Index: Makefile =================================================================== --- Makefile +++ Makefile @@ -54,13 +54,13 @@ endif GCC_PLUGIN_DIR=$(shell $(GCC) -print-file-name=plugin) -GCC_VERSION=$(shell $(GCC) -dumpversion).0 -GCC_MAJOR=$(word 1, $(subst ., ,$(GCC_VERSION))) -GCC_MINOR=$(word 2, $(subst ., ,$(GCC_VERSION))) -GCC_MICRO=$(word 3, $(subst ., ,$(GCC_VERSION))) +GCC_VERSION_STRING=$(shell $(GCC) -dumpversion).0 +GCC_MAJOR=$(word 1, $(subst ., ,$(GCC_VERSION_STRING))) +GCC_MINOR=$(word 2, $(subst ., ,$(GCC_VERSION_STRING))) +GCC_MICRO=$(word 3, $(subst ., ,$(GCC_VERSION_STRING))) TARGET_TRIPLE=$(shell $(GCC) -dumpmachine) -LLVM_VERSION=$(shell $(LLVM_CONFIG) --version) +LLVM_VERSION_STRING=$(shell $(LLVM_CONFIG) --version) PLUGIN=dragonegg.so PLUGIN_OBJECTS=Aliasing.o Backend.o Cache.o ConstantConversion.o Convert.o \ @@ -77,7 +77,7 @@ CPP_OPTIONS+=$(CPPFLAGS) $(shell $(LLVM_CONFIG) --cppflags) \ -fno-rtti \ -MD -MP \ - -DIN_GCC -DLLVM_VERSION=\"$(LLVM_VERSION)\" \ + -DIN_GCC -DLLVM_VERSION_STRING=\"$(LLVM_VERSION_STRING)\" \ -DTARGET_TRIPLE=\"$(TARGET_TRIPLE)\" \ -DGCC_MAJOR=$(GCC_MAJOR) -DGCC_MINOR=$(GCC_MINOR) \ -DGCC_MICRO=$(GCC_MICRO) \ @@ -191,6 +191,8 @@ # The following target exists for the benefit of the dragonegg maintainers, and # is not used in a normal build. You need to specify the path to the GCC build # directory in GCC_BUILD_DIR. +# FIXME: gengtype does not support macro https://gcc.gnu.org/ml/gcc/2017-07/msg00061.html +# You have to comment #if (GCC_MAJOR == XXX) not feet your GCC version. GENGTYPE_INPUT=$(SRC_DIR)/Cache.cpp GENGTYPE_OUTPUT=$(INCLUDE_DIR)/dragonegg/gt-cache-$(GCC_MAJOR).$(GCC_MINOR).inc .PHONY: gt-cache.inc Index: include/dragonegg/ABI.h =================================================================== --- include/dragonegg/ABI.h +++ include/dragonegg/ABI.h @@ -142,20 +142,26 @@ llvm::Type *Ty = ConvertType(type); uint64_t Size = getDataLayout().getTypeAllocSize(Ty); *Offset = 0; + llvm::LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + getGlobalContext(); +#endif if (Size == 0) - return llvm::Type::getVoidTy(llvm::getGlobalContext()); + return llvm::Type::getVoidTy(Context); else if (Size == 1) - return llvm::Type::getInt8Ty(llvm::getGlobalContext()); + return llvm::Type::getInt8Ty(Context); else if (Size == 2) - return llvm::Type::getInt16Ty(llvm::getGlobalContext()); + return llvm::Type::getInt16Ty(Context); else if (Size <= 4) - return llvm::Type::getInt32Ty(llvm::getGlobalContext()); + return llvm::Type::getInt32Ty(Context); else if (Size <= 8) - return llvm::Type::getInt64Ty(llvm::getGlobalContext()); + return llvm::Type::getInt64Ty(Context); else if (Size <= 16) - return llvm::IntegerType::get(llvm::getGlobalContext(), 128); + return llvm::IntegerType::get(Context, 128); else if (Size <= 32) - return llvm::IntegerType::get(llvm::getGlobalContext(), 256); + return llvm::IntegerType::get(Context, 256); return NULL; } Index: include/dragonegg/Debug.h =================================================================== --- include/dragonegg/Debug.h +++ include/dragonegg/Debug.h @@ -35,6 +35,24 @@ // System headers #include +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) +typedef llvm::DIType * MigDIType; +typedef llvm::DIScope * MigDIScope; +typedef llvm::DINamespace * MigDINamespace; +typedef llvm::DISubprogram * MigDISubprogram; +typedef llvm::DIFile * MigDIFile; +typedef llvm::DINodeArray MigDINodeArray; +typedef llvm::DICompositeType * MigDICompositeType; +#else +typedef llvm::DIType MigDIType; +typedef llvm::DIDescriptor MigDIScope; +typedef llvm::DINameSpace MigDINamespace; +typedef llvm::DISubprogram MigDISubprogram; +typedef llvm::DIFile MigDIFile; +typedef llvm::DIArray MigDINodeArray; +typedef llvm::DICompositeType MigDICompositeType; +#endif + // Forward declarations namespace llvm { class AllocaInst; @@ -116,41 +134,40 @@ /// getOrCreateType - Get the type from the cache or create a new type if /// necessary. - llvm::DIType getOrCreateType(tree_node *type); + MigDIType getOrCreateType(tree_node *type); /// createBasicType - Create BasicType. - llvm::DIType createBasicType(tree_node *type); + MigDIType createBasicType(tree_node *type); /// createMethodType - Create MethodType. - llvm::DIType createMethodType(tree_node *type); + MigDIType createMethodType(tree_node *type); /// createPointerType - Create PointerType. - llvm::DIType createPointerType(tree_node *type); + MigDIType createPointerType(tree_node *type); /// createArrayType - Create ArrayType. - llvm::DIType createArrayType(tree_node *type); + MigDIType createArrayType(tree_node *type); /// createEnumType - Create EnumType. - llvm::DIType createEnumType(tree_node *type); + MigDIType createEnumType(tree_node *type); /// createStructType - Create StructType for struct or union or class. - llvm::DIType createStructType(tree_node *type); + MigDIType createStructType(tree_node *type); /// createVarinatType - Create variant type or return MainTy. - llvm::DIType createVariantType(tree_node *type, llvm::DIType MainTy); + MigDIType createVariantType(tree_node *type, MigDIType MainTy); /// getOrCreateCompileUnit - Create a new compile unit. void getOrCreateCompileUnit(const char *FullPath, bool isMain = false); /// getOrCreateFile - Get DIFile descriptor. - llvm::DIFile getOrCreateFile(const char *FullPath); + MigDIFile getOrCreateFile(const char *FullPath); /// findRegion - Find tree_node N's region. - llvm::DIDescriptor findRegion(tree_node *n); + MigDIScope findRegion(tree_node *n); /// getOrCreateNameSpace - Get name space descriptor for the tree node. - llvm::DINameSpace getOrCreateNameSpace(tree_node *Node, - llvm::DIDescriptor Context); + MigDINamespace getOrCreateNameSpace(tree_node *Node, MigDIScope Context); /// getFunctionName - Get function name for the given FnDecl. If the /// name is constructred on demand (e.g. C++ destructor) then the name @@ -160,36 +177,45 @@ private: /// CreateDerivedType - Create a derived type like const qualified type, /// pointer, typedef, etc. - llvm::DIDerivedType CreateDerivedType( - unsigned Tag, llvm::DIDescriptor Context, llvm::StringRef Name, - llvm::DIFile F, unsigned LineNumber, uint64_t SizeInBits, +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + llvm::DIDerivedType * +#else + llvm::DIDerivedType +#endif + CreateDerivedType( + unsigned Tag, MigDIScope Context, llvm::StringRef Name, + MigDIFile F, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, - llvm::DIType DerivedFrom); + MigDIType DerivedFrom); /// CreateCompositeType - Create a composite type like array, struct, etc. - llvm::DICompositeType CreateCompositeType( - unsigned Tag, llvm::DIDescriptor Context, llvm::StringRef Name, - llvm::DIFile F, unsigned LineNumber, uint64_t SizeInBits, + MigDICompositeType CreateCompositeType( + unsigned Tag, MigDIScope Context, llvm::StringRef Name, + MigDIFile F, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, - llvm::DIType DerivedFrom, llvm::DIArray Elements, + MigDIType DerivedFrom, MigDINodeArray Elements, unsigned RunTimeLang = 0, llvm::MDNode *ContainingType = 0); /// CreateSubprogram - Create a new descriptor for the specified subprogram. /// See comments in DISubprogram for descriptions of these fields. - llvm::DISubprogram CreateSubprogram( - llvm::DIDescriptor Context, llvm::StringRef Name, - llvm::StringRef DisplayName, llvm::StringRef LinkageName, llvm::DIFile F, - unsigned LineNo, llvm::DIType Ty, bool isLocalToUnit, bool isDefinition, - unsigned VK = 0, unsigned VIndex = 0, - llvm::DIType ContainingType = llvm::DIType(), unsigned Flags = 0, - bool isOptimized = false, llvm::Function *Fn = 0); + MigDISubprogram CreateSubprogram(MigDIScope Context, llvm::StringRef Name, + llvm::StringRef DisplayName, llvm::StringRef LinkageName, MigDIFile F, + unsigned LineNo, MigDIType Ty, bool isLocalToUnit, bool isDefinition, + MigDIType ContainingType, unsigned VK = 0, unsigned VIndex = 0, + unsigned Flags = 0, bool isOptimized = false, llvm::Function *Fn = 0); /// CreateSubprogramDefinition - Create new subprogram descriptor for the /// given declaration. - llvm::DISubprogram - CreateSubprogramDefinition(llvm::DISubprogram &SPDeclaration, + MigDISubprogram + CreateSubprogramDefinition( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + llvm::DISubprogram *SPDeclaration, +#else + llvm::DISubprogram &SPDeclaration, +#endif unsigned LineNo, llvm::Function *Fn); +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. llvm::Instruction * InsertDeclare(llvm::Value *Storage, llvm::DIVariable D, @@ -209,6 +235,7 @@ llvm::Instruction *InsertDbgValueIntrinsic(llvm::Value *V, uint64_t Offset, llvm::DIVariable D, llvm::Instruction *InsertBefore); +#endif }; #endif /* DRAGONEGG_DEBUG_H */ Index: include/dragonegg/Internals.h =================================================================== --- include/dragonegg/Internals.h +++ include/dragonegg/Internals.h @@ -33,8 +33,32 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#define LLVM_VERSION(major, minor) (((major) << 8) | (minor)) +#define LLVM_VERSION_CODE LLVM_VERSION(LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR) + +#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 0) +# define LLVM_TYPE_Q +#else +# define LLVM_TYPE_Q const +#endif + +#define GCC_VERSION(major, minor) (((major) << 8) | (minor)) +#define GCC_VERSION_CODE GCC_VERSION(GCC_MAJOR, GCC_MINOR) + +#if __has_attribute(sentinel) || LLVM_GNUC_PREREQ(3, 0, 0) +#define LLVM_END_WITH_NULL __attribute__((sentinel)) +#else +#define LLVM_END_WITH_NULL +#endif + struct basic_block_def; union gimple_statement_d; +#if (GCC_MAJOR > 4) +struct gimple; +typedef struct gimple GimpleTy; +#else +typedef union gimple_statement_d GimpleTy; +#endif union tree_node; namespace llvm { @@ -57,7 +81,11 @@ } class DebugInfo; -typedef llvm::IRBuilder LLVMBuilder; +typedef llvm::IRBuilder< +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) + true, +#endif + llvm::TargetFolder> LLVMBuilder; // Global state. @@ -208,7 +236,7 @@ /// PhiRecord - This struct holds the LLVM PHI node associated with a GCC phi. struct PhiRecord { - gimple_statement_d *gcc_phi; + GimpleTy *gcc_phi; llvm::PHINode *PHI; }; @@ -464,27 +492,27 @@ //===------------------ Render* - Convert GIMPLE to LLVM ----------------===// - void RenderGIMPLE_ASM(gimple_statement_d *stmt); - void RenderGIMPLE_ASSIGN(gimple_statement_d *stmt); - void RenderGIMPLE_CALL(gimple_statement_d *stmt); - void RenderGIMPLE_COND(gimple_statement_d *stmt); - void RenderGIMPLE_EH_DISPATCH(gimple_statement_d *stmt); - void RenderGIMPLE_GOTO(gimple_statement_d *stmt); - void RenderGIMPLE_RESX(gimple_statement_d *stmt); - void RenderGIMPLE_RETURN(gimple_statement_d *stmt); - void RenderGIMPLE_SWITCH(gimple_statement_d *stmt); + void RenderGIMPLE_ASM(GimpleTy *stmt); + void RenderGIMPLE_ASSIGN(GimpleTy *stmt); + void RenderGIMPLE_CALL(GimpleTy *stmt); + void RenderGIMPLE_COND(GimpleTy *stmt); + void RenderGIMPLE_EH_DISPATCH(GimpleTy *stmt); + void RenderGIMPLE_GOTO(GimpleTy *stmt); + void RenderGIMPLE_RESX(GimpleTy *stmt); + void RenderGIMPLE_RETURN(GimpleTy *stmt); + void RenderGIMPLE_SWITCH(GimpleTy *stmt); // Render helpers. /// EmitAssignRHS - Convert the RHS of a scalar GIMPLE_ASSIGN to LLVM. - llvm::Value *EmitAssignRHS(gimple_statement_d *stmt); + llvm::Value *EmitAssignRHS(GimpleTy *stmt); /// EmitAssignSingleRHS - Helper for EmitAssignRHS. Handles those RHS that /// are not register expressions. llvm::Value *EmitAssignSingleRHS(tree_node *rhs); /// OutputCallRHS - Convert the RHS of a GIMPLE_CALL. - llvm::Value *OutputCallRHS(gimple_statement_d *stmt, const MemRef *DestLoc); + llvm::Value *OutputCallRHS(GimpleTy *stmt, const MemRef *DestLoc); /// WriteScalarToLHS - Store RHS, a non-aggregate value, into the given LHS. void WriteScalarToLHS(tree_node *lhs, llvm::Value *Scalar); @@ -565,7 +593,7 @@ llvm::Value *EmitReg_TRUNC_DIV_EXPR(tree_node *op0, tree_node *op1, bool isExact); llvm::Value *EmitReg_TRUNC_MOD_EXPR(tree_node *op0, tree_node *op1); -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) llvm::Value *EmitReg_VEC_EXTRACT_EVEN_EXPR(tree_node *op0, tree_node *op1); llvm::Value *EmitReg_VEC_EXTRACT_ODD_EXPR(tree_node *op0, tree_node *op1); llvm::Value *EmitReg_VEC_INTERLEAVE_HIGH_EXPR(tree_node *op0, tree_node *op1); @@ -584,10 +612,10 @@ // Ternary expressions. llvm::Value *EmitReg_CondExpr(tree_node *op0, tree_node *op1, tree_node *op2); -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) llvm::Value *EmitReg_FMA_EXPR(tree_node *op0, tree_node *op1, tree_node *op2); #endif -#if (GCC_MINOR > 6) +#if GCC_VERSION_CODE > GCC_VERSION(4, 6) llvm::Value *EmitReg_VEC_PERM_EXPR(tree_node *op0, tree_node *op1, tree_node *op2); #endif @@ -595,10 +623,10 @@ llvm::Value *EmitLoadOfLValue(tree_node *exp); llvm::Value *EmitOBJ_TYPE_REF(tree_node *exp); llvm::Value *EmitADDR_EXPR(tree_node *exp); -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) llvm::Value *EmitCondExpr(tree_node *exp); #endif - llvm::Value *EmitCallOf(llvm::Value *Callee, gimple_statement_d *stmt, + llvm::Value *EmitCallOf(llvm::Value *Callee, GimpleTy *stmt, const MemRef *DestLoc, const llvm::AttributeSet &PAL); llvm::CallInst *EmitSimpleCall(llvm::StringRef CalleeName, tree_node *ret_type, @@ -613,74 +641,71 @@ llvm::Value *BuildVector(const std::vector &Elts); llvm::Value *BuildVector(llvm::Value *Elt, ...); llvm::Value *BuildVectorShuffle(llvm::Value *InVec1, llvm::Value *InVec2, ...); - llvm::Value *BuildBinaryAtomic(gimple_statement_d *stmt, + llvm::Value *BuildBinaryAtomic(GimpleTy *stmt, llvm::AtomicRMWInst::BinOp Kind, unsigned PostOp = 0); llvm::Value * - BuildCmpAndSwapAtomic(gimple_statement_d *stmt, unsigned Bits, bool isBool); + BuildCmpAndSwapAtomic(GimpleTy *stmt, unsigned Bits, bool isBool); // Builtin Function Expansion. - bool EmitBuiltinCall(gimple_statement_d *stmt, tree_node *fndecl, + bool EmitBuiltinCall(GimpleTy *stmt, tree_node *fndecl, const MemRef *DestLoc, llvm::Value *&Result); - bool EmitFrontendExpandedBuiltinCall(gimple_statement_d *stmt, - tree_node *fndecl, const MemRef *DestLoc, + bool EmitFrontendExpandedBuiltinCall(GimpleTy *stmt, tree_node *fndecl, + const MemRef *DestLoc, llvm::Value *&Result); bool EmitBuiltinUnaryOp(llvm::Value *InVal, llvm::Value *&Result, llvm::Intrinsic::ID Id); llvm::Value * - EmitBuiltinBitCountIntrinsic(gimple_statement_d *stmt, - llvm::Intrinsic::ID Id); - llvm::Value *EmitBuiltinSQRT(gimple_statement_d *stmt); - llvm::Value *EmitBuiltinPOWI(gimple_statement_d *stmt); - llvm::Value *EmitBuiltinPOW(gimple_statement_d *stmt); - llvm::Value *EmitBuiltinLCEIL(gimple_statement_d *stmt); - llvm::Value *EmitBuiltinLFLOOR(gimple_statement_d *stmt); - llvm::Value *EmitBuiltinLROUND(gimple_statement_d *stmt); - llvm::Value *EmitBuiltinCEXPI(gimple_statement_d *stmt); - llvm::Value *EmitBuiltinSIGNBIT(gimple_statement_d *stmt); - - bool EmitBuiltinAdjustTrampoline(gimple_statement_d *stmt, - llvm::Value *&Result); - bool EmitBuiltinAlloca(gimple_statement_d *stmt, llvm::Value *&Result); - bool EmitBuiltinAllocaWithAlign(gimple_statement_d *stmt, - llvm::Value *&Result); -#if (GCC_MINOR > 6) - bool EmitBuiltinAssumeAligned(gimple_statement_d *stmt, llvm::Value *&Result); + EmitBuiltinBitCountIntrinsic(GimpleTy *stmt, llvm::Intrinsic::ID Id); + llvm::Value *EmitBuiltinSQRT(GimpleTy *stmt); + llvm::Value *EmitBuiltinPOWI(GimpleTy *stmt); + llvm::Value *EmitBuiltinPOW(GimpleTy *stmt); + llvm::Value *EmitBuiltinLCEIL(GimpleTy *stmt); + llvm::Value *EmitBuiltinLFLOOR(GimpleTy *stmt); + llvm::Value *EmitBuiltinLROUND(GimpleTy *stmt); + llvm::Value *EmitBuiltinCEXPI(GimpleTy *stmt); + llvm::Value *EmitBuiltinSIGNBIT(GimpleTy *stmt); + + bool EmitBuiltinAdjustTrampoline(GimpleTy *stmt, llvm::Value *&Result); + bool EmitBuiltinAlloca(GimpleTy *stmt, llvm::Value *&Result); + bool EmitBuiltinAllocaWithAlign(GimpleTy *stmt, llvm::Value *&Result); +#if GCC_VERSION_CODE > GCC_VERSION(4, 6) + bool EmitBuiltinAssumeAligned(GimpleTy *stmt, llvm::Value *&Result); #endif - bool EmitBuiltinBZero(gimple_statement_d *stmt, llvm::Value *&Result); - bool EmitBuiltinConstantP(gimple_statement_d *stmt, llvm::Value *&Result); - bool EmitBuiltinExpect(gimple_statement_d *stmt, llvm::Value *&Result); - bool EmitBuiltinExtendPointer(gimple_statement_d *stmt, llvm::Value *&Result); - bool EmitBuiltinExtractReturnAddr(gimple_statement_d *stmt, + bool EmitBuiltinBZero(GimpleTy *stmt, llvm::Value *&Result); + bool EmitBuiltinConstantP(GimpleTy *stmt, llvm::Value *&Result); + bool EmitBuiltinExpect(GimpleTy *stmt, llvm::Value *&Result); + bool EmitBuiltinExtendPointer(GimpleTy *stmt, llvm::Value *&Result); + bool EmitBuiltinExtractReturnAddr(GimpleTy *stmt, llvm::Value *&Result); - bool EmitBuiltinFrobReturnAddr(gimple_statement_d *stmt, + bool EmitBuiltinFrobReturnAddr(GimpleTy *stmt, llvm::Value *&Result); - bool EmitBuiltinInitTrampoline(gimple_statement_d *stmt, bool OnStack); - bool EmitBuiltinMemCopy(gimple_statement_d *stmt, llvm::Value *&Result, + bool EmitBuiltinInitTrampoline(GimpleTy *stmt, bool OnStack); + bool EmitBuiltinMemCopy(GimpleTy *stmt, llvm::Value *&Result, bool isMemMove, bool SizeCheck); - bool EmitBuiltinMemSet(gimple_statement_d *stmt, llvm::Value *&Result, + bool EmitBuiltinMemSet(GimpleTy *stmt, llvm::Value *&Result, bool SizeCheck); - bool EmitBuiltinPrefetch(gimple_statement_d *stmt); - bool EmitBuiltinReturnAddr(gimple_statement_d *stmt, llvm::Value *&Result, + bool EmitBuiltinPrefetch(GimpleTy *stmt); + bool EmitBuiltinReturnAddr(GimpleTy *stmt, llvm::Value *&Result, bool isFrame); - bool EmitBuiltinStackRestore(gimple_statement_d *stmt); - bool EmitBuiltinStackSave(gimple_statement_d *stmt, llvm::Value *&Result); + bool EmitBuiltinStackRestore(GimpleTy *stmt); + bool EmitBuiltinStackSave(GimpleTy *stmt, llvm::Value *&Result); bool EmitBuiltinUnreachable(); - bool EmitBuiltinVACopy(gimple_statement_d *stmt); - bool EmitBuiltinVAEnd(gimple_statement_d *stmt); - bool EmitBuiltinVAStart(gimple_statement_d *stmt); - - bool EmitBuiltinEHCopyValues(gimple_statement_d *stmt); - bool EmitBuiltinEHFilter(gimple_statement_d *stmt, llvm::Value *&Result); - bool EmitBuiltinEHPointer(gimple_statement_d *stmt, llvm::Value *&Result); - bool EmitBuiltinDwarfCFA(gimple_statement_d *stmt, llvm::Value *&Result); - bool EmitBuiltinDwarfSPColumn(gimple_statement_d *stmt, llvm::Value *&Result); - bool EmitBuiltinEHReturnDataRegno(gimple_statement_d *stmt, + bool EmitBuiltinVACopy(GimpleTy *stmt); + bool EmitBuiltinVAEnd(GimpleTy *stmt); + bool EmitBuiltinVAStart(GimpleTy *stmt); + + bool EmitBuiltinEHCopyValues(GimpleTy *stmt); + bool EmitBuiltinEHFilter(GimpleTy *stmt, llvm::Value *&Result); + bool EmitBuiltinEHPointer(GimpleTy *stmt, llvm::Value *&Result); + bool EmitBuiltinDwarfCFA(GimpleTy *stmt, llvm::Value *&Result); + bool EmitBuiltinDwarfSPColumn(GimpleTy *stmt, llvm::Value *&Result); + bool EmitBuiltinEHReturnDataRegno(GimpleTy *stmt, llvm::Value *&Result); - bool EmitBuiltinEHReturn(gimple_statement_d *stmt, llvm::Value *&Result); - bool EmitBuiltinInitDwarfRegSizes(gimple_statement_d *stmt, + bool EmitBuiltinEHReturn(GimpleTy *stmt, llvm::Value *&Result); + bool EmitBuiltinInitDwarfRegSizes(GimpleTy *stmt, llvm::Value *&Result); - bool EmitBuiltinUnwindInit(gimple_statement_d *stmt, llvm::Value *&Result); + bool EmitBuiltinUnwindInit(GimpleTy *stmt, llvm::Value *&Result); // Complex Math Expressions. llvm::Value *CreateComplex(llvm::Value *Real, llvm::Value *Imag); @@ -693,10 +718,10 @@ LValue EmitLV_COMPONENT_REF(tree_node *exp); LValue EmitLV_DECL(tree_node *exp); LValue EmitLV_INDIRECT_REF(tree_node *exp); -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) LValue EmitLV_MEM_REF(tree_node *exp); #endif -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) LValue EmitLV_MISALIGNED_INDIRECT_REF(tree_node *exp); #endif LValue EmitLV_VIEW_CONVERT_EXPR(tree_node *exp); @@ -763,7 +788,7 @@ private: // Optional target defined builtin intrinsic expanding function. - bool TargetIntrinsicLower(gimple_statement_d *stmt, tree_node *fndecl, + bool TargetIntrinsicLower(GimpleTy *stmt, tree_node *fndecl, const MemRef *DestLoc, llvm::Value *&Result, llvm::Type *ResultType, std::vector &Ops); Index: include/dragonegg/Trees.h =================================================================== --- include/dragonegg/Trees.h +++ include/dragonegg/Trees.h @@ -23,7 +23,7 @@ #ifndef DRAGONEGG_TREES_H #define DRAGONEGG_TREES_H -#if (GCC_MINOR < 7) +#if (GCC_MAJOR < 5 && GCC_MINOR < 7) #include "flags.h" // For TYPE_OVERFLOW_UNDEFINED. #endif @@ -142,7 +142,7 @@ bool isBitfield(const_tree field_decl); // Compatibility hacks for older versions of GCC. -#if (GCC_MINOR < 8) +#if (GCC_MAJOR < 5 && GCC_MINOR < 8) // Supported allocation types: struct va_gc { }; // Allocation uses ggc_alloc. Index: include/dragonegg/gt-cache-6.3.inc =================================================================== --- /dev/null +++ include/dragonegg/gt-cache-6.3.inc @@ -0,0 +1,1289 @@ +/* Type information for GCC. + Copyright (C) 2004-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This file is machine generated. Do not edit. */ + +/* GC marker procedures. */ +/* Macros and declarations. */ +#define gt_ggc_m_29hash_table_WeakVHCacheHasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_WeakVHCacheHasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_WeakVHCacheHasher_ (void *); +#define gt_ggc_m_26hash_table_TypeCacheHaser_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_TypeCacheHaser_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_TypeCacheHaser_ (void *); +#define gt_ggc_m_26hash_table_intCacheHasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_intCacheHasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_intCacheHasher_ (void *); +#define gt_ggc_m_24vec_ivarref_entry_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_ivarref_entry_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_ivarref_entry_va_gc_ (void *); +#define gt_ggc_m_26vec_prot_list_entry_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_prot_list_entry_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_prot_list_entry_va_gc_ (void *); +#define gt_ggc_m_23vec_msgref_entry_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_msgref_entry_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_msgref_entry_va_gc_ (void *); +#define gt_ggc_m_27vec_ident_data_tuple_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_ident_data_tuple_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_ident_data_tuple_va_gc_ (void *); +#define gt_ggc_m_30hash_table_objc_string_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_objc_string_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_objc_string_hasher_ (void *); +#define gt_ggc_m_17string_descriptor(X) do { \ + if (X != NULL) gt_ggc_mx_string_descriptor (X);\ + } while (0) +extern void gt_ggc_mx_string_descriptor (void *); +#define gt_ggc_m_9imp_entry(X) do { \ + if (X != NULL) gt_ggc_mx_imp_entry (X);\ + } while (0) +extern void gt_ggc_mx_imp_entry (void *); +#define gt_ggc_m_16hashed_attribute(X) do { \ + if (X != NULL) gt_ggc_mx_hashed_attribute (X);\ + } while (0) +extern void gt_ggc_mx_hashed_attribute (void *); +#define gt_ggc_m_12hashed_entry(X) do { \ + if (X != NULL) gt_ggc_mx_hashed_entry (X);\ + } while (0) +extern void gt_ggc_mx_hashed_entry (void *); +#define gt_ggc_m_16objc_map_private(X) do { \ + if (X != NULL) gt_ggc_mx_objc_map_private (X);\ + } while (0) +extern void gt_ggc_mx_objc_map_private (void *); +#define gt_ggc_m_33hash_table_type_assertion_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_type_assertion_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_type_assertion_hasher_ (void *); +#define gt_ggc_m_23vec_method_entry_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_method_entry_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_method_entry_va_gc_ (void *); +#define gt_ggc_m_14type_assertion(X) do { \ + if (X != NULL) gt_ggc_mx_type_assertion (X);\ + } while (0) +extern void gt_ggc_mx_type_assertion (void *); +#define gt_ggc_m_22hash_table_ict_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_ict_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_ict_hasher_ (void *); +#define gt_ggc_m_26hash_table_treetreehasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_treetreehasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_treetreehasher_ (void *); +#define gt_ggc_m_18treetreehash_entry(X) do { \ + if (X != NULL) gt_ggc_mx_treetreehash_entry (X);\ + } while (0) +extern void gt_ggc_mx_treetreehash_entry (void *); +#define gt_ggc_m_5CPool(X) do { \ + if (X != NULL) gt_ggc_mx_CPool (X);\ + } while (0) +extern void gt_ggc_mx_CPool (void *); +#define gt_ggc_m_3JCF(X) do { \ + if (X != NULL) gt_ggc_mx_JCF (X);\ + } while (0) +extern void gt_ggc_mx_JCF (void *); +#define gt_ggc_m_30hash_table_module_decl_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_module_decl_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_module_decl_hasher_ (void *); +#define gt_ggc_m_17module_htab_entry(X) do { \ + if (X != NULL) gt_ggc_mx_module_htab_entry (X);\ + } while (0) +extern void gt_ggc_mx_module_htab_entry (void *); +#define gt_ggc_m_25hash_table_module_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_module_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_module_hasher_ (void *); +#define gt_ggc_m_13binding_level(X) do { \ + if (X != NULL) gt_ggc_mx_binding_level (X);\ + } while (0) +extern void gt_ggc_mx_binding_level (void *); +#define gt_ggc_m_33hash_table_constexpr_call_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_constexpr_call_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_constexpr_call_hasher_ (void *); +#define gt_ggc_m_14constexpr_call(X) do { \ + if (X != NULL) gt_ggc_mx_constexpr_call (X);\ + } while (0) +extern void gt_ggc_mx_constexpr_call (void *); +#define gt_ggc_m_35hash_table_constexpr_fundef_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_constexpr_fundef_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_constexpr_fundef_hasher_ (void *); +#define gt_ggc_m_16constexpr_fundef(X) do { \ + if (X != NULL) gt_ggc_mx_constexpr_fundef (X);\ + } while (0) +extern void gt_ggc_mx_constexpr_fundef (void *); +#define gt_ggc_m_27vec_pending_noexcept_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_pending_noexcept_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_pending_noexcept_va_gc_ (void *); +#define gt_ggc_m_32hash_table_abstract_type_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_abstract_type_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_abstract_type_hasher_ (void *); +#define gt_ggc_m_21pending_abstract_type(X) do { \ + if (X != NULL) gt_ggc_mx_pending_abstract_type (X);\ + } while (0) +extern void gt_ggc_mx_pending_abstract_type (void *); +#define gt_ggc_m_19vec_tree_int_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_tree_int_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_tree_int_va_gc_ (void *); +#define gt_ggc_m_9cp_parser(X) do { \ + if (X != NULL) gt_ggc_mx_cp_parser (X);\ + } while (0) +extern void gt_ggc_mx_cp_parser (void *); +#define gt_ggc_m_38vec_cp_unparsed_functions_entry_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_cp_unparsed_functions_entry_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_cp_unparsed_functions_entry_va_gc_ (void *); +#define gt_ggc_m_17cp_parser_context(X) do { \ + if (X != NULL) gt_ggc_mx_cp_parser_context (X);\ + } while (0) +extern void gt_ggc_mx_cp_parser_context (void *); +#define gt_ggc_m_31vec_cp_default_arg_entry_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_cp_default_arg_entry_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_cp_default_arg_entry_va_gc_ (void *); +#define gt_ggc_m_8cp_lexer(X) do { \ + if (X != NULL) gt_ggc_mx_cp_lexer (X);\ + } while (0) +extern void gt_ggc_mx_cp_lexer (void *); +#define gt_ggc_m_19vec_cp_token_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_cp_token_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_cp_token_va_gc_ (void *); +#define gt_ggc_m_10tree_check(X) do { \ + if (X != NULL) gt_ggc_mx_tree_check (X);\ + } while (0) +extern void gt_ggc_mx_tree_check (void *); +#define gt_ggc_m_23hash_table_list_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_list_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_list_hasher_ (void *); +#define gt_ggc_m_30hash_table_cplus_array_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_cplus_array_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_cplus_array_hasher_ (void *); +#define gt_ggc_m_26vec_deferred_access_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_deferred_access_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_deferred_access_va_gc_ (void *); +#define gt_ggc_m_32vec_deferred_access_check_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_deferred_access_check_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_deferred_access_check_va_gc_ (void *); +#define gt_ggc_m_30hash_table_subsumption_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_subsumption_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_subsumption_hasher_ (void *); +#define gt_ggc_m_17subsumption_entry(X) do { \ + if (X != NULL) gt_ggc_mx_subsumption_entry (X);\ + } while (0) +extern void gt_ggc_mx_subsumption_entry (void *); +#define gt_ggc_m_31hash_table_concept_spec_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_concept_spec_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_concept_spec_hasher_ (void *); +#define gt_ggc_m_33hash_table_constraint_sat_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_constraint_sat_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_constraint_sat_hasher_ (void *); +#define gt_ggc_m_18concept_spec_entry(X) do { \ + if (X != NULL) gt_ggc_mx_concept_spec_entry (X);\ + } while (0) +extern void gt_ggc_mx_concept_spec_entry (void *); +#define gt_ggc_m_20constraint_sat_entry(X) do { \ + if (X != NULL) gt_ggc_mx_constraint_sat_entry (X);\ + } while (0) +extern void gt_ggc_mx_constraint_sat_entry (void *); +#define gt_ggc_m_25hash_table_constr_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_constr_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_constr_hasher_ (void *); +#define gt_ggc_m_12constr_entry(X) do { \ + if (X != NULL) gt_ggc_mx_constr_entry (X);\ + } while (0) +extern void gt_ggc_mx_constr_entry (void *); +#define gt_ggc_m_23hash_table_spec_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_spec_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_spec_hasher_ (void *); +#define gt_ggc_m_10spec_entry(X) do { \ + if (X != NULL) gt_ggc_mx_spec_entry (X);\ + } while (0) +extern void gt_ggc_mx_spec_entry (void *); +#define gt_ggc_m_16pending_template(X) do { \ + if (X != NULL) gt_ggc_mx_pending_template (X);\ + } while (0) +extern void gt_ggc_mx_pending_template (void *); +#define gt_ggc_m_27hash_table_typename_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_typename_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_typename_hasher_ (void *); +#define gt_ggc_m_25vec_incomplete_var_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_incomplete_var_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_incomplete_var_va_gc_ (void *); +#define gt_ggc_m_21named_label_use_entry(X) do { \ + if (X != NULL) gt_ggc_mx_named_label_use_entry (X);\ + } while (0) +extern void gt_ggc_mx_named_label_use_entry (void *); +#define gt_ggc_m_22vec_tree_pair_s_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_tree_pair_s_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_tree_pair_s_va_gc_ (void *); +#define gt_ggc_m_35hash_table_cxx_int_tree_map_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_cxx_int_tree_map_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_cxx_int_tree_map_hasher_ (void *); +#define gt_ggc_m_30hash_table_named_label_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_named_label_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_named_label_hasher_ (void *); +#define gt_ggc_m_17named_label_entry(X) do { \ + if (X != NULL) gt_ggc_mx_named_label_entry (X);\ + } while (0) +extern void gt_ggc_mx_named_label_entry (void *); +#define gt_ggc_m_28vec_cxx_saved_binding_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_cxx_saved_binding_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_cxx_saved_binding_va_gc_ (void *); +#define gt_ggc_m_36vec_qualified_typedef_usage_t_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_qualified_typedef_usage_t_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_qualified_typedef_usage_t_va_gc_ (void *); +#define gt_ggc_m_14cp_token_cache(X) do { \ + if (X != NULL) gt_ggc_mx_cp_token_cache (X);\ + } while (0) +extern void gt_ggc_mx_cp_token_cache (void *); +#define gt_ggc_m_11saved_scope(X) do { \ + if (X != NULL) gt_ggc_mx_saved_scope (X);\ + } while (0) +extern void gt_ggc_mx_saved_scope (void *); +#define gt_ggc_m_16cxx_int_tree_map(X) do { \ + if (X != NULL) gt_ggc_mx_cxx_int_tree_map (X);\ + } while (0) +extern void gt_ggc_mx_cxx_int_tree_map (void *); +#define gt_ggc_m_27vec_cp_label_binding_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_cp_label_binding_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_cp_label_binding_va_gc_ (void *); +#define gt_ggc_m_27vec_cp_class_binding_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_cp_class_binding_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_cp_class_binding_va_gc_ (void *); +#define gt_ggc_m_16cp_binding_level(X) do { \ + if (X != NULL) gt_ggc_mx_cp_binding_level (X);\ + } while (0) +extern void gt_ggc_mx_cp_binding_level (void *); +#define gt_ggc_m_11cxx_binding(X) do { \ + if (X != NULL) gt_ggc_mx_cxx_binding (X);\ + } while (0) +extern void gt_ggc_mx_cxx_binding (void *); +#define gt_ggc_m_15binding_entry_s(X) do { \ + if (X != NULL) gt_ggc_mx_binding_entry_s (X);\ + } while (0) +extern void gt_ggc_mx_binding_entry_s (void *); +#define gt_ggc_m_15binding_table_s(X) do { \ + if (X != NULL) gt_ggc_mx_binding_table_s (X);\ + } while (0) +extern void gt_ggc_mx_binding_table_s (void *); +#define gt_ggc_m_28hash_table_conv_type_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_conv_type_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_conv_type_hasher_ (void *); +#define gt_ggc_m_11tinst_level(X) do { \ + if (X != NULL) gt_ggc_mx_tinst_level (X);\ + } while (0) +extern void gt_ggc_mx_tinst_level (void *); +#define gt_ggc_m_18vec_tinfo_s_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_tinfo_s_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_tinfo_s_va_gc_ (void *); +#define gt_ggc_m_8c_parser(X) do { \ + if (X != NULL) gt_ggc_mx_c_parser (X);\ + } while (0) +extern void gt_ggc_mx_c_parser (void *); +#define gt_ggc_m_18vec_c_token_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_c_token_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_c_token_va_gc_ (void *); +#define gt_ggc_m_9opt_stack(X) do { \ + if (X != NULL) gt_ggc_mx_opt_stack (X);\ + } while (0) +extern void gt_ggc_mx_opt_stack (void *); +#define gt_ggc_m_31vec_pending_redefinition_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_pending_redefinition_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_pending_redefinition_va_gc_ (void *); +#define gt_ggc_m_23vec_pending_weak_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_pending_weak_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_pending_weak_va_gc_ (void *); +#define gt_ggc_m_11align_stack(X) do { \ + if (X != NULL) gt_ggc_mx_align_stack (X);\ + } while (0) +extern void gt_ggc_mx_align_stack (void *); +#define gt_ggc_m_22vec_tree_gc_vec_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_tree_gc_vec_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_tree_gc_vec_va_gc_ (void *); +#define gt_ggc_m_23vec_const_char_p_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_const_char_p_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_const_char_p_va_gc_ (void *); +#define gt_ggc_m_25hash_table_c_type_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_c_type_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_c_type_hasher_ (void *); +#define gt_ggc_m_18sorted_fields_type(X) do { \ + if (X != NULL) gt_ggc_mx_sorted_fields_type (X);\ + } while (0) +extern void gt_ggc_mx_sorted_fields_type (void *); +#define gt_ggc_m_15c_inline_static(X) do { \ + if (X != NULL) gt_ggc_mx_c_inline_static (X);\ + } while (0) +extern void gt_ggc_mx_c_inline_static (void *); +#define gt_ggc_m_28vec_c_goto_bindings_p_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_c_goto_bindings_p_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_c_goto_bindings_p_va_gc_ (void *); +#define gt_ggc_m_15c_goto_bindings(X) do { \ + if (X != NULL) gt_ggc_mx_c_goto_bindings (X);\ + } while (0) +extern void gt_ggc_mx_c_goto_bindings (void *); +#define gt_ggc_m_7c_scope(X) do { \ + if (X != NULL) gt_ggc_mx_c_scope (X);\ + } while (0) +extern void gt_ggc_mx_c_scope (void *); +#define gt_ggc_m_9c_binding(X) do { \ + if (X != NULL) gt_ggc_mx_c_binding (X);\ + } while (0) +extern void gt_ggc_mx_c_binding (void *); +#define gt_ggc_m_12c_label_vars(X) do { \ + if (X != NULL) gt_ggc_mx_c_label_vars (X);\ + } while (0) +extern void gt_ggc_mx_c_label_vars (void *); +#define gt_ggc_m_27hash_table_pad_type_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_pad_type_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_pad_type_hasher_ (void *); +#define gt_ggc_m_13pad_type_hash(X) do { \ + if (X != NULL) gt_ggc_mx_pad_type_hash (X);\ + } while (0) +extern void gt_ggc_mx_pad_type_hash (void *); +#define gt_ggc_m_18gnat_binding_level(X) do { \ + if (X != NULL) gt_ggc_mx_gnat_binding_level (X);\ + } while (0) +extern void gt_ggc_mx_gnat_binding_level (void *); +#define gt_ggc_m_20vec_loop_info_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_loop_info_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_loop_info_va_gc_ (void *); +#define gt_ggc_m_11loop_info_d(X) do { \ + if (X != NULL) gt_ggc_mx_loop_info_d (X);\ + } while (0) +extern void gt_ggc_mx_loop_info_d (void *); +#define gt_ggc_m_27vec_range_check_info_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_range_check_info_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_range_check_info_va_gc_ (void *); +#define gt_ggc_m_18range_check_info_d(X) do { \ + if (X != NULL) gt_ggc_mx_range_check_info_d (X);\ + } while (0) +extern void gt_ggc_mx_range_check_info_d (void *); +#define gt_ggc_m_9elab_info(X) do { \ + if (X != NULL) gt_ggc_mx_elab_info (X);\ + } while (0) +extern void gt_ggc_mx_elab_info (void *); +#define gt_ggc_m_10stmt_group(X) do { \ + if (X != NULL) gt_ggc_mx_stmt_group (X);\ + } while (0) +extern void gt_ggc_mx_stmt_group (void *); +#define gt_ggc_m_20vec_parm_attr_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_parm_attr_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_parm_attr_va_gc_ (void *); +#define gt_ggc_m_11parm_attr_d(X) do { \ + if (X != NULL) gt_ggc_mx_parm_attr_d (X);\ + } while (0) +extern void gt_ggc_mx_parm_attr_d (void *); +#define gt_ggc_m_35hash_table_value_annotation_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_value_annotation_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_value_annotation_hasher_ (void *); +#define gt_ggc_m_38vec_hsa_decl_kernel_map_element_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_hsa_decl_kernel_map_element_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_hsa_decl_kernel_map_element_va_gc_ (void *); +#define gt_ggc_m_19vec_odr_type_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_odr_type_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_odr_type_va_gc_ (void *); +#define gt_ggc_m_38hash_table_tree_type_map_cache_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_tree_type_map_cache_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_tree_type_map_cache_hasher_ (void *); +#define gt_ggc_m_13tree_type_map(X) do { \ + if (X != NULL) gt_ggc_mx_tree_type_map (X);\ + } while (0) +extern void gt_ggc_mx_tree_type_map (void *); +#define gt_ggc_m_33function_summary_inline_summary__(X) do { \ + if (X != NULL) gt_ggc_mx_function_summary_inline_summary__ (X);\ + } while (0) +extern void gt_ggc_mx_function_summary_inline_summary__ (void *); +#define gt_ggc_m_26vec_size_time_entry_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_size_time_entry_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_size_time_entry_va_gc_ (void *); +#define gt_ggc_m_20vec_condition_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_condition_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_condition_va_gc_ (void *); +#define gt_ggc_m_29hash_table_decl_state_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_decl_state_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_decl_state_hasher_ (void *); +#define gt_ggc_m_29hash_table_tm_wrapper_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_tm_wrapper_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_tm_wrapper_hasher_ (void *); +#define gt_ggc_m_24vec_ipa_edge_args_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_ipa_edge_args_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_ipa_edge_args_va_gc_ (void *); +#define gt_ggc_m_38vec_ipcp_transformation_summary_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_ipcp_transformation_summary_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_ipcp_transformation_summary_va_gc_ (void *); +#define gt_ggc_m_39vec_ipa_polymorphic_call_context_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_ipa_polymorphic_call_context_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_ipa_polymorphic_call_context_va_gc_ (void *); +#define gt_ggc_m_24vec_ipa_jump_func_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_ipa_jump_func_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_ipa_jump_func_va_gc_ (void *); +#define gt_ggc_m_24vec_ipa_alignment_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_ipa_alignment_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_ipa_alignment_va_gc_ (void *); +#define gt_ggc_m_26vec_ipa_agg_jf_item_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_ipa_agg_jf_item_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_ipa_agg_jf_item_va_gc_ (void *); +#define gt_ggc_m_18vec_gimple__va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_gimple__va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_gimple__va_gc_ (void *); +#define gt_ggc_m_28hash_table_dllimport_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_dllimport_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_dllimport_hasher_ (void *); +#define gt_ggc_m_28grid_launch_attributes_trees(X) do { \ + if (X != NULL) gt_ggc_mx_grid_launch_attributes_trees (X);\ + } while (0) +extern void gt_ggc_mx_grid_launch_attributes_trees (void *); +#define gt_ggc_m_20ssa_operand_memory_d(X) do { \ + if (X != NULL) gt_ggc_mx_ssa_operand_memory_d (X);\ + } while (0) +extern void gt_ggc_mx_ssa_operand_memory_d (void *); +#define gt_ggc_m_28hash_table_scev_info_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_scev_info_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_scev_info_hasher_ (void *); +#define gt_ggc_m_13scev_info_str(X) do { \ + if (X != NULL) gt_ggc_mx_scev_info_str (X);\ + } while (0) +extern void gt_ggc_mx_scev_info_str (void *); +#define gt_ggc_m_28vec_mem_addr_template_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_mem_addr_template_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_mem_addr_template_va_gc_ (void *); +#define gt_ggc_m_29hash_table_tm_restart_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_tm_restart_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_tm_restart_hasher_ (void *); +#define gt_ggc_m_27hash_table_ssa_name_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_ssa_name_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_ssa_name_hasher_ (void *); +#define gt_ggc_m_19hash_map_tree_tree_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_map_tree_tree_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_map_tree_tree_ (void *); +#define gt_ggc_m_15tm_restart_node(X) do { \ + if (X != NULL) gt_ggc_mx_tm_restart_node (X);\ + } while (0) +extern void gt_ggc_mx_tm_restart_node (void *); +#define gt_ggc_m_27hash_table_tm_clone_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_tm_clone_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_tm_clone_hasher_ (void *); +#define gt_ggc_m_33hash_table_const_rtx_desc_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_const_rtx_desc_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_const_rtx_desc_hasher_ (void *); +#define gt_ggc_m_34hash_table_tree_descriptor_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_tree_descriptor_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_tree_descriptor_hasher_ (void *); +#define gt_ggc_m_31hash_table_object_block_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_object_block_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_object_block_hasher_ (void *); +#define gt_ggc_m_26hash_table_section_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_section_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_section_hasher_ (void *); +#define gt_ggc_m_37hash_table_tree_vec_map_cache_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_tree_vec_map_cache_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_tree_vec_map_cache_hasher_ (void *); +#define gt_ggc_m_38hash_table_tree_decl_map_cache_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_tree_decl_map_cache_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_tree_decl_map_cache_hasher_ (void *); +#define gt_ggc_m_28hash_table_cl_option_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_cl_option_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_cl_option_hasher_ (void *); +#define gt_ggc_m_26hash_table_int_cst_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_int_cst_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_int_cst_hasher_ (void *); +#define gt_ggc_m_29hash_table_type_cache_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_type_cache_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_type_cache_hasher_ (void *); +#define gt_ggc_m_9type_hash(X) do { \ + if (X != NULL) gt_ggc_mx_type_hash (X);\ + } while (0) +extern void gt_ggc_mx_type_hash (void *); +#define gt_ggc_m_16string_pool_data(X) do { \ + if (X != NULL) gt_ggc_mx_string_pool_data (X);\ + } while (0) +extern void gt_ggc_mx_string_pool_data (void *); +#define gt_ggc_m_31hash_table_libfunc_decl_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_libfunc_decl_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_libfunc_decl_hasher_ (void *); +#define gt_ggc_m_24hash_map_tree_hash_tree_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_map_tree_hash_tree_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_map_tree_hash_tree_ (void *); +#define gt_ggc_m_31hash_table_temp_address_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_temp_address_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_temp_address_hasher_ (void *); +#define gt_ggc_m_23temp_slot_address_entry(X) do { \ + if (X != NULL) gt_ggc_mx_temp_slot_address_entry (X);\ + } while (0) +extern void gt_ggc_mx_temp_slot_address_entry (void *); +#define gt_ggc_m_29hash_table_insn_cache_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_insn_cache_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_insn_cache_hasher_ (void *); +#define gt_ggc_m_21hash_map_gimple__int_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_map_gimple__int_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_map_gimple__int_ (void *); +#define gt_ggc_m_25vec_eh_landing_pad_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_eh_landing_pad_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_eh_landing_pad_va_gc_ (void *); +#define gt_ggc_m_20vec_eh_region_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_eh_region_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_eh_region_va_gc_ (void *); +#define gt_ggc_m_10eh_catch_d(X) do { \ + if (X != NULL) gt_ggc_mx_eh_catch_d (X);\ + } while (0) +extern void gt_ggc_mx_eh_catch_d (void *); +#define gt_ggc_m_16eh_landing_pad_d(X) do { \ + if (X != NULL) gt_ggc_mx_eh_landing_pad_d (X);\ + } while (0) +extern void gt_ggc_mx_eh_landing_pad_d (void *); +#define gt_ggc_m_11eh_region_d(X) do { \ + if (X != NULL) gt_ggc_mx_eh_region_d (X);\ + } while (0) +extern void gt_ggc_mx_eh_region_d (void *); +#define gt_ggc_m_30hash_table_const_fixed_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_const_fixed_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_const_fixed_hasher_ (void *); +#define gt_ggc_m_31hash_table_const_double_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_const_double_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_const_double_hasher_ (void *); +#define gt_ggc_m_27hash_table_reg_attr_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_reg_attr_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_reg_attr_hasher_ (void *); +#define gt_ggc_m_33hash_table_const_wide_int_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_const_wide_int_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_const_wide_int_hasher_ (void *); +#define gt_ggc_m_28hash_table_const_int_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_const_int_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_const_int_hasher_ (void *); +#define gt_ggc_m_22vec_temp_slot_p_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_temp_slot_p_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_temp_slot_p_va_gc_ (void *); +#define gt_ggc_m_20initial_value_struct(X) do { \ + if (X != NULL) gt_ggc_mx_initial_value_struct (X);\ + } while (0) +extern void gt_ggc_mx_initial_value_struct (void *); +#define gt_ggc_m_9temp_slot(X) do { \ + if (X != NULL) gt_ggc_mx_temp_slot (X);\ + } while (0) +extern void gt_ggc_mx_temp_slot (void *); +#define gt_ggc_m_23hash_table_addr_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_addr_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_addr_hasher_ (void *); +#define gt_ggc_m_24vec_die_arg_entry_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_die_arg_entry_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_die_arg_entry_va_gc_ (void *); +#define gt_ggc_m_24vec_macinfo_entry_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_macinfo_entry_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_macinfo_entry_va_gc_ (void *); +#define gt_ggc_m_24vec_pubname_entry_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_pubname_entry_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_pubname_entry_va_gc_ (void *); +#define gt_ggc_m_30vec_dw_line_info_table__va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_dw_line_info_table__va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_dw_line_info_table__va_gc_ (void *); +#define gt_ggc_m_30hash_table_dw_loc_list_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_dw_loc_list_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_dw_loc_list_hasher_ (void *); +#define gt_ggc_m_22cached_dw_loc_list_def(X) do { \ + if (X != NULL) gt_ggc_mx_cached_dw_loc_list_def (X);\ + } while (0) +extern void gt_ggc_mx_cached_dw_loc_list_def (void *); +#define gt_ggc_m_27hash_table_decl_loc_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_decl_loc_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_decl_loc_hasher_ (void *); +#define gt_ggc_m_17call_arg_loc_node(X) do { \ + if (X != NULL) gt_ggc_mx_call_arg_loc_node (X);\ + } while (0) +extern void gt_ggc_mx_call_arg_loc_node (void *); +#define gt_ggc_m_16var_loc_list_def(X) do { \ + if (X != NULL) gt_ggc_mx_var_loc_list_def (X);\ + } while (0) +extern void gt_ggc_mx_var_loc_list_def (void *); +#define gt_ggc_m_12var_loc_node(X) do { \ + if (X != NULL) gt_ggc_mx_var_loc_node (X);\ + } while (0) +extern void gt_ggc_mx_var_loc_node (void *); +#define gt_ggc_m_28hash_table_block_die_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_block_die_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_block_die_hasher_ (void *); +#define gt_ggc_m_27hash_table_decl_die_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_decl_die_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_decl_die_hasher_ (void *); +#define gt_ggc_m_29hash_table_dwarf_file_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_dwarf_file_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_dwarf_file_hasher_ (void *); +#define gt_ggc_m_16limbo_die_struct(X) do { \ + if (X != NULL) gt_ggc_mx_limbo_die_struct (X);\ + } while (0) +extern void gt_ggc_mx_limbo_die_struct (void *); +#define gt_ggc_m_18dw_ranges_by_label(X) do { \ + if (X != NULL) gt_ggc_mx_dw_ranges_by_label (X);\ + } while (0) +extern void gt_ggc_mx_dw_ranges_by_label (void *); +#define gt_ggc_m_9dw_ranges(X) do { \ + if (X != NULL) gt_ggc_mx_dw_ranges (X);\ + } while (0) +extern void gt_ggc_mx_dw_ranges (void *); +#define gt_ggc_m_23vec_dw_attr_node_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_dw_attr_node_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_dw_attr_node_va_gc_ (void *); +#define gt_ggc_m_18dw_line_info_table(X) do { \ + if (X != NULL) gt_ggc_mx_dw_line_info_table (X);\ + } while (0) +extern void gt_ggc_mx_dw_line_info_table (void *); +#define gt_ggc_m_29vec_dw_line_info_entry_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_dw_line_info_entry_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_dw_line_info_entry_va_gc_ (void *); +#define gt_ggc_m_16comdat_type_node(X) do { \ + if (X != NULL) gt_ggc_mx_comdat_type_node (X);\ + } while (0) +extern void gt_ggc_mx_comdat_type_node (void *); +#define gt_ggc_m_34hash_table_indirect_string_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_indirect_string_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_indirect_string_hasher_ (void *); +#define gt_ggc_m_21vec_dw_fde_ref_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_dw_fde_ref_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_dw_fde_ref_va_gc_ (void *); +#define gt_ggc_m_17reg_saved_in_data(X) do { \ + if (X != NULL) gt_ggc_mx_reg_saved_in_data (X);\ + } while (0) +extern void gt_ggc_mx_reg_saved_in_data (void *); +#define gt_ggc_m_10dw_cfi_row(X) do { \ + if (X != NULL) gt_ggc_mx_dw_cfi_row (X);\ + } while (0) +extern void gt_ggc_mx_dw_cfi_row (void *); +#define gt_ggc_m_20hash_map_char__tree_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_map_char__tree_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_map_char__tree_ (void *); +#define gt_ggc_m_15dwarf_file_data(X) do { \ + if (X != NULL) gt_ggc_mx_dwarf_file_data (X);\ + } while (0) +extern void gt_ggc_mx_dwarf_file_data (void *); +#define gt_ggc_m_20indirect_string_node(X) do { \ + if (X != NULL) gt_ggc_mx_indirect_string_node (X);\ + } while (0) +extern void gt_ggc_mx_indirect_string_node (void *); +#define gt_ggc_m_16addr_table_entry(X) do { \ + if (X != NULL) gt_ggc_mx_addr_table_entry (X);\ + } while (0) +extern void gt_ggc_mx_addr_table_entry (void *); +#define gt_ggc_m_21vec_dw_cfi_ref_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_dw_cfi_ref_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_dw_cfi_ref_va_gc_ (void *); +#define gt_ggc_m_18dw_discr_list_node(X) do { \ + if (X != NULL) gt_ggc_mx_dw_discr_list_node (X);\ + } while (0) +extern void gt_ggc_mx_dw_discr_list_node (void *); +#define gt_ggc_m_18dw_loc_list_struct(X) do { \ + if (X != NULL) gt_ggc_mx_dw_loc_list_struct (X);\ + } while (0) +extern void gt_ggc_mx_dw_loc_list_struct (void *); +#define gt_ggc_m_17dw_loc_descr_node(X) do { \ + if (X != NULL) gt_ggc_mx_dw_loc_descr_node (X);\ + } while (0) +extern void gt_ggc_mx_dw_loc_descr_node (void *); +#define gt_ggc_m_11dw_cfi_node(X) do { \ + if (X != NULL) gt_ggc_mx_dw_cfi_node (X);\ + } while (0) +extern void gt_ggc_mx_dw_cfi_node (void *); +#define gt_ggc_m_8typeinfo(X) do { \ + if (X != NULL) gt_ggc_mx_typeinfo (X);\ + } while (0) +extern void gt_ggc_mx_typeinfo (void *); +#define gt_ggc_m_10odr_type_d(X) do { \ + if (X != NULL) gt_ggc_mx_odr_type_d (X);\ + } while (0) +extern void gt_ggc_mx_odr_type_d (void *); +#define gt_ggc_m_14inline_summary(X) do { \ + if (X != NULL) gt_ggc_mx_inline_summary (X);\ + } while (0) +extern void gt_ggc_mx_inline_summary (void *); +#define gt_ggc_m_25ipa_agg_replacement_value(X) do { \ + if (X != NULL) gt_ggc_mx_ipa_agg_replacement_value (X);\ + } while (0) +extern void gt_ggc_mx_ipa_agg_replacement_value (void *); +#define gt_ggc_m_17lto_in_decl_state(X) do { \ + if (X != NULL) gt_ggc_mx_lto_in_decl_state (X);\ + } while (0) +extern void gt_ggc_mx_lto_in_decl_state (void *); +#define gt_ggc_m_35hash_table_function_version_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_function_version_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_function_version_hasher_ (void *); +#define gt_ggc_m_27vec_alias_set_entry__va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_alias_set_entry__va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_alias_set_entry__va_gc_ (void *); +#define gt_ggc_m_15alias_set_entry(X) do { \ + if (X != NULL) gt_ggc_mx_alias_set_entry (X);\ + } while (0) +extern void gt_ggc_mx_alias_set_entry (void *); +#define gt_ggc_m_28hash_map_alias_set_hash_int_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_map_alias_set_hash_int_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_map_alias_set_hash_int_ (void *); +#define gt_ggc_m_24constant_descriptor_tree(X) do { \ + if (X != NULL) gt_ggc_mx_constant_descriptor_tree (X);\ + } while (0) +extern void gt_ggc_mx_constant_descriptor_tree (void *); +#define gt_ggc_m_42hash_map_symtab_node__symbol_priority_map_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_map_symtab_node__symbol_priority_map_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_map_symtab_node__symbol_priority_map_ (void *); +#define gt_ggc_m_26hash_table_asmname_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_asmname_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_asmname_hasher_ (void *); +#define gt_ggc_m_31hash_table_section_name_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_section_name_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_section_name_hasher_ (void *); +#define gt_ggc_m_12symbol_table(X) do { \ + if (X != NULL) gt_ggc_mx_symbol_table (X);\ + } while (0) +extern void gt_ggc_mx_symbol_table (void *); +#define gt_ggc_m_8asm_node(X) do { \ + if (X != NULL) gt_ggc_mx_asm_node (X);\ + } while (0) +extern void gt_ggc_mx_asm_node (void *); +#define gt_ggc_m_25cgraph_indirect_call_info(X) do { \ + if (X != NULL) gt_ggc_mx_cgraph_indirect_call_info (X);\ + } while (0) +extern void gt_ggc_mx_cgraph_indirect_call_info (void *); +#define gt_ggc_m_30hash_table_cgraph_edge_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_cgraph_edge_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_cgraph_edge_hasher_ (void *); +#define gt_ggc_m_11cgraph_edge(X) do { \ + if (X != NULL) gt_ggc_mx_cgraph_edge (X);\ + } while (0) +extern void gt_ggc_mx_cgraph_edge (void *); +#define gt_ggc_m_28cgraph_function_version_info(X) do { \ + if (X != NULL) gt_ggc_mx_cgraph_function_version_info (X);\ + } while (0) +extern void gt_ggc_mx_cgraph_function_version_info (void *); +#define gt_ggc_m_17cgraph_simd_clone(X) do { \ + if (X != NULL) gt_ggc_mx_cgraph_simd_clone (X);\ + } while (0) +extern void gt_ggc_mx_cgraph_simd_clone (void *); +#define gt_ggc_m_27vec_ipa_replace_map__va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_ipa_replace_map__va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_ipa_replace_map__va_gc_ (void *); +#define gt_ggc_m_15ipa_replace_map(X) do { \ + if (X != NULL) gt_ggc_mx_ipa_replace_map (X);\ + } while (0) +extern void gt_ggc_mx_ipa_replace_map (void *); +#define gt_ggc_m_18lto_file_decl_data(X) do { \ + if (X != NULL) gt_ggc_mx_lto_file_decl_data (X);\ + } while (0) +extern void gt_ggc_mx_lto_file_decl_data (void *); +#define gt_ggc_m_18section_hash_entry(X) do { \ + if (X != NULL) gt_ggc_mx_section_hash_entry (X);\ + } while (0) +extern void gt_ggc_mx_section_hash_entry (void *); +#define gt_ggc_m_20vec_ipa_ref_t_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_ipa_ref_t_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_ipa_ref_t_va_gc_ (void *); +#define gt_ggc_m_15vec_edge_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_edge_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_edge_va_gc_ (void *); +#define gt_ggc_m_11rtl_bb_info(X) do { \ + if (X != NULL) gt_ggc_mx_rtl_bb_info (X);\ + } while (0) +extern void gt_ggc_mx_rtl_bb_info (void *); +#define gt_ggc_m_22vec_basic_block_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_basic_block_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_basic_block_va_gc_ (void *); +#define gt_ggc_m_28hash_table_loop_exit_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_loop_exit_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_loop_exit_hasher_ (void *); +#define gt_ggc_m_10niter_desc(X) do { \ + if (X != NULL) gt_ggc_mx_niter_desc (X);\ + } while (0) +extern void gt_ggc_mx_niter_desc (void *); +#define gt_ggc_m_17vec_loop_p_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_loop_p_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_loop_p_va_gc_ (void *); +#define gt_ggc_m_10control_iv(X) do { \ + if (X != NULL) gt_ggc_mx_control_iv (X);\ + } while (0) +extern void gt_ggc_mx_control_iv (void *); +#define gt_ggc_m_4loop(X) do { \ + if (X != NULL) gt_ggc_mx_loop (X);\ + } while (0) +extern void gt_ggc_mx_loop (void *); +#define gt_ggc_m_9loop_exit(X) do { \ + if (X != NULL) gt_ggc_mx_loop_exit (X);\ + } while (0) +extern void gt_ggc_mx_loop_exit (void *); +#define gt_ggc_m_13nb_iter_bound(X) do { \ + if (X != NULL) gt_ggc_mx_nb_iter_bound (X);\ + } while (0) +extern void gt_ggc_mx_nb_iter_bound (void *); +#define gt_ggc_m_28hash_table_used_type_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_used_type_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_used_type_hasher_ (void *); +#define gt_ggc_m_24types_used_by_vars_entry(X) do { \ + if (X != NULL) gt_ggc_mx_types_used_by_vars_entry (X);\ + } while (0) +extern void gt_ggc_mx_types_used_by_vars_entry (void *); +#define gt_ggc_m_14hash_set_tree_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_set_tree_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_set_tree_ (void *); +#define gt_ggc_m_17language_function(X) do { \ + if (X != NULL) gt_ggc_mx_language_function (X);\ + } while (0) +extern void gt_ggc_mx_language_function (void *); +#define gt_ggc_m_5loops(X) do { \ + if (X != NULL) gt_ggc_mx_loops (X);\ + } while (0) +extern void gt_ggc_mx_loops (void *); +#define gt_ggc_m_18control_flow_graph(X) do { \ + if (X != NULL) gt_ggc_mx_control_flow_graph (X);\ + } while (0) +extern void gt_ggc_mx_control_flow_graph (void *); +#define gt_ggc_m_9eh_status(X) do { \ + if (X != NULL) gt_ggc_mx_eh_status (X);\ + } while (0) +extern void gt_ggc_mx_eh_status (void *); +#define gt_ggc_m_11stack_usage(X) do { \ + if (X != NULL) gt_ggc_mx_stack_usage (X);\ + } while (0) +extern void gt_ggc_mx_stack_usage (void *); +#define gt_ggc_m_11frame_space(X) do { \ + if (X != NULL) gt_ggc_mx_frame_space (X);\ + } while (0) +extern void gt_ggc_mx_frame_space (void *); +#define gt_ggc_m_17rtx_constant_pool(X) do { \ + if (X != NULL) gt_ggc_mx_rtx_constant_pool (X);\ + } while (0) +extern void gt_ggc_mx_rtx_constant_pool (void *); +#define gt_ggc_m_11dw_fde_node(X) do { \ + if (X != NULL) gt_ggc_mx_dw_fde_node (X);\ + } while (0) +extern void gt_ggc_mx_dw_fde_node (void *); +#define gt_ggc_m_9gimple_df(X) do { \ + if (X != NULL) gt_ggc_mx_gimple_df (X);\ + } while (0) +extern void gt_ggc_mx_gimple_df (void *); +#define gt_ggc_m_27vec_call_site_record_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_call_site_record_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_call_site_record_va_gc_ (void *); +#define gt_ggc_m_16vec_uchar_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_uchar_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_uchar_va_gc_ (void *); +#define gt_ggc_m_18call_site_record_d(X) do { \ + if (X != NULL) gt_ggc_mx_call_site_record_d (X);\ + } while (0) +extern void gt_ggc_mx_call_site_record_d (void *); +#define gt_ggc_m_14sequence_stack(X) do { \ + if (X != NULL) gt_ggc_mx_sequence_stack (X);\ + } while (0) +extern void gt_ggc_mx_sequence_stack (void *); +#define gt_ggc_m_15target_libfuncs(X) do { \ + if (X != NULL) gt_ggc_mx_target_libfuncs (X);\ + } while (0) +extern void gt_ggc_mx_target_libfuncs (void *); +#define gt_ggc_m_26hash_table_libfunc_hasher_(X) do { \ + if (X != NULL) gt_ggc_mx_hash_table_libfunc_hasher_ (X);\ + } while (0) +extern void gt_ggc_mx_hash_table_libfunc_hasher_ (void *); +#define gt_ggc_m_13libfunc_entry(X) do { \ + if (X != NULL) gt_ggc_mx_libfunc_entry (X);\ + } while (0) +extern void gt_ggc_mx_libfunc_entry (void *); +#define gt_ggc_m_21vec_alias_pair_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_alias_pair_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_alias_pair_va_gc_ (void *); +#define gt_ggc_m_12tree_vec_map(X) do { \ + if (X != NULL) gt_ggc_mx_tree_vec_map (X);\ + } while (0) +extern void gt_ggc_mx_tree_vec_map (void *); +#define gt_ggc_m_12tree_int_map(X) do { \ + if (X != NULL) gt_ggc_mx_tree_int_map (X);\ + } while (0) +extern void gt_ggc_mx_tree_int_map (void *); +#define gt_ggc_m_13tree_decl_map(X) do { \ + if (X != NULL) gt_ggc_mx_tree_decl_map (X);\ + } while (0) +extern void gt_ggc_mx_tree_decl_map (void *); +#define gt_ggc_m_8tree_map(X) do { \ + if (X != NULL) gt_ggc_mx_tree_map (X);\ + } while (0) +extern void gt_ggc_mx_tree_map (void *); +#define gt_ggc_m_14lang_tree_node(X) do { \ + if (X != NULL) gt_ggc_mx_lang_tree_node (X);\ + } while (0) +extern void gt_ggc_mx_lang_tree_node (void *); +#define gt_ggc_m_14target_globals(X) do { \ + if (X != NULL) gt_ggc_mx_target_globals (X);\ + } while (0) +extern void gt_ggc_mx_target_globals (void *); +#define gt_ggc_m_24tree_statement_list_node(X) do { \ + if (X != NULL) gt_ggc_mx_tree_statement_list_node (X);\ + } while (0) +extern void gt_ggc_mx_tree_statement_list_node (void *); +#define gt_ggc_m_11symtab_node(X) do { \ + if (X != NULL) gt_ggc_mx_symtab_node (X);\ + } while (0) +extern void gt_ggc_mx_symtab_node (void *); +#define gt_ggc_m_9lang_decl(X) do { \ + if (X != NULL) gt_ggc_mx_lang_decl (X);\ + } while (0) +extern void gt_ggc_mx_lang_decl (void *); +#define gt_ggc_m_9lang_type(X) do { \ + if (X != NULL) gt_ggc_mx_lang_type (X);\ + } while (0) +extern void gt_ggc_mx_lang_type (void *); +#define gt_ggc_m_15vec_tree_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_tree_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_tree_va_gc_ (void *); +#define gt_ggc_m_26vec_constructor_elt_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_constructor_elt_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_constructor_elt_va_gc_ (void *); +#define gt_ggc_m_10die_struct(X) do { \ + if (X != NULL) gt_ggc_mx_die_struct (X);\ + } while (0) +extern void gt_ggc_mx_die_struct (void *); +#define gt_ggc_m_14range_info_def(X) do { \ + if (X != NULL) gt_ggc_mx_range_info_def (X);\ + } while (0) +extern void gt_ggc_mx_range_info_def (void *); +#define gt_ggc_m_12ptr_info_def(X) do { \ + if (X != NULL) gt_ggc_mx_ptr_info_def (X);\ + } while (0) +extern void gt_ggc_mx_ptr_info_def (void *); +#define gt_ggc_m_15cgraph_rtl_info(X) do { \ + if (X != NULL) gt_ggc_mx_cgraph_rtl_info (X);\ + } while (0) +extern void gt_ggc_mx_cgraph_rtl_info (void *); +#define gt_ggc_m_10target_rtl(X) do { \ + if (X != NULL) gt_ggc_mx_target_rtl (X);\ + } while (0) +extern void gt_ggc_mx_target_rtl (void *); +#define gt_ggc_m_8function(X) do { \ + if (X != NULL) gt_ggc_mx_function (X);\ + } while (0) +extern void gt_ggc_mx_function (void *); +#define gt_ggc_m_23constant_descriptor_rtx(X) do { \ + if (X != NULL) gt_ggc_mx_constant_descriptor_rtx (X);\ + } while (0) +extern void gt_ggc_mx_constant_descriptor_rtx (void *); +#define gt_ggc_m_11fixed_value(X) do { \ + if (X != NULL) gt_ggc_mx_fixed_value (X);\ + } while (0) +extern void gt_ggc_mx_fixed_value (void *); +#define gt_ggc_m_10real_value(X) do { \ + if (X != NULL) gt_ggc_mx_real_value (X);\ + } while (0) +extern void gt_ggc_mx_real_value (void *); +#define gt_ggc_m_14vec_rtx_va_gc_(X) do { \ + if (X != NULL) gt_ggc_mx_vec_rtx_va_gc_ (X);\ + } while (0) +extern void gt_ggc_mx_vec_rtx_va_gc_ (void *); +#define gt_ggc_m_12object_block(X) do { \ + if (X != NULL) gt_ggc_mx_object_block (X);\ + } while (0) +extern void gt_ggc_mx_object_block (void *); +#define gt_ggc_m_9reg_attrs(X) do { \ + if (X != NULL) gt_ggc_mx_reg_attrs (X);\ + } while (0) +extern void gt_ggc_mx_reg_attrs (void *); +#define gt_ggc_m_9mem_attrs(X) do { \ + if (X != NULL) gt_ggc_mx_mem_attrs (X);\ + } while (0) +extern void gt_ggc_mx_mem_attrs (void *); +#define gt_ggc_m_13coverage_data(X) do { \ + if (X != NULL) gt_ggc_mx_coverage_data (X);\ + } while (0) +extern void gt_ggc_mx_coverage_data (void *); +#define gt_ggc_m_34generic_wide_int_wide_int_storage_(X) do { \ + if (X != NULL) gt_ggc_mx_generic_wide_int_wide_int_storage_ (X);\ + } while (0) +extern void gt_ggc_mx_generic_wide_int_wide_int_storage_ (void *); +#define gt_ggc_m_14bitmap_obstack(X) do { \ + if (X != NULL) gt_ggc_mx_bitmap_obstack (X);\ + } while (0) +extern void gt_ggc_mx_bitmap_obstack (void *); +#define gt_ggc_m_14bitmap_element(X) do { \ + if (X != NULL) gt_ggc_mx_bitmap_element (X);\ + } while (0) +extern void gt_ggc_mx_bitmap_element (void *); +#define gt_ggc_m_16machine_function(X) do { \ + if (X != NULL) gt_ggc_mx_machine_function (X);\ + } while (0) +extern void gt_ggc_mx_machine_function (void *); +#define gt_ggc_m_17stack_local_entry(X) do { \ + if (X != NULL) gt_ggc_mx_stack_local_entry (X);\ + } while (0) +extern void gt_ggc_mx_stack_local_entry (void *); +#define gt_ggc_m_15basic_block_def(X) do { \ + if (X != NULL) gt_ggc_mx_basic_block_def (X);\ + } while (0) +extern void gt_ggc_mx_basic_block_def (void *); +#define gt_ggc_m_8edge_def(X) do { \ + if (X != NULL) gt_ggc_mx_edge_def (X);\ + } while (0) +extern void gt_ggc_mx_edge_def (void *); +#define gt_ggc_m_15cl_optimization(X) do { \ + if (X != NULL) gt_ggc_mx_cl_optimization (X);\ + } while (0) +extern void gt_ggc_mx_cl_optimization (void *); +#define gt_ggc_m_16cl_target_option(X) do { \ + if (X != NULL) gt_ggc_mx_cl_target_option (X);\ + } while (0) +extern void gt_ggc_mx_cl_target_option (void *); +#define gt_ggc_m_7section(X) do { \ + if (X != NULL) gt_ggc_mx_section (X);\ + } while (0) +extern void gt_ggc_mx_section (void *); +#define gt_ggc_m_6gimple(X) do { \ + if (X != NULL) gt_ggc_mx_gimple (X);\ + } while (0) +extern void gt_ggc_mx_gimple (void *); +#define gt_ggc_m_9rtvec_def(X) do { \ + if (X != NULL) gt_ggc_mx_rtvec_def (X);\ + } while (0) +extern void gt_ggc_mx_rtvec_def (void *); +#define gt_ggc_m_7rtx_def(X) do { \ + if (X != NULL) gt_ggc_mx_rtx_def (X);\ + } while (0) +extern void gt_ggc_mx_rtx_def (void *); +#define gt_ggc_m_11bitmap_head(X) do { \ + if (X != NULL) gt_ggc_mx_bitmap_head (X);\ + } while (0) +extern void gt_ggc_mx_bitmap_head (void *); +#define gt_ggc_m_6answer(X) do { \ + if (X != NULL) gt_ggc_mx_answer (X);\ + } while (0) +extern void gt_ggc_mx_answer (void *); +#define gt_ggc_m_9cpp_macro(X) do { \ + if (X != NULL) gt_ggc_mx_cpp_macro (X);\ + } while (0) +extern void gt_ggc_mx_cpp_macro (void *); +#define gt_ggc_m_9cpp_token(X) do { \ + if (X != NULL) gt_ggc_mx_cpp_token (X);\ + } while (0) +extern void gt_ggc_mx_cpp_token (void *); +#define gt_ggc_m_9line_maps(X) do { \ + if (X != NULL) gt_ggc_mx_line_maps (X);\ + } while (0) +extern void gt_ggc_mx_line_maps (void *); +#define gt_ggc_m_9tree_node(X) do { \ + if (X != NULL) gt_ggc_mx_tree_node (X);\ + } while (0) +#define gt_ggc_mx_tree_node gt_ggc_mx_lang_tree_node + +/* functions code */ + +void +gt_ggc_mx_hash_table_WeakVHCacheHasher_ (void *x_p) +{ + hash_table * const x = (hash_table *)x_p; + if (ggc_test_and_set_mark (x)) + { + gt_ggc_mx (x); + } +} + +void +gt_ggc_mx (struct WeakVHCacheHasher& x_r ATTRIBUTE_UNUSED) +{ + struct WeakVHCacheHasher * ATTRIBUTE_UNUSED x = &x_r; +} + +void +gt_ggc_mx_hash_table_TypeCacheHaser_ (void *x_p) +{ + hash_table * const x = (hash_table *)x_p; + if (ggc_test_and_set_mark (x)) + { + gt_ggc_mx (x); + } +} + +void +gt_ggc_mx (struct TypeCacheHaser& x_r ATTRIBUTE_UNUSED) +{ + struct TypeCacheHaser * ATTRIBUTE_UNUSED x = &x_r; +} + +void +gt_ggc_mx_hash_table_intCacheHasher_ (void *x_p) +{ + hash_table * const x = (hash_table *)x_p; + if (ggc_test_and_set_mark (x)) + { + gt_ggc_mx (x); + } +} + +void +gt_ggc_mx (struct intCacheHasher& x_r ATTRIBUTE_UNUSED) +{ + struct intCacheHasher * ATTRIBUTE_UNUSED x = &x_r; +} + +/* GC roots. */ + +EXPORTED_CONST struct ggc_root_tab gt_ggc_r__gt_cache_inc[] = { + { + &WeakVHCache, + 1, + sizeof (WeakVHCache), + >_ggc_mx_hash_table_WeakVHCacheHasher_, + NULL + }, + { + &TypeCache, + 1, + sizeof (TypeCache), + >_ggc_mx_hash_table_TypeCacheHaser_, + NULL + }, + { + &intCache, + 1, + sizeof (intCache), + >_ggc_mx_hash_table_intCacheHasher_, + NULL + }, + LAST_GGC_ROOT_TAB +}; + +void +gt_clear_caches__gt_cache_inc () +{ + gt_cleare_cache (WeakVHCache); + gt_cleare_cache (TypeCache); + gt_cleare_cache (intCache); +} + Index: include/mips/dragonegg/Target.h =================================================================== --- /dev/null +++ include/mips/dragonegg/Target.h @@ -0,0 +1,27 @@ +//==----- Target.h - Target hooks for GCC to LLVM conversion -----*- C++ -*-==// +// +// Copyright (C) 2017 Leslie Zhai +// Copyright (C) 2007 to 2013 Anton Korobeynikov, Duncan Sands et al. +// +// This file is part of DragonEgg. +// +// DragonEgg is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later version. +// +// DragonEgg is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// DragonEgg; see the file COPYING. If not, write to the Free Software +// Foundation, 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA. +// +//===----------------------------------------------------------------------===// +// This file declares some target-specific hooks for GCC to LLVM conversion. +//===----------------------------------------------------------------------===// + +#ifndef DRAGONEGG_TARGET_H +#define DRAGONEGG_TARGET_H + +#endif /* DRAGONEGG_TARGET_H */ Index: src/Aliasing.cpp =================================================================== --- src/Aliasing.cpp +++ src/Aliasing.cpp @@ -22,6 +22,7 @@ //===----------------------------------------------------------------------===// // Plugin headers +#include "dragonegg/Internals.h" #include "dragonegg/Aliasing.h" #include "llvm/ADT/SmallVector.h" @@ -59,7 +60,12 @@ using namespace llvm; +// https://reviews.llvm.org/D19094 +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) +static LLVMContext Context; +#else static LLVMContext &Context = getGlobalContext(); +#endif /// getTBAARoot - Return the root of the TBAA tree for this compilation unit. static MDNode *getTBAARoot() { Index: src/Backend.cpp =================================================================== --- src/Backend.cpp +++ src/Backend.cpp @@ -31,7 +31,21 @@ // LLVM headers #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 9) +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 9) +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#endif +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetMachine.h" +#else #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetLibraryInfo.h" +#endif #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IRPrintingPasses.h" @@ -39,12 +53,10 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/SubtargetFeature.h" -#include "llvm/PassManager.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" @@ -76,6 +88,12 @@ #include "diagnostic.h" #include "flags.h" #include "gcc-plugin.h" +#if (GCC_MAJOR > 4) +#include "cgraph.h" +#include "stor-layout.h" +#include "context.h" +#include "stringpool.h" +#endif #include "intl.h" #include "langhooks.h" #include "output.h" @@ -85,7 +103,11 @@ #endif #include "target.h" // For targetm. #include "toplev.h" +#if (GCC_MAJOR == 4) #include "tree-flow.h" +#else +#include "tree-cfg.h" +#endif #include "tree-pass.h" #include "version.h" @@ -98,8 +120,9 @@ // Trees header. #include "dragonegg/Trees.h" -#if (GCC_MAJOR != 4) -#error Unsupported GCC major version +#if (GCC_MAJOR < 4 || LLVM_VERSION_MAJOR < 3) +#pragma error("Experimental only support GCC v4.x, v5.x, v6.x, v7.x, v8.x and " + "LLVM v3.x, v4.x, v5.x") #endif using namespace llvm; @@ -109,7 +132,7 @@ // Whether -fno-builtin was specified. // In GCC < 4.6, this variable is only defined in C family front ends. -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) extern int flag_no_builtin __attribute__((weak)); #endif @@ -132,7 +155,12 @@ TargetMachine *TheTarget = 0; TargetFolder *TheFolder = 0; raw_ostream *OutStream = 0; // Stream to write assembly code to. -formatted_raw_ostream FormattedOutStream; +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) +std::shared_ptr +#else +formatted_raw_ostream +#endif + FormattedOutStream; static bool DebugPassArguments; static bool DebugPassStructure; @@ -151,15 +179,27 @@ /// PerFunctionPasses - This is the list of cleanup passes run per-function /// as each is compiled. In cases where we are not doing IPO, it includes the /// code generator. +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) +static legacy::FunctionPassManager *PerFunctionPasses = 0; +static legacy::PassManager *PerModulePasses = 0; +static legacy::PassManager *CodeGenPasses = 0; +#else static FunctionPassManager *PerFunctionPasses = 0; static PassManager *PerModulePasses = 0; static PassManager *CodeGenPasses = 0; +#endif + +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) +static LLVMContext TheContext; +#else +static LLVMContext &TheContext = getGlobalContext(); +#endif static void createPerFunctionOptimizationPasses(); static void createPerModuleOptimizationPasses(); // Compatibility hacks for older versions of GCC. -#if (GCC_MINOR < 8) +#if GCC_VERSION_CODE < GCC_VERSION(4, 8) static struct cgraph_node *cgraph_symbol(struct cgraph_node *N) { return N; } static struct varpool_node *varpool_symbol(struct varpool_node *N) { return N; } @@ -178,6 +218,28 @@ #define FOR_EACH_VARIABLE(node) \ for ((node) = varpool_nodes; (node); (node) = (node)->next) +#elif (GCC_MAJOR > 4) + +#define ipa_ref_list_referring_iterate(L,I,P) \ + (L)->referring.iterate ((I), &(P)) + +static inline struct cgraph_node * +ipa_ref_referring_node(struct ipa_ref *ref) { + return reinterpret_cast(ref->referring); +} + +static inline struct varpool_node * +ipa_ref_referring_varpool_node(struct ipa_ref *ref) { + return reinterpret_cast(ref->referring); +} + +static symtab_node *cgraph_symbol(cgraph_node *N) { + return symtab_node::get(N->orig_decl); +} +static symtab_node *varpool_symbol(varpool_node *N) { + return symtab_node::get(N->get_constructor()); +} + #else static symtab_node_base *cgraph_symbol(cgraph_node *N) { return &N->symbol; } @@ -314,10 +376,20 @@ // TODO: Change getTypeSizeInBits for aggregate types so it is no longer // rounded up to the alignment. uint64_t gcc_size = getInt64(DECL_SIZE(decl), true); - const DataLayout *DL = TheTarget->getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + TheTarget->createDataLayout(); +#else + TheTarget->getSubtargetImpl()->getDataLayout(); +#endif unsigned Align = 8 * DL->getABITypeAlignment(Ty); - return TheTarget->getSubtargetImpl()->getDataLayout()->getTypeAllocSizeInBits( - Ty) == ((gcc_size + Align - 1) / Align) * Align; + return +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + TheTarget->createDataLayout()->getTypeAllocSizeInBits(Ty) +#else + TheTarget->getSubtargetImpl()->getDataLayout()->getTypeAllocSizeInBits(Ty) +#endif + == ((gcc_size + Align - 1) / Align) * Align; } #endif @@ -468,7 +540,12 @@ // The target can set LLVM_SET_RELOC_MODEL to configure the relocation model // used by the LLVM backend. - Reloc::Model RelocModel = Reloc::Default; + Reloc::Model RelocModel = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Reloc::Static; +#else + Reloc::Default; +#endif #ifdef LLVM_SET_RELOC_MODEL LLVM_SET_RELOC_MODEL(RelocModel); #endif @@ -482,13 +559,16 @@ TargetOptions Options; - // Set frame pointer elimination mode. if (flag_omit_frame_pointer) { // Eliminate frame pointers everywhere. +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) Options.NoFramePointerElim = false; +#endif } else { // Keep frame pointers everywhere. +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) Options.NoFramePointerElim = true; +#endif } // If a target has an option to eliminate frame pointers in leaf functions // only then it should set @@ -511,8 +591,8 @@ Options.NoNaNsFPMath = flag_finite_math_only; Options.NoZerosInBSS = !flag_zero_initialized_in_bss; Options.UnsafeFPMath = -#if (GCC_MINOR > 5) - fast_math_flags_set_p(&global_options); +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) + fast_math_flags_set_p(&global_options); #else fast_math_flags_set_p(); #endif @@ -522,11 +602,17 @@ // TODO: DisableTailCalls. // TODO: TrapFuncName. // TODO: -fsplit-stack + // https://reviews.llvm.org/D19733 +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) Options.PositionIndependentExecutable = flag_pie; +#endif #ifdef LLVM_SET_TARGET_MACHINE_OPTIONS + // https://reviews.llvm.org/D9830 +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) LLVM_SET_TARGET_MACHINE_OPTIONS(Options); #endif +#endif // Binutils does not yet support the use of file directives with an explicit // directory. FIXME: Once GCC learns to detect support for this, condition // on what GCC detected. @@ -534,14 +620,18 @@ TheTarget = TME->createTargetMachine(TargetTriple, CPU, FeatureStr, Options, RelocModel, CMModel, CodeGenOptLevel()); - assert(TheTarget->getSubtargetImpl()->getDataLayout()->isBigEndian() == - BYTES_BIG_ENDIAN); + +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + assert(TheTarget->createDataLayout()->isBigEndian() == BYTES_BIG_ENDIAN); +#else + assert(TheTarget->getSubtargetImpl()->getDataLayout()->isBigEndian() == BYTES_BIG_ENDIAN); +#endif } /// output_ident - Insert a .ident directive that identifies the plugin. static void output_ident(const char *ident_str) { const char *ident_asm_op = "\t.ident\t"; -#if (GCC_MINOR < 8) +#if GCC_VERSION_CODE < GCC_VERSION(4, 8) #ifdef IDENT_ASM_OP ident_asm_op = IDENT_ASM_OP; #endif @@ -550,7 +640,7 @@ Directive += "\""; Directive += ident_str; Directive += " LLVM: "; - Directive += LLVM_VERSION; + Directive += LLVM_VERSION_STRING; Directive += "\""; TheModule->setModuleInlineAsm(Directive); } @@ -559,9 +649,9 @@ static void CreateModule(const std::string &TargetTriple) { // Create the module itself. StringRef ModuleID = main_input_filename ? main_input_filename : ""; - TheModule = new Module(ModuleID, getGlobalContext()); + TheModule = new Module(ModuleID, TheContext); -#if (GCC_MINOR < 8) +#if GCC_VERSION_CODE < GCC_VERSION(4, 8) #ifdef IDENT_ASM_OP if (!flag_no_ident) { std::string IdentString; @@ -583,9 +673,14 @@ // Install information about the target triple and data layout into the module // for optimizer use. TheModule->setTargetTriple(TargetTriple); + // https://reviews.llvm.org/D11103 +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + TheModule->setDataLayout(TheTarget->createDataLayout()); +#else TheModule->setDataLayout(TheTarget->getSubtargetImpl() ->getDataLayout() ->getStringRepresentation()); +#endif } /// flag_default_initialize_globals - Whether global variables with no explicit @@ -646,7 +741,11 @@ // Create a module to hold the generated LLVM IR. CreateModule(TargetTriple); +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + TheFolder = new TargetFolder(TheTarget->createDataLayout()); +#else TheFolder = new TargetFolder(TheTarget->getSubtargetImpl()->getDataLayout()); +#endif if (debug_info_level > DINFO_LEVEL_NONE) { TheDebugInfo = new DebugInfo(TheModule); @@ -664,8 +763,13 @@ // PassBuilder.SLPVectorize = flag_tree_slp_vectorize; PassBuilder.LoopVectorize = flag_tree_vectorize; +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + PassBuilder.LibraryInfo = + new TargetLibraryInfoImpl((Triple) TheModule->getTargetTriple()); +#else PassBuilder.LibraryInfo = new TargetLibraryInfo((Triple) TheModule->getTargetTriple()); +#endif if (flag_no_simplify_libcalls) PassBuilder.LibraryInfo->disableAllFunctions(); @@ -683,8 +787,13 @@ if (EC) report_fatal_error(EC.message()); + // https://reviews.llvm.org/rL234535 +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + FormattedOutStream = std::make_shared(*OutStream); +#else FormattedOutStream.setStream(*OutStream, formatted_raw_ostream::PRESERVE_STREAM); +#endif } static void createPerFunctionOptimizationPasses() { @@ -693,9 +802,18 @@ // Create and set up the per-function pass manager. // FIXME: Move the code generator to be function-at-a-time. +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + PerFunctionPasses = new legacy::FunctionPassManager(TheModule); +#else PerFunctionPasses = new FunctionPassManager(TheModule); +#endif + // https://reviews.llvm.org/D7992 +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) PerFunctionPasses->add(new DataLayoutPass()); +#endif +#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3) && LLVM_VERSION_CODE <= LLVM_VERSION(3, 6) TheTarget->addAnalysisPasses(*PerFunctionPasses); +#endif #ifndef NDEBUG PerFunctionPasses->add(createVerifierPass()); @@ -711,7 +829,11 @@ // FIXME: This is disabled right now until bugs can be worked out. Reenable // this for fast -O0 compiles! if (!EmitIR && 0) { +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + legacy::FunctionPassManager *PM = PerFunctionPasses; +#else FunctionPassManager *PM = PerFunctionPasses; +#endif // Request that addPassesToEmitFile run the Verifier after running // passes which modify the IR. @@ -727,8 +849,12 @@ TargetMachine::CodeGenFileType CGFT = TargetMachine::CGFT_AssemblyFile; if (EmitObj) CGFT = TargetMachine::CGFT_ObjectFile; +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) if (TheTarget->addPassesToEmitFile(*PM, FormattedOutStream, CGFT, DisableVerify)) +#else + if (0) +#endif llvm_unreachable("Error interfacing to target machine!"); } @@ -739,9 +865,18 @@ if (PerModulePasses) return; +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + PerModulePasses = new legacy::PassManager(); +#else PerModulePasses = new PassManager(); +#endif + // https://reviews.llvm.org/D7992 +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) PerModulePasses->add(new DataLayoutPass()); +#endif +#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3) && LLVM_VERSION_CODE <= LLVM_VERSION(3, 6) TheTarget->addAnalysisPasses(*PerModulePasses); +#endif Pass *InliningPass; if (!LLVMIROptimizeArg) @@ -763,7 +898,11 @@ } else { // Run the always-inline pass to handle functions marked as always_inline. // TODO: Consider letting the GCC inliner do this. +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 9) + InliningPass = createAlwaysInlinerLegacyPass(); +#else InliningPass = createAlwaysInlinerPass(); +#endif } PassBuilder.OptLevel = ModuleOptLevel(); @@ -783,9 +922,16 @@ // FIXME: This is disabled right now until bugs can be worked out. Reenable // this for fast -O0 compiles! if (PerModulePasses || 1) { +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + legacy::PassManager *PM = CodeGenPasses = new legacy::PassManager(); +#else PassManager *PM = CodeGenPasses = new PassManager(); + // FIXME: https://reviews.llvm.org/D7992 PM->add(new DataLayoutPass()); +#endif +#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3) && LLVM_VERSION_CODE <= LLVM_VERSION(3, 6) TheTarget->addAnalysisPasses(*PM); +#endif // Request that addPassesToEmitFile run the Verifier after running // passes which modify the IR. @@ -801,9 +947,11 @@ TargetMachine::CodeGenFileType CGFT = TargetMachine::CGFT_AssemblyFile; if (EmitObj) CGFT = TargetMachine::CGFT_ObjectFile; +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) if (TheTarget->addPassesToEmitFile(*PM, FormattedOutStream, CGFT, DisableVerify)) llvm_unreachable("Error interfacing to target machine!"); +#endif } } } @@ -816,7 +964,7 @@ std::vector StructInit; StructInit.resize(2); - LLVMContext &Context = getGlobalContext(); + LLVMContext &Context = TheContext; Type *FPTy = FunctionType::get(Type::getVoidTy(Context), std::vector(), false); @@ -840,7 +988,7 @@ /// global if possible. Constant *ConvertMetadataStringToGV(const char *str) { - Constant *Init = ConstantDataArray::getString(getGlobalContext(), str); + Constant *Init = ConstantDataArray::getString(TheContext, str); // Use cached string if it exists. static std::map StringCSTCache; @@ -861,7 +1009,7 @@ /// AddAnnotateAttrsToGlobal - Adds decls that have a annotate attribute to a /// vector to be emitted later. void AddAnnotateAttrsToGlobal(GlobalValue *GV, tree decl) { - LLVMContext &Context = getGlobalContext(); + LLVMContext &Context = TheContext; // Handle annotate attribute on global. tree annotateAttr = lookup_attribute("annotate", DECL_ATTRIBUTES(decl)); @@ -945,9 +1093,17 @@ target = TREE_CHAIN(target); if (isa(target)) { +#if (GCC_MAJOR > 4) + if (struct cgraph_node *fnode = cgraph_node::get_for_asmname(target)) +#else if (struct cgraph_node *fnode = cgraph_node_for_asm(target)) +#endif target = cgraph_symbol(fnode)->decl; +#if (GCC_MAJOR > 4) + else if (struct varpool_node *vnode = varpool_node::get_for_asmname(target)) +#else else if (struct varpool_node *vnode = varpool_node_for_asm(target)) +#endif target = varpool_symbol(vnode)->decl; } @@ -1011,7 +1167,7 @@ /// emit_varpool_aliases - Output any aliases associated with the given varpool /// node. static void emit_varpool_aliases(struct varpool_node *node) { -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) for (struct varpool_node *alias = node->extra_name; alias; alias = alias->next) emit_alias(alias->decl, node->decl); @@ -1026,7 +1182,13 @@ if (lookup_attribute("weakref", DECL_ATTRIBUTES(varpool_symbol(alias)->decl))) continue; - emit_alias(varpool_symbol(alias)->decl, alias->alias_of); + emit_alias(varpool_symbol(alias)->decl, +#if (GCC_MAJOR > 4) + alias->get_constructor() +#else + alias->alias_of +#endif + ); emit_varpool_aliases(alias); } #endif @@ -1136,14 +1298,23 @@ // is not taken). However if -fmerge-all-constants was specified then allow // merging even if the address was taken. Note that merging will only happen // if the global is constant or later proved to be constant by the optimizers. - GV->setUnnamedAddr(flag_merge_constants >= 2 || !TREE_ADDRESSABLE(decl)); + GV->setUnnamedAddr(flag_merge_constants >= 2 || !TREE_ADDRESSABLE(decl) +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ? llvm::GlobalValue::UnnamedAddr::Global + : llvm::GlobalValue::UnnamedAddr::Local +#endif + ); handleVisibility(decl, GV); // Set the section for the global. if (isa(decl)) { if (DECL_SECTION_NAME(decl)) { - GV->setSection(TREE_STRING_POINTER(DECL_SECTION_NAME(decl))); +#if (GCC_MAJOR > 4) + GV->setSection(StringRef(DECL_SECTION_NAME(decl))); +#else + GV->setSection(StringRef(TREE_STRING_POINTER(DECL_SECTION_NAME(decl)))); +#endif #ifdef LLVM_IMPLICIT_TARGET_GLOBAL_VAR_SECTION } else if (const char *Section = LLVM_IMPLICIT_TARGET_GLOBAL_VAR_SECTION(decl)) { @@ -1207,8 +1378,10 @@ // Output any associated aliases. if (isa(decl)) if (struct varpool_node *vnode = -#if (GCC_MINOR < 6) - varpool_node(decl) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) + varpool_node(decl) +#elif (GCC_MAJOR > 4) + varpool_node::get(decl) #else varpool_get_node(decl) #endif @@ -1283,7 +1456,12 @@ if (errorcount || sorrycount) return NULL; // Do not process broken code. - LLVMContext &Context = getGlobalContext(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(decl))->getContext(); +#else + TheContext; +#endif // Global register variable with asm name, e.g.: // register unsigned long esp __asm__("ebp"); @@ -1315,7 +1493,11 @@ // Specifying a section attribute on a variable forces it into a // non-.bss section, and thus it cannot be common. +#if (GCC_MAJOR > 4) + if (isa(decl) && DECL_SECTION_NAME(decl) != NULL && +#else if (isa(decl) && DECL_SECTION_NAME(decl) != NULL_TREE && +#endif DECL_INITIAL(decl) == NULL_TREE && DECL_COMMON(decl)) DECL_COMMON(decl) = 0; @@ -1339,7 +1521,9 @@ FnEntry = Function::Create(Ty, Function::ExternalLinkage, Name, TheModule); FnEntry->setCallingConv(CC); +#if LLVM_VERSION_CODE < LLVM_VERSION(4, 0) FnEntry->setAttributes(PAL); +#endif // Check for external weak linkage. if (DECL_EXTERNAL(decl) && DECL_WEAK(decl)) @@ -1613,7 +1797,7 @@ EmitIR |= flag_generate_lto != 0; // We have the same needs as GCC's LTO. Always claim to be doing LTO. flag_lto = -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) ""; #else 1; @@ -1646,7 +1830,7 @@ /// emit_cgraph_aliases - Output any aliases associated with the given cgraph /// node. static void emit_cgraph_aliases(struct cgraph_node *node) { -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) struct cgraph_node *alias, *next; for (alias = node->same_body; alias && alias->next; alias = alias->next) ; @@ -1689,7 +1873,11 @@ } // Output any associated aliases. +#if (GCC_MAJOR > 4) + emit_cgraph_aliases(cgraph_node::get(current_function_decl)); +#else emit_cgraph_aliases(cgraph_get_node(current_function_decl)); +#endif if (!errorcount && !sorrycount) { // Do not process broken code. createPerFunctionOptimizationPasses(); @@ -1713,14 +1901,18 @@ } // Free tree-ssa data structures. -#if (GCC_MINOR < 8) +#if GCC_VERSION_CODE < GCC_VERSION(4, 8) execute_free_datastructures(); #else free_dominance_info(CDI_DOMINATORS); free_dominance_info(CDI_POST_DOMINATORS); // And get rid of annotations we no longer need. +#if (GCC_MAJOR > 4) + delete_tree_cfg_annotations(DECL_STRUCT_FUNCTION(current_function_decl)); +#else delete_tree_cfg_annotations(); #endif +#endif // Finally, we have written out this function! TREE_ASM_WRITTEN(current_function_decl) = 1; @@ -1728,6 +1920,7 @@ } /// pass_rtl_emit_function - RTL pass that converts a function to LLVM IR. +#if (GCC_MAJOR < 5) static struct rtl_opt_pass pass_rtl_emit_function = { { RTL_PASS, "rtl_emit_function", /* name */ #if (GCC_MINOR >= 8) @@ -1744,20 +1937,47 @@ PROP_ssa | PROP_trees, /* properties_destroyed */ TODO_verify_ssa | TODO_verify_flow | TODO_verify_stmts } }; +#else +const pass_data pass_data_rtl_emit_function = { + RTL_PASS, /* type */ + "rtl_emit_function", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_ssa | PROP_gimple_leh | PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + PROP_ssa | PROP_trees, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_rtl_emit_function : public rtl_opt_pass { +public: + pass_rtl_emit_function(gcc::context *ctxt) + : rtl_opt_pass(pass_data_rtl_emit_function, ctxt) {} + + virtual unsigned int execute(function *) { return rtl_emit_function(); } +}; +#endif /// emit_file_scope_asms - Output any file-scope assembly. static void emit_file_scope_asms() { +#if (GCC_MAJOR > 4) + for (struct asm_node *anode = symtab->first_asm_symbol(); anode; anode = anode->next) { +#else for (struct asm_node *anode = asm_nodes; anode; anode = anode->next) { +#endif tree string = anode->asm_str; if (isa(string)) string = TREE_OPERAND(string, 0); TheModule->appendModuleInlineAsm(TREE_STRING_POINTER(string)); } // Remove the asms so gcc doesn't waste time outputting them. +#if (GCC_MAJOR < 5) asm_nodes = NULL; +#endif } -#if (GCC_MINOR > 6) +#if GCC_VERSION_CODE > GCC_VERSION(4, 6) /// get_alias_symbol - Return the name of the aliasee for this alias. static tree get_alias_symbol(tree decl) { tree alias = lookup_attribute("alias", DECL_ATTRIBUTES(decl)); @@ -1784,12 +2004,17 @@ if (vnode->alias && DECL_EXTERNAL(varpool_symbol(vnode)->decl) && lookup_attribute("weakref", DECL_ATTRIBUTES(varpool_symbol(vnode)->decl))) - emit_alias(varpool_symbol(vnode)->decl, vnode->alias_of ? vnode->alias_of + emit_alias(varpool_symbol(vnode)->decl, +#if (GCC_MAJOR > 4) + vnode->get_constructor() ? vnode->get_constructor() +#else + vnode->alias_of ? vnode->alias_of +#endif : get_alias_symbol(varpool_symbol(vnode)->decl)); } #endif -#if (GCC_MINOR < 8) +#if GCC_VERSION_CODE < GCC_VERSION(4, 8) INSTANTIATE_VECTOR(alias_pair); #endif @@ -1810,7 +2035,7 @@ struct varpool_node *vnode; FOR_EACH_VARIABLE(vnode) { // If the node is explicitly marked as not being needed, then skip it. -#if (GCC_MINOR < 8) +#if GCC_VERSION_CODE < GCC_VERSION(4, 8) if (!vnode->needed) continue; #endif @@ -1891,7 +2116,7 @@ TheDebugInfo = 0; } - LLVMContext &Context = getGlobalContext(); + LLVMContext &Context = TheContext; createPerFunctionOptimizationPasses(); @@ -1986,7 +2211,11 @@ Context.setInlineAsmDiagnosticHandler(OldHandler, OldHandlerData); } +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + FormattedOutStream->flush(); +#else FormattedOutStream.flush(); +#endif OutStream->flush(); //TODO timevar_pop(TV_LLVM_PERFILE); @@ -2004,6 +2233,7 @@ static bool gate_null(void) { return false; } /// pass_gimple_null - Gimple pass that does nothing. +#if (GCC_MAJOR < 5) static struct gimple_opt_pass pass_gimple_null = { { GIMPLE_PASS, "*gimple_null", /* name */ #if (GCC_MINOR >= 8) @@ -2021,12 +2251,33 @@ 0, /* todo_flags_start */ 0 /* todo_flags_finish */ } }; +#else +const pass_data pass_data_gimple_null = { + GIMPLE_PASS, /* type */ + "*gimple_null", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_gimple_null : public gimple_opt_pass { +public: + pass_gimple_null(gcc::context *ctxt) + : gimple_opt_pass(pass_data_gimple_null, ctxt) {} +}; +#endif /// execute_correct_state - Correct the cgraph state to ensure that newly /// inserted functions are processed before being converted to LLVM IR. static unsigned int execute_correct_state(void) { +#if (GCC_MAJOR < 5) if (cgraph_state < CGRAPH_STATE_IPA_SSA) cgraph_state = CGRAPH_STATE_IPA_SSA; +#endif return 0; } @@ -2035,6 +2286,7 @@ /// pass_gimple_correct_state - Gimple pass that corrects the cgraph state so /// newly inserted functions are processed before being converted to LLVM IR. +#if (GCC_MAJOR < 5) static struct gimple_opt_pass pass_gimple_correct_state = { { GIMPLE_PASS, "*gimple_correct_state", /* name */ #if (GCC_MINOR >= 8) @@ -2052,8 +2304,32 @@ 0, /* todo_flags_start */ 0 /* todo_flags_finish */ } }; +#else +const pass_data pass_data_gimple_correct_state = { + GIMPLE_PASS, + "*gimple_correct_state", + OPTGROUP_NONE, + TV_NONE, + 0, + 0, + 0, + 0, + 0, +}; + +class pass_gimple_correct_state : public gimple_opt_pass { +public: + pass_gimple_correct_state(gcc::context *ctxt) + : gimple_opt_pass(pass_data_gimple_correct_state, ctxt) {} + + virtual bool gate(function *) { return gate_correct_state(); } + + virtual unsigned int execute(function *) { return execute_correct_state(); } +}; +#endif /// pass_ipa_null - IPA pass that does nothing. +#if (GCC_MAJOR < 5) static struct ipa_opt_pass_d pass_ipa_null = { { IPA_PASS, "*ipa_null", /* name */ #if (GCC_MINOR >= 8) @@ -2085,8 +2361,38 @@ NULL, /* function_transform */ NULL /* variable_transform */ }; +#else +const pass_data pass_data_ipa_null = { + IPA_PASS, /* type */ + "*ipa_null", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_ipa_null : public ipa_opt_pass_d { +public: + pass_ipa_null(gcc::context *ctxt) + : ipa_opt_pass_d(pass_data_ipa_null, ctxt, + NULL, /* generate_summary */ + NULL, /* write_summary */ + NULL, /* read_summary */ + NULL, /* write_optimization_summary */ + NULL, /* read_optimization_summary */ + NULL, /* stmt_fixup */ + 0, /* function_transform_todo_flags_start */ + NULL, /* function_transform */ + NULL) /* variable_transform */ + {} +}; +#endif /// pass_rtl_null - RTL pass that does nothing. +#if (GCC_MAJOR < 5) static struct rtl_opt_pass pass_rtl_null = { { RTL_PASS, "*rtl_null", /* name */ #if (GCC_MINOR >= 8) OPTGROUP_NONE,/* optinfo_flags */ @@ -2103,8 +2409,27 @@ 0, /* todo_flags_start */ 0 /* todo_flags_finish */ } }; +#else +const pass_data pass_data_rtl_null = { + RTL_PASS, /* type */ + "*rtl_null", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_rtl_null : public rtl_opt_pass { +public: + pass_rtl_null(gcc::context *ctxt) : rtl_opt_pass(pass_data_rtl_null, ctxt) {} +}; +#endif /// pass_simple_ipa_null - Simple IPA pass that does nothing. +#if (GCC_MAJOR < 5) static struct simple_ipa_opt_pass pass_simple_ipa_null = { { SIMPLE_IPA_PASS, "*simple_ipa_null", /* name */ #if (GCC_MINOR >= 8) @@ -2122,6 +2447,25 @@ 0, /* todo_flags_start */ 0 /* todo_flags_finish */ } }; +#else +const pass_data pass_data_simple_ipa_null = { + SIMPLE_IPA_PASS, /* type */ + "*simple_ipa_null", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_simple_ipa_null : public simple_ipa_opt_pass { +public: + pass_simple_ipa_null(gcc::context *ctxt) + : simple_ipa_opt_pass(pass_data_simple_ipa_null, ctxt) {} +}; +#endif // Garbage collector roots. extern const struct ggc_cache_tab gt_ggc_rc__gt_cache_h[]; @@ -2144,9 +2488,9 @@ /// llvm_plugin_info - Information about this plugin. Users can access this /// using "gcc --help -v". static struct plugin_info llvm_plugin_info = { - LLVM_VERSION, // version - // TODO provide something useful here - NULL // help + LLVM_VERSION_STRING, // version + // TODO provide something useful here + NULL // help }; #ifndef DISABLE_VERSION_CHECK @@ -2270,7 +2614,12 @@ TakeoverAsmOutput(); // Register our garbage collector roots. + // https://gcc.gnu.org/ml/gcc-patches/2014-11/msg02965.html +#if (GCC_MAJOR < 5) register_callback(plugin_name, PLUGIN_REGISTER_GGC_CACHES, NULL, +#else + register_callback(plugin_name, PLUGIN_REGISTER_GGC_ROOTS, NULL, +#endif const_cast(gt_ggc_rc__gt_cache_h)); // Perform late initialization just before processing the compilation unit. @@ -2286,7 +2635,7 @@ // Leave pass_ipa_function_and_variable_visibility. Needed for correctness. -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) // Turn off pass_ipa_early_inline. pass_info.pass = &pass_simple_ipa_null.pass; pass_info.reference_pass_name = "einline_ipa"; @@ -2306,7 +2655,12 @@ // Leave pass_early_local_passes::pass_build_ssa. // Turn off pass_lower_vector. - pass_info.pass = &pass_gimple_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_gimple_null.pass; +#else + new pass_gimple_null(g); +#endif pass_info.reference_pass_name = "veclower"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; @@ -2325,14 +2679,24 @@ // Insert a pass that ensures that any newly inserted functions, for example // those generated by OMP expansion, are processed before being converted to // LLVM IR. - pass_info.pass = &pass_gimple_correct_state.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_gimple_correct_state.pass; +#else + new pass_gimple_correct_state(g); +#endif pass_info.reference_pass_name = "early_optimizations"; pass_info.ref_pass_instance_number = 1; pass_info.pos_op = PASS_POS_INSERT_BEFORE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); // Turn off pass_early_local_passes::pass_all_early_optimizations. - pass_info.pass = &pass_gimple_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_gimple_null.pass; +#else + new pass_gimple_null(g); +#endif pass_info.reference_pass_name = "early_optimizations"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; @@ -2348,13 +2712,18 @@ // Leave pass pass_early_local_passes::pass_tree_profile. // Turn off pass_ipa_increase_alignment. - pass_info.pass = &pass_simple_ipa_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_simple_ipa_null.pass; +#else + new pass_simple_ipa_null(g); +#endif pass_info.reference_pass_name = "increase_alignment"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); -#if (GCC_MINOR < 8) +#if GCC_VERSION_CODE < GCC_VERSION(4, 8) // Turn off pass_ipa_matrix_reorg. pass_info.pass = &pass_simple_ipa_null.pass; pass_info.reference_pass_name = "matrix-reorg"; @@ -2372,7 +2741,12 @@ // Leave pass_ipa_profile. ??? // Turn off pass_ipa_cp. - pass_info.pass = &pass_ipa_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_ipa_null.pass; +#else + new pass_ipa_null(g); +#endif pass_info.reference_pass_name = "cp"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; @@ -2381,27 +2755,42 @@ // Leave pass_ipa_cdtor_merge. // Turn off pass_ipa_inline. - pass_info.pass = &pass_ipa_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_ipa_null.pass; +#else + new pass_ipa_null(g); +#endif pass_info.reference_pass_name = "inline"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); // Turn off pass_ipa_pure_const. - pass_info.pass = &pass_ipa_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_ipa_null.pass; +#else + new pass_ipa_null(g); +#endif pass_info.reference_pass_name = "pure-const"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); // Turn off pass_ipa_reference. - pass_info.pass = &pass_ipa_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_ipa_null.pass; +#else + new pass_ipa_null(g); +#endif pass_info.reference_pass_name = "static-var"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) // Turn off pass_ipa_type_escape. pass_info.pass = &pass_simple_ipa_null.pass; pass_info.reference_pass_name = "type-escape-var"; @@ -2411,13 +2800,18 @@ #endif // Turn off pass_ipa_pta. - pass_info.pass = &pass_simple_ipa_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_simple_ipa_null.pass; +#else + new pass_simple_ipa_null(g); +#endif pass_info.reference_pass_name = "pta"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) // Turn off pass_ipa_struct_reorg. pass_info.pass = &pass_simple_ipa_null.pass; pass_info.reference_pass_name = "ipa_struct_reorg"; @@ -2428,19 +2822,29 @@ } // Disable all LTO passes. - pass_info.pass = &pass_ipa_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_ipa_null.pass; +#else + new pass_ipa_null(g); +#endif pass_info.reference_pass_name = "lto_gimple_out"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); - pass_info.pass = &pass_ipa_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_ipa_null.pass; +#else + new pass_ipa_null(g); +#endif pass_info.reference_pass_name = "lto_decls_out"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) pass_info.pass = &pass_ipa_null.pass; pass_info.reference_pass_name = "lto_wpa_fixup"; pass_info.ref_pass_instance_number = 0; @@ -2457,14 +2861,24 @@ if (!EnableGCCOptimizations) { // Disable pass_lower_eh_dispatch. - pass_info.pass = &pass_gimple_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_gimple_null.pass; +#else + new pass_gimple_null(g); +#endif pass_info.reference_pass_name = "ehdisp"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); // Disable pass_all_optimizations. - pass_info.pass = &pass_gimple_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_gimple_null.pass; +#else + new pass_gimple_null(g); +#endif pass_info.reference_pass_name = "*all_optimizations"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; @@ -2473,42 +2887,72 @@ // Leave pass_tm_init. // Disable pass_lower_complex_O0. - pass_info.pass = &pass_gimple_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_gimple_null.pass; +#else + new pass_gimple_null(g); +#endif pass_info.reference_pass_name = "cplxlower0"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); // Disable pass_cleanup_eh. - pass_info.pass = &pass_gimple_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_gimple_null.pass; +#else + new pass_gimple_null(g); +#endif pass_info.reference_pass_name = "ehcleanup"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); // Disable pass_lower_resx. - pass_info.pass = &pass_gimple_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_gimple_null.pass; +#else + new pass_gimple_null(g); +#endif pass_info.reference_pass_name = "resx"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); // Disable pass_nrv. - pass_info.pass = &pass_gimple_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_gimple_null.pass; +#else + new pass_gimple_null(g); +#endif pass_info.reference_pass_name = "nrv"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); // Disable pass_mudflap_2. ??? - pass_info.pass = &pass_gimple_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_gimple_null.pass; +#else + new pass_gimple_null(g); +#endif pass_info.reference_pass_name = "mudflap2"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); // Disable pass_cleanup_cfg_post_optimizing. - pass_info.pass = &pass_gimple_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_gimple_null.pass; +#else + new pass_gimple_null(g); +#endif pass_info.reference_pass_name = "optimized"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; @@ -2518,24 +2962,39 @@ } // Replace rtl expansion with a pass that converts functions to LLVM IR. - pass_info.pass = &pass_rtl_emit_function.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_rtl_emit_function.pass; +#else + new pass_rtl_emit_function(g); +#endif pass_info.reference_pass_name = "expand"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); // Turn off all other rtl passes. -#if (GCC_MINOR < 8) +#if GCC_VERSION_CODE < GCC_VERSION(4, 8) pass_info.pass = &pass_gimple_null.pass; #else - pass_info.pass = &pass_rtl_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_rtl_null.pass; +#else + new pass_rtl_null(g); +#endif #endif pass_info.reference_pass_name = "*rest_of_compilation"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); - pass_info.pass = &pass_rtl_null.pass; + pass_info.pass = +#if (GCC_MAJOR < 5) + &pass_rtl_null.pass; +#else + new pass_rtl_null(g); +#endif pass_info.reference_pass_name = "*clean_state"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; Index: src/Cache.cpp =================================================================== --- src/Cache.cpp +++ src/Cache.cpp @@ -46,6 +46,9 @@ #include "coretypes.h" #include "tm.h" #include "tree.h" +#if (GCC_MAJOR > 4) +#include "tree-core.h" +#endif #include "ggc.h" #ifndef ENABLE_BUILD_WITH_CXX @@ -65,8 +68,26 @@ #define tree2int_hash tree_map_base_hash #define tree2int_marked_p tree_map_base_marked_p +#if (GCC_MAJOR < 5) +// FIXME: gengtype does not support macro https://gcc.gnu.org/ml/gcc/2017-07/msg00061.html static GTY((if_marked("tree2int_marked_p"), param_is(struct tree2int))) htab_t intCache; +#else +#if (GCC_MAJOR == 5) +struct intCacheHasher : ggc_cache_hasher { +#else +struct intCacheHasher : ggc_cache_ptr_hash { +#endif + static inline hashval_t hash(tree2int *t2i) { + return tree_map_base_hash(&t2i->base); + } + + static inline bool equal(tree2int *a, tree2int *b) { + return a->base.from == b->base.from; + } +}; +static GTY((cache)) hash_table *intCache; +#endif // Hash table mapping trees to Type*. @@ -87,8 +108,26 @@ #define tree2Type_hash tree_map_base_hash #define tree2Type_marked_p tree_map_base_marked_p +#if (GCC_MAJOR < 5) +// FIXME: gengtype does not support macro https://gcc.gnu.org/ml/gcc/2017-07/msg00061.html static GTY((if_marked("tree2Type_marked_p"), param_is(struct tree2Type))) htab_t TypeCache; +#else +#if (GCC_MAJOR == 5) +struct TypeCacheHaser : ggc_cache_hasher { +#else +struct TypeCacheHaser : ggc_cache_ptr_hash { +#endif + static inline hashval_t hash(tree2Type *t2T) { + return tree_map_base_hash(&t2T->base); + } + + static inline bool equal(tree2Type *a, tree2Type *b) { + return a->base.from == b->base.from; + } +}; +static GTY((cache)) hash_table *TypeCache; +#endif // Hash table mapping trees to WeakVH. @@ -109,18 +148,46 @@ #define tree2WeakVH_hash tree_map_base_hash #define tree2WeakVH_marked_p tree_map_base_marked_p +#if (GCC_MAJOR < 5) +// FIXME: gengtype does not support macro https://gcc.gnu.org/ml/gcc/2017-07/msg00061.html static GTY((if_marked("tree2WeakVH_marked_p"), param_is(struct tree2WeakVH))) htab_t WeakVHCache; +#else +#if (GCC_MAJOR == 5) +struct WeakVHCacheHasher : ggc_cache_hasher { +#else +struct WeakVHCacheHasher : ggc_cache_ptr_hash { +#endif + static inline hashval_t hash(tree2WeakVH *t2W) { + return tree_map_base_hash(&t2W->base); + } + + static inline bool equal(tree2WeakVH *a, tree2WeakVH *b) { + return a->base.from == b->base.from; + } + + static int keep_cache_entry(tree2WeakVH *&t2W) { + return ggc_marked_p(t2W->base.from); + } +}; +static GTY((cache)) hash_table *WeakVHCache; +#endif // Include the garbage collector header. #ifndef ENABLE_BUILD_WITH_CXX extern "C" { #endif -#if (GCC_MINOR > 5) +#if (GCC_MAJOR < 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) #include "dragonegg/gt-cache-4.6.inc" #else #include "dragonegg/gt-cache-4.5.inc" #endif +#else +#if (GCC_MAJOR == 6) +#include "dragonegg/gt-cache-6.3.inc" +#endif +#endif #ifndef ENABLE_BUILD_WITH_CXX } // extern "C" #endif @@ -128,8 +195,14 @@ bool getCachedInteger(tree t, int &Val) { if (!intCache) return false; +#if (GCC_MAJOR < 5) tree_map_base in = { t }; tree2int *h = (tree2int *)htab_find(intCache, &in); +#else + tree2int in; + in.base.from = t; + tree2int *h = intCache->find(&in); +#endif if (!h) return false; Val = h->val; @@ -138,20 +211,32 @@ void setCachedInteger(tree t, int Val) { if (!intCache) +#if (GCC_MAJOR < 5) intCache = htab_create_ggc(1024, tree2int_hash, tree2int_eq, 0); +#else + intCache = hash_table::create_ggc(1024); +#endif +#if (GCC_MAJOR < 5) tree_map_base in = { t }; tree2int **slot = (tree2int **)htab_find_slot(intCache, &in, INSERT); +#else + tree2int in; + in.base.from = t; + tree2int **slot = intCache->find_slot(&in, INSERT); +#endif assert(slot && "Failed to create hash table slot!"); if (!*slot) { +#if (GCC_MAJOR < 5) *slot = -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) ggc_alloc_tree2int(); #else GGC_NEW(struct tree2int); #endif (*slot)->base.from = t; +#endif } (*slot)->val = Val; @@ -160,36 +245,61 @@ Type *getCachedType(tree t) { if (!TypeCache) return 0; +#if (GCC_MAJOR < 5) tree_map_base in = { t }; tree2Type *h = (tree2Type *)htab_find(TypeCache, &in); +#else + tree2Type in; + in.base.from = t; + tree2Type *h = TypeCache->find(&in); +#endif return h ? h->Ty : 0; } void setCachedType(tree t, Type *Ty) { +#if (GCC_MAJOR < 5) tree_map_base in = { t }; +#else + tree2Type in; + in.base.from = t; +#endif /* If deleting, remove the slot. */ if (!Ty) { if (TypeCache) +#if (GCC_MAJOR < 5) htab_remove_elt(TypeCache, &in); +#else + TypeCache->remove_elt(&in); +#endif return; } if (!TypeCache) +#if (GCC_MAJOR < 5) TypeCache = htab_create_ggc(1024, tree2Type_hash, tree2Type_eq, 0); +#else + TypeCache = hash_table::create_ggc(1024); +#endif +#if (GCC_MAJOR < 5) tree2Type **slot = (tree2Type **)htab_find_slot(TypeCache, &in, INSERT); +#else + tree2Type **slot = TypeCache->find_slot(&in, INSERT); +#endif assert(slot && "Failed to create hash table slot!"); +#if (GCC_MAJOR < 5) if (!*slot) { *slot = -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) ggc_alloc_tree2Type(); #else GGC_NEW(struct tree2Type); #endif (*slot)->base.from = t; } +#endif (*slot)->Ty = Ty; } @@ -199,8 +309,14 @@ Value *getCachedValue(tree t) { if (!WeakVHCache) return 0; +#if (GCC_MAJOR < 5) tree_map_base in = { t }; tree2WeakVH *h = (tree2WeakVH *)htab_find(WeakVHCache, &in); +#else + tree2WeakVH in; + in.base.from = t; + tree2WeakVH *h = WeakVHCache->find(&in); +#endif return h ? h->V : 0; } @@ -212,20 +328,37 @@ /// given GCC tree. The association is removed if tree is garbage collected /// or the value deleted. void setCachedValue(tree t, Value *V) { +#if (GCC_MAJOR < 5) tree_map_base in = { t }; +#else + tree2WeakVH in; + in.base.from = t; +#endif // If deleting, remove the slot. if (!V) { if (WeakVHCache) +#if (GCC_MAJOR < 5) htab_remove_elt(WeakVHCache, &in); +#else + WeakVHCache->remove_elt(&in); +#endif return; } if (!WeakVHCache) WeakVHCache = +#if (GCC_MAJOR < 5) htab_create_ggc(1024, tree2WeakVH_hash, tree2WeakVH_eq, DestructWeakVH); +#else + hash_table::create_ggc(1024); +#endif +#if (GCC_MAJOR < 5) tree2WeakVH **slot = (tree2WeakVH **)htab_find_slot(WeakVHCache, &in, INSERT); +#else + tree2WeakVH **slot = WeakVHCache->find_slot(&in, INSERT); +#endif assert(slot && "Failed to create hash table slot!"); if (*slot) { @@ -233,8 +366,9 @@ return; } +#if (GCC_MAJOR < 5) *slot = -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) ggc_alloc_tree2WeakVH(); #else GGC_NEW(struct tree2WeakVH); @@ -243,4 +377,5 @@ WeakVH *W = new (&(*slot)->V) WeakVH(V); assert(W == &(*slot)->V && "Pointer was displaced!"); (void)W; +#endif } Index: src/ConstantConversion.cpp =================================================================== --- src/ConstantConversion.cpp +++ src/ConstantConversion.cpp @@ -33,6 +33,9 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Host.h" +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) +#include "llvm/Target/TargetMachine.h" +#endif // System headers #include @@ -50,8 +53,13 @@ #include "coretypes.h" #include "tm.h" #include "tree.h" +#if (GCC_MAJOR > 4) +#include "print-tree.h" +#include "stor-layout.h" +#include "fold-const.h" +#endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) #include "flags.h" // For POINTER_TYPE_OVERFLOW_UNDEFINED. #endif #include "tm_p.h" // For CONSTANT_ALIGNMENT. @@ -64,7 +72,9 @@ using namespace llvm; -static LLVMContext &Context = getGlobalContext(); +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) +static LLVMContext &TheContext = getGlobalContext(); +#endif // Forward declarations. static Constant *ConvertInitializerImpl(tree, TargetFolder &); @@ -103,11 +113,11 @@ } /// ExtendRange - Extend the slice to a wider range. All added bits are zero. - BitSlice ExtendRange(SignedRange r, TargetFolder &Folder) const; + BitSlice ExtendRange(SignedRange r, TargetFolder &Folder, Type *Ty) const; /// ReduceRange - Reduce the slice to a smaller range discarding any bits that /// do not belong to the new range. - BitSlice ReduceRange(SignedRange r, TargetFolder &Folder) const; + BitSlice ReduceRange(SignedRange r, TargetFolder &Folder, Type *Ty) const; public: /// BitSlice - Default constructor: empty bit range. @@ -150,23 +160,29 @@ /// returned value corresponds to the first bit of the range (aka "First"), /// while on big-endian machines it corresponds to the last bit of the range /// (aka "Last-1"). - Constant *getBits(SignedRange r, TargetFolder &Folder) const; + Constant *getBits(SignedRange r, TargetFolder &Folder, Type *Ty) const; /// Merge - Join the slice with another (which must be disjoint), forming the /// convex hull of the ranges. The bits in the range of one of the slices are /// those of that slice. Any other bits have an undefined value. - void Merge(const BitSlice &other, TargetFolder &Folder); + void Merge(const BitSlice &other, TargetFolder &Folder, Type *Ty); }; } // Unnamed namespace. /// ExtendRange - Extend the slice to a wider range. All added bits are zero. -BitSlice BitSlice::ExtendRange(SignedRange r, TargetFolder &Folder) const { +BitSlice BitSlice::ExtendRange(SignedRange r, TargetFolder &Folder, Type *Ty) const { assert(r.contains(R) && "Not an extension!"); // Quick exit if the range did not actually increase. if (R == r) return *this; assert(!r.empty() && "Empty ranges did not evaluate as equal?"); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif Type *ExtTy = IntegerType::get(Context, (unsigned) r.getWidth()); // If the slice contains no bits then every bit of the extension is zero. if (empty()) @@ -195,19 +211,25 @@ /// returned value corresponds to the first bit of the range (aka "First"), /// while on big-endian machines it corresponds to the last bit of the range /// (aka "Last-1"). -Constant *BitSlice::getBits(SignedRange r, TargetFolder &Folder) const { +Constant *BitSlice::getBits(SignedRange r, TargetFolder &Folder, Type *Ty) const { assert(!r.empty() && "Bit range is empty!"); // Quick exit if the desired range matches that of the slice. if (R == r) return Contents; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif Type *RetTy = IntegerType::get(Context, (unsigned) r.getWidth()); // If the slice contains no bits then every returned bit is undefined. if (empty()) return UndefValue::get(RetTy); // Extend to the convex hull of the two ranges. - BitSlice Slice = ExtendRange(R.Join(r), Folder); + BitSlice Slice = ExtendRange(R.Join(r), Folder, Ty); // Chop the slice down to the requested range. - Slice = Slice.ReduceRange(r, Folder); + Slice = Slice.ReduceRange(r, Folder, Ty); // Now we can just return the bits contained in the slice. return Slice.Contents; } @@ -215,7 +237,7 @@ /// Merge - Join the slice with another (which must be disjoint), forming the /// convex hull of the ranges. The bits in the range of one of the slices are /// those of that slice. Any other bits have an undefined value. -void BitSlice::Merge(const BitSlice &other, TargetFolder &Folder) { +void BitSlice::Merge(const BitSlice &other, TargetFolder &Folder, Type *Ty) { // If the other slice is empty, the result is this slice. if (other.empty()) return; @@ -228,8 +250,8 @@ // Extend each slice to the convex hull of the ranges. SignedRange Hull = R.Join(other.getRange()); - BitSlice ExtThis = ExtendRange(Hull, Folder); - BitSlice ExtOther = other.ExtendRange(Hull, Folder); + BitSlice ExtThis = ExtendRange(Hull, Folder, Ty); + BitSlice ExtOther = other.ExtendRange(Hull, Folder, Ty); // Since the slices are disjoint and all added bits are zero they can be // joined via a simple 'or'. @@ -238,7 +260,7 @@ /// ReduceRange - Reduce the slice to a smaller range discarding any bits that /// do not belong to the new range. -BitSlice BitSlice::ReduceRange(SignedRange r, TargetFolder &Folder) const { +BitSlice BitSlice::ReduceRange(SignedRange r, TargetFolder &Folder, Type *Ty) const { assert(R.contains(r) && "Not a reduction!"); // Quick exit if the range did not actually decrease. if (R == r) @@ -261,6 +283,12 @@ C = Folder.CreateLShr(C, ShiftAmt); } // Truncate to the new type. + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif Type *RedTy = IntegerType::get(Context, (unsigned) r.getWidth()); C = Folder.CreateTruncOrBitCast(C, RedTy); return BitSlice(r, C); @@ -276,6 +304,12 @@ // Sanitize the range to make life easier in what follows. Type *Ty = C->getType(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif int StoreSize = getDataLayout().getTypeStoreSizeInBits(Ty); R = R.Meet(SignedRange(0, StoreSize)); @@ -334,7 +368,7 @@ assert(!NeededBits.empty() && "Used element computation wrong!"); BitSlice EltBits = ViewAsBits(Elt, NeededBits, Folder); // Add to the already known bits. - Bits.Merge(EltBits.Displace(EltOffsetInBits), Folder); + Bits.Merge(EltBits.Displace(EltOffsetInBits), Folder, Ty); } return Bits; } @@ -362,7 +396,7 @@ if (!NeededBits.empty()) { // No field bits needed if only using padding. BitSlice FieldBits = ViewAsBits(Field, NeededBits, Folder); // Add to the already known bits. - Bits.Merge(FieldBits.Displace(FieldOffsetInBits), Folder); + Bits.Merge(FieldBits.Displace(FieldOffsetInBits), Folder, Ty); } } return Bits; @@ -392,7 +426,7 @@ assert(!NeededBits.empty() && "Used element computation wrong!"); BitSlice EltBits = ViewAsBits(Elt, NeededBits, Folder); // Add to the already known bits. - Bits.Merge(EltBits.Displace(EltOffsetInBits), Folder); + Bits.Merge(EltBits.Displace(EltOffsetInBits), Folder, Ty); } return Bits; } @@ -416,6 +450,13 @@ if (C->isNullValue()) return Constant::getNullValue(Ty); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif + // The general case. switch (Ty->getTypeID()) { default: @@ -434,8 +475,8 @@ // the end on little-endian machines. Bits = Bits.Displace(-StartingBit); return BYTES_BIG_ENDIAN - ? Bits.getBits(SignedRange(StoreSize - BitWidth, StoreSize), Folder) - : Bits.getBits(SignedRange(0, BitWidth), Folder); + ? Bits.getBits(SignedRange(StoreSize - BitWidth, StoreSize), Folder, Ty) + : Bits.getBits(SignedRange(0, BitWidth), Folder, Ty); } case Type::PointerTyID: { @@ -523,6 +564,13 @@ // This roundabout approach means we get the right result on both little and // big endian machines. unsigned Size = GET_MODE_BITSIZE(TYPE_MODE(type)); + Type *Ty = ConvertType(type); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif Type *MemTy = IntegerType::get(Context, Size); C = InterpretAsType(C, MemTy, StartingBit, Folder); return Folder.CreateTruncOrBitCast(C, getRegType(type)); @@ -538,7 +586,7 @@ return ConstantStruct::getAnon(Vals); } -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) case NULLPTR_TYPE: #endif case OFFSET_TYPE: @@ -572,7 +620,11 @@ /// byte StartingByte. Constant * ExtractRegisterFromConstant(Constant *C, tree type, int StartingByte) { +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + TargetFolder Folder(TheTarget->createDataLayout()); +#else TargetFolder Folder(&getDataLayout()); +#endif return ExtractRegisterFromConstantImpl(C, type, StartingByte, Folder); } @@ -596,6 +648,13 @@ // NOTE: Needs to be kept in sync with ExtractRegisterFromConstant. assert(C->getType() == getRegType(type) && "Constant has wrong type!"); Constant *Result; + Type *Ty = ConvertType(type); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif switch (TREE_CODE(type)) { @@ -629,7 +688,7 @@ break; } -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) case NULLPTR_TYPE: #endif case OFFSET_TYPE: @@ -730,6 +789,13 @@ (void) CharsWritten; // Avoid unused variable warning when assertions disabled. // Turn it into an LLVM byte array. + Type *Ty = ConvertType(type); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif return ConstantDataArray::get(Context, Buffer); } @@ -738,6 +804,12 @@ // just those with a byte component type; then ConvertCST can handle strings. ArrayType *StrTy = cast(ConvertType(TREE_TYPE(exp))); Type *ElTy = StrTy->getElementType(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ElTy->getContext(); +#else + TheContext; +#endif unsigned Len = (unsigned) TREE_STRING_LENGTH(exp); @@ -816,6 +888,12 @@ tree init_type = main_type(exp); Type *InitTy = ConvertType(init_type); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + InitTy->getContext(); +#else + TheContext; +#endif tree elt_type = main_type(init_type); Type *EltTy = ConvertType(elt_type); @@ -884,14 +962,23 @@ assert(host_integerp(first, 1) && host_integerp(last, 1) && "Unknown range_expr!"); +#if (GCC_MAJOR > 4) + FirstIndex = tree_to_shwi(first); + LastIndex = tree_to_shwi(last); +#else FirstIndex = tree_low_cst(first, 1); LastIndex = tree_low_cst(last, 1); +#endif } else { // Subtract off the lower bound if any to ensure indices start from zero. if (lower_bnd != NULL_TREE) index = fold_build2(MINUS_EXPR, main_type(index), index, lower_bnd); assert(host_integerp(index, 1)); +#if (GCC_MAJOR > 4) + FirstIndex = tree_to_shwi(index); +#else FirstIndex = tree_low_cst(index, 1); +#endif LastIndex = FirstIndex; } @@ -1001,6 +1088,12 @@ Constant *getAsBits() const { if (R.empty()) return 0; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + C->getType()->getContext(); +#else + TheContext; +#endif Type *IntTy = IntegerType::get(Context, R.getWidth()); return InterpretAsType(C, IntTy, R.getFirst() - Starts, Folder); } @@ -1040,7 +1133,9 @@ R = other.R; C = other.C; Starts = other.Starts; +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) Folder = other.Folder; +#endif return *this; } @@ -1066,6 +1161,12 @@ /// in the range then just return it. if (isSafeToReturnContentsDirectly(DL)) return C; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + C->getType()->getContext(); +#else + TheContext; +#endif // If the range is empty then return a constant with zero size. if (R.empty()) { // Return an empty array. Remember the returned value as an optimization @@ -1080,7 +1181,11 @@ Type *Ty = C->getType(); assert(Ty->isIntegerTy() && "Non-integer type with non-byte size!"); unsigned BitWidth = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + alignTo(Ty->getPrimitiveSizeInBits(), BITS_PER_UNIT); +#else RoundUpToAlignment(Ty->getPrimitiveSizeInBits(), BITS_PER_UNIT); +#endif Ty = IntegerType::get(Context, BitWidth); C = TheFolder->CreateZExtOrBitCast(C, Ty); if (isSafeToReturnContentsDirectly(DL)) @@ -1116,9 +1221,9 @@ // together. This can result in a nasty integer constant expression, but as // we only get here for bitfields that's mostly harmless. BitSlice Bits(R, getAsBits()); - Bits.Merge(BitSlice(S.R, S.getAsBits()), Folder); + Bits.Merge(BitSlice(S.R, S.getAsBits()), Folder, C->getType()); R = Bits.getRange(); - C = Bits.getBits(R, Folder); + C = Bits.getBits(R, Folder, C->getType()); Starts = R.empty() ? 0 : R.getFirst(); } @@ -1129,6 +1234,12 @@ const DataLayout &DL = getDataLayout(); tree type = main_type(exp); Type *Ty = ConvertType(type); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif uint64_t TypeSize = DL.getTypeAllocSizeInBits(Ty); // Ensure that fields without an initial value are default initialized by @@ -1294,7 +1405,7 @@ // Okay, we're done. Return the computed elements as a constant with the type // of exp if possible. - if (StructType *STy = dyn_cast(Ty)) + if (StructType *STy = llvm::dyn_cast(Ty)) if (STy->isPacked() == Pack && STy->getNumElements() == Elts.size()) { bool EltTypesMatch = true; for (unsigned i = 0, e = Elts.size(); i != e; ++i) { @@ -1363,12 +1474,23 @@ static Constant *ConvertPOINTER_PLUS_EXPR(tree exp, TargetFolder &Folder) { Constant *Ptr = getAsRegister(TREE_OPERAND(exp, 0), Folder); // Pointer Constant *Idx = getAsRegister(TREE_OPERAND(exp, 1), Folder); // Offset (units) + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(main_type(exp))->getContext(); +#else + TheContext; +#endif // Convert the pointer into an i8* and add the offset to it. Ptr = Folder.CreateBitCast(Ptr, GetUnitPointerType(Context)); Constant *Result = POINTER_TYPE_OVERFLOW_UNDEFINED +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ? Folder.CreateInBoundsGetElementPtr(nullptr, Ptr, Idx) + : Folder.CreateGetElementPtr(nullptr, Ptr, Idx); +#else ? Folder.CreateInBoundsGetElementPtr(Ptr, Idx) : Folder.CreateGetElementPtr(Ptr, Idx); +#endif // The result may be of a different pointer type. Result = Folder.CreateBitCast(Result, getRegType(TREE_TYPE(exp))); @@ -1472,7 +1594,11 @@ /// initial value may exceed the alloc size of the LLVM memory type generated /// for the GCC type (see ConvertType); it is never smaller than the alloc size. Constant *ConvertInitializer(tree exp) { +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + TargetFolder Folder(TheTarget->createDataLayout()); +#else TargetFolder Folder(&getDataLayout()); +#endif return ConvertInitializerImpl(exp, Folder); } @@ -1503,7 +1629,13 @@ // Allow identical constants to be merged if the user allowed it. // FIXME: maybe this flag should be set unconditionally, and instead the // ConstantMerge pass should be disabled if flag_merge_constants is zero. +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Slot->setUnnamedAddr(flag_merge_constants >= 2 ? + llvm::GlobalValue::UnnamedAddr::Global : + llvm::GlobalValue::UnnamedAddr::Local); +#else Slot->setUnnamedAddr(flag_merge_constants); +#endif return Slot; } @@ -1538,8 +1670,13 @@ ArrayAddr = Folder.CreateBitCast(ArrayAddr, EltTy->getPointerTo()); return POINTER_TYPE_OVERFLOW_UNDEFINED +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ? Folder.CreateInBoundsGetElementPtr(nullptr, ArrayAddr, IndexVal) + : Folder.CreateGetElementPtr(nullptr, ArrayAddr, IndexVal); +#else ? Folder.CreateInBoundsGetElementPtr(ArrayAddr, IndexVal) : Folder.CreateGetElementPtr(ArrayAddr, IndexVal); +#endif } /// AddressOfCOMPONENT_REF - Return the address of a field in a record. @@ -1574,10 +1711,20 @@ assert(BitStart == 0 && "It's a bitfield reference or we didn't get to the field!"); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(main_type(exp))->getContext(); +#else + TheContext; +#endif Type *UnitPtrTy = GetUnitPointerType(Context); Constant *StructAddr = AddressOfImpl(TREE_OPERAND(exp, 0), Folder); Constant *FieldPtr = Folder.CreateBitCast(StructAddr, UnitPtrTy); +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + FieldPtr = Folder.CreateInBoundsGetElementPtr(nullptr, FieldPtr, Offset); +#else FieldPtr = Folder.CreateInBoundsGetElementPtr(FieldPtr, Offset); +#endif return FieldPtr; } @@ -1670,7 +1817,7 @@ Addr = AddressOfDecl(exp, Folder); break; case INDIRECT_REF: -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) case MISALIGNED_INDIRECT_REF: #endif Addr = AddressOfINDIRECT_REF(exp, Folder); @@ -1688,6 +1835,12 @@ // Ensure that the address has the expected type. It is simpler to do this // once here rather than in every AddressOf helper. Type *Ty; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(main_type(exp))->getContext(); +#else + TheContext; +#endif if (isa(TREE_TYPE(exp))) Ty = GetUnitPointerType(Context); // void* -> i8*. else @@ -1702,6 +1855,10 @@ /// type of the pointee is the memory type that corresponds to the type of exp /// (see ConvertType). Constant *AddressOf(tree exp) { +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + TargetFolder Folder(TheTarget->createDataLayout()); +#else TargetFolder Folder(&getDataLayout()); +#endif return AddressOfImpl(exp, Folder); } Index: src/Convert.cpp =================================================================== --- src/Convert.cpp +++ src/Convert.cpp @@ -57,7 +57,7 @@ #include "diagnostic.h" #include "except.h" #include "flags.h" -#if (GCC_MINOR > 6) +#if GCC_VERSION_CODE > GCC_VERSION(4, 6) #include "gimple-pretty-print.h" #endif #include "langhooks.h" @@ -67,12 +67,41 @@ #include "target.h" // For targetm. #include "tm_p.h" #include "toplev.h" +#if (GCC_MAJOR < 5) #include "tree-flow.h" +#else +#include "builtins.h" +#include "stor-layout.h" +#include "print-tree.h" +#include "function.h" +#include "cfg.h" +#include "basic-block.h" +#include "gimple.h" +#include "tree-cfg.h" +#include "gimple-iterator.h" +#include "tree-eh.h" +#include "emit-rtl.h" +#include "fold-const.h" +#include "stmt.h" +#endif #include "tree-pass.h" +#if (GCC_MAJOR > 4) +#define ENTRY_BLOCK_PTR (cfun->cfg->x_entry_block_ptr) +#define FOR_EACH_BB(BB) FOR_EACH_BB_FN (BB, cfun) +#define MAX_RECOG_OPERANDS 101 +#define MIG_TO_GCALL(STMT) as_a(STMT) +#define MIG_TO_GASM(STMT) as_a(STMT) +#define MIG_TO_GSWITCH(STMT) as_a(STMT) +#else +#define MIG_TO_GCALL(STMT) STMT +#define MIG_TO_GASM(STMT) STMT +#define MIG_TO_GSWITCH(STMT) STMT +#endif + using namespace llvm; -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) extern enum machine_mode reg_raw_mode[FIRST_PSEUDO_REGISTER]; #else // TODO: Submit a GCC patch to install "regs.h" as a plugin header. @@ -86,9 +115,13 @@ #define reg_raw_mode (default_target_regs.x_reg_raw_mode) #endif +#if (GCC_MAJOR < 5) #if (GCC_MINOR == 6) extern void debug_gimple_stmt(union gimple_statement_d *); #endif +#else +extern void debug_gimple_stmt(gimple *stmt); +#endif #ifndef ENABLE_BUILD_WITH_CXX } // extern "C" @@ -97,7 +130,11 @@ // Trees header. #include "dragonegg/Trees.h" -static LLVMContext &Context = getGlobalContext(); +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) +static LLVMContext TheContext; +#else +static LLVMContext &TheContext = getGlobalContext(); +#endif #define DEBUG_TYPE "dragonegg" STATISTIC(NumBasicBlocks, "Number of basic blocks converted"); @@ -108,7 +145,7 @@ static unsigned int getPointerAlignment(tree exp) { assert(isa(TREE_TYPE(exp)) && "Expected a pointer type!"); unsigned int align = -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) get_pointer_alignment(exp, BIGGEST_ALIGNMENT); #else get_pointer_alignment(exp); @@ -170,9 +207,19 @@ DisplaceLocationByUnits(MemRef Loc, int32_t Offset, LLVMBuilder &Builder) { // Convert to a byte pointer and displace by the offset. unsigned AddrSpace = Loc.Ptr->getType()->getPointerAddressSpace(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Loc.Ptr->getType()->getContext(); +#else + TheContext; +#endif Type *UnitPtrTy = GetUnitPointerType(Context, AddrSpace); Value *Ptr = Builder.CreateBitCast(Loc.Ptr, UnitPtrTy); - Ptr = Builder.CreateConstInBoundsGEP1_32(Ptr, Offset, + Ptr = Builder.CreateConstInBoundsGEP1_32( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + UnitPtrTy, +#endif + Ptr, Offset, flag_verbose_asm ? "dsplc" : ""); Ptr = Builder.CreateBitCast(Ptr, Loc.Ptr->getType()); uint32_t Align = MinAlign(Loc.getAlignment(), Offset); @@ -332,6 +379,12 @@ // Unlike GCC's, LLVM ranges do not include the upper end point. ++Hi; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(type))->getContext(); +#else + TheContext; +#endif MDBuilder MDHelper(Context); return MDHelper.createRange(Lo, Hi); } @@ -411,6 +464,13 @@ return LI; } + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(type))->getContext(); +#else + TheContext; +#endif + // There is a discrepancy between the in-register type and the in-memory type. switch (TREE_CODE(type)) { default: @@ -496,6 +556,13 @@ return; } + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(type))->getContext(); +#else + TheContext; +#endif + // There is a discrepancy between the in-register type and the in-memory type. switch (TREE_CODE(type)) { default: @@ -571,7 +638,11 @@ TreeToLLVM *TheTreeToLLVM = 0; const DataLayout &getDataLayout() { +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + return TheTarget->createDataLayout(); +#else return *TheTarget->getSubtargetImpl()->getDataLayout(); +#endif } /// EmitDebugInfo - Return true if debug info is to be emitted for current @@ -583,7 +654,13 @@ } TreeToLLVM::TreeToLLVM(tree fndecl) - : DL(getDataLayout()), Builder(Context, *TheFolder) { + : DL(getDataLayout()), Builder( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(fndecl))->getContext(), +#else + TheContext, +#endif + *TheFolder) { FnDecl = fndecl; AllocaInsertionPoint = 0; Fn = 0; @@ -689,6 +766,12 @@ assert(!BYTES_BIG_ENDIAN && "Unsupported case - please report"); // Do byte wise store because actual argument type does not match LLVMTy. assert(ArgVal->getType()->isIntegerTy() && "Expected an integer value!"); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + LLVMTy->getContext(); +#else + TheContext; +#endif Type *StoreType = IntegerType::get(Context, RealSize * 8); Loc = Builder.CreateBitCast(Loc, StoreType->getPointerTo()); if (ArgVal->getType()->getPrimitiveSizeInBits() >= @@ -760,7 +843,7 @@ tree ResultDecl = DECL_RESULT(FunctionDecl); tree RetTy = TREE_TYPE(TREE_TYPE(FunctionDecl)); if (TREE_CODE(RetTy) == TREE_CODE(TREE_TYPE(ResultDecl))) { - TheTreeToLLVM->set_decl_local(ResultDecl, AI); + TheTreeToLLVM->set_decl_local(ResultDecl, llvm::dyn_cast(AI)); ++AI; return; } @@ -770,11 +853,17 @@ "Not type match and not passing by reference?"); // Create an alloca for the ResultDecl. Value *Tmp = TheTreeToLLVM->CreateTemporary(AI->getType()); - Builder.CreateStore(AI, Tmp); + Builder.CreateStore(llvm::dyn_cast(AI), Tmp); TheTreeToLLVM->set_decl_local(ResultDecl, Tmp); if (TheDebugInfo && !DECL_IGNORED_P(FunctionDecl)) { - TheDebugInfo->EmitDeclare(ResultDecl, dwarf::DW_TAG_auto_variable, + // https://reviews.llvm.org/rL243774 + TheDebugInfo->EmitDeclare(ResultDecl, +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + dwarf::DW_TAG_invalid, +#else + dwarf::DW_TAG_auto_variable, +#endif "agg.result", RetTy, Tmp, Builder); } ++AI; @@ -785,13 +874,13 @@ "No explicit return value?"); AI->setName("scalar.result"); isShadowRet = true; - TheTreeToLLVM->set_decl_local(DECL_RESULT(FunctionDecl), AI); + TheTreeToLLVM->set_decl_local(DECL_RESULT(FunctionDecl), llvm::dyn_cast(AI)); ++AI; } void HandleScalarArgument(llvm::Type *LLVMTy, tree /*type*/, unsigned RealSize = 0) { - Value *ArgVal = AI; + Value *ArgVal = llvm::dyn_cast(AI); if (ArgVal->getType() != LLVMTy) { if (ArgVal->getType()->isPointerTy() && LLVMTy->isPointerTy()) { // If this is GCC being sloppy about pointer types, insert a bitcast. @@ -827,11 +916,17 @@ // bytes, but only 10 are copied. If the object is really a union // we might need the other bytes. We must also be careful to use // the smaller alignment. + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(type))->getContext(); +#else + TheContext; +#endif Type *SBP = Type::getInt8PtrTy(Context); Type *IntPtr = getDataLayout().getIntPtrType(Context, 0); Value *Ops[5] = { Builder.CreateCast(Instruction::BitCast, Loc, SBP), - Builder.CreateCast(Instruction::BitCast, AI, SBP), + Builder.CreateCast(Instruction::BitCast, llvm::dyn_cast(AI), SBP), ConstantInt::get(IntPtr, TREE_INT_CST_LOW(TYPE_SIZE_UNIT(type))), Builder.getInt32(LLVM_BYVAL_ALIGNMENT(type)), Builder.getFalse() }; @@ -849,7 +944,7 @@ // Store the FCA argument into alloca. assert(!LocStack.empty()); Value *Loc = LocStack.back(); - Builder.CreateStore(AI, Loc); + Builder.CreateStore(llvm::dyn_cast(AI), Loc); AI->setName(NameStack.back()); ++AI; } @@ -865,7 +960,11 @@ // This cast only involves pointers, therefore BitCast. Loc = Builder.CreateBitCast(Loc, StructTy->getPointerTo()); - Loc = Builder.CreateStructGEP(Loc, FieldNo, flag_verbose_asm ? "ntr" : ""); + Loc = Builder.CreateStructGEP( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + StructTy, +#endif + Loc, FieldNo, flag_verbose_asm ? "ntr" : ""); LocStack.push_back(Loc); } void ExitField() { @@ -907,6 +1006,12 @@ FunctionType *FTy; CallingConv::ID CallingConv; AttributeSet PAL; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + FTy->getContext(); +#else + TheContext; +#endif // If this is a K&R-style function: with a type that takes no arguments but // with arguments none the less, then calculate the LLVM type from the list @@ -989,7 +1094,12 @@ TARGET_ADJUST_LLVM_LINKAGE(Fn, FnDecl); #endif /* TARGET_ADJUST_LLVM_LINKAGE */ - Fn->setUnnamedAddr(!TREE_ADDRESSABLE(FnDecl)); + Fn->setUnnamedAddr(!TREE_ADDRESSABLE(FnDecl) +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ? llvm::GlobalValue::UnnamedAddr::Global + : llvm::GlobalValue::UnnamedAddr::Local +#endif + ); // Handle visibility style handleVisibility(FnDecl, Fn); @@ -1006,7 +1116,13 @@ // Handle functions in specified sections. if (DECL_SECTION_NAME(FnDecl)) - Fn->setSection(TREE_STRING_POINTER(DECL_SECTION_NAME(FnDecl))); + Fn->setSection( +#if (GCC_MAJOR > 4) + StringRef(DECL_SECTION_NAME(FnDecl)) +#else + TREE_STRING_POINTER(DECL_SECTION_NAME(FnDecl)) +#endif + ); // Handle used Functions if (lookup_attribute("used", DECL_ATTRIBUTES(FnDecl))) @@ -1115,10 +1231,16 @@ // alignment of the type (examples are x86-32 aggregates containing long // double and large x86-64 vectors), we need to make the copy. AI->setName(Name); - SET_DECL_LOCAL(Args, AI); + SET_DECL_LOCAL(Args, llvm::dyn_cast(AI)); if (!isInvRef && EmitDebugInfo()) - TheDebugInfo->EmitDeclare(Args, dwarf::DW_TAG_arg_variable, Name, - TREE_TYPE(Args), AI, Builder); + TheDebugInfo->EmitDeclare(Args, +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + dwarf::DW_TAG_invalid, +#else + dwarf::DW_TAG_arg_variable, +#endif + Name, TREE_TYPE(Args), + llvm::dyn_cast(AI), Builder); ABIConverter.HandleArgument(TREE_TYPE(Args), ScalarArgs); } else { // Otherwise, we create an alloca to hold the argument value and provide @@ -1128,8 +1250,13 @@ Tmp->setName(Name + "_addr"); SET_DECL_LOCAL(Args, Tmp); if (EmitDebugInfo()) { - TheDebugInfo->EmitDeclare(Args, dwarf::DW_TAG_arg_variable, Name, - TREE_TYPE(Args), Tmp, Builder); + TheDebugInfo->EmitDeclare(Args, +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + dwarf::DW_TAG_invalid, +#else + dwarf::DW_TAG_arg_variable, +#endif + Name, TREE_TYPE(Args), Tmp, Builder); } // Emit annotate intrinsic if arg has annotate attr @@ -1223,7 +1350,11 @@ // Extract the incoming value for each predecessor from the GCC phi node. for (unsigned i = 0, e = gimple_phi_num_args(P.gcc_phi); i != e; ++i) { // The incoming GCC basic block. - basic_block bb = gimple_phi_arg_edge(P.gcc_phi, i)->src; + basic_block bb = gimple_phi_arg_edge( +#if (GCC_MAJOR > 4) + (gphi *) +#endif + P.gcc_phi, i)->src; // The corresponding LLVM basic block. DenseMap::iterator BI = BasicBlocks.find(bb); @@ -1244,7 +1375,13 @@ for (++FI; FI != FE && !FI->hasName(); ++FI) { assert(FI->getSinglePredecessor() == IncomingValues.back().first && "Anonymous block does not continue predecessor!"); - IncomingValues.push_back(std::make_pair(FI, val)); + IncomingValues.push_back(std::make_pair( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + llvm::dyn_cast(FI), +#else + FI, +#endif + val)); } } @@ -1355,6 +1492,12 @@ } else { // Advance to the point we want to load from. if (ReturnOffset) { + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Fn->getReturnType()->getContext(); +#else + TheContext; +#endif ResultLV.Ptr = Builder .CreateBitCast(ResultLV.Ptr, Type::getInt8PtrTy(Context)); ResultLV.Ptr = Builder.CreateGEP( @@ -1488,7 +1631,7 @@ return I->second; // Otherwise, create a new LLVM basic block. - BasicBlock *BB = BasicBlock::Create(Context); + BasicBlock *BB = BasicBlock::Create(TheContext); // All basic blocks that directly correspond to GCC basic blocks (those // created here) must have a name. All artificial basic blocks produced @@ -1499,9 +1642,13 @@ // use the same naming scheme as GCC. if (flag_verbose_asm) { // If BB contains labels, name the LLVM basic block after the first label. - gimple stmt = first_stmt(bb); + GimpleTy *stmt = first_stmt(bb); if (stmt && gimple_code(stmt) == GIMPLE_LABEL) { - tree label = gimple_label_label(stmt); + tree label = gimple_label_label( +#if (GCC_MAJOR > 4) + as_a +#endif + (stmt)); const std::string &LabelName = getDescriptiveName(label); if (!LabelName.empty()) BB->setName("<" + LabelName + ">"); @@ -1549,7 +1696,7 @@ // the phi uses may not have been defined yet - phis are special this way. for (gimple_stmt_iterator gsi = gsi_start_phis(bb); !gsi_end_p(gsi); gsi_next(&gsi)) { - gimple gcc_phi = gsi_stmt(gsi); + GimpleTy *gcc_phi = gsi_stmt(gsi); // Skip virtual operands. if (!is_gimple_reg(gimple_phi_result(gcc_phi))) continue; @@ -1573,7 +1720,7 @@ // Render statements. for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) { - gimple stmt = gsi_stmt(gsi); + GimpleTy *stmt = gsi_stmt(gsi); input_location = gimple_location(stmt); ++NumStatements; @@ -1671,7 +1818,11 @@ FMF.setAllowReciprocal(); if (flag_unsafe_math_optimizations && flag_finite_math_only) FMF.setUnsafeAlgebra(); +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Builder.setFastMathFlags(FMF); +#else Builder.SetFastMathFlags(FMF); +#endif // Set up parameters and prepare for return, for the function. StartFunctionBody(); @@ -1783,7 +1934,7 @@ case INDIRECT_REF: LV = EmitLV_INDIRECT_REF(exp); break; -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) case MISALIGNED_INDIRECT_REF: LV = EmitLV_MISALIGNED_INDIRECT_REF(exp); break; @@ -1821,6 +1972,12 @@ if (!CastInst::isCastable(SrcTy, DestTy)) { unsigned SrcBits = SrcTy->getScalarSizeInBits(); unsigned DestBits = DestTy->getScalarSizeInBits(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + SrcTy->getContext(); +#else + TheContext; +#endif if (SrcBits && !isa(SrcTy)) { Type *IntTy = IntegerType::get(Context, SrcBits); Src = Builder.CreateBitCast(Src, IntTy); @@ -1856,6 +2013,12 @@ if (!CastInst::isCastable(SrcTy, DestTy)) { unsigned SrcBits = SrcTy->getScalarSizeInBits(); unsigned DestBits = DestTy->getScalarSizeInBits(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + SrcTy->getContext(); +#else + TheContext; +#endif if (SrcBits && !isa(SrcTy)) { Type *IntTy = IntegerType::get(Context, SrcBits); Src = TheFolder->CreateBitCast(Src, IntTy); @@ -1915,6 +2078,12 @@ // Everything else. assert(OrigEltTy->isFloatingPointTy() && "Expected a floating point type!"); unsigned BitWidth = OrigEltTy->getPrimitiveSizeInBits(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + OrigTy->getContext(); +#else + TheContext; +#endif Type *NewEltTy = IntegerType::get(Context, BitWidth); if (VectorType *VecTy = llvm::dyn_cast(OrigTy)) { Type *NewTy = VectorType::get(NewEltTy, VecTy->getNumElements()); @@ -1979,6 +2148,12 @@ // alloc instructions before. It doesn't matter what this instruction is, // it is dead. This allows us to insert allocas in order without having to // scan for an insertion point. Use BitCast for int -> int + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif AllocaInsertionPoint = CastInst::Create( Instruction::BitCast, Constant::getNullValue(Type::getInt32Ty(Context)), Type::getInt32Ty(Context), "alloca point"); @@ -2111,8 +2286,14 @@ int FieldIdx = GetFieldIndex(Field, Ty); assert(FieldIdx != INT_MAX && "Should not be copying if no LLVM field!"); Value *DestFieldPtr = Builder.CreateStructGEP( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty, +#endif DestLoc.Ptr, FieldIdx, flag_verbose_asm ? "df" : ""); Value *SrcFieldPtr = Builder.CreateStructGEP( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty, +#endif SrcLoc.Ptr, FieldIdx, flag_verbose_asm ? "sf" : ""); // Compute the field's alignment. @@ -2146,8 +2327,14 @@ Value *DestCompPtr = DestLoc.Ptr, *SrcCompPtr = SrcLoc.Ptr; if (i) { DestCompPtr = Builder.CreateConstInBoundsGEP1_32( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + CompType, +#endif DestCompPtr, i, flag_verbose_asm ? "da" : ""); SrcCompPtr = Builder.CreateConstInBoundsGEP1_32( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + CompType, +#endif SrcCompPtr, i, flag_verbose_asm ? "sa" : ""); } @@ -2213,7 +2400,11 @@ // Get the address of the field. int FieldIdx = GetFieldIndex(Field, Ty); assert(FieldIdx != INT_MAX && "Should not be zeroing if no LLVM field!"); - Value *FieldPtr = Builder.CreateStructGEP(DestLoc.Ptr, FieldIdx, + Value *FieldPtr = Builder.CreateStructGEP( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty, +#endif + DestLoc.Ptr, FieldIdx, flag_verbose_asm ? "zf" : ""); // Compute the field's alignment. @@ -2242,6 +2433,9 @@ Value *CompPtr = DestLoc.Ptr; if (i) CompPtr = Builder.CreateConstInBoundsGEP1_32( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + CompType, +#endif CompPtr, i, flag_verbose_asm ? "za" : ""); // Compute the component's alignment. @@ -2274,7 +2468,12 @@ Value *TreeToLLVM::EmitMemCpy(Value *DestPtr, Value *SrcPtr, Value *Size, unsigned Align) { - + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DestPtr->getType()->getContext(); +#else + TheContext; +#endif Type *SBP = Type::getInt8PtrTy(Context); Type *IntPtr = DL.getIntPtrType(DestPtr->getType()); Value *Ops[5] = { Builder.CreateBitCast(DestPtr, SBP), @@ -2290,6 +2489,12 @@ Value *TreeToLLVM::EmitMemMove(Value *DestPtr, Value *SrcPtr, Value *Size, unsigned Align) { + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DestPtr->getType()->getContext(); +#else + TheContext; +#endif Type *SBP = Type::getInt8PtrTy(Context); Type *IntPtr = DL.getIntPtrType(DestPtr->getType()); Value *Ops[5] = { Builder.CreateBitCast(DestPtr, SBP), @@ -2305,6 +2510,12 @@ Value *TreeToLLVM::EmitMemSet(Value *DestPtr, Value *SrcVal, Value *Size, unsigned Align) { + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DestPtr->getType()->getContext(); +#else + TheContext; +#endif Type *SBP = Type::getInt8PtrTy(Context); Type *IntPtr = DL.getIntPtrType(DestPtr->getType()); Value *Ops[5] = { Builder.CreateBitCast(DestPtr, SBP), @@ -2328,6 +2539,12 @@ // The idea is that it's a pointer to type "Value" // which is opaque* but the routine expects i8** and i8*. + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + V->getType()->getContext(); +#else + TheContext; +#endif PointerType *Ty = Type::getInt8PtrTy(Context); V = Builder.CreateBitCast(V, Ty->getPointerTo()); @@ -2348,6 +2565,12 @@ Function *annotateFun = Intrinsic::getDeclaration(TheModule, Intrinsic::var_annotation); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + V->getType()->getContext(); +#else + TheContext; +#endif // Get file and line number Constant *lineNo = ConstantInt::get(Type::getInt32Ty(Context), DECL_SOURCE_LINE(decl)); @@ -2413,6 +2636,12 @@ } else { // Compute the variable's size in bytes. Size = EmitRegister(DECL_SIZE_UNIT(decl)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Size->getType()->getContext(); +#else + TheContext; +#endif Ty = Type::getInt8Ty(Context); } @@ -2455,7 +2684,12 @@ if (EmitDebugInfo()) { if (DECL_NAME(decl) || isa(decl)) { - TheDebugInfo->EmitDeclare(decl, dwarf::DW_TAG_auto_variable, + TheDebugInfo->EmitDeclare(decl, +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + dwarf::DW_TAG_invalid, +#else + dwarf::DW_TAG_auto_variable, +#endif AI->getName(), TREE_TYPE(decl), AI, Builder); } } @@ -2489,6 +2723,12 @@ AllocaInst *&ExceptionPtr = ExceptionPtrs[RegionNo]; if (!ExceptionPtr) { + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ExceptionPtr->getAllocatedType()->getContext(); +#else + TheContext; +#endif ExceptionPtr = CreateTemporary(Type::getInt8PtrTy(Context)); ExceptionPtr->setName("exc_tmp"); } @@ -2507,6 +2747,12 @@ AllocaInst *&ExceptionFilter = ExceptionFilters[RegionNo]; if (!ExceptionFilter) { + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ExceptionFilter->getAllocatedType()->getContext(); +#else + TheContext; +#endif ExceptionFilter = CreateTemporary(Type::getInt32Ty(Context)); ExceptionFilter->setName("filt_tmp"); } @@ -2525,7 +2771,7 @@ BasicBlock *&FailureBlock = FailureBlocks[RegionNo]; if (!FailureBlock) - FailureBlock = BasicBlock::Create(Context, "fail"); + FailureBlock = BasicBlock::Create(TheContext, "fail"); return FailureBlock; } @@ -2560,6 +2806,12 @@ continue; // Create the LLVM landing pad right before the GCC post landing pad. + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + PostPad->getContext(); +#else + TheContext; +#endif BasicBlock *LPad = BasicBlock::Create(Context, "lpad", Fn, PostPad); // Redirect invoke unwind edges from the GCC post landing pad to LPad. @@ -2628,7 +2880,12 @@ unsigned RegionNo = region->index; // Insert instructions at the start of the landing pad, but after any phis. - Builder.SetInsertPoint(LPad, LPad->getFirstNonPHI()); + // https://reviews.llvm.org/rL249925 + Builder.SetInsertPoint( +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) + LPad, +#endif + LPad->getFirstNonPHI()); // Create the landingpad instruction without any clauses. Clauses are added // below. @@ -2638,8 +2895,13 @@ "No exception handling personality!"); personality = lang_hooks.eh_personality(); } + // https://reviews.llvm.org/D10429 LandingPadInst *LPadInst = Builder.CreateLandingPad( - UnwindDataTy, DECL_LLVM(personality), 0, "exc"); + UnwindDataTy, +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) + DECL_LLVM(personality), +#endif + 0, "exc"); // Store the exception pointer if made use of elsewhere. if (RegionNo < ExceptionPtrs.size() && ExceptionPtrs[RegionNo]) { @@ -2725,6 +2987,12 @@ if (!FailureBlock) continue; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + FailureBlock->getContext(); +#else + TheContext; +#endif eh_region region = get_eh_region_from_number(RegionNo); assert(region->type == ERT_MUST_NOT_THROW && "Unexpected region type!"); @@ -2766,8 +3034,13 @@ StructType::get(Builder.getInt8PtrTy(), Builder.getInt32Ty(), NULL); tree personality = DECL_FUNCTION_PERSONALITY(FnDecl); assert(personality && "No-throw region but no personality function!"); + // https://reviews.llvm.org/D10429 LandingPadInst *LPadInst = Builder.CreateLandingPad( - UnwindDataTy, DECL_LLVM(personality), 1, "exc"); + UnwindDataTy, +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) + DECL_LLVM(personality), +#endif + 1, "exc"); ArrayType *FilterTy = ArrayType::get(Builder.getInt8PtrTy(), 0); LPadInst->addClause(ConstantArray::get(FilterTy, ArrayRef())); @@ -2855,7 +3128,17 @@ // Load the minimum number of bytes that covers the field. unsigned LoadSizeInBits = LV.BitStart + LV.BitSize; +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + LoadSizeInBits = alignTo(LoadSizeInBits, BITS_PER_UNIT); +#else LoadSizeInBits = RoundUpToAlignment(LoadSizeInBits, BITS_PER_UNIT); +#endif + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif Type *LoadType = IntegerType::get(Context, LoadSizeInBits); // Load the bits. @@ -2914,7 +3197,7 @@ return Builder.CreateBitCast(LV.Ptr, getRegType(TREE_TYPE(exp))); } -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) Value *TreeToLLVM::EmitCondExpr(tree exp) { return TriviallyTypeConvert( EmitReg_CondExpr(TREE_OPERAND(exp, 0), TREE_OPERAND(exp, 1), @@ -2928,7 +3211,7 @@ getRegType(TREE_TYPE(exp))); } -#if (GCC_MINOR < 8) +#if GCC_VERSION_CODE < GCC_VERSION(4, 8) INSTANTIATE_VECTOR(constructor_elt); #endif @@ -3021,6 +3304,12 @@ // Not clear what this is supposed to do on big endian machines... assert(!BYTES_BIG_ENDIAN && "Unsupported case - please report"); assert(LLVMTy->isIntegerTy() && "Expected an integer value!"); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + LLVMTy->getContext(); +#else + TheContext; +#endif Type *LoadType = IntegerType::get(Context, RealSize * 8); L = Builder.CreateBitCast(L, LoadType->getPointerTo()); Value *Val = Builder.CreateLoad(L); @@ -3267,7 +3556,11 @@ Value *Loc = getAddress(); Loc = Builder.CreateBitCast(Loc, StructTy->getPointerTo()); pushAddress( - Builder.CreateStructGEP(Loc, FieldNo, flag_verbose_asm ? "elt" : "")); + Builder.CreateStructGEP( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + StructTy, +#endif + Loc, FieldNo, flag_verbose_asm ? "elt" : "")); } void ExitField() { assert(!LocStack.empty()); @@ -3279,8 +3572,8 @@ /// EmitCallOf - Emit a call to the specified callee with the operands specified /// in the GIMPLE_CALL 'stmt'. If the result of the call is a scalar, return the /// result, otherwise store it in DestLoc. -Value *TreeToLLVM::EmitCallOf(Value *Callee, gimple stmt, const MemRef *DestLoc, - const AttributeSet &InPAL) { +Value *TreeToLLVM::EmitCallOf(Value *Callee, GimpleTy *stmt, + const MemRef *DestLoc, const AttributeSet &InPAL) { BasicBlock *LandingPad = 0; // Non-zero indicates an invoke. int LPadNo = 0; @@ -3320,7 +3613,7 @@ } tree fndecl = gimple_call_fndecl(stmt); -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) tree fntype = fndecl ? TREE_TYPE(fndecl) : TREE_TYPE(TREE_TYPE(gimple_call_fn(stmt))); #else @@ -3336,24 +3629,30 @@ SmallVector CallOperands; PointerType *PFTy = cast(Callee->getType()); FunctionType *FTy = cast(PFTy->getElementType()); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + FTy->getContext(); +#else + TheContext; +#endif FunctionCallArgumentConversion Client(CallOperands, FTy, DestLoc, - gimple_call_return_slot_opt_p(stmt), - Builder, CallingConvention); + gimple_call_return_slot_opt_p(MIG_TO_GCALL(stmt)), Builder, + CallingConvention); DefaultABI ABIConverter(Client); // Handle the result, including struct returns. - ABIConverter.HandleReturnType(gimple_call_return_type(stmt), + ABIConverter.HandleReturnType(gimple_call_return_type(MIG_TO_GCALL(stmt)), fndecl ? fndecl : fntype, fndecl ? DECL_BUILT_IN(fndecl) : false); // Pass the static chain, if any, as the first parameter. - if (gimple_call_chain(stmt)) - CallOperands.push_back(EmitMemory(gimple_call_chain(stmt))); + if (gimple_call_chain(MIG_TO_GCALL(stmt))) + CallOperands.push_back(EmitMemory(gimple_call_chain(MIG_TO_GCALL(stmt)))); // Loop over the arguments, expanding them and adding them to the op list. std::vector ScalarArgs; - for (unsigned i = 0, e = gimple_call_num_args(stmt); i != e; ++i) { - tree arg = gimple_call_arg(stmt, i); + for (unsigned i = 0, e = gimple_call_num_args(MIG_TO_GCALL(stmt)); i != e; ++i) { + tree arg = gimple_call_arg(MIG_TO_GCALL(stmt), i); tree type = TREE_TYPE(arg); Type *ArgTy = ConvertType(type); @@ -3449,11 +3748,11 @@ // If the call statement has void type then either the callee does not return // a result, or it does but the result should be discarded. - if (isa(gimple_call_return_type(stmt))) + if (isa(gimple_call_return_type(MIG_TO_GCALL(stmt)))) return 0; if (Client.isShadowReturn()) - return Client.EmitShadowResult(gimple_call_return_type(stmt), DestLoc); + return Client.EmitShadowResult(gimple_call_return_type(MIG_TO_GCALL(stmt)), DestLoc); if (Client.isAggrReturn()) { MemRef Target; @@ -3462,7 +3761,7 @@ else // Destination is a first class value (eg: a complex number). Extract to // a temporary then load the value out later. - Target = CreateTempLoc(ConvertType(gimple_call_return_type(stmt))); + Target = CreateTempLoc(ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt)))); if (DL.getTypeAllocSize(Call->getType()) <= DL.getTypeAllocSize(cast(Target.Ptr->getType()) @@ -3484,14 +3783,15 @@ Target, MemRef(Builder.CreateBitCast(biggerTmp, Call->getType()->getPointerTo()), Target.getAlignment(), Target.Volatile), - gimple_call_return_type(stmt)); + gimple_call_return_type(MIG_TO_GCALL(stmt))); } return DestLoc ? 0 : Builder.CreateLoad(Target.Ptr); } if (!DestLoc) { - Type *RetTy = ConvertType(gimple_call_return_type(stmt)); + Type *RetTy = ConvertType(gimple_call_return_type( + MIG_TO_GCALL(stmt))); if (Call->getType() == RetTy) return Call; // Normal scalar return. @@ -3581,6 +3881,12 @@ #endif va_end(ops); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(ret_type))->getContext(); +#else + TheContext; +#endif Type *RetTy = isa(ret_type) ? Type::getVoidTy(Context) : getRegType(ret_type); @@ -3670,6 +3976,12 @@ // Turn this into a 'call void asm sideeffect "", "{reg}"(Ty %RHS)'. std::vector ArgTys; ArgTys.push_back(RHS->getType()); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + RHS->getType()->getContext(); +#else + TheContext; +#endif FunctionType *FTy = FunctionType::get(Type::getVoidTy(Context), ArgTys, false); @@ -3697,13 +4009,13 @@ /// punctuation. /// Other %xN expressions are turned into LLVM ${N:x} operands. /// -static std::string ConvertInlineAsmStr(gimple stmt, unsigned NumOperands) { - const char *AsmStr = gimple_asm_string(stmt); +static std::string ConvertInlineAsmStr(GimpleTy *stmt, unsigned NumOperands) { + const char *AsmStr = gimple_asm_string(MIG_TO_GASM(stmt)); // gimple_asm_input_p - This flag is set if this is a non-extended ASM, // which means that the asm string should not be interpreted, other than // to escape $'s. - if (gimple_asm_input_p(stmt)) { + if (gimple_asm_input_p(MIG_TO_GASM(stmt))) { const char *InStr = AsmStr; std::string Result; while (1) { @@ -3794,13 +4106,13 @@ /// isOperandMentioned - Return true if the given operand is explicitly /// mentioned in the asm string. For example if passed operand 1 then /// this routine checks that the asm string does not contain "%1". -static bool isOperandMentioned(gimple stmt, unsigned OpNum) { +static bool isOperandMentioned(GimpleTy *stmt, unsigned OpNum) { // If this is a non-extended ASM then the contents of the asm string are not // to be interpreted. - if (gimple_asm_input_p(stmt)) + if (gimple_asm_input_p(MIG_TO_GASM(stmt))) return false; // Search for a non-escaped '%' character followed by OpNum. - for (const char *AsmStr = gimple_asm_string(stmt); * AsmStr; ++AsmStr) { + for (const char *AsmStr = gimple_asm_string(MIG_TO_GASM(stmt)); * AsmStr; ++AsmStr) { if (*AsmStr != '%') // Not a '%', move on to next character. continue; @@ -3884,7 +4196,12 @@ // REG_CLASS_FROM_CONSTRAINT doesn't support 'r' for some reason. RegClass = GENERAL_REGS; else - RegClass = REG_CLASS_FROM_CONSTRAINT(Constraint[-1], Constraint - 1); + RegClass = +#if (GCC_MAJOR > 4) + reg_class_for_constraint(lookup_constraint(Constraint - 1)); +#else + REG_CLASS_FROM_CONSTRAINT(Constraint[-1], Constraint - 1); +#endif if (RegClass == NO_REGS) { // not a reg class. Result += ConstraintChar; @@ -3949,7 +4266,12 @@ if (*p == 'r') RegClass = GENERAL_REGS; else - RegClass = REG_CLASS_FROM_CONSTRAINT(*p, p); + RegClass = +#if (GCC_MAJOR > 4) + reg_class_for_constraint(lookup_constraint(p)); +#else + REG_CLASS_FROM_CONSTRAINT(*p, p); +#endif if (RegClass != NO_REGS && TEST_HARD_REG_BIT(reg_class_contents[RegClass], RegNum)) { RetVal = 1; @@ -3993,11 +4315,11 @@ /// gcc's algorithm for picking "the best" tuple is quite complicated, and /// is performed after things like SROA, not before. At the moment we are /// just trying to pick one that will work. This may get refined. -static void ChooseConstraintTuple(gimple stmt, const char **Constraints, +static void ChooseConstraintTuple(GimpleTy *stmt, const char **Constraints, unsigned NumChoices, BumpPtrAllocator &StringStorage) { - unsigned NumInputs = gimple_asm_ninputs(stmt); - unsigned NumOutputs = gimple_asm_noutputs(stmt); + unsigned NumInputs = gimple_asm_ninputs(MIG_TO_GASM(stmt)); + unsigned NumOutputs = gimple_asm_noutputs(MIG_TO_GASM(stmt)); int MaxWeight = -1; unsigned int CommasToSkip = 0; @@ -4013,7 +4335,7 @@ for (unsigned i = 0; i != NumChoices; ++i) { Weights[i] = 0; for (unsigned j = 0; j != NumOutputs; ++j) { - tree Output = gimple_asm_output_op(stmt, j); + tree Output = gimple_asm_output_op(MIG_TO_GASM(stmt), j); if (i == 0) RunningConstraints[j]++; // skip leading = const char *p = RunningConstraints[j]; @@ -4039,7 +4361,7 @@ RunningConstraints[j] = p; } for (unsigned j = 0; j != NumInputs; ++j) { - tree Input = gimple_asm_input_op(stmt, j); + tree Input = gimple_asm_input_op(MIG_TO_GASM(stmt), j); const char *p = RunningConstraints[NumOutputs + j]; if (Weights[i] != -1) { int w = MatchWeight(p, TREE_VALUE(Input)); @@ -4149,6 +4471,12 @@ assert(InVec1->getType()->isVectorTy() && InVec1->getType() == InVec2->getType() && "Invalid shuffle!"); unsigned NumElements = cast(InVec1->getType())->getNumElements(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + InVec1->getType()->getContext(); +#else + TheContext; +#endif // Get all the indexes from varargs. SmallVector Idxs; @@ -4179,14 +4507,15 @@ /// /// This method returns true if the builtin is handled, otherwise false. /// -bool TreeToLLVM::EmitFrontendExpandedBuiltinCall( - gimple stmt, tree fndecl, const MemRef *DestLoc, Value *&Result) { +bool TreeToLLVM::EmitFrontendExpandedBuiltinCall(GimpleTy *stmt, tree fndecl, + const MemRef *DestLoc, + Value *&Result) { #ifdef LLVM_TARGET_INTRINSIC_LOWER // Get the result type and operand line in an easy to consume format. Type *ResultType = ConvertType(TREE_TYPE(TREE_TYPE(fndecl))); std::vector Operands; - for (unsigned i = 0, e = gimple_call_num_args(stmt); i != e; ++i) { - tree OpVal = gimple_call_arg(stmt, i); + for (unsigned i = 0, e = gimple_call_num_args(MIG_TO_GCALL(stmt)); i != e; ++i) { + tree OpVal = gimple_call_arg(MIG_TO_GCALL(stmt), i); if (isa(TREE_TYPE(OpVal))) { MemRef OpLoc = CreateTempLoc(ConvertType(TREE_TYPE(OpVal))); EmitAggregate(OpVal, OpLoc); @@ -4212,12 +4541,14 @@ /// builtin number. static std::vector TargetBuiltinCache; -Value *TreeToLLVM::BuildBinaryAtomic(gimple stmt, AtomicRMWInst::BinOp Kind, +Value *TreeToLLVM::BuildBinaryAtomic(GimpleTy *stmt, AtomicRMWInst::BinOp Kind, unsigned PostOp) { - tree return_type = gimple_call_return_type(stmt); + tree return_type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Type *ResultTy = ConvertType(return_type); - Value *C[2] = { EmitMemory(gimple_call_arg(stmt, 0)), - EmitMemory(gimple_call_arg(stmt, 1)) }; + Value *C[2] = { + EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)), + EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 1)) + }; Type *Ty[2]; Ty[0] = ResultTy; Ty[1] = ResultTy->getPointerTo(); @@ -4225,7 +4556,7 @@ C[1] = Builder.CreateIntCast( C[1], Ty[0], /*isSigned*/ !TYPE_UNSIGNED(return_type), "cast"); Value *Result = - Builder.CreateAtomicRMW(Kind, C[0], C[1], SequentiallyConsistent); + Builder.CreateAtomicRMW(Kind, C[0], C[1], AtomicOrdering::SequentiallyConsistent); if (PostOp) Result = Builder.CreateBinOp(Instruction::BinaryOps(PostOp), Result, C[1]); @@ -4234,12 +4565,18 @@ } Value * -TreeToLLVM::BuildCmpAndSwapAtomic(gimple stmt, unsigned Bits, bool isBool) { - tree ptr = gimple_call_arg(stmt, 0); - tree old_val = gimple_call_arg(stmt, 1); - tree new_val = gimple_call_arg(stmt, 2); +TreeToLLVM::BuildCmpAndSwapAtomic(GimpleTy *stmt, unsigned Bits, bool isBool) { + tree ptr = gimple_call_arg(MIG_TO_GCALL(stmt), 0); + tree old_val = gimple_call_arg(MIG_TO_GCALL(stmt), 1); + tree new_val = gimple_call_arg(MIG_TO_GCALL(stmt), 2); // The type loaded from/stored to memory. + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(ptr))->getContext(); +#else + TheContext; +#endif Type *MemTy = IntegerType::get(Context, Bits); Type *MemPtrTy = MemTy->getPointerTo(); @@ -4254,15 +4591,15 @@ Value *C[3] = { Ptr, Old_Val, New_Val }; Value *Result = Builder.CreateAtomicCmpXchg(C[0], C[1], C[2], - SequentiallyConsistent, - SequentiallyConsistent); + AtomicOrdering::SequentiallyConsistent, + AtomicOrdering::SequentiallyConsistent); // AtomicCmpXchg has the type {i1,iN}. Result = Builder.CreateExtractValue(Result, 0); if (isBool) Result = Builder.CreateICmpEQ(Result, Old_Val); - tree return_type = gimple_call_return_type(stmt); + tree return_type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Result = CastToAnyType(Result, !TYPE_UNSIGNED(return_type), getRegType(return_type), !TYPE_UNSIGNED(return_type)); return Reg2Mem(Result, return_type, Builder); @@ -4271,7 +4608,7 @@ /// EmitBuiltinCall - stmt is a call to fndecl, a builtin function. Try to emit /// the call in a special way, setting Result to the scalar result if necessary. /// If we can't handle the builtin, return false, otherwise return true. -bool TreeToLLVM::EmitBuiltinCall(gimple stmt, tree fndecl, +bool TreeToLLVM::EmitBuiltinCall(GimpleTy *stmt, tree fndecl, const MemRef *DestLoc, Value *&Result) { if (DECL_BUILT_IN_CLASS(fndecl) == BUILT_IN_MD) { unsigned FnCode = DECL_FUNCTION_CODE(fndecl); @@ -4296,7 +4633,7 @@ Intrinsic::getIntrinsicForGCCBuiltin(TargetPrefix, BuiltinName); if (IntrinsicID == Intrinsic::not_intrinsic) { error("unsupported target builtin %<%s%> used", BuiltinName); - Type *ResTy = ConvertType(gimple_call_return_type(stmt)); + Type *ResTy = ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt))); if (ResTy->isSingleValueType()) Result = UndefValue::get(ResTy); return true; @@ -4312,6 +4649,12 @@ return true; } + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(fndecl))->getContext(); +#else + TheContext; +#endif enum built_in_function fcode = DECL_FUNCTION_CODE(fndecl); switch (fcode) { default: @@ -4352,7 +4695,7 @@ return EmitBuiltinFrobReturnAddr(stmt, Result); case BUILT_IN_INIT_TRAMPOLINE: return EmitBuiltinInitTrampoline(stmt, true); -#if (GCC_MINOR > 6) +#if GCC_VERSION_CODE > GCC_VERSION(4, 6) case BUILT_IN_INIT_HEAP_TRAMPOLINE: return EmitBuiltinInitTrampoline(stmt, false); #endif @@ -4406,11 +4749,11 @@ return EmitBuiltinUnwindInit(stmt, Result); case BUILT_IN_OBJECT_SIZE: { - if (!validate_gimple_arglist(stmt, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)) { error("Invalid builtin_object_size argument types"); return false; } - tree ObjSizeTree = gimple_call_arg(stmt, 1); + tree ObjSizeTree = gimple_call_arg(MIG_TO_GCALL(stmt), 1); STRIP_NOPS(ObjSizeTree); if (!isa(ObjSizeTree) || tree_int_cst_sgn(ObjSizeTree) < 0 || compare_tree_int(ObjSizeTree, 3) > 0) { @@ -4419,7 +4762,7 @@ } // LLVM doesn't handle type 1 or type 3. Deal with that here. - Value *Tmp = EmitMemory(gimple_call_arg(stmt, 1)); + Value *Tmp = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 1)); ConstantInt *CI = cast(Tmp); @@ -4429,12 +4772,12 @@ Value *NewTy = ConstantInt::get(Tmp->getType(), val); - Value *Args[] = { EmitMemory(gimple_call_arg(stmt, 0)), NewTy }; + Value *Args[] = { EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)), NewTy }; Type *Int8PtrTy = Type::getInt8PtrTy(Context); // Grab the current return type. Type *Ty[2] = { - ConvertType(gimple_call_return_type(stmt)), + ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt))), Int8PtrTy }; @@ -4463,11 +4806,11 @@ case BUILT_IN_PARITYLL: case BUILT_IN_PARITYL: case BUILT_IN_PARITY: { - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); EmitBuiltinUnaryOp(Amt, Result, Intrinsic::ctpop); Result = Builder.CreateBinOp(Instruction::And, Result, ConstantInt::get(Result->getType(), 1)); - tree return_type = gimple_call_return_type(stmt); + tree return_type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Type *DestTy = ConvertType(return_type); Result = Builder.CreateIntCast( Result, DestTy, /*isSigned*/ !TYPE_UNSIGNED(return_type), "cast"); @@ -4476,9 +4819,9 @@ case BUILT_IN_POPCOUNT: // These GCC builtins always return int. case BUILT_IN_POPCOUNTL: case BUILT_IN_POPCOUNTLL: { - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); EmitBuiltinUnaryOp(Amt, Result, Intrinsic::ctpop); - tree return_type = gimple_call_return_type(stmt); + tree return_type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Type *DestTy = ConvertType(return_type); Result = Builder.CreateIntCast( Result, DestTy, /*isSigned*/ !TYPE_UNSIGNED(return_type), "cast"); @@ -4486,9 +4829,9 @@ } case BUILT_IN_BSWAP32: case BUILT_IN_BSWAP64: { - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); EmitBuiltinUnaryOp(Amt, Result, Intrinsic::bswap); - tree return_type = gimple_call_return_type(stmt); + tree return_type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Type *DestTy = ConvertType(return_type); Result = Builder.CreateIntCast( Result, DestTy, /*isSigned*/ !TYPE_UNSIGNED(return_type), "cast"); @@ -4521,9 +4864,9 @@ case BUILT_IN_LOGL: // If errno math has been disabled, expand these to llvm.log calls. if (!flag_errno_math) { - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); EmitBuiltinUnaryOp(Amt, Result, Intrinsic::log); - Result = CastToFPType(Result, ConvertType(gimple_call_return_type(stmt))); + Result = CastToFPType(Result, ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt)))); return true; } break; @@ -4532,9 +4875,10 @@ case BUILT_IN_LOG2L: // If errno math has been disabled, expand these to llvm.log2 calls. if (!flag_errno_math) { - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); EmitBuiltinUnaryOp(Amt, Result, Intrinsic::log2); - Result = CastToFPType(Result, ConvertType(gimple_call_return_type(stmt))); + Result = CastToFPType(Result, ConvertType(gimple_call_return_type( + MIG_TO_GCALL(stmt)))); return true; } break; @@ -4543,9 +4887,9 @@ case BUILT_IN_LOG10L: // If errno math has been disabled, expand these to llvm.log10 calls. if (!flag_errno_math) { - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); EmitBuiltinUnaryOp(Amt, Result, Intrinsic::log10); - Result = CastToFPType(Result, ConvertType(gimple_call_return_type(stmt))); + Result = CastToFPType(Result, ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt)))); return true; } break; @@ -4554,9 +4898,9 @@ case BUILT_IN_EXPL: // If errno math has been disabled, expand these to llvm.exp calls. if (!flag_errno_math) { - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); EmitBuiltinUnaryOp(Amt, Result, Intrinsic::exp); - Result = CastToFPType(Result, ConvertType(gimple_call_return_type(stmt))); + Result = CastToFPType(Result, ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt)))); return true; } break; @@ -4565,9 +4909,9 @@ case BUILT_IN_EXP2L: // If errno math has been disabled, expand these to llvm.exp2 calls. if (!flag_errno_math) { - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); EmitBuiltinUnaryOp(Amt, Result, Intrinsic::exp2); - Result = CastToFPType(Result, ConvertType(gimple_call_return_type(stmt))); + Result = CastToFPType(Result, ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt)))); return true; } break; @@ -4576,13 +4920,21 @@ case BUILT_IN_FFSLL: { // FFS(X) -> (x == 0 ? 0 : CTTZ(x)+1) // The argument and return type of cttz should match the argument type of // the ffs, but should ignore the return type of ffs. - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); - Result = Builder.CreateCall2( + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + // https://reviews.llvm.org/rL237624 + Result = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Builder.CreateCall( + Intrinsic::getDeclaration(TheModule, Intrinsic::cttz, Amt->getType()), + {Amt, Builder.getTrue()}); +#else + Builder.CreateCall2( Intrinsic::getDeclaration(TheModule, Intrinsic::cttz, Amt->getType()), Amt, Builder.getTrue()); +#endif Result = Builder.CreateAdd(Result, ConstantInt::get(Result->getType(), 1)); Result = Builder.CreateIntCast( - Result, ConvertType(gimple_call_return_type(stmt)), /*isSigned*/ false); + Result, ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt))), /*isSigned*/ false); Value *Cond = Builder.CreateICmpEQ(Amt, Constant::getNullValue(Amt->getType())); Result = Builder.CreateSelect( @@ -4680,13 +5032,19 @@ //TODO return true; //TODO } -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_SYNCHRONIZE: #else case BUILT_IN_SYNC_SYNCHRONIZE: #endif // We assume like gcc appears to, that this only applies to cached memory. - Builder.CreateFence(llvm::SequentiallyConsistent); + Builder.CreateFence( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + AtomicOrdering::SequentiallyConsistent +#else + llvm::SequentiallyConsistent +#endif + ); return true; #if defined(TARGET_ALPHA) || defined(TARGET_386) || defined(TARGET_POWERPC) || \ defined(TARGET_ARM) @@ -4699,28 +5057,28 @@ // enough, we have to key off the opcode. // Note that Intrinsic::getDeclaration expects the type list in reversed // order, while CreateCall expects the parameter list in normal order. -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_BOOL_COMPARE_AND_SWAP_1: #else case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1: #endif Result = BuildCmpAndSwapAtomic(stmt, BITS_PER_UNIT, true); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_BOOL_COMPARE_AND_SWAP_2: #else case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2: #endif Result = BuildCmpAndSwapAtomic(stmt, 2 * BITS_PER_UNIT, true); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_BOOL_COMPARE_AND_SWAP_4: #else case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4: #endif Result = BuildCmpAndSwapAtomic(stmt, 4 * BITS_PER_UNIT, true); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_BOOL_COMPARE_AND_SWAP_8: #else case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8: @@ -4733,28 +5091,28 @@ return true; // Fall through. -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_VAL_COMPARE_AND_SWAP_1: #else case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_1: #endif Result = BuildCmpAndSwapAtomic(stmt, BITS_PER_UNIT, false); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_VAL_COMPARE_AND_SWAP_2: #else case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_2: #endif Result = BuildCmpAndSwapAtomic(stmt, 2 * BITS_PER_UNIT, false); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_VAL_COMPARE_AND_SWAP_4: #else case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_4: #endif Result = BuildCmpAndSwapAtomic(stmt, 4 * BITS_PER_UNIT, false); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_VAL_COMPARE_AND_SWAP_8: #else case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_8: @@ -4766,7 +5124,7 @@ Result = BuildCmpAndSwapAtomic(stmt, 8 * BITS_PER_UNIT, false); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_ADD_8: #else case BUILT_IN_SYNC_FETCH_AND_ADD_8: @@ -4775,7 +5133,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_ADD_1: case BUILT_IN_FETCH_AND_ADD_2: case BUILT_IN_FETCH_AND_ADD_4: { @@ -4787,7 +5145,7 @@ Result = BuildBinaryAtomic(stmt, AtomicRMWInst::Add); return true; } -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_SUB_8: #else case BUILT_IN_SYNC_FETCH_AND_SUB_8: @@ -4796,7 +5154,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_SUB_1: case BUILT_IN_FETCH_AND_SUB_2: case BUILT_IN_FETCH_AND_SUB_4: { @@ -4808,7 +5166,7 @@ Result = BuildBinaryAtomic(stmt, AtomicRMWInst::Sub); return true; } -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_OR_8: #else case BUILT_IN_SYNC_FETCH_AND_OR_8: @@ -4817,7 +5175,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_OR_1: case BUILT_IN_FETCH_AND_OR_2: case BUILT_IN_FETCH_AND_OR_4: { @@ -4829,7 +5187,7 @@ Result = BuildBinaryAtomic(stmt, AtomicRMWInst::Or); return true; } -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_AND_8: #else case BUILT_IN_SYNC_FETCH_AND_AND_8: @@ -4838,7 +5196,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_AND_1: case BUILT_IN_FETCH_AND_AND_2: case BUILT_IN_FETCH_AND_AND_4: { @@ -4850,7 +5208,7 @@ Result = BuildBinaryAtomic(stmt, AtomicRMWInst::And); return true; } -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_XOR_8: #else case BUILT_IN_SYNC_FETCH_AND_XOR_8: @@ -4859,7 +5217,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_XOR_1: case BUILT_IN_FETCH_AND_XOR_2: case BUILT_IN_FETCH_AND_XOR_4: { @@ -4871,7 +5229,7 @@ Result = BuildBinaryAtomic(stmt, AtomicRMWInst::Xor); return true; } -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_NAND_8: #else case BUILT_IN_SYNC_FETCH_AND_NAND_8: @@ -4880,7 +5238,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_FETCH_AND_NAND_1: case BUILT_IN_FETCH_AND_NAND_2: case BUILT_IN_FETCH_AND_NAND_4: { @@ -4892,7 +5250,7 @@ Result = BuildBinaryAtomic(stmt, AtomicRMWInst::Nand); return true; } -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_LOCK_TEST_AND_SET_8: #else case BUILT_IN_SYNC_LOCK_TEST_AND_SET_8: @@ -4901,7 +5259,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_LOCK_TEST_AND_SET_1: case BUILT_IN_LOCK_TEST_AND_SET_2: case BUILT_IN_LOCK_TEST_AND_SET_4: { @@ -4914,7 +5272,7 @@ return true; } -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_ADD_AND_FETCH_8: #else case BUILT_IN_SYNC_ADD_AND_FETCH_8: @@ -4923,7 +5281,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_ADD_AND_FETCH_1: case BUILT_IN_ADD_AND_FETCH_2: case BUILT_IN_ADD_AND_FETCH_4: @@ -4934,7 +5292,7 @@ #endif Result = BuildBinaryAtomic(stmt, AtomicRMWInst::Add, Instruction::Add); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_SUB_AND_FETCH_8: #else case BUILT_IN_SYNC_SUB_AND_FETCH_8: @@ -4943,7 +5301,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_SUB_AND_FETCH_1: case BUILT_IN_SUB_AND_FETCH_2: case BUILT_IN_SUB_AND_FETCH_4: @@ -4954,7 +5312,7 @@ #endif Result = BuildBinaryAtomic(stmt, AtomicRMWInst::Sub, Instruction::Sub); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_OR_AND_FETCH_8: #else case BUILT_IN_SYNC_OR_AND_FETCH_8: @@ -4963,7 +5321,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_OR_AND_FETCH_1: case BUILT_IN_OR_AND_FETCH_2: case BUILT_IN_OR_AND_FETCH_4: @@ -4974,7 +5332,7 @@ #endif Result = BuildBinaryAtomic(stmt, AtomicRMWInst::Or, Instruction::Or); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_AND_AND_FETCH_8: #else case BUILT_IN_SYNC_AND_AND_FETCH_8: @@ -4983,7 +5341,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_AND_AND_FETCH_1: case BUILT_IN_AND_AND_FETCH_2: case BUILT_IN_AND_AND_FETCH_4: @@ -4994,7 +5352,7 @@ #endif Result = BuildBinaryAtomic(stmt, AtomicRMWInst::And, Instruction::And); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_XOR_AND_FETCH_8: #else case BUILT_IN_SYNC_XOR_AND_FETCH_8: @@ -5003,7 +5361,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_XOR_AND_FETCH_1: case BUILT_IN_XOR_AND_FETCH_2: case BUILT_IN_XOR_AND_FETCH_4: @@ -5014,7 +5372,7 @@ #endif Result = BuildBinaryAtomic(stmt, AtomicRMWInst::Xor, Instruction::Xor); return true; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_NAND_AND_FETCH_8: #else case BUILT_IN_SYNC_NAND_AND_FETCH_8: @@ -5023,7 +5381,7 @@ if (!TARGET_64BIT) return false; #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_NAND_AND_FETCH_1: case BUILT_IN_NAND_AND_FETCH_2: case BUILT_IN_NAND_AND_FETCH_4: { @@ -5032,22 +5390,29 @@ case BUILT_IN_SYNC_NAND_AND_FETCH_2: case BUILT_IN_SYNC_NAND_AND_FETCH_4: { #endif - tree return_type = gimple_call_return_type(stmt); + tree return_type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Type *ResultTy = ConvertType(return_type); - Value *C[2] = { EmitMemory(gimple_call_arg(stmt, 0)), - EmitMemory(gimple_call_arg(stmt, 1)) }; + Value *C[2] = { + EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)), + EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 1)) + }; C[0] = Builder.CreateBitCast(C[0], ResultTy->getPointerTo()); C[1] = Builder.CreateIntCast( C[1], ResultTy, /*isSigned*/ !TYPE_UNSIGNED(return_type), "cast"); Result = Builder.CreateAtomicRMW(AtomicRMWInst::Nand, C[0], C[1], - SequentiallyConsistent); +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) + SequentiallyConsistent +#else + AtomicOrdering::SequentiallyConsistent +#endif + ); Result = Builder.CreateAnd(Builder.CreateNot(Result), C[1]); Result = Builder.CreateIntToPtr(Result, ResultTy); return true; } -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_LOCK_RELEASE_1: case BUILT_IN_LOCK_RELEASE_2: case BUILT_IN_LOCK_RELEASE_4: @@ -5069,35 +5434,35 @@ // to use "store atomic [...] release". Type *Ty; switch (DECL_FUNCTION_CODE(fndecl)) { -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_LOCK_RELEASE_16: // not handled; should use SSE on x86 #else case BUILT_IN_SYNC_LOCK_RELEASE_16: // not handled; should use SSE on x86 #endif default: llvm_unreachable("Not handled; should use SSE on x86!"); -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_LOCK_RELEASE_1: #else case BUILT_IN_SYNC_LOCK_RELEASE_1: #endif Ty = Type::getInt8Ty(Context); break; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_LOCK_RELEASE_2: #else case BUILT_IN_SYNC_LOCK_RELEASE_2: #endif Ty = Type::getInt16Ty(Context); break; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_LOCK_RELEASE_4: #else case BUILT_IN_SYNC_LOCK_RELEASE_4: #endif Ty = Type::getInt32Ty(Context); break; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case BUILT_IN_LOCK_RELEASE_8: #else case BUILT_IN_SYNC_LOCK_RELEASE_8: @@ -5105,7 +5470,7 @@ Ty = Type::getInt64Ty(Context); break; } - Value *Ptr = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Ptr = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); Ptr = Builder.CreateBitCast(Ptr, Ty->getPointerTo()); Builder.CreateStore(Constant::getNullValue(Ty), Ptr, true); Result = 0; @@ -5116,8 +5481,8 @@ #if 1 // FIXME: Should handle these GCC extensions eventually. case BUILT_IN_LONGJMP: { - if (validate_gimple_arglist(stmt, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)) { - tree value = gimple_call_arg(stmt, 1); + if (validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)) { + tree value = gimple_call_arg(MIG_TO_GCALL(stmt), 1); if (!isa(value) || cast(EmitMemory(value))->getValue() != 1) { @@ -5139,19 +5504,21 @@ case BUILT_IN_APPLY: case BUILT_IN_RETURN: case BUILT_IN_SAVEREGS: -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) case BUILT_IN_ARGS_INFO: #endif case BUILT_IN_NEXT_ARG: case BUILT_IN_CLASSIFY_TYPE: case BUILT_IN_AGGREGATE_INCOMING_ADDRESS: case BUILT_IN_SETJMP_SETUP: +#if (GCC_MAJOR < 5) case BUILT_IN_SETJMP_DISPATCHER: +#endif case BUILT_IN_SETJMP_RECEIVER: case BUILT_IN_UPDATE_SETJMP_BUF: // FIXME: HACK: Just ignore these. { - Type *Ty = ConvertType(gimple_call_return_type(stmt)); + Type *Ty = ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt))); if (!Ty->isVoidTy()) Result = Constant::getNullValue(Ty); return true; @@ -5173,33 +5540,46 @@ return true; } - Value *TreeToLLVM::EmitBuiltinBitCountIntrinsic(gimple stmt, + Value *TreeToLLVM::EmitBuiltinBitCountIntrinsic(GimpleTy *stmt, Intrinsic::ID Id) { - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); - Value *Result = Builder.CreateCall2( + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + Value *Result = +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) + Builder.CreateCall2( Intrinsic::getDeclaration(TheModule, Id, Amt->getType()), Amt, Builder.getTrue()); - tree return_type = gimple_call_return_type(stmt); +#else + Builder.CreateCall( + Intrinsic::getDeclaration(TheModule, Id, Amt->getType()), + {Amt, Builder.getTrue()}); +#endif + tree return_type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Type *DestTy = ConvertType(return_type); return Builder.CreateIntCast( Result, DestTy, /*isSigned*/ !TYPE_UNSIGNED(return_type), "cast"); } - Value *TreeToLLVM::EmitBuiltinSQRT(gimple stmt) { - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); + Value *TreeToLLVM::EmitBuiltinSQRT(GimpleTy *stmt) { + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); Type *Ty = Amt->getType(); return Builder.CreateCall( Intrinsic::getDeclaration(TheModule, Intrinsic::sqrt, Ty), Amt); } - Value *TreeToLLVM::EmitBuiltinPOWI(gimple stmt) { - if (!validate_gimple_arglist(stmt, REAL_TYPE, INTEGER_TYPE, VOID_TYPE)) + Value *TreeToLLVM::EmitBuiltinPOWI(GimpleTy *stmt) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), REAL_TYPE, INTEGER_TYPE, VOID_TYPE)) return 0; - Value *Val = EmitMemory(gimple_call_arg(stmt, 0)); - Value *Pow = EmitMemory(gimple_call_arg(stmt, 1)); + Value *Val = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + Value *Pow = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 1)); Type *Ty = Val->getType(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif Pow = Builder.CreateIntCast(Pow, Type::getInt32Ty(Context), /*isSigned*/ true); @@ -5210,12 +5590,12 @@ Intrinsic::getDeclaration(TheModule, Intrinsic::powi, Ty), Args); } - Value *TreeToLLVM::EmitBuiltinPOW(gimple stmt) { - if (!validate_gimple_arglist(stmt, REAL_TYPE, REAL_TYPE, VOID_TYPE)) + Value *TreeToLLVM::EmitBuiltinPOW(GimpleTy *stmt) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), REAL_TYPE, REAL_TYPE, VOID_TYPE)) return 0; - Value *Val = EmitMemory(gimple_call_arg(stmt, 0)); - Value *Pow = EmitMemory(gimple_call_arg(stmt, 1)); + Value *Val = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + Value *Pow = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 1)); Type *Ty = Val->getType(); SmallVector Args; @@ -5225,13 +5605,13 @@ Intrinsic::getDeclaration(TheModule, Intrinsic::pow, Ty), Args); } - Value *TreeToLLVM::EmitBuiltinLCEIL(gimple stmt) { - if (!validate_gimple_arglist(stmt, REAL_TYPE, VOID_TYPE)) + Value *TreeToLLVM::EmitBuiltinLCEIL(GimpleTy *stmt) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), REAL_TYPE, VOID_TYPE)) return 0; // Cast the result of "ceil" to the appropriate integer type. // First call the appropriate version of "ceil". - tree op = gimple_call_arg(stmt, 0); + tree op = gimple_call_arg(MIG_TO_GCALL(stmt), 0); StringRef Name = SelectFPName(TREE_TYPE(op), "ceilf", "ceil", "ceill"); assert(!Name.empty() && "Unsupported floating point type!"); CallInst *Call = EmitSimpleCall(Name, TREE_TYPE(op), op, NULL); @@ -5239,19 +5619,19 @@ Call->setDoesNotAccessMemory(); // Then type cast the result of the "ceil" call. - tree type = gimple_call_return_type(stmt); + tree type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Type *RetTy = getRegType(type); return TYPE_UNSIGNED(type) ? Builder.CreateFPToUI(Call, RetTy) : Builder.CreateFPToSI(Call, RetTy); } - Value *TreeToLLVM::EmitBuiltinLFLOOR(gimple stmt) { - if (!validate_gimple_arglist(stmt, REAL_TYPE, VOID_TYPE)) + Value *TreeToLLVM::EmitBuiltinLFLOOR(GimpleTy *stmt) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), REAL_TYPE, VOID_TYPE)) return 0; // Cast the result of "floor" to the appropriate integer type. // First call the appropriate version of "floor". - tree op = gimple_call_arg(stmt, 0); + tree op = gimple_call_arg(MIG_TO_GCALL(stmt), 0); StringRef Name = SelectFPName(TREE_TYPE(op), "floorf", "floor", "floorl"); assert(!Name.empty() && "Unsupported floating point type!"); CallInst *Call = EmitSimpleCall(Name, TREE_TYPE(op), op, NULL); @@ -5259,19 +5639,19 @@ Call->setDoesNotAccessMemory(); // Then type cast the result of the "floor" call. - tree type = gimple_call_return_type(stmt); + tree type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Type *RetTy = getRegType(type); return TYPE_UNSIGNED(type) ? Builder.CreateFPToUI(Call, RetTy) : Builder.CreateFPToSI(Call, RetTy); } - Value *TreeToLLVM::EmitBuiltinLROUND(gimple stmt) { - if (!validate_gimple_arglist(stmt, REAL_TYPE, VOID_TYPE)) + Value *TreeToLLVM::EmitBuiltinLROUND(GimpleTy *stmt) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), REAL_TYPE, VOID_TYPE)) return 0; // Cast the result of "lround" to the appropriate integer type. // First call the appropriate version of "lround". - tree op = gimple_call_arg(stmt, 0); + tree op = gimple_call_arg(MIG_TO_GCALL(stmt), 0); StringRef Name = SelectFPName(TREE_TYPE(op), "lroundf", "lround", "lroundl"); assert(!Name.empty() && "Unsupported floating point type!"); @@ -5280,19 +5660,23 @@ Call->setDoesNotAccessMemory(); // Then type cast the result of the "lround" call. - tree type = gimple_call_return_type(stmt); + tree type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Type *RetTy = getRegType(type); return Builder.CreateTrunc(Call, RetTy); } - Value *TreeToLLVM::EmitBuiltinCEXPI(gimple stmt) { - if (!validate_gimple_arglist(stmt, REAL_TYPE, VOID_TYPE)) + Value *TreeToLLVM::EmitBuiltinCEXPI(GimpleTy *stmt) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), REAL_TYPE, VOID_TYPE)) return 0; +#if (GCC_MAJOR > 4) + if (targetm.libc_has_function(function_sincos)) { +#else if (TARGET_HAS_SINCOS) { +#endif // exp(i*arg) = cos(arg) + i*sin(arg). Emit a call to sincos. First // determine which version of sincos to call. - tree arg = gimple_call_arg(stmt, 0); + tree arg = gimple_call_arg(MIG_TO_GCALL(stmt), 0); tree arg_type = TREE_TYPE(arg); StringRef Name = SelectFPName(arg_type, "sincosf", "sincos", "sincosl"); assert(!Name.empty() && "Unsupported floating point type!"); @@ -5305,6 +5689,12 @@ // Get the LLVM function declaration for sincos. Type *ArgTys[3] = { Val->getType(), SinPtr->getType(), CosPtr->getType() }; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Val->getType()->getContext(); +#else + TheContext; +#endif FunctionType *FTy = FunctionType::get(Type::getVoidTy(Context), ArgTys, /*isVarArg*/ false); Constant *Func = TheModule->getOrInsertFunction(Name, FTy); @@ -5338,13 +5728,13 @@ return CreateComplex(Cos, Sin); } else { // Emit a call to cexp. First determine which version of cexp to call. - tree arg = gimple_call_arg(stmt, 0); + tree arg = gimple_call_arg(MIG_TO_GCALL(stmt), 0); tree arg_type = TREE_TYPE(arg); StringRef Name = SelectFPName(arg_type, "cexpf", "cexp", "cexpl"); assert(!Name.empty() && "Unsupported floating point type!"); // Get the GCC and LLVM function types for cexp. - tree cplx_type = gimple_call_return_type(stmt); + tree cplx_type = gimple_call_return_type(MIG_TO_GCALL(stmt)); tree fntype = build_function_type_list(cplx_type, cplx_type, NULL_TREE); FunctionType *FTy = cast(ConvertType(fntype)); @@ -5440,32 +5830,39 @@ } } - Value *TreeToLLVM::EmitBuiltinSIGNBIT(gimple stmt) { - Value *Arg = EmitRegister(gimple_call_arg(stmt, 0)); + Value *TreeToLLVM::EmitBuiltinSIGNBIT(GimpleTy *stmt) { + Value *Arg = EmitRegister(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); Type *ArgTy = Arg->getType(); unsigned ArgWidth = ArgTy->getPrimitiveSizeInBits(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ArgTy->getContext(); +#else + TheContext; +#endif Type *ArgIntTy = IntegerType::get(Context, ArgWidth); Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy); Value *ZeroCmp = Constant::getNullValue(ArgIntTy); Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp); return Builder.CreateZExt(Result, - ConvertType(gimple_call_return_type(stmt))); + ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt)))); } - bool TreeToLLVM::EmitBuiltinConstantP(gimple stmt, Value * &Result) { + bool TreeToLLVM::EmitBuiltinConstantP(GimpleTy *stmt, Value * &Result) { Result = - Constant::getNullValue(ConvertType(gimple_call_return_type(stmt))); + Constant::getNullValue(ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt)))); return true; } - bool TreeToLLVM::EmitBuiltinExtendPointer(gimple stmt, Value * &Result) { - tree arg0 = gimple_call_arg(stmt, 0); + bool TreeToLLVM::EmitBuiltinExtendPointer(GimpleTy *stmt, Value * &Result) { + tree arg0 = gimple_call_arg(MIG_TO_GCALL(stmt), 0); Value *Amt = EmitMemory(arg0); bool AmtIsSigned = !TYPE_UNSIGNED(TREE_TYPE(arg0)); - bool ExpIsSigned = !TYPE_UNSIGNED(gimple_call_return_type(stmt)); + bool ExpIsSigned = + !TYPE_UNSIGNED(gimple_call_return_type(MIG_TO_GCALL(stmt))); Result = CastToAnyType(Amt, AmtIsSigned, - ConvertType(gimple_call_return_type(stmt)), - ExpIsSigned); + ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt))), + ExpIsSigned); return true; } @@ -5473,7 +5870,7 @@ /// size checking builtin calls (e.g. __builtin___memcpy_chk into the /// plain non-checking calls. If the size of the argument is either -1 (unknown) /// or large enough to ensure no overflow (> len), then it's safe to do so. - static bool OptimizeIntoPlainBuiltIn(gimple stmt, Value * Len, + static bool OptimizeIntoPlainBuiltIn(GimpleTy *stmt, Value * Len, Value * Size) { if (BitCastInst *SizeBC = llvm::dyn_cast(Size)) Size = SizeBC->getOperand(0); @@ -5491,7 +5888,7 @@ return false; if (SizeCI->getValue().ult(LenCI->getValue())) { warning(0, "call to %D will always overflow destination buffer", - gimple_call_fndecl(stmt)); + gimple_call_fndecl(MIG_TO_GCALL(stmt))); return false; } return true; @@ -5499,28 +5896,29 @@ /// EmitBuiltinMemCopy - Emit an llvm.memcpy or llvm.memmove intrinsic, /// depending on the value of isMemMove. - bool TreeToLLVM::EmitBuiltinMemCopy(gimple stmt, Value * &Result, + bool TreeToLLVM::EmitBuiltinMemCopy(GimpleTy *stmt, Value * &Result, bool isMemMove, bool SizeCheck) { if (SizeCheck) { - if (!validate_gimple_arglist(stmt, POINTER_TYPE, POINTER_TYPE, - INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE)) + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), + POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, + INTEGER_TYPE, VOID_TYPE)) return false; } else { - if (!validate_gimple_arglist(stmt, POINTER_TYPE, POINTER_TYPE, - INTEGER_TYPE, VOID_TYPE)) + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, + POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)) return false; } - tree Dst = gimple_call_arg(stmt, 0); - tree Src = gimple_call_arg(stmt, 1); + tree Dst = gimple_call_arg(MIG_TO_GCALL(stmt), 0); + tree Src = gimple_call_arg(MIG_TO_GCALL(stmt), 1); unsigned SrcAlign = getPointerAlignment(Src); unsigned DstAlign = getPointerAlignment(Dst); Value *DstV = EmitMemory(Dst); Value *SrcV = EmitMemory(Src); - Value *Len = EmitMemory(gimple_call_arg(stmt, 2)); + Value *Len = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 2)); if (SizeCheck) { - tree SizeArg = gimple_call_arg(stmt, 3); + tree SizeArg = gimple_call_arg(MIG_TO_GCALL(stmt), 3); Value *Size = EmitMemory(SizeArg); if (!OptimizeIntoPlainBuiltIn(stmt, Len, Size)) return false; @@ -5532,26 +5930,27 @@ return true; } - bool TreeToLLVM::EmitBuiltinMemSet(gimple stmt, Value * &Result, + bool TreeToLLVM::EmitBuiltinMemSet(GimpleTy *stmt, Value * &Result, bool SizeCheck) { if (SizeCheck) { - if (!validate_gimple_arglist(stmt, POINTER_TYPE, INTEGER_TYPE, - INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE)) + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, + INTEGER_TYPE, INTEGER_TYPE, INTEGER_TYPE, + VOID_TYPE)) return false; } else { - if (!validate_gimple_arglist(stmt, POINTER_TYPE, INTEGER_TYPE, - INTEGER_TYPE, VOID_TYPE)) + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, + INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE)) return false; } - tree Dst = gimple_call_arg(stmt, 0); + tree Dst = gimple_call_arg(MIG_TO_GCALL(stmt), 0); unsigned DstAlign = getPointerAlignment(Dst); Value *DstV = EmitMemory(Dst); - Value *Val = EmitMemory(gimple_call_arg(stmt, 1)); - Value *Len = EmitMemory(gimple_call_arg(stmt, 2)); + Value *Val = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 1)); + Value *Len = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 2)); if (SizeCheck) { - tree SizeArg = gimple_call_arg(stmt, 3); + tree SizeArg = gimple_call_arg(MIG_TO_GCALL(stmt), 3); Value *Size = EmitMemory(SizeArg); if (!OptimizeIntoPlainBuiltIn(stmt, Len, Size)) return false; @@ -5560,31 +5959,44 @@ return true; } - bool TreeToLLVM::EmitBuiltinBZero(gimple stmt, Value * &/*Result*/) { - if (!validate_gimple_arglist(stmt, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)) + bool TreeToLLVM::EmitBuiltinBZero(GimpleTy *stmt, Value * &/*Result*/) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, + INTEGER_TYPE, VOID_TYPE)) return false; - tree Dst = gimple_call_arg(stmt, 0); + tree Dst = gimple_call_arg(MIG_TO_GCALL(stmt), 0); unsigned DstAlign = getPointerAlignment(Dst); Value *DstV = EmitMemory(Dst); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DstV->getType()->getContext(); +#else + TheContext; +#endif Value *Val = Constant::getNullValue(Type::getInt32Ty(Context)); - Value *Len = EmitMemory(gimple_call_arg(stmt, 1)); + Value *Len = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 1)); EmitMemSet(DstV, Val, Len, DstAlign); return true; } - bool TreeToLLVM::EmitBuiltinPrefetch(gimple stmt) { - if (!validate_gimple_arglist(stmt, POINTER_TYPE, 0)) + bool TreeToLLVM::EmitBuiltinPrefetch(GimpleTy *stmt) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, 0)) return false; - Value *Ptr = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Ptr = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ptr->getType()->getContext(); +#else + TheContext; +#endif Value *ReadWrite = 0; Value *Locality = 0; Value *Data = 0; - if (gimple_call_num_args(stmt) > 1) { // Args 1/2 are optional - ReadWrite = EmitMemory(gimple_call_arg(stmt, 1)); + if (gimple_call_num_args(MIG_TO_GCALL(stmt)) > 1) { // Args 1/2 are optional + ReadWrite = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 1)); if (!isa(ReadWrite)) { error("second argument to %<__builtin_prefetch%> must be a constant"); ReadWrite = 0; @@ -5598,8 +6010,8 @@ /*isSigned*/ false); } - if (gimple_call_num_args(stmt) > 2) { - Locality = EmitMemory(gimple_call_arg(stmt, 2)); + if (gimple_call_num_args(MIG_TO_GCALL(stmt)) > 2) { + Locality = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 2)); if (!isa(Locality)) { error( "third argument to %<__builtin_prefetch%> must be a constant"); @@ -5626,21 +6038,27 @@ Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Builder.CreateCall( + Intrinsic::getDeclaration(TheModule, Intrinsic::prefetch), + {Ptr, ReadWrite, Locality, Data}); +#else Builder.CreateCall4( Intrinsic::getDeclaration(TheModule, Intrinsic::prefetch), Ptr, ReadWrite, Locality, Data); +#endif return true; } /// EmitBuiltinReturnAddr - Emit an llvm.returnaddress or llvm.frameaddress /// instruction, depending on whether isFrame is true or not. - bool TreeToLLVM::EmitBuiltinReturnAddr(gimple stmt, Value * &Result, + bool TreeToLLVM::EmitBuiltinReturnAddr(GimpleTy *stmt, Value * &Result, bool isFrame) { - if (!validate_gimple_arglist(stmt, INTEGER_TYPE, VOID_TYPE)) + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), INTEGER_TYPE, VOID_TYPE)) return false; ConstantInt *Level = - llvm::dyn_cast(EmitMemory(gimple_call_arg(stmt, 0))); + llvm::dyn_cast(EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0))); if (!Level) { if (isFrame) error("invalid argument to %<__builtin_frame_address%>"); @@ -5654,13 +6072,13 @@ Result = Builder.CreateCall(Intrinsic::getDeclaration(TheModule, IID), Level); Result = Builder.CreateBitCast( - Result, ConvertType(gimple_call_return_type(stmt))); + Result, ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt)))); return true; } - bool TreeToLLVM::EmitBuiltinExtractReturnAddr(gimple stmt, + bool TreeToLLVM::EmitBuiltinExtractReturnAddr(GimpleTy *stmt, Value * &Result) { - Value *Ptr = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Ptr = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); // FIXME: Actually we should do something like this: // @@ -5668,28 +6086,39 @@ // offset are defined. This seems to be needed for: ARM, MIPS, Sparc. // Unfortunately, these constants are defined as RTL expressions and // should be handled separately. - + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ptr->getType()->getContext(); +#else + TheContext; +#endif Result = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); return true; } - bool TreeToLLVM::EmitBuiltinFrobReturnAddr(gimple stmt, Value * &Result) { - Value *Ptr = EmitMemory(gimple_call_arg(stmt, 0)); + bool TreeToLLVM::EmitBuiltinFrobReturnAddr(GimpleTy *stmt, + Value * &Result) { + Value *Ptr = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); // FIXME: Actually we should do something like this: // // Result = Ptr - RETURN_ADDR_OFFSET, if offset is defined. This seems to be // needed for: MIPS, Sparc. Unfortunately, these constants are defined // as RTL expressions and should be handled separately. - + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ptr->getType()->getContext(); +#else + TheContext; +#endif Result = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); return true; } - bool TreeToLLVM::EmitBuiltinStackSave(gimple stmt, Value * &Result) { - if (!validate_gimple_arglist(stmt, VOID_TYPE)) + bool TreeToLLVM::EmitBuiltinStackSave(GimpleTy *stmt, Value * &Result) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), VOID_TYPE)) return false; Result = Builder.CreateCall( @@ -5704,9 +6133,19 @@ // Exception handling builtins. - bool TreeToLLVM::EmitBuiltinEHCopyValues(gimple stmt) { - unsigned DstRegionNo = tree_low_cst(gimple_call_arg(stmt, 0), 0); - unsigned SrcRegionNo = tree_low_cst(gimple_call_arg(stmt, 1), 0); + bool TreeToLLVM::EmitBuiltinEHCopyValues(GimpleTy *stmt) { + unsigned DstRegionNo = +#if (GCC_MAJOR > 4) + tree_to_shwi(gimple_call_arg(as_a(stmt), 0)); +#else + tree_low_cst(gimple_call_arg(stmt, 0), 0); +#endif + unsigned SrcRegionNo = +#if (GCC_MAJOR > 4) + tree_to_shwi(gimple_call_arg(as_a(stmt), 1)); +#else + tree_low_cst(gimple_call_arg(stmt, 1), 0); +#endif // Copy the exception pointer. Value *ExcPtr = Builder.CreateLoad(getExceptionPtr(SrcRegionNo)); Builder.CreateStore(ExcPtr, getExceptionPtr(DstRegionNo)); @@ -5716,27 +6155,37 @@ return true; } - bool TreeToLLVM::EmitBuiltinEHFilter(gimple stmt, Value * &Result) { + bool TreeToLLVM::EmitBuiltinEHFilter(GimpleTy *stmt, Value * &Result) { // Lookup the local that holds the selector value for this region. - unsigned RegionNo = tree_low_cst(gimple_call_arg(stmt, 0), 0); + unsigned RegionNo = +#if (GCC_MAJOR > 4) + tree_to_shwi(gimple_call_arg(as_a(stmt), 0)); +#else + tree_low_cst(gimple_call_arg(stmt, 0), 0); +#endif AllocaInst *Filter = getExceptionFilter(RegionNo); // Load the selector value out. Result = Builder.CreateLoad(Filter); // Ensure the returned value has the right integer type. - tree type = gimple_call_return_type(stmt); + tree type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Result = CastToAnyType(Result, /*isSigned*/ true, getRegType(type), /*isSigned*/ !TYPE_UNSIGNED(type)); return true; } - bool TreeToLLVM::EmitBuiltinEHPointer(gimple stmt, Value * &Result) { + bool TreeToLLVM::EmitBuiltinEHPointer(GimpleTy *stmt, Value * &Result) { // Lookup the local that holds the exception pointer for this region. - unsigned RegionNo = tree_low_cst(gimple_call_arg(stmt, 0), 0); + unsigned RegionNo = +#if (GCC_MAJOR > 4) + tree_to_shwi(gimple_call_arg(as_a(stmt), 0)); +#else + tree_low_cst(gimple_call_arg(stmt, 0), 0); +#endif AllocaInst *ExcPtr = getExceptionPtr(RegionNo); // Load the exception pointer out. Result = Builder.CreateLoad(ExcPtr); // Ensure the returned value has the right pointer type. - tree type = gimple_call_return_type(stmt); + tree type = gimple_call_return_type(MIG_TO_GCALL(stmt)); Result = Builder.CreateBitCast(Result, getRegType(type)); return true; } @@ -5767,8 +6216,8 @@ #define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) 0 #endif - bool TreeToLLVM::EmitBuiltinDwarfCFA(gimple stmt, Value * &Result) { - if (!validate_gimple_arglist(stmt, VOID_TYPE)) + bool TreeToLLVM::EmitBuiltinDwarfCFA(GimpleTy *stmt, Value * &Result) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), VOID_TYPE)) return false; int cfa_offset = ARG_POINTER_CFA_OFFSET(exp); @@ -5781,24 +6230,24 @@ return true; } - bool TreeToLLVM::EmitBuiltinDwarfSPColumn(gimple stmt, Value * &Result) { - if (!validate_gimple_arglist(stmt, VOID_TYPE)) + bool TreeToLLVM::EmitBuiltinDwarfSPColumn(GimpleTy *stmt, + Value * &Result) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), VOID_TYPE)) return false; unsigned int dwarf_regnum = DWARF_FRAME_REGNUM(STACK_POINTER_REGNUM); - Result = ConstantInt::get(ConvertType(gimple_call_return_type(stmt)), - dwarf_regnum); + Result = ConstantInt::get(ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt))), dwarf_regnum); return true; } - bool TreeToLLVM::EmitBuiltinEHReturnDataRegno(gimple stmt, + bool TreeToLLVM::EmitBuiltinEHReturnDataRegno(GimpleTy *stmt, Value * &Result) { #ifdef EH_RETURN_DATA_REGNO - if (!validate_gimple_arglist(stmt, INTEGER_TYPE, VOID_TYPE)) + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), INTEGER_TYPE, VOID_TYPE)) return false; - tree which = gimple_call_arg(stmt, 0); + tree which = gimple_call_arg(MIG_TO_GCALL(stmt), 0); unsigned HOST_WIDE_INT iwhich; if (!isa(which)) { @@ -5806,7 +6255,12 @@ return false; } - iwhich = tree_low_cst(which, 1); + iwhich = +#if (GCC_MAJOR > 4) + tree_to_shwi(which); +#else + tree_low_cst(which, 1); +#endif iwhich = EH_RETURN_DATA_REGNO(iwhich); if (iwhich == INVALID_REGNUM) return false; @@ -5814,19 +6268,27 @@ iwhich = DWARF_FRAME_REGNUM(iwhich); Result = - ConstantInt::get(ConvertType(gimple_call_return_type(stmt)), iwhich); + ConstantInt::get(ConvertType(gimple_call_return_type( + MIG_TO_GCALL(stmt))), iwhich); #endif return true; } - bool TreeToLLVM::EmitBuiltinEHReturn(gimple stmt, Value * &/*Result*/) { - if (!validate_gimple_arglist(stmt, INTEGER_TYPE, POINTER_TYPE, VOID_TYPE)) + bool TreeToLLVM::EmitBuiltinEHReturn(GimpleTy *stmt, Value * &/*Result*/) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), INTEGER_TYPE, + POINTER_TYPE, VOID_TYPE)) return false; + Value *Offset = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Offset->getType()->getContext(); +#else + TheContext; +#endif Type *IntPtr = DL.getIntPtrType(Context, 0); - Value *Offset = EmitMemory(gimple_call_arg(stmt, 0)); - Value *Handler = EmitMemory(gimple_call_arg(stmt, 1)); + Value *Handler = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 1)); Intrinsic::ID IID = IntPtr->isIntegerTy(32) ? Intrinsic::eh_return_i32 : Intrinsic::eh_return_i64; @@ -5842,14 +6304,14 @@ return true; } - bool TreeToLLVM::EmitBuiltinInitDwarfRegSizes(gimple stmt, + bool TreeToLLVM::EmitBuiltinInitDwarfRegSizes(GimpleTy *stmt, Value * &/*Result*/) { #ifdef DWARF2_UNWIND_INFO unsigned int i; bool wrote_return_column = false; static bool reg_modes_initialized = false; - if (!validate_gimple_arglist(stmt, POINTER_TYPE, VOID_TYPE)) + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, VOID_TYPE)) return false; if (!reg_modes_initialized) { @@ -5857,7 +6319,15 @@ reg_modes_initialized = true; } - Value *Addr = Builder.CreateBitCast(EmitMemory(gimple_call_arg(stmt, 0)), + Value *Ptr = EmitMemory(gimple_call_arg( + MIG_TO_GCALL(stmt), 0)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ptr->getType()->getContext(); +#else + TheContext; +#endif + Value *Addr = Builder.CreateBitCast(EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)), Type::getInt8PtrTy(Context)); Constant *Size, *Idx; @@ -5911,8 +6381,9 @@ return true; } - bool TreeToLLVM::EmitBuiltinUnwindInit(gimple stmt, Value * &/*Result*/) { - if (!validate_gimple_arglist(stmt, VOID_TYPE)) + bool TreeToLLVM::EmitBuiltinUnwindInit(GimpleTy *stmt, + Value * &/*Result*/) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), VOID_TYPE)) return false; Builder.CreateCall( @@ -5921,11 +6392,17 @@ return true; } - bool TreeToLLVM::EmitBuiltinStackRestore(gimple stmt) { - if (!validate_gimple_arglist(stmt, POINTER_TYPE, VOID_TYPE)) + bool TreeToLLVM::EmitBuiltinStackRestore(GimpleTy *stmt) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, VOID_TYPE)) return false; - Value *Ptr = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Ptr = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ptr->getType()->getContext(); +#else + TheContext; +#endif Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); Builder.CreateCall( @@ -5933,60 +6410,81 @@ return true; } - bool TreeToLLVM::EmitBuiltinAlloca(gimple stmt, Value * &Result) { - if (!validate_gimple_arglist(stmt, INTEGER_TYPE, VOID_TYPE)) + bool TreeToLLVM::EmitBuiltinAlloca(GimpleTy *stmt, Value * &Result) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), INTEGER_TYPE, VOID_TYPE)) return false; - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Amt->getType()->getContext(); +#else + TheContext; +#endif AllocaInst *Alloca = Builder.CreateAlloca(Type::getInt8Ty(Context), Amt); Alloca->setAlignment(BIGGEST_ALIGNMENT / 8); Result = Alloca; return true; } - bool TreeToLLVM::EmitBuiltinAllocaWithAlign(gimple stmt, Value * &Result) { - if (!validate_gimple_arglist(stmt, INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE)) + bool TreeToLLVM::EmitBuiltinAllocaWithAlign(GimpleTy *stmt, + Value * &Result) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), INTEGER_TYPE, + INTEGER_TYPE, VOID_TYPE)) return false; - Value *Amt = EmitMemory(gimple_call_arg(stmt, 0)); - uint64_t Align = getInt64(gimple_call_arg(stmt, 1), true); + Value *Amt = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + uint64_t Align = getInt64(gimple_call_arg(MIG_TO_GCALL(stmt), 1), true); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Amt->getType()->getContext(); +#else + TheContext; +#endif AllocaInst *Alloca = Builder.CreateAlloca(Type::getInt8Ty(Context), Amt); Alloca->setAlignment(Align / 8); Result = Alloca; return true; } -#if (GCC_MINOR > 6) - bool TreeToLLVM::EmitBuiltinAssumeAligned(gimple stmt, Value * &Result) { - if (!validate_gimple_arglist(stmt, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)) +#if GCC_VERSION_CODE > GCC_VERSION(4, 6) + bool TreeToLLVM::EmitBuiltinAssumeAligned(GimpleTy *stmt, + Value * &Result) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, + INTEGER_TYPE, VOID_TYPE)) return false; // Return the pointer argument. TODO: Pass the alignment information on to // the optimizers. - Value *Ptr = EmitRegister(gimple_call_arg(stmt, 0)); + Value *Ptr = EmitRegister(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); // Bitcast it to the return type. Ptr = - TriviallyTypeConvert(Ptr, getRegType(gimple_call_return_type(stmt))); - Result = Reg2Mem(Ptr, gimple_call_return_type(stmt), Builder); + TriviallyTypeConvert(Ptr, getRegType(gimple_call_return_type(MIG_TO_GCALL(stmt)))); + Result = Reg2Mem(Ptr, gimple_call_return_type(MIG_TO_GCALL(stmt)), Builder); return true; } #endif - bool TreeToLLVM::EmitBuiltinExpect(gimple stmt, Value * &Result) { - tree type = gimple_call_return_type(stmt); - if (gimple_call_num_args(stmt) < 2) { + bool TreeToLLVM::EmitBuiltinExpect(GimpleTy *stmt, Value * &Result) { + tree type = gimple_call_return_type(MIG_TO_GCALL(stmt)); + if (gimple_call_num_args(MIG_TO_GCALL(stmt)) < 2) { Result = Constant::getNullValue(ConvertType(type)); return true; } Type *ArgTy = getRegType(type); Value *ExpectIntr = Intrinsic::getDeclaration(TheModule, Intrinsic::expect, ArgTy); - Value *ArgValue = EmitRegister(gimple_call_arg(stmt, 0)); - Value *ExpectedValue = EmitRegister(gimple_call_arg(stmt, 1)); - Result = Builder.CreateCall2(ExpectIntr, ArgValue, ExpectedValue); + Value *ArgValue = EmitRegister(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + Value *ExpectedValue = EmitRegister(gimple_call_arg(MIG_TO_GCALL(stmt), 1)); + Result = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Builder.CreateCall(ExpectIntr, {ArgValue, ExpectedValue}); +#else + Builder.CreateCall2(ExpectIntr, ArgValue, ExpectedValue); +#endif Result = Reg2Mem(Result, type, Builder); return true; } - bool TreeToLLVM::EmitBuiltinVAStart(gimple stmt) { - if (gimple_call_num_args(stmt) < 2) { + bool TreeToLLVM::EmitBuiltinVAStart(GimpleTy *stmt) { + if (gimple_call_num_args(MIG_TO_GCALL(stmt)) < 2) { error("too few arguments to function %"); return true; } @@ -6000,23 +6498,35 @@ Constant *va_start = Intrinsic::getDeclaration(TheModule, Intrinsic::vastart); - Value *ArgVal = EmitMemory(gimple_call_arg(stmt, 0)); + Value *ArgVal = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ArgVal->getType()->getContext(); +#else + TheContext; +#endif ArgVal = Builder.CreateBitCast(ArgVal, Type::getInt8PtrTy(Context)); Builder.CreateCall(va_start, ArgVal); return true; } - bool TreeToLLVM::EmitBuiltinVAEnd(gimple stmt) { - Value *Arg = EmitMemory(gimple_call_arg(stmt, 0)); + bool TreeToLLVM::EmitBuiltinVAEnd(GimpleTy *stmt) { + Value *Arg = EmitMemory(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Arg->getType()->getContext(); +#else + TheContext; +#endif Arg = Builder.CreateBitCast(Arg, Type::getInt8PtrTy(Context)); Builder.CreateCall(Intrinsic::getDeclaration(TheModule, Intrinsic::vaend), Arg); return true; } - bool TreeToLLVM::EmitBuiltinVACopy(gimple stmt) { - tree Arg1T = gimple_call_arg(stmt, 0); - tree Arg2T = gimple_call_arg(stmt, 1); + bool TreeToLLVM::EmitBuiltinVACopy(GimpleTy *stmt) { + tree Arg1T = gimple_call_arg(MIG_TO_GCALL(stmt), 0); + tree Arg2T = gimple_call_arg(MIG_TO_GCALL(stmt), 1); Value *Arg1 = EmitMemory(Arg1T); // Emit the address of the destination. // The second arg of llvm.va_copy is a pointer to a valist. @@ -6033,6 +6543,12 @@ Arg2 = EmitMemory(Arg2T); } + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Arg1->getType()->getContext(); +#else + TheContext; +#endif static Type *VPTy = Type::getInt8PtrTy(Context); // FIXME: This ignores alignment and volatility of the arguments. @@ -6045,26 +6561,27 @@ return true; } - bool TreeToLLVM::EmitBuiltinAdjustTrampoline(gimple stmt, Value * &Result) { - if (!validate_gimple_arglist(stmt, POINTER_TYPE, VOID_TYPE)) + bool TreeToLLVM::EmitBuiltinAdjustTrampoline(GimpleTy *stmt, + Value * &Result) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, VOID_TYPE)) return false; Function *Intr = Intrinsic::getDeclaration(TheModule, Intrinsic::adjust_trampoline); - Value *Arg = Builder.CreateBitCast(EmitRegister(gimple_call_arg(stmt, 0)), + Value *Arg = Builder.CreateBitCast(EmitRegister(gimple_call_arg(MIG_TO_GCALL(stmt), 0)), Builder.getInt8PtrTy()); Result = Builder.CreateCall(Intr, Arg); return true; } - bool TreeToLLVM::EmitBuiltinInitTrampoline(gimple stmt, bool OnStack) { - if (!validate_gimple_arglist(stmt, POINTER_TYPE, POINTER_TYPE, + bool TreeToLLVM::EmitBuiltinInitTrampoline(GimpleTy *stmt, bool OnStack) { + if (!validate_gimple_arglist(MIG_TO_GCALL(stmt), POINTER_TYPE, POINTER_TYPE, POINTER_TYPE, VOID_TYPE)) return false; - Value *Tramp = EmitRegister(gimple_call_arg(stmt, 0)); - Value *Func = EmitRegister(gimple_call_arg(stmt, 1)); - Value *Chain = EmitRegister(gimple_call_arg(stmt, 2)); + Value *Tramp = EmitRegister(gimple_call_arg(MIG_TO_GCALL(stmt), 0)); + Value *Func = EmitRegister(gimple_call_arg(MIG_TO_GCALL(stmt), 1)); + Value *Chain = EmitRegister(gimple_call_arg(MIG_TO_GCALL(stmt), 2)); Type *VPTy = Builder.getInt8PtrTy(); Value *Ops[3] = { Builder.CreateBitCast(Tramp, VPTy), @@ -6075,9 +6592,10 @@ Intrinsic::getDeclaration(TheModule, Intrinsic::init_trampoline); Builder.CreateCall(Intr, Ops); -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) if (OnStack) { - tree target = TREE_OPERAND(gimple_call_arg(stmt, 1), 0); + tree target = + TREE_OPERAND(gimple_call_arg(MIG_TO_GCALL(stmt), 1), 0); warning_at(DECL_SOURCE_LOCATION(target), OPT_Wtrampolines, "trampoline generated for nested function %qD", target); } @@ -6115,6 +6633,12 @@ tree AnnotateAttr = lookup_attribute("annotate", DECL_ATTRIBUTES(FieldDecl)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + FieldPtr->getType()->getContext(); +#else + TheContext; +#endif Type *SBP = Type::getInt8PtrTy(Context); Function *An = @@ -6224,7 +6748,12 @@ // Otherwise, just do raw, low-level pointer arithmetic. FIXME: this could be // much nicer in cases like: // float foo(int w, float A[][w], int g) { return A[g][0]; } - + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + IntPtrTy->getContext(); +#else + TheContext; +#endif if (isa(TREE_TYPE(ArrayTreeType))) { ArrayAddr = Builder.CreateBitCast(ArrayAddr, Type::getInt8PtrTy(Context)); @@ -6332,7 +6861,11 @@ if (MemberIndex < INT_MAX) { assert(!TREE_OPERAND(exp, 2) && "Constant not gimple min invariant?"); // Get a pointer to the byte in which the GCC field starts. - FieldPtr = Builder.CreateStructGEP(StructAddrLV.Ptr, MemberIndex, + FieldPtr = Builder.CreateStructGEP( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + FieldTy, +#endif + StructAddrLV.Ptr, MemberIndex, flag_verbose_asm ? "cr" : ""); // Within that byte, the bit at which the GCC field starts. BitStart = FieldBitOffset & 7; @@ -6369,6 +6902,12 @@ BitStart -= ByteOffset * 8; } + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + StructTy->getContext(); +#else + TheContext; +#endif Type *BytePtrTy = Type::getInt8PtrTy(Context); FieldPtr = Builder.CreateBitCast(StructAddrLV.Ptr, BytePtrTy); FieldPtr = Builder.CreateInBoundsGEP(FieldPtr, Offset, @@ -6415,6 +6954,12 @@ } Type *Ty = ConvertType(TREE_TYPE(exp)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Decl->getType()->getContext(); +#else + TheContext; +#endif // If we have "extern void foo", make the global have type {} instead of // type void. if (Ty->isVoidTy()) @@ -6438,12 +6983,18 @@ return LV; } -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) LValue TreeToLLVM::EmitLV_MEM_REF(tree exp) { // The address is the first operand offset in bytes by the second. Value *Addr = EmitRegister(TREE_OPERAND(exp, 0)); if (!integer_zerop(TREE_OPERAND(exp, 1))) { // Convert to a byte pointer and displace by the offset. + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Addr->getType()->getContext(); +#else + TheContext; +#endif Addr = Builder.CreateBitCast(Addr, GetUnitPointerType(Context)); APInt Offset = getAPIntValue(TREE_OPERAND(exp, 1)); // The address is always inside the referenced object, so "inbounds". @@ -6456,13 +7007,13 @@ Addr = Builder.CreateBitCast(Addr, getPointerToType(TREE_TYPE(exp))); unsigned Alignment = -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) get_object_alignment(exp, TYPE_ALIGN(TREE_TYPE(exp)), BIGGEST_ALIGNMENT); -#elif(GCC_MINOR < 7) +#elif GCC_VERSION_CODE < GCC_VERSION(4, 7) std::max(get_object_alignment(exp, BIGGEST_ALIGNMENT), TYPE_ALIGN(TREE_TYPE(exp))); -#elif (GCC_MINOR < 8) +#elif GCC_VERSION_CODE < GCC_VERSION(4, 8) get_object_or_type_alignment(exp); #else get_object_alignment(exp); @@ -6473,10 +7024,15 @@ } #endif -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) LValue TreeToLLVM::EmitLV_MISALIGNED_INDIRECT_REF(tree exp) { // The lvalue is just the address. The alignment is given by operand 1. - unsigned Alignment = tree_low_cst(TREE_OPERAND(exp, 1), true); + unsigned Alignment = +#if (GCC_MAJOR > 4) + tree_to_shwi(TREE_OPERAND(exp, 1)); +#else + tree_low_cst(TREE_OPERAND(exp, 1), true); +#endif // The alignment need not be a power of two, so replace it with the largest // power of two that divides it. Alignment &= -Alignment; @@ -6518,7 +7074,11 @@ // IMAGPART alignment = MinAlign(Ptr.Alignment, sizeof field); Alignment = MinAlign(Ptr.getAlignment(), DL.getTypeAllocSize(Ptr.Ptr->getType())); - return LValue(Builder.CreateStructGEP(Ptr.Ptr, Idx, + return LValue(Builder.CreateStructGEP( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ptr.Ptr->getType(), +#endif + Ptr.Ptr, Idx, flag_verbose_asm ? "prtxpr" : ""), Alignment); } @@ -6535,7 +7095,7 @@ Value *Addr; Value *Delta = 0; // Offset from base pointer in units -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) // Starting with gcc 4.6 the address is base + index * step + index2 + offset. Addr = EmitRegister(TMR_BASE(exp)); if (TMR_INDEX2(exp) && !integer_zerop(TMR_INDEX2(exp))) @@ -6555,6 +7115,12 @@ } #endif + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Addr->getType()->getContext(); +#else + TheContext; +#endif if (TMR_INDEX(exp)) { Value *Index = EmitRegister(TMR_INDEX(exp)); if (TMR_STEP(exp) && !integer_onep(TMR_STEP(exp))) @@ -6580,13 +7146,13 @@ // The result can be of a different pointer type even if we didn't advance it. Addr = Builder.CreateBitCast(Addr, getPointerToType(TREE_TYPE(exp))); unsigned Alignment = -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) get_object_alignment(exp, TYPE_ALIGN(TREE_TYPE(exp)), BIGGEST_ALIGNMENT); -#elif(GCC_MINOR < 7) +#elif GCC_VERSION_CODE < GCC_VERSION(4, 7) std::max(get_object_alignment(exp, BIGGEST_ALIGNMENT), TYPE_ALIGN(TREE_TYPE(exp))); -#elif (GCC_MINOR < 8) +#elif GCC_VERSION_CODE < GCC_VERSION(4, 8) get_object_or_type_alignment(exp); #else get_object_alignment(exp); @@ -6622,7 +7188,12 @@ assert(is_gimple_reg_type(TREE_TYPE(addr)) && "Not of register type!"); // Any generated code goes in the entry block. - BasicBlock *EntryBlock = Fn->begin(); + BasicBlock *EntryBlock = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + &(*Fn->begin()); +#else + Fn->begin(); +#endif // Note the current builder position. BasicBlock *SavedInsertBB = Builder.GetInsertBlock(); @@ -6733,10 +7304,16 @@ /// EmitIntegerRegisterConstant - Turn the given INTEGER_CST into an LLVM /// constant of the corresponding register type. Constant *TreeToLLVM::EmitIntegerRegisterConstant(tree reg) { - ConstantInt *CI = ConstantInt::get(Context, getAPIntValue(reg)); // The destination can be a pointer, integer or floating point type so we need // a generalized cast here Type *Ty = getRegType(TREE_TYPE(reg)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif + ConstantInt *CI = ConstantInt::get(Context, getAPIntValue(reg)); Instruction::CastOps opcode = CastInst::getCastOpcode( CI, false, Ty, !TYPE_UNSIGNED(TREE_TYPE(reg))); return TheFolder->CreateCast(opcode, CI, Ty); @@ -6788,6 +7365,12 @@ // Form an APInt from the buffer, an APFloat from the APInt, and the desired // floating point constant from the APFloat, phew! const APInt &I = APInt(Precision, Words, Parts); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif return ConstantFP::get(Context, APFloat(Ty->getFltSemantics(), I)); } @@ -6805,7 +7388,7 @@ Constant *TreeToLLVM::EmitVectorRegisterConstant(tree reg) { // If there are no elements then immediately return the default value for a // small speedup. -#if (GCC_MINOR < 8) +#if GCC_VERSION_CODE < GCC_VERSION(4, 8) if (!TREE_VECTOR_CST_ELTS(reg)) #else if (!VECTOR_CST_NELTS(reg)) @@ -6815,7 +7398,7 @@ // Convert the elements. SmallVector Elts; tree elt_type = TREE_TYPE(TREE_TYPE(reg)); -#if (GCC_MINOR < 8) +#if GCC_VERSION_CODE < GCC_VERSION(4, 8) for (tree ch = TREE_VECTOR_CST_ELTS(reg); ch; ch = TREE_CHAIN(ch)) { tree elt = TREE_VALUE(ch); #else @@ -6950,8 +7533,18 @@ // with their initial values, and before any modifications to their values. // Create a builder that inserts code before the SSAInsertionPoint marker. + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(reg))->getContext(); +#else + TheContext; +#endif LLVMBuilder SSABuilder(Context, Builder.getFolder()); - SSABuilder.SetInsertPoint(SSAInsertionPoint->getParent(), + // https://reviews.llvm.org/rL249925 + SSABuilder.SetInsertPoint( +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) + SSAInsertionPoint->getParent(), +#endif SSAInsertionPoint); // Use it to load the parameter value. @@ -6966,6 +7559,12 @@ // Unary expressions. Value *TreeToLLVM::EmitReg_ABS_EXPR(tree op) { + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(op))->getContext(); +#else + TheContext; +#endif if (!isa(TREE_TYPE(op))) { Value *Op = EmitRegister(op); Value *OpN = Builder.CreateNeg(Op, Op->getName() + "neg"); @@ -7220,6 +7819,12 @@ assert(Length > 1 && !(Length & (Length - 1)) && "Length not a power of 2!"); SmallVector Mask(Length); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif Constant *UndefIndex = UndefValue::get(Type::getInt32Ty(Context)); for (unsigned Elts = Length >> 1; Elts; Elts >>= 1) { // In the extracted vectors, elements with index Elts and on are undefined. @@ -7264,6 +7869,12 @@ assert(Length > 1 && !(Length & (Length - 1)) && "Length not a power of 2!"); SmallVector Mask(Length); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif Constant *UndefIndex = UndefValue::get(Type::getInt32Ty(Context)); for (unsigned Elts = Length >> 1; Elts; Elts >>= 1) { // In the extracted vectors, elements with index Elts and on are undefined. @@ -7337,6 +7948,12 @@ /*isSigned*/ false); RHS = Builder.CreateInsertElement(UndefValue::get(VecTy), RHS, Builder.getInt32(0)); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + VecTy->getContext(); +#else + TheContext; +#endif Type *MaskTy = VectorType::get(Type::getInt32Ty(Context), VecTy->getNumElements()); RHS = Builder.CreateShuffleVector(RHS, UndefValue::get(VecTy), @@ -7352,6 +7969,12 @@ Value *Amt = EmitRegister(op1); // An integer. VectorType *VecTy = cast(LHS->getType()); unsigned Bits = VecTy->getPrimitiveSizeInBits(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + VecTy->getContext(); +#else + TheContext; +#endif // If the shift is by a multiple of the element size then emit a shuffle. if (ConstantInt *CI = llvm::dyn_cast(Amt)) { @@ -7712,6 +8335,12 @@ Value *TreeToLLVM::EmitReg_POINTER_PLUS_EXPR(tree op0, tree op1) { Value *Ptr = EmitRegister(op0); // The pointer. Value *Idx = EmitRegister(op1); // The offset in units. + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Idx->getType()->getContext(); +#else + TheContext; +#endif // Convert the pointer into an i8* and add the offset to it. Ptr = Builder.CreateBitCast(Ptr, GetUnitPointerType(Context)); @@ -7898,7 +8527,7 @@ : Builder.CreateSRem(LHS, RHS); } -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) Value *TreeToLLVM::EmitReg_VEC_EXTRACT_EVEN_EXPR(tree op0, tree op1) { Value *LHS = EmitRegister(op0); Value *RHS = EmitRegister(op1); @@ -7911,7 +8540,7 @@ } #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) Value *TreeToLLVM::EmitReg_VEC_EXTRACT_ODD_EXPR(tree op0, tree op1) { Value *LHS = EmitRegister(op0); Value *RHS = EmitRegister(op1); @@ -7924,7 +8553,7 @@ } #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) Value *TreeToLLVM::EmitReg_VEC_INTERLEAVE_HIGH_EXPR(tree op0, tree op1) { Value *LHS = EmitRegister(op0); Value *RHS = EmitRegister(op1); @@ -7940,7 +8569,7 @@ } #endif -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) Value *TreeToLLVM::EmitReg_VEC_INTERLEAVE_LOW_EXPR(tree op0, tree op1) { Value *LHS = EmitRegister(op0); Value *RHS = EmitRegister(op1); @@ -7981,7 +8610,7 @@ return Builder.CreateShuffleVector(LHS, RHS, ConstantVector::get(Mask)); } -#if (GCC_MINOR > 6) +#if (GCC_MAJOR < 5 && GCC_MINOR > 6) Value *TreeToLLVM::EmitReg_VEC_PERM_EXPR(tree op0, tree op1, tree op2) { unsigned Length = (unsigned) TYPE_VECTOR_SUBPARTS(TREE_TYPE(op0)); @@ -8047,7 +8676,7 @@ } #endif -#if (GCC_MINOR > 5) +#if (GCC_MAJOR < 5 && GCC_MINOR > 5) Value *TreeToLLVM::EmitReg_FMA_EXPR(tree op0, tree op1, tree op2) { Value *V0 = EmitRegister(op0); Value *V1 = EmitRegister(op1); @@ -8115,7 +8744,7 @@ // ... Render* - Convert GIMPLE to LLVM ... //===----------------------------------------------------------------------===// - void TreeToLLVM::RenderGIMPLE_ASM(gimple stmt) { + void TreeToLLVM::RenderGIMPLE_ASM(GimpleTy *stmt) { // A gimple asm statement consists of an asm string, a list of outputs, a list // of inputs, a list of clobbers, a list of labels and a "volatile" flag. // These correspond directly to the elements of an asm statement. For example @@ -8162,14 +8791,14 @@ // and TREE_VALUE holding the appropriate LABEL_DECL. // TODO: Add support for labels. - if (gimple_asm_nlabels(stmt) > 0) { + if (gimple_asm_nlabels(MIG_TO_GASM(stmt)) > 0) { sorry("'asm goto' not supported"); return; } - const unsigned NumOutputs = gimple_asm_noutputs(stmt); - const unsigned NumInputs = gimple_asm_ninputs(stmt); - const unsigned NumClobbers = gimple_asm_nclobbers(stmt); + const unsigned NumOutputs = gimple_asm_noutputs(MIG_TO_GASM(stmt)); + const unsigned NumInputs = gimple_asm_ninputs(MIG_TO_GASM(stmt)); + const unsigned NumClobbers = gimple_asm_nclobbers(MIG_TO_GASM(stmt)); /// Constraints - The output/input constraints, concatenated together in array /// form instead of list form. This way of doing things is forced on us by @@ -8178,33 +8807,48 @@ const char **Constraints = (const char **)alloca( (NumOutputs + NumInputs) * sizeof(const char *)); +#if (GCC_MAJOR > 4) + auto_vec GConstraints; + auto_vec OutputRvec; + auto_vec InputRvec; + GConstraints.safe_grow(NumOutputs + NumInputs); + OutputRvec.safe_grow(NumOutputs); + InputRvec.safe_grow(NumInputs); +#endif + // Initialize the Constraints array. for (unsigned i = 0; i != NumOutputs; ++i) { - tree Output = gimple_asm_output_op(stmt, i); + tree Output = gimple_asm_output_op(MIG_TO_GASM(stmt), i); // If there's an erroneous arg then bail out. if (TREE_TYPE(TREE_VALUE(Output)) == error_mark_node) return; // Record the output constraint. const char *Constraint = TREE_STRING_POINTER(TREE_VALUE(TREE_PURPOSE(Output))); - Constraints[i] = Constraint; +#if (GCC_MAJOR > 4) + GConstraints[i] = +#endif + Constraints[i] = Constraint; } for (unsigned i = 0; i != NumInputs; ++i) { - tree Input = gimple_asm_input_op(stmt, i); + tree Input = gimple_asm_input_op(MIG_TO_GASM(stmt), i); // If there's an erroneous arg then bail out. if (TREE_TYPE(TREE_VALUE(Input)) == error_mark_node) return; // Record the input constraint. const char *Constraint = TREE_STRING_POINTER(TREE_VALUE(TREE_PURPOSE(Input))); - Constraints[NumOutputs + i] = Constraint; +#if (GCC_MAJOR > 4) + GConstraints[i] = +#endif + Constraints[NumOutputs + i] = Constraint; } // Look for multiple alternative constraints: multiple alternatives separated // by commas. unsigned NumChoices = 0; // sentinal; real value is always at least 1. for (unsigned i = 0; i != NumInputs; ++i) { - tree Input = gimple_asm_input_op(stmt, i); + tree Input = gimple_asm_input_op(MIG_TO_GASM(stmt), i); unsigned NumInputChoices = 1; for (const char * p = TREE_STRING_POINTER(TREE_VALUE(TREE_PURPOSE(Input))); @@ -8220,7 +8864,7 @@ NumChoices = NumInputChoices; } for (unsigned i = 0; i != NumOutputs; ++i) { - tree Output = gimple_asm_output_op(stmt, i); + tree Output = gimple_asm_output_op(MIG_TO_GASM(stmt), i); unsigned NumOutputChoices = 1; for (const char * p = TREE_STRING_POINTER(TREE_VALUE(TREE_PURPOSE(Output))); @@ -8245,7 +8889,7 @@ // HasSideEffects - Whether the LLVM inline asm should be marked as having // side effects. - bool HasSideEffects = gimple_asm_volatile_p(stmt) || (NumOutputs == 0); + bool HasSideEffects = gimple_asm_volatile_p(MIG_TO_GASM(stmt)) || (NumOutputs == 0); // CallResultTypes - The inline asm call may return one or more results. The // types of the results are recorded here along with a flag indicating whether @@ -8280,7 +8924,7 @@ // Process outputs. for (unsigned i = 0; i != NumOutputs; ++i) { - tree Output = gimple_asm_output_op(stmt, i); + tree Output = gimple_asm_output_op(MIG_TO_GASM(stmt), i); tree Operand = TREE_VALUE(Output); // Parse the output constraint. @@ -8355,7 +8999,7 @@ // Process inputs. for (unsigned i = 0; i != NumInputs; ++i) { - tree Input = gimple_asm_input_op(stmt, i); + tree Input = gimple_asm_input_op(MIG_TO_GASM(stmt), i); tree Val = TREE_VALUE(Input); tree type = TREE_TYPE(Val); bool IsSigned = !TYPE_UNSIGNED(type); @@ -8371,6 +9015,12 @@ if (AllowsReg || !AllowsMem) { // Register operand. Type *LLVMTy = ConvertType(type); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + LLVMTy->getContext(); +#else + TheContext; +#endif Value *Op = 0; Type *OpTy = LLVMTy; if (LLVMTy->isSingleValueType()) { @@ -8529,32 +9179,38 @@ // Create input, output & clobber lists for the benefit of md_asm_clobbers. tree outputs = NULL_TREE; if (NumOutputs) { - tree t = outputs = gimple_asm_output_op(stmt, 0); + tree t = outputs = gimple_asm_output_op(MIG_TO_GASM(stmt), 0); for (unsigned i = 1; i < NumOutputs; i++) { - TREE_CHAIN(t) = gimple_asm_output_op(stmt, i); - t = gimple_asm_output_op(stmt, i); + TREE_CHAIN(t) = gimple_asm_output_op(MIG_TO_GASM(stmt), i); + t = gimple_asm_output_op(MIG_TO_GASM(stmt), i); } } tree inputs = NULL_TREE; if (NumInputs) { - tree t = inputs = gimple_asm_input_op(stmt, 0); + tree t = inputs = gimple_asm_input_op(MIG_TO_GASM(stmt), 0); for (unsigned i = 1; i < NumInputs; i++) { - TREE_CHAIN(t) = gimple_asm_input_op(stmt, i); - t = gimple_asm_input_op(stmt, i); + TREE_CHAIN(t) = gimple_asm_input_op(MIG_TO_GASM(stmt), i); + t = gimple_asm_input_op(MIG_TO_GASM(stmt), i); } } tree clobbers = NULL_TREE; if (NumClobbers) { - tree t = clobbers = gimple_asm_clobber_op(stmt, 0); + tree t = clobbers = gimple_asm_clobber_op(MIG_TO_GASM(stmt), 0); for (unsigned i = 1; i < NumClobbers; i++) { - TREE_CHAIN(t) = gimple_asm_clobber_op(stmt, i); - t = gimple_asm_clobber_op(stmt, i); + TREE_CHAIN(t) = gimple_asm_clobber_op(MIG_TO_GASM(stmt), i); + t = gimple_asm_clobber_op(MIG_TO_GASM(stmt), i); } } - Clobbers = targetm.md_asm_clobbers(outputs, inputs, clobbers); + Clobbers = +#if (GCC_MAJOR > 4) + // FIXME targetm.md_asm_adjust(OutputRvec, InputRvec, GConstraints, clobbers); + 0; +#else + targetm.md_asm_clobbers(outputs, inputs, clobbers); +#endif } for (; Clobbers; Clobbers = TREE_CHAIN(Clobbers)) { @@ -8587,7 +9243,7 @@ switch (CallResultTypes.size()) { // If there are no results then the return type is void! case 0: - CallResultType = Type::getVoidTy(Context); + CallResultType = Type::getVoidTy(TheContext); break; // If there is one result then use the result's type as the return type. case 1: @@ -8599,7 +9255,7 @@ SmallVector Fields((unsigned) CallResultTypes.size()); for (unsigned i = 0, e = (unsigned) CallResultTypes.size(); i != e; ++i) Fields[i] = CallResultTypes[i].first; - CallResultType = StructType::get(Context, Fields); + CallResultType = StructType::get(TheContext, Fields); break; } @@ -8631,7 +9287,10 @@ if (gimple_has_location(stmt)) { // Pass the location of the asm using a !srcloc metadata. Constant *LocationCookie = Builder.getInt64(gimple_location(stmt)); + // FIXME +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) CV->setMetadata("srcloc", MDNode::get(Context, LocationCookie)); +#endif } // If the call produces a value, store it into the destination. @@ -8660,15 +9319,17 @@ // Give the backend a chance to upgrade the inline asm to LLVM code. This // handles some common cases that LLVM has intrinsics for, e.g. x86 bswap -> // llvm.bswap. +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) if (const TargetLowering *TLI = TheTarget->getSubtargetImpl()->getTargetLowering()) TLI->ExpandInlineAsm(CV); +#endif } - void TreeToLLVM::RenderGIMPLE_ASSIGN(gimple stmt) { + void TreeToLLVM::RenderGIMPLE_ASSIGN(GimpleTy *stmt) { tree lhs = gimple_assign_lhs(stmt); -#if (GCC_MINOR > 6) +#if (GCC_MAJOR < 5 && GCC_MINOR > 6) // Assigning a right-hand side with TREE_CLOBBER_P says that the left-hand // side is dead from this point on. Output an llvm.lifetime.end intrinsic. if (get_gimple_rhs_class(gimple_expr_code(stmt)) == GIMPLE_SINGLE_RHS && @@ -8703,18 +9364,18 @@ WriteScalarToLHS(lhs, EmitAssignRHS(stmt)); } - void TreeToLLVM::RenderGIMPLE_CALL(gimple stmt) { + void TreeToLLVM::RenderGIMPLE_CALL(GimpleTy *stmt) { tree lhs = gimple_call_lhs(stmt); if (!lhs) { // The returned value is not used. - if (!isa(gimple_call_return_type(stmt))) { + if (!isa(gimple_call_return_type(MIG_TO_GCALL(stmt)))) { OutputCallRHS(stmt, 0); return; } // Create a temporary to hold the returned value. // TODO: Figure out how to avoid creating this temporary and the // associated useless code that stores the returned value into it. - MemRef Loc = CreateTempLoc(ConvertType(gimple_call_return_type(stmt))); + MemRef Loc = CreateTempLoc(ConvertType(gimple_call_return_type(MIG_TO_GCALL(stmt)))); OutputCallRHS(stmt, &Loc); return; } @@ -8728,7 +9389,7 @@ WriteScalarToLHS(lhs, OutputCallRHS(stmt, 0)); } - void TreeToLLVM::RenderGIMPLE_COND(gimple stmt) { + void TreeToLLVM::RenderGIMPLE_COND(GimpleTy *stmt) { // Emit the comparison. Value *Cond = EmitCompare(gimple_cond_lhs(stmt), gimple_cond_rhs(stmt), gimple_cond_code(stmt)); @@ -8744,8 +9405,12 @@ Builder.CreateCondBr(Cond, IfTrue, IfFalse); } - void TreeToLLVM::RenderGIMPLE_EH_DISPATCH(gimple stmt) { - int RegionNo = gimple_eh_dispatch_region(stmt); + void TreeToLLVM::RenderGIMPLE_EH_DISPATCH(GimpleTy *stmt) { + int RegionNo = gimple_eh_dispatch_region( +#if (GCC_MAJOR > 4) + as_a +#endif + (stmt)); eh_region region = get_eh_region_from_number(RegionNo); switch (region->type) { @@ -8754,6 +9419,12 @@ case ERT_ALLOWED_EXCEPTIONS: { // Filter. BasicBlock *Dest = getLabelDeclBlock(region->u.allowed.label); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Dest->getContext(); +#else + TheContext; +#endif if (!region->u.allowed.type_list) { // Not allowed to throw. Branch directly to the post landing pad. @@ -8786,6 +9457,12 @@ SmallSet AlreadyCaught; // Typeinfos known caught. Function *TypeIDIntr = Intrinsic::getDeclaration(TheModule, Intrinsic::eh_typeid_for); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + TypeIDIntr->getContext(); +#else + TheContext; +#endif for (eh_catch c = region->u.eh_try.first_catch; c; c = c->next_catch) { BasicBlock *Dest = getLabelDeclBlock(c->label); if (!c->type_list) { @@ -8825,7 +9502,7 @@ } } - void TreeToLLVM::RenderGIMPLE_GOTO(gimple stmt) { + void TreeToLLVM::RenderGIMPLE_GOTO(GimpleTy *stmt) { tree dest = gimple_goto_dest(stmt); if (isa(dest)) { @@ -8846,14 +9523,18 @@ Br->addDestination(getBasicBlock(e->dest)); } - void TreeToLLVM::RenderGIMPLE_RESX(gimple stmt) { + void TreeToLLVM::RenderGIMPLE_RESX(GimpleTy *stmt) { // Reraise an exception. If this statement is inside an exception handling // region then the reraised exception may be caught by the current function, // in which case it can be simplified into a branch. int DstLPadNo = lookup_stmt_eh_lp(stmt); eh_region dst_rgn = DstLPadNo ? get_eh_region_from_lp_number(DstLPadNo) : NULL; - eh_region src_rgn = get_eh_region_from_number(gimple_resx_region(stmt)); + eh_region src_rgn = get_eh_region_from_number(gimple_resx_region( +#if (GCC_MAJOR > 4) + as_a +#endif + (stmt))); if (!src_rgn) { // Unreachable block? @@ -8898,8 +9579,12 @@ Builder.CreateResume(UnwindData); } - void TreeToLLVM::RenderGIMPLE_RETURN(gimple stmt) { - tree retval = gimple_return_retval(stmt); + void TreeToLLVM::RenderGIMPLE_RETURN(GimpleTy *stmt) { + tree retval = gimple_return_retval( +#if (GCC_MAJOR > 4) + as_a +#endif + (stmt)); tree result = DECL_RESULT(current_function_decl); if (retval && retval != error_mark_node && retval != result) { @@ -8914,6 +9599,12 @@ } } + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(TREE_TYPE(retval))->getContext(); +#else + TheContext; +#endif // Emit a branch to the exit label. if (!ReturnBB) // Create a new block for the return node, but don't insert it yet. @@ -8922,21 +9613,21 @@ Builder.CreateBr(ReturnBB); } - void TreeToLLVM::RenderGIMPLE_SWITCH(gimple stmt) { + void TreeToLLVM::RenderGIMPLE_SWITCH(GimpleTy *stmt) { // Emit the condition. - Value *Index = EmitRegister(gimple_switch_index(stmt)); - tree index_type = TREE_TYPE(gimple_switch_index(stmt)); + Value *Index = EmitRegister(gimple_switch_index(MIG_TO_GSWITCH(stmt))); + tree index_type = TREE_TYPE(gimple_switch_index(MIG_TO_GSWITCH(stmt))); // Create the switch instruction. - tree default_label = CASE_LABEL(gimple_switch_label(stmt, 0)); + tree default_label = CASE_LABEL(gimple_switch_label(MIG_TO_GSWITCH(stmt), 0)); SwitchInst *SI = Builder.CreateSwitch(Index, getLabelDeclBlock(default_label), - gimple_switch_num_labels(stmt)); + gimple_switch_num_labels(MIG_TO_GSWITCH(stmt))); // Add the switch cases. BasicBlock *IfBlock = 0; // Set if a range was output as an "if". - for (unsigned i = 1, e = gimple_switch_num_labels(stmt); i != e; ++i) { - tree label = gimple_switch_label(stmt, i); + for (unsigned i = 1, e = gimple_switch_num_labels(MIG_TO_GSWITCH(stmt)); i != e; ++i) { + tree label = gimple_switch_label(MIG_TO_GSWITCH(stmt), i); BasicBlock *Dest = getLabelDeclBlock(CASE_LABEL(label)); // Convert the integer to the right type. @@ -8954,6 +9645,12 @@ ConstantInt *HighC = cast(Val); APInt Range = HighC->getValue() - LowC->getValue(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Val->getType()->getContext(); +#else + TheContext; +#endif if (Range.ult(APInt(Range.getBitWidth(), 64))) { // Add all of the necessary successors to the switch. APInt CurrentValue = LowC->getValue(); @@ -8990,7 +9687,7 @@ //===----------------------------------------------------------------------===// /// EmitAssignRHS - Convert the RHS of a scalar GIMPLE_ASSIGN to LLVM. - Value *TreeToLLVM::EmitAssignRHS(gimple stmt) { + Value *TreeToLLVM::EmitAssignRHS(GimpleTy *stmt) { // Loads from memory and other non-register expressions are handled by // EmitAssignSingleRHS. if (get_gimple_rhs_class(gimple_expr_code(stmt)) == GIMPLE_SINGLE_RHS) { @@ -9006,7 +9703,7 @@ tree_code code = gimple_assign_rhs_code(stmt); tree rhs1 = gimple_assign_rhs1(stmt); tree rhs2 = gimple_assign_rhs2(stmt); -#if (GCC_MINOR > 5) +#if (GCC_MAJOR < 5 && GCC_MINOR > 5) tree rhs3 = gimple_assign_rhs3(stmt); #endif @@ -9155,7 +9852,7 @@ case TRUTH_XOR_EXPR: RHS = EmitReg_TruthOp(type, rhs1, rhs2, Instruction::Xor); break; -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case VEC_EXTRACT_EVEN_EXPR: RHS = EmitReg_VEC_EXTRACT_EVEN_EXPR(rhs1, rhs2); break; @@ -9169,16 +9866,20 @@ RHS = EmitReg_VEC_INTERLEAVE_LOW_EXPR(rhs1, rhs2); break; #endif +#if (GCC_MAJOR < 5) case VEC_LSHIFT_EXPR: RHS = EmitReg_VecShiftOp(rhs1, rhs2, /*isLeftShift*/ true); break; +#endif case VEC_PACK_FIX_TRUNC_EXPR: case VEC_PACK_TRUNC_EXPR: RHS = EmitReg_VEC_PACK_TRUNC_EXPR(type, rhs1, rhs2); break; +#if (GCC_MAJOR < 5) case VEC_RSHIFT_EXPR: RHS = EmitReg_VecShiftOp(rhs1, rhs2, /*isLeftShift*/ false); break; +#endif case VEC_UNPACK_FLOAT_HI_EXPR: case VEC_UNPACK_HI_EXPR: RHS = EmitReg_VecUnpackHiExpr(type, rhs1); @@ -9198,12 +9899,12 @@ break; // Ternary expressions. -#if (GCC_MINOR > 5) +#if (GCC_MAJOR < 5 && GCC_MINOR > 5) case FMA_EXPR: RHS = EmitReg_FMA_EXPR(rhs1, rhs2, rhs3); break; #endif -#if (GCC_MINOR > 6) +#if (GCC_MAJOR < 5 && GCC_MINOR > 6) case COND_EXPR: case VEC_COND_EXPR: RHS = EmitReg_CondExpr(rhs1, rhs2, rhs3); @@ -9230,7 +9931,7 @@ // Expressions (tcc_expression). case ADDR_EXPR: return EmitADDR_EXPR(rhs); -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) case COND_EXPR: case VEC_COND_EXPR: return EmitCondExpr(rhs); @@ -9251,10 +9952,10 @@ case COMPONENT_REF: case IMAGPART_EXPR: case INDIRECT_REF: -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) case MEM_REF: #endif -#if (GCC_MINOR < 6) +#if GCC_VERSION_CODE < GCC_VERSION(4, 6) case MISALIGNED_INDIRECT_REF: #endif case REALPART_EXPR: @@ -9275,7 +9976,7 @@ } /// OutputCallRHS - Convert the RHS of a GIMPLE_CALL. - Value *TreeToLLVM::OutputCallRHS(gimple stmt, const MemRef * DestLoc) { + Value *TreeToLLVM::OutputCallRHS(GimpleTy *stmt, const MemRef * DestLoc) { // Check for a built-in function call. If we can lower it directly, do so // now. tree fndecl = gimple_call_fndecl(stmt); @@ -9283,16 +9984,16 @@ DECL_BUILT_IN_CLASS(fndecl) != BUILT_IN_FRONTEND) { Value *Res = 0; if (EmitBuiltinCall(stmt, fndecl, DestLoc, Res)) - return Res ? Mem2Reg(Res, gimple_call_return_type(stmt), Builder) : 0; + return Res ? Mem2Reg(Res, gimple_call_return_type(MIG_TO_GCALL(stmt)), Builder) : 0; } - tree call_expr = gimple_call_fn(stmt); + tree call_expr = gimple_call_fn(MIG_TO_GCALL(stmt)); assert(TREE_TYPE(call_expr) && (isa(TREE_TYPE(call_expr)) || isa(TREE_TYPE(call_expr))) && "Not calling a function pointer?"); -#if (GCC_MINOR < 7) +#if GCC_VERSION_CODE < GCC_VERSION(4, 7) tree function_type = TREE_TYPE(TREE_TYPE(call_expr)); #else tree function_type = gimple_call_fntype(stmt); @@ -9331,10 +10032,16 @@ // fall into the subsequent block. if (gimple_call_flags(stmt) & ECF_NORETURN) { Builder.CreateUnreachable(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Result->getType()->getContext(); +#else + TheContext; +#endif BeginBlock(BasicBlock::Create(Context)); } - return Result ? Mem2Reg(Result, gimple_call_return_type(stmt), Builder) + return Result ? Mem2Reg(Result, gimple_call_return_type(MIG_TO_GCALL(stmt)), Builder) : 0; } @@ -9378,7 +10085,17 @@ // Load and store the minimum number of bytes that covers the field. unsigned LoadSizeInBits = LV.BitStart + LV.BitSize; LoadSizeInBits = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + (unsigned) alignTo(LoadSizeInBits, BITS_PER_UNIT); +#else (unsigned) RoundUpToAlignment(LoadSizeInBits, BITS_PER_UNIT); +#endif + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + RHS->getType()->getContext(); +#else + TheContext; +#endif Type *LoadType = IntegerType::get(Context, LoadSizeInBits); // Load the existing bits. Index: src/Debug.cpp =================================================================== --- src/Debug.cpp +++ src/Debug.cpp @@ -25,6 +25,10 @@ // LLVM headers #include "llvm/IR/Module.h" +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/IR/IntrinsicInst.h" +#endif // System headers #include @@ -198,7 +202,11 @@ tree decl_name = DECL_NAME(Node); if (decl_name != NULL && IDENTIFIER_POINTER(decl_name) != NULL) { if (TREE_PUBLIC(Node) && DECL_ASSEMBLER_NAME(Node) != DECL_NAME(Node) && +#if (GCC_MAJOR > 4) + !DECL_ABSTRACT_P(Node)) { +#else !DECL_ABSTRACT(Node)) { +#endif return StringRef(IDENTIFIER_POINTER(DECL_ASSEMBLER_NAME(Node))); } } @@ -231,19 +239,28 @@ /// EmitFunctionStart - Constructs the debug code for entering a function. void DebugInfo::EmitFunctionStart(tree FnDecl, Function *Fn) { - DIType FNType = getOrCreateType(TREE_TYPE(FnDecl)); + MigDIType FNType = getOrCreateType(TREE_TYPE(FnDecl)); unsigned lineno = CurLineNo; std::map::iterator I = SPCache.find(FnDecl); if (I != SPCache.end()) { +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DISubprogram *SPDecl = llvm::getDISubprogram(cast(I->second)); +#else DISubprogram SPDecl(cast(I->second)); - DISubprogram SP = CreateSubprogramDefinition(SPDecl, lineno, Fn); +#endif + MigDISubprogram SP = CreateSubprogramDefinition(SPDecl, lineno, Fn); SPDecl->replaceAllUsesWith(SP); // Push function on region stack. - RegionStack.push_back(WeakVH(SP)); +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + RegionStack.push_back(WeakVH(cast(SP))); + RegionMap[FnDecl] = WeakVH(cast(SP)); +#else + RegionStack.push_back(SP); RegionMap[FnDecl] = WeakVH(SP); +#endif return; } @@ -254,21 +271,35 @@ DECL_ABSTRACT_ORIGIN(FnDecl) != FnDecl) ArtificialFnWithAbstractOrigin = true; - DIDescriptor SPContext = - ArtificialFnWithAbstractOrigin ? getOrCreateFile(main_input_filename) - : findRegion(DECL_CONTEXT(FnDecl)); + MigDIScope SPContext = + ArtificialFnWithAbstractOrigin ? +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + dyn_cast_or_null(getOrCreateFile(main_input_filename)) +#else + getOrCreateFile(main_input_filename) +#endif + : findRegion(DECL_CONTEXT(FnDecl)); // Creating context may have triggered creation of this SP descriptor. So // check the cache again. I = SPCache.find(FnDecl); if (I != SPCache.end()) { +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DISubprogram *SPDecl = llvm::getDISubprogram(cast(I->second)); +#else DISubprogram SPDecl(cast(I->second)); - DISubprogram SP = CreateSubprogramDefinition(SPDecl, lineno, Fn); +#endif + MigDISubprogram SP = CreateSubprogramDefinition(SPDecl, lineno, Fn); SPDecl->replaceAllUsesWith(SP); // Push function on region stack. +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + RegionStack.push_back(WeakVH(cast(SP))); + RegionMap[FnDecl] = WeakVH(cast(SP)); +#else RegionStack.push_back(WeakVH(SP)); RegionMap[FnDecl] = WeakVH(SP); +#endif return; } @@ -278,67 +309,107 @@ unsigned Virtuality = 0; unsigned VIndex = 0; - DIType ContainingType; + MigDIType ContainingType; if (DECL_VINDEX(FnDecl) && DECL_CONTEXT(FnDecl) && isa((DECL_CONTEXT(FnDecl)))) { // Workaround GCC PR42653 +#if (GCC_MAJOR > 4) + if (tree_fits_uhwi_p(DECL_VINDEX(FnDecl))) + VIndex = tree_to_shwi(DECL_VINDEX(FnDecl)); +#else if (host_integerp(DECL_VINDEX(FnDecl), 0)) VIndex = tree_low_cst(DECL_VINDEX(FnDecl), 0); +#endif Virtuality = dwarf::DW_VIRTUALITY_virtual; ContainingType = getOrCreateType(DECL_CONTEXT(FnDecl)); } StringRef FnName = getFunctionName(FnDecl); - DISubprogram SP = CreateSubprogram( - SPContext, FnName, FnName, LinkageName, getOrCreateFile(Loc.file), lineno, - FNType, Fn->hasInternalLinkage(), true /*definition*/, Virtuality, VIndex, - ContainingType, DECL_ARTIFICIAL(FnDecl), optimize, Fn); + MigDISubprogram SP = CreateSubprogram( + SPContext, FnName, FnName, LinkageName, getOrCreateFile(Loc.file), + lineno, FNType, Fn->hasInternalLinkage(), true /*definition*/, + ContainingType, Virtuality, VIndex, DECL_ARTIFICIAL(FnDecl), + optimize, Fn); - SPCache[FnDecl] = WeakVH(SP); +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + SPCache[FnDecl] = WeakVH(cast(SP)); // Push function on region stack. + RegionStack.push_back(WeakVH(cast(SP))); + RegionMap[FnDecl] = WeakVH(cast(SP)); +#else + SPCache[FnDecl] = WeakVH(SP); RegionStack.push_back(WeakVH(SP)); RegionMap[FnDecl] = WeakVH(SP); +#endif } /// getOrCreateNameSpace - Get name space descriptor for the tree node. -DINameSpace DebugInfo::getOrCreateNameSpace(tree Node, DIDescriptor Context) { +MigDINamespace DebugInfo::getOrCreateNameSpace(tree Node, MigDIScope Context) { std::map::iterator I = NameSpaceCache.find(Node); if (I != NameSpaceCache.end()) +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + return dyn_cast_or_null(cast(I->second)); +#else return DINameSpace(cast(I->second)); +#endif expanded_location Loc = GetNodeLocation(Node, false); - DINameSpace DNS = Builder.createNameSpace( + MigDINamespace DNS = Builder.createNameSpace( Context, GetNodeName(Node), getOrCreateFile(Loc.file), Loc.line); - NameSpaceCache[Node] = WeakVH(DNS); + NameSpaceCache[Node] = WeakVH( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + cast +#endif + (DNS)); return DNS; } /// findRegion - Find tree_node N's region. -DIDescriptor DebugInfo::findRegion(tree Node) { +MigDIScope DebugInfo::findRegion(tree Node) { if (Node == NULL_TREE) +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + return dyn_cast_or_null(getOrCreateFile(main_input_filename)); +#else return getOrCreateFile(main_input_filename); +#endif std::map::iterator I = RegionMap.find(Node); if (I != RegionMap.end()) - if (MDNode *R = dyn_cast_or_null(&*I->second)) + if (MDNode *R = cast(&*I->second)) +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + return dyn_cast_or_null(R); +#else return DIDescriptor(R); +#endif if (isa(Node)) { - DIType Ty = getOrCreateType(Node); + MigDIType Ty = getOrCreateType(Node); +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + return dyn_cast_or_null(Ty); +#else return DIDescriptor(Ty); +#endif } else if (DECL_P(Node)) { if (isa(Node)) { - DIDescriptor NSContext = findRegion(DECL_CONTEXT(Node)); - DINameSpace NS = getOrCreateNameSpace(Node, NSContext); + MigDIScope NSContext = findRegion(DECL_CONTEXT(Node)); + MigDINamespace NS = getOrCreateNameSpace(Node, NSContext); +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + return dyn_cast_or_null(NS); +#else return DIDescriptor(NS); +#endif } return findRegion(DECL_CONTEXT(Node)); } // Otherwise main compile unit covers everything. +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + return dyn_cast_or_null(getOrCreateFile(main_input_filename)); +#else return getOrCreateFile(main_input_filename); +#endif } /// EmitFunctionEnd - Pop the region stack and reset current lexical block. @@ -367,17 +438,32 @@ expanded_location Loc = GetNodeLocation(decl, false); // Construct variable. +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DIScope *VarScope = dyn_cast_or_null(cast(RegionStack.back())); +#else DIScope VarScope = DIScope(cast(RegionStack.back())); - DIType Ty = getOrCreateType(type); +#endif + MigDIType Ty = getOrCreateType(type); if (Ty && DECL_ARTIFICIAL(decl)) Ty = Builder.createArtificialType(Ty); // If type info is not available then do not emit debug info for this var. if (!Ty) return; + + // https://reviews.llvm.org/rL243764 +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + llvm::DILocalVariable *D = Builder.createAutoVariable( + VarScope, Name, getOrCreateFile(Loc.file), Loc.line, Ty, optimize); + DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(AI); +#else llvm::DIVariable D = Builder.createLocalVariable( Tag, VarScope, Name, getOrCreateFile(Loc.file), Loc.line, Ty, optimize); +#endif Instruction *Call = Builder.insertDeclare(AI, D, Builder.createExpression(), +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DbgDecl->getDebugLoc(), +#endif IRBuilder.GetInsertBlock()); Call->setDebugLoc(DebugLoc::get(Loc.line, 0, VarScope)); @@ -410,7 +496,7 @@ return; // Gather location information. expanded_location Loc = expand_location(DECL_SOURCE_LOCATION(decl)); - DIType TyD = getOrCreateType(TREE_TYPE(decl)); + MigDIType TyD = getOrCreateType(TREE_TYPE(decl)); StringRef DispName = GV->getName(); if (DispName.empty()) DispName = "__unknown__"; @@ -429,7 +515,7 @@ } /// createBasicType - Create BasicType. -DIType DebugInfo::createBasicType(tree type) { +MigDIType DebugInfo::createBasicType(tree type) { StringRef TypeName = GetNodeName(type); if (TypeName.empty()) @@ -484,22 +570,44 @@ } /// createMethodType - Create MethodType. -DIType DebugInfo::createMethodType(tree type) { +MigDIType DebugInfo::createMethodType(tree type) { // Create a place holder type first. The may be used as a context // for the argument types. - llvm::DIType FwdType = Builder.createReplaceableForwardDecl( + // https://reviews.llvm.org/rL228852 +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DICompositeType *FwdType = Builder.createReplaceableCompositeType( +#else + DIType FwdType = Builder.createReplaceableForwardDecl( +#endif llvm::dwarf::DW_TAG_subroutine_type, StringRef(), findRegion(TYPE_CONTEXT(type)), getOrCreateFile(main_input_filename), 0, 0, 0, 0); +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + llvm::Value *FTN = cast(FwdType); + llvm::TrackingVH +#else llvm::MDNode *FTN = FwdType; - llvm::TrackingVH FwdTypeNode = FTN; + llvm::TrackingVH +#endif + FwdTypeNode = FTN; +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + TypeCache[type] = WeakVH(cast(FwdType)); + RegionStack.push_back(WeakVH(cast(FwdType))); + RegionMap[type] = WeakVH(cast(FwdType)); +#else TypeCache[type] = WeakVH(FwdType); // Push the struct on region stack. RegionStack.push_back(WeakVH(FwdType)); RegionMap[type] = WeakVH(FwdType); +#endif - llvm::SmallVector EltTys; +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + llvm::SmallVector +#else + llvm::SmallVector +#endif + EltTys; // Add the result type at least. EltTys.push_back(getOrCreateType(TREE_TYPE(type))); @@ -510,9 +618,9 @@ tree formal_type = TREE_VALUE(arg); if (formal_type == void_type_node) break; - llvm::DIType FormalType = getOrCreateType(formal_type); + MigDIType FormalType = getOrCreateType(formal_type); if (!ProcessedFirstArg && isArtificialArgumentType(formal_type, type)) { - DIType AFormalType = Builder.createArtificialType(FormalType); + MigDIType AFormalType = Builder.createArtificialType(FormalType); EltTys.push_back(AFormalType); } else EltTys.push_back(FormalType); @@ -520,28 +628,39 @@ ProcessedFirstArg = true; } +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + llvm::DITypeRefArray EltTypeArray = Builder.getOrCreateTypeArray(makeArrayRef(EltTys)); +#else llvm::DITypeArray EltTypeArray = Builder.getOrCreateTypeArray(EltTys); +#endif RegionStack.pop_back(); std::map::iterator RI = RegionMap.find(type); if (RI != RegionMap.end()) RegionMap.erase(RI); - llvm::DIType RealType = Builder.createSubroutineType( - getOrCreateFile(main_input_filename), - EltTypeArray); + MigDIType RealType = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Builder.createSubroutineType(EltTypeArray); +#else + Builder.createSubroutineType( + getOrCreateFile(main_input_filename), + EltTypeArray); +#endif // Now that we have a real decl for the struct, replace anything using the // old decl with the new one. This will recursively update the debug info. +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) llvm::DIType(FwdTypeNode).replaceAllUsesWith(RealType); +#endif return RealType; } /// createPointerType - Create PointerType. -DIType DebugInfo::createPointerType(tree type) { +MigDIType DebugInfo::createPointerType(tree type) { - DIType FromTy = getOrCreateType(TREE_TYPE(type)); + MigDIType FromTy = getOrCreateType(TREE_TYPE(type)); // type* and type& // FIXME: Should BLOCK_POINTER_TYP have its own DW_TAG? unsigned Tag = @@ -552,16 +671,25 @@ if (tree TyName = TYPE_NAME(type)) if (isa(TyName) && !DECL_ORIGINAL_TYPE(TyName)) { expanded_location TypeNameLoc = GetNodeLocation(TyName); - DIType Ty = CreateDerivedType( + MigDIType Ty = CreateDerivedType( Tag, findRegion(DECL_CONTEXT(TyName)), GetNodeName(TyName), getOrCreateFile(TypeNameLoc.file), TypeNameLoc.line, 0 /*size*/, 0 /*align*/, 0 /*offset */, 0 /*flags*/, FromTy); - TypeCache[TyName] = WeakVH(Ty); + TypeCache[TyName] = WeakVH( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + cast +#endif + (Ty)); return Ty; } - StringRef PName = FromTy.getName(); - DIType PTy = CreateDerivedType( + StringRef PName = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + FromTy->getName(); +#else + FromTy.getName(); +#endif + MigDIType PTy = CreateDerivedType( Tag, findRegion(TYPE_CONTEXT(type)), Tag == DW_TAG_pointer_type ? StringRef() : PName, getOrCreateFile(main_input_filename), 0 /*line no*/, NodeSizeInBits(type), @@ -570,11 +698,16 @@ } /// createArrayType - Create ArrayType. -DIType DebugInfo::createArrayType(tree type) { +MigDIType DebugInfo::createArrayType(tree type) { // Add the dimensions of the array. FIXME: This loses CV qualifiers from // interior arrays, do we care? Why aren't nested arrays represented the // obvious/recursive way? - llvm::SmallVector Subscripts; +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + llvm::SmallVector +#else + llvm::SmallVector +#endif + Subscripts; // There will be ARRAY_TYPE nodes for each rank. Followed by the derived // type. @@ -603,7 +736,11 @@ Subscripts.push_back(Builder.getOrCreateSubrange(0, Length)); } - llvm::DIArray SubscriptArray = Builder.getOrCreateArray(Subscripts); + MigDINodeArray SubscriptArray = Builder.getOrCreateArray( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + makeArrayRef +#endif + (Subscripts)); expanded_location Loc = GetNodeLocation(type); return CreateCompositeType( llvm::dwarf::DW_TAG_array_type, findRegion(TYPE_CONTEXT(type)), @@ -612,9 +749,14 @@ } /// createEnumType - Create EnumType. -DIType DebugInfo::createEnumType(tree type) { +MigDIType DebugInfo::createEnumType(tree type) { // enum { a, b, ..., z }; - llvm::SmallVector Elements; +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + llvm::SmallVector +#else + llvm::SmallVector +#endif + Elements; if (TYPE_SIZE(type)) { for (tree Link = TYPE_VALUES(type); Link; Link = TREE_CHAIN(Link)) { @@ -627,7 +769,11 @@ } } - llvm::DIArray EltArray = Builder.getOrCreateArray(Elements); + MigDINodeArray EltArray = Builder.getOrCreateArray( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + makeArrayRef +#endif + (Elements)); expanded_location Loc = {}; @@ -638,12 +784,17 @@ return CreateCompositeType( llvm::dwarf::DW_TAG_enumeration_type, findRegion(TYPE_CONTEXT(type)), GetNodeName(type), getOrCreateFile(Loc.file), Loc.line, - NodeSizeInBits(type), NodeAlignInBits(type), 0, 0, llvm::DIType(), + NodeSizeInBits(type), NodeAlignInBits(type), 0, 0, +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + nullptr, +#else + llvm::DIType(), +#endif EltArray); } /// createStructType - Create StructType for struct or union or class. -DIType DebugInfo::createStructType(tree type) { +MigDIType DebugInfo::createStructType(tree type) { // struct { a; b; ... z; }; | union { a; b; ... z; }; unsigned Tag = @@ -678,18 +829,27 @@ // final definition. expanded_location Loc = GetNodeLocation(TREE_CHAIN(type), false); unsigned SFlags = 0; - DIDescriptor TyContext = findRegion(TYPE_CONTEXT(type)); + MigDIScope TyContext = findRegion(TYPE_CONTEXT(type)); // Check if this type is created while creating context information // descriptor. { std::map::iterator I = TypeCache.find(type); if (I != TypeCache.end()) - if (MDNode *TN = dyn_cast_or_null(&*I->second)) + if (MDNode *TN = cast(&*I->second)) +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + return dyn_cast_or_null(TN); +#else return DIType(TN); +#endif } - llvm::DIType FwdDecl = Builder.createReplaceableForwardDecl( + MigDIType FwdDecl = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Builder.createReplaceableCompositeType( +#else + Builder.createReplaceableForwardDecl( +#endif Tag, GetNodeName(type), TyContext, getOrCreateFile(Loc.file), Loc.line, 0, 0, 0); @@ -698,6 +858,13 @@ return FwdDecl; // Insert into the TypeCache so that recursive uses will find it. +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + llvm::Value *FDN = cast(FwdDecl); + llvm::TrackingVH FwdDeclNode = FDN; + TypeCache[type] = WeakVH(cast(FwdDecl)); + RegionStack.push_back(WeakVH(cast(FwdDecl))); + RegionMap[type] = WeakVH(cast(FwdDecl)); +#else llvm::MDNode *FDN = FwdDecl; llvm::TrackingVH FwdDeclNode = FDN; TypeCache[type] = WeakVH(FwdDecl); @@ -705,15 +872,21 @@ // Push the struct on region stack. RegionStack.push_back(WeakVH(FwdDecl)); RegionMap[type] = WeakVH(FwdDecl); +#endif // Convert all the elements. - llvm::SmallVector EltTys; +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + llvm::SmallVector +#else + llvm::SmallVector +#endif + EltTys; if (tree binfo = TYPE_BINFO(type)) { for (unsigned i = 0, e = BINFO_N_BASE_BINFOS(binfo); i != e; ++i) { tree BInfo = BINFO_BASE_BINFO(binfo, i); tree BInfoType = BINFO_TYPE(BInfo); - DIType BaseClass = getOrCreateType(BInfoType); + MigDIType BaseClass = getOrCreateType(BInfoType); unsigned BFlags = 0; if (BINFO_VIRTUAL_P(BInfo)) BFlags = llvm::DIType::FlagVirtual; @@ -733,8 +906,14 @@ if (BINFO_VIRTUAL_P(BInfo)) Offset = 0 - getInt64(BINFO_VPTR_FIELD(BInfo), false); // FIXME : name, size, align etc... - DIType DTy = CreateDerivedType( - DW_TAG_inheritance, findRegion(type), StringRef(), llvm::DIFile(), 0, + MigDIType DTy = CreateDerivedType( + DW_TAG_inheritance, findRegion(type), StringRef(), +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + nullptr, +#else + llvm::DIFile(), +#endif + 0, 0, 0, Offset, BFlags, BaseClass); EltTys.push_back(DTy); } @@ -766,7 +945,7 @@ // Field type is the declared type of the field. tree FieldNodeType = FieldType(Member); - DIType MemberType = getOrCreateType(FieldNodeType); + MigDIType MemberType = getOrCreateType(FieldNodeType); StringRef MemberName = GetNodeName(Member); unsigned MFlags = 0; if (TREE_PROTECTED(Member)) @@ -774,7 +953,7 @@ else if (TREE_PRIVATE(Member)) MFlags = llvm::DIType::FlagPrivate; - DIType DTy = CreateDerivedType( + MigDIType DTy = CreateDerivedType( DW_TAG_member, findRegion(DECL_CONTEXT(Member)), MemberName, getOrCreateFile(MemLoc.file), MemLoc.line, NodeSizeInBits(Member), NodeAlignInBits(FieldNodeType), int_bit_position(Member), MFlags, @@ -795,74 +974,108 @@ std::map::iterator I = SPCache.find(Member); if (I != SPCache.end()) - EltTys.push_back(DISubprogram(cast(I->second))); + EltTys.push_back( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + dyn_cast_or_null(cast(I->second)) +#else + DISubprogram(cast(I->second)) +#endif + ); else { // Get the location of the member. expanded_location MemLoc = GetNodeLocation(Member, false); StringRef MemberName = getFunctionName(Member); StringRef LinkageName = getLinkageName(Member); - DIType SPTy = getOrCreateType(TREE_TYPE(Member)); + MigDIType SPTy = getOrCreateType(TREE_TYPE(Member)); unsigned Virtuality = 0; unsigned VIndex = 0; - DIType ContainingType; + MigDIType ContainingType; if (DECL_VINDEX(Member)) { +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + if (tree_fits_uhwi_p(DECL_VINDEX(Member))) + VIndex = tree_to_shwi(DECL_VINDEX(Member)); +#else if (host_integerp(DECL_VINDEX(Member), 0)) VIndex = tree_low_cst(DECL_VINDEX(Member), 0); +#endif Virtuality = dwarf::DW_VIRTUALITY_virtual; ContainingType = getOrCreateType(DECL_CONTEXT(Member)); } - DISubprogram SP = CreateSubprogram( + MigDISubprogram SP = CreateSubprogram( findRegion(DECL_CONTEXT(Member)), MemberName, MemberName, LinkageName, getOrCreateFile(MemLoc.file), MemLoc.line, SPTy, false, false, - Virtuality, VIndex, ContainingType, DECL_ARTIFICIAL(Member), + ContainingType, Virtuality, VIndex, DECL_ARTIFICIAL(Member), optimize); EltTys.push_back(SP); - SPCache[Member] = WeakVH(SP); + SPCache[Member] = WeakVH( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + cast +#endif + (SP)); } } - llvm::DIArray Elements = Builder.getOrCreateArray(EltTys); + MigDINodeArray Elements = Builder.getOrCreateArray(EltTys); RegionStack.pop_back(); std::map::iterator RI = RegionMap.find(type); if (RI != RegionMap.end()) RegionMap.erase(RI); - llvm::DIType ContainingType; + MigDIType ContainingType; if (TYPE_VFIELD(type)) { tree vtype = DECL_FCONTEXT(TYPE_VFIELD(type)); ContainingType = getOrCreateType(vtype); } - llvm::DICompositeType RealDecl = CreateCompositeType( + MigDICompositeType RealDecl = CreateCompositeType( Tag, findRegion(TYPE_CONTEXT(type)), GetNodeName(type), getOrCreateFile(Loc.file), Loc.line, NodeSizeInBits(type), - NodeAlignInBits(type), 0, SFlags, llvm::DIType(), Elements, RunTimeLang, + NodeAlignInBits(type), 0, SFlags, +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + nullptr, +#else + llvm::DIType(), +#endif + Elements, RunTimeLang, ContainingType); - RegionMap[type] = WeakVH(RealDecl); + RegionMap[type] = WeakVH( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + cast +#endif + (RealDecl)); // Now that we have a real decl for the struct, replace anything using the - // old decl with the new one. This will recursively update the debug info. + // old decl with the new one. This will recursively update the debug info +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) llvm::DIType(FwdDeclNode).replaceAllUsesWith(RealDecl); +#endif return RealDecl; } /// createVariantType - Create variant type or return MainTy. -DIType DebugInfo::createVariantType(tree type, DIType MainTy) { - - DIType Ty; +MigDIType DebugInfo::createVariantType(tree type, MigDIType MainTy) { + MigDIType Ty; if (tree TyDef = TYPE_NAME(type)) { std::map::iterator I = TypeCache.find(TyDef); if (I != TypeCache.end()) if (I->second) +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + return dyn_cast_or_null(cast(I->second)); +#else return DIType(cast(I->second)); +#endif if (isa(TyDef) && DECL_ORIGINAL_TYPE(TyDef)) { expanded_location TypeDefLoc = GetNodeLocation(TyDef); Ty = CreateDerivedType( DW_TAG_typedef, findRegion(DECL_CONTEXT(TyDef)), GetNodeName(TyDef), getOrCreateFile(TypeDefLoc.file), TypeDefLoc.line, 0 /*size*/, 0 /*align*/, 0 /*offset */, 0 /*flags*/, MainTy); - TypeCache[TyDef] = WeakVH(Ty); + TypeCache[TyDef] = WeakVH( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + cast +#endif + (Ty)); return Ty; } } @@ -884,7 +1097,11 @@ 0 /* flags */, MainTy); if (TYPE_VOLATILE(type) || TYPE_READONLY(type)) { - TypeCache[type] = WeakVH(Ty); + TypeCache[type] = WeakVH( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + cast +#endif + (Ty)); return Ty; } @@ -894,37 +1111,47 @@ /// getOrCreateType - Get the type from the cache or create a new type if /// necessary. -DIType DebugInfo::getOrCreateType(tree type) { +MigDIType DebugInfo::getOrCreateType(tree type) { if (type == NULL_TREE || type == error_mark_node) llvm_unreachable("Not a type."); // Should only be void if a pointer/reference/return type. Returning NULL // allows the caller to produce a non-derived type. if (isa(type)) +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + return nullptr; +#else return DIType(); +#endif // Check to see if the compile unit already has created this type. std::map::iterator I = TypeCache.find(type); if (I != TypeCache.end()) if (I->second) +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + return dyn_cast_or_null(cast(I->second)); +#else return DIType(cast(I->second)); +#endif if (type != TYPE_MAIN_VARIANT(type) && TYPE_MAIN_VARIANT(type)) { - DIType MainTy = getOrCreateType(TYPE_MAIN_VARIANT(type)); - DIType Ty = createVariantType(type, MainTy); + MigDIType MainTy = getOrCreateType(TYPE_MAIN_VARIANT(type)); + MigDIType Ty = createVariantType(type, MainTy); +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) if (Ty.isValid()) +#endif return Ty; } // Work out details of type. - DIType Ty; + MigDIType Ty; switch (TREE_CODE(type)) { case ERROR_MARK: case TRANSLATION_UNIT_DECL: default: llvm_unreachable("Unsupported type"); -#if (GCC_MINOR > 5) +#if GCC_VERSION_CODE > GCC_VERSION(4, 5) case NULLPTR_TYPE: #endif case LANG_TYPE: { @@ -967,7 +1194,11 @@ Ty = createBasicType(type); break; } - TypeCache[type] = WeakVH(Ty); + TypeCache[type] = WeakVH( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + cast +#endif + (Ty)); return Ty; } @@ -1038,7 +1269,7 @@ } /// getOrCreateFile - Get DIFile descriptor. -DIFile DebugInfo::getOrCreateFile(const char *FullPath) { +MigDIFile DebugInfo::getOrCreateFile(const char *FullPath) { if (!FullPath) FullPath = main_input_filename; if (!strcmp(FullPath, "")) @@ -1059,10 +1290,14 @@ /// CreateDerivedType - Create a derived type like const qualified type, /// pointer, typedef, etc. -DIDerivedType DebugInfo::CreateDerivedType( - unsigned Tag, DIDescriptor Context, StringRef Name, DIFile F, - unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, DIType DerivedFrom) { +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) +DIDerivedType * +#else +DIDerivedType +#endif +DebugInfo::CreateDerivedType(unsigned Tag, MigDIScope Context, StringRef Name, + MigDIFile F, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, MigDIType DerivedFrom) { switch (Tag) { case dwarf::DW_TAG_typedef: return Builder.createTypedef(DerivedFrom, Name, F, LineNumber, Context); @@ -1081,7 +1316,13 @@ AlignInBits, OffsetInBits, Flags, DerivedFrom); case dwarf::DW_TAG_inheritance: - return Builder.createInheritance(DIType(Context), DerivedFrom, OffsetInBits, + return Builder.createInheritance( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + dyn_cast_or_null(Context), +#else + DIType(Context), +#endif + DerivedFrom, OffsetInBits, Flags); case dwarf::DW_TAG_friend: case dwarf::DW_TAG_ptr_to_member_type: @@ -1091,11 +1332,11 @@ } /// CreateCompositeType - Create a composite type like array, struct, etc. -DICompositeType DebugInfo::CreateCompositeType( - unsigned Tag, DIDescriptor Context, StringRef Name, DIFile F, +MigDICompositeType DebugInfo::CreateCompositeType( + unsigned Tag, MigDIScope Context, StringRef Name, MigDIFile F, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, DIType DerivedFrom, DIArray Elements, - unsigned RuntimeLang, MDNode *ContainingType) { + uint64_t OffsetInBits, unsigned Flags, MigDIType DerivedFrom, + MigDINodeArray Elements, unsigned RuntimeLang, MDNode *ContainingType) { switch (Tag) { case dwarf::DW_TAG_array_type: return Builder.createArrayType(SizeInBits, AlignInBits, DerivedFrom, @@ -1103,7 +1344,13 @@ case dwarf::DW_TAG_structure_type: return Builder.createStructType(Context, Name, F, LineNumber, SizeInBits, AlignInBits, Flags, DerivedFrom, Elements, - 0, DIType(ContainingType)); + 0, +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + dyn_cast_or_null(ContainingType) +#else + DIType(ContainingType) +#endif + ); case dwarf::DW_TAG_union_type: return Builder.createUnionType(Context, Name, F, LineNumber, SizeInBits, AlignInBits, Flags, Elements, RuntimeLang); @@ -1122,41 +1369,76 @@ /// CreateSubprogram - Create a new descriptor for the specified subprogram. /// See comments in DISubprogram for descriptions of these fields. This /// method does not unique the generated descriptors. -DISubprogram DebugInfo::CreateSubprogram( - DIDescriptor Context, StringRef Name, StringRef DisplayName, - StringRef LinkageName, DIFile F, unsigned LineNo, DIType Ty, - bool isLocalToUnit, bool isDefinition, unsigned VK, unsigned VIndex, - DIType ContainingType, unsigned Flags, bool isOptimized, Function *Fn) { +MigDISubprogram DebugInfo::CreateSubprogram( + MigDIScope Context, StringRef Name, StringRef DisplayName, + StringRef LinkageName, MigDIFile F, unsigned LineNo, MigDIType Ty, + bool isLocalToUnit, bool isDefinition, MigDIType ContainingType, + unsigned VK, unsigned VIndex, unsigned Flags, bool isOptimized, + Function *Fn) { +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DISubroutineType *CTy = dyn_cast_or_null(Ty); +#else DICompositeType CTy = getDICompositeType(Ty); assert(CTy.Verify() && "Expected a composite type!"); - if (ContainingType.isValid() || VK || VIndex) +#endif + if ( +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) + ContainingType.isValid() || +#endif + VK || VIndex) return Builder.createMethod(Context, Name, LinkageName, F, LineNo, CTy, isLocalToUnit, isDefinition, VK, VIndex, +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + 0, /* ThisAdjustment */ + nullptr, +#else DIType(), - Flags, isOptimized, Fn, NULL); +#endif + Flags, isOptimized, +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) + Fn, +#endif + NULL); return Builder.createFunction(Context, Name, LinkageName, F, LineNo, CTy, isLocalToUnit, isDefinition, LineNo, Flags, - isOptimized, Fn, NULL, NULL); + isOptimized, +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) + Fn, +#endif + NULL, NULL); } /// CreateSubprogramDefinition - Create new subprogram descriptor for the /// given declaration. -DISubprogram DebugInfo::CreateSubprogramDefinition( +MigDISubprogram DebugInfo::CreateSubprogramDefinition( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DISubprogram *SP, unsigned LineNo, Function *Fn) { + if (SP->isDefinition()) + return SP; + + MigDIFile File = Builder.createFile(SP->getFilename(), SP->getDirectory()); + return Builder.createFunction(dyn_cast_or_null(SP->getScope()), + SP->getName(), SP->getLinkageName(), File, LineNo, SP->getType(), + SP->isLocalToUnit(), true, LineNo, SP->getFlags(), SP->isOptimized(), + SP->getTemplateParams(), SP); +#else DISubprogram &SP, unsigned LineNo, Function *Fn) { if (SP.isDefinition()) return DISubprogram(SP); - DIFile File = Builder.createFile(SP.getFilename(), SP.getDirectory()); + MigDIFile File = Builder.createFile(SP.getFilename(), SP.getDirectory()); return Builder.createFunction( SP.getContext(), SP.getName(), SP.getLinkageName(), File, SP.getLineNumber(), SP.getType(), SP.isLocalToUnit(), true, LineNo, SP.getFlags(), SP.isOptimized(), Fn, SP.getTemplateParams(), SP); +#endif } //===----------------------------------------------------------------------===// // Routines for inserting code into a function //===----------------------------------------------------------------------===// +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. Instruction *DebugInfo::InsertDeclare(Value *Storage, DIVariable D, Instruction *InsertBefore) { @@ -1214,3 +1496,4 @@ D }; return CallInst::Create(ValueFn, Args, "", InsertAtEnd); } +#endif Index: src/DefaultABI.cpp =================================================================== --- src/DefaultABI.cpp +++ src/DefaultABI.cpp @@ -39,6 +39,9 @@ #include "coretypes.h" #include "tm.h" #include "tree.h" +#if (GCC_MAJOR > 4) +#include "function.h" +#endif #ifndef ENABLE_BUILD_WITH_CXX } // extern "C" #endif @@ -116,7 +119,7 @@ ? isSingleElementStructOrArray(FoundField, ignoreZeroLength, false) : 0; case ARRAY_TYPE: - ArrayType *Ty = dyn_cast(ConvertType(type)); + ArrayType *Ty = llvm::dyn_cast(ConvertType(type)); if (!Ty || Ty->getNumElements() != 1) return 0; return isSingleElementStructOrArray(TREE_TYPE(type), false, false); @@ -204,7 +207,13 @@ std::vector Elts; if (Ty->isVoidTy()) { // Handle void explicitly as a {} type. - Type *OpTy = StructType::get(getGlobalContext()); + Type *OpTy = StructType::get( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext() +#else + getGlobalContext() +#endif + ); C.HandleScalarArgument(OpTy, type); ScalarElts.push_back(OpTy); } else if (isPassedByInvisibleReference(type)) { // variable size -> by-ref. @@ -343,6 +352,12 @@ Size = origSize; else Size = TREE_INT_CST_LOW(TYPE_SIZE(type)) / 8; +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Type *Ty = ConvertType(type); + LLVMContext &Context = Ty->getContext(); +#else + LLVMContext &Context = getGlobalContext(); +#endif // FIXME: We should preserve all aggregate value alignment information. // Work around to preserve some aggregate value alignment information: @@ -350,7 +365,7 @@ // from Int64 alignment. ARM backend needs this. unsigned Align = TYPE_ALIGN(type) / 8; unsigned Int64Align = - getDataLayout().getABITypeAlignment(Type::getInt64Ty(getGlobalContext())); + getDataLayout().getABITypeAlignment(Type::getInt64Ty(Context)); bool UseInt64 = (DontCheckAlignment || Align >= Int64Align); unsigned ElementSize = UseInt64 ? 8 : 4; @@ -361,8 +376,8 @@ Type *ArrayElementType = NULL; if (ArraySize) { Size = Size % ElementSize; - ArrayElementType = (UseInt64 ? Type::getInt64Ty(getGlobalContext()) - : Type::getInt32Ty(getGlobalContext())); + ArrayElementType = (UseInt64 ? Type::getInt64Ty(Context) + : Type::getInt32Ty(Context)); ATy = ArrayType::get(ArrayElementType, ArraySize); } @@ -370,13 +385,13 @@ unsigned LastEltRealSize = 0; llvm::Type *LastEltTy = 0; if (Size > 4) { - LastEltTy = Type::getInt64Ty(getGlobalContext()); + LastEltTy = Type::getInt64Ty(Context); } else if (Size > 2) { - LastEltTy = Type::getInt32Ty(getGlobalContext()); + LastEltTy = Type::getInt32Ty(Context); } else if (Size > 1) { - LastEltTy = Type::getInt16Ty(getGlobalContext()); + LastEltTy = Type::getInt16Ty(Context); } else if (Size > 0) { - LastEltTy = Type::getInt8Ty(getGlobalContext()); + LastEltTy = Type::getInt8Ty(Context); } if (LastEltTy) { if (Size != getDataLayout().getTypeAllocSize(LastEltTy)) @@ -388,7 +403,7 @@ Elts.push_back(ATy); if (LastEltTy) Elts.push_back(LastEltTy); - StructType *STy = StructType::get(getGlobalContext(), Elts, false); + StructType *STy = StructType::get(Context, Elts, false); unsigned i = 0; if (ArraySize) { @@ -419,14 +434,20 @@ // that occupies storage but has no useful information, and is not passed // anywhere". Happens on x86-64. std::vector Elts(OrigElts); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + getGlobalContext(); +#endif Type *wordType = getDataLayout().getPointerSize(0) == 4 - ? Type::getInt32Ty(getGlobalContext()) - : Type::getInt64Ty(getGlobalContext()); + ? Type::getInt32Ty(Context) + : Type::getInt64Ty(Context); for (unsigned i = 0, e = Elts.size(); i != e; ++i) if (OrigElts[i]->isVoidTy()) Elts[i] = wordType; - StructType *STy = StructType::get(getGlobalContext(), Elts, false); + StructType *STy = StructType::get(Context, Elts, false); unsigned Size = getDataLayout().getTypeAllocSize(STy); unsigned InSize = 0; Index: src/Trees.cpp =================================================================== --- src/Trees.cpp +++ src/Trees.cpp @@ -163,7 +163,11 @@ /// the truncated value must sign-/zero-extend to the original. APInt getAPIntValue(const_tree exp, unsigned Bitwidth) { assert(isa(exp) && "Expected an integer constant!"); +#if (GCC_MAJOR > 4) + offset_int val = wi::to_offset(TREE_OPERAND(exp, 0)); +#else double_int val = tree_to_double_int(exp); +#endif unsigned DefaultWidth = TYPE_PRECISION(TREE_TYPE(exp)); APInt DefaultValue; @@ -174,8 +178,13 @@ "Unsupported host integer width!"); unsigned ShiftAmt = HOST_BITS_PER_WIDE_INT; integerPart Part = +#if (GCC_MAJOR > 4) + integerPart((unsigned HOST_WIDE_INT) val.ulow()) + + (integerPart((unsigned HOST_WIDE_INT) val.uhigh()) << ShiftAmt); +#else integerPart((unsigned HOST_WIDE_INT) val.low) + (integerPart((unsigned HOST_WIDE_INT) val.high) << ShiftAmt); +#endif DefaultValue = APInt(DefaultWidth, Part); } @@ -203,7 +212,13 @@ if (!t) return false; if (HOST_BITS_PER_WIDE_INT == 64) - return host_integerp(t, Unsigned) && !TREE_OVERFLOW(t); + return +#if (GCC_MAJOR > 4) + tree_fits_uhwi_p(t) +#else + host_integerp(t, Unsigned) +#endif + && !TREE_OVERFLOW(t); assert(HOST_BITS_PER_WIDE_INT == 32 && "Only 32- and 64-bit hosts supported!"); return (isa(t) && !TREE_OVERFLOW(t)) && @@ -211,7 +226,11 @@ // If the constant is signed and we want an unsigned result, check // that the value is non-negative. If the constant is unsigned and // we want a signed result, check it fits in 63 bits. +#if (GCC_MAJOR > 4) + (HOST_WIDE_INT) TREE_INT_CST_NUNITS(t) >= 0); +#else (HOST_WIDE_INT) TREE_INT_CST_HIGH(t) >= 0); +#endif } /// getInt64 - Extract the value of an INTEGER_CST as a 64 bit integer. If @@ -227,7 +246,12 @@ } else { assert(HOST_BITS_PER_WIDE_INT == 32 && "Only 32- and 64-bit hosts supported!"); - unsigned HOST_WIDE_INT HI = (unsigned HOST_WIDE_INT) TREE_INT_CST_HIGH(t); + unsigned HOST_WIDE_INT HI = (unsigned HOST_WIDE_INT) +#if (GCC_MAJOR > 4) + TREE_INT_CST_NUNITS(t); +#else + TREE_INT_CST_HIGH(t); +#endif return ((uint64_t) HI << 32) | (uint64_t) LO; } } Index: src/TypeConversion.cpp =================================================================== --- src/TypeConversion.cpp +++ src/TypeConversion.cpp @@ -50,6 +50,10 @@ #include "tree.h" #include "flags.h" +#if (GCC_MAJOR > 4) +#include "print-tree.h" +#include "calls.h" +#endif #ifndef ENABLE_BUILD_WITH_CXX } // extern "C" #endif @@ -59,7 +63,11 @@ using namespace llvm; -static LLVMContext &Context = getGlobalContext(); +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) +static LLVMContext TheContext; +#else +static LLVMContext &TheContext = getGlobalContext(); +#endif /// SCCInProgress - Set of mutually dependent types currently being converted. static const std::vector *SCCInProgress; @@ -264,7 +272,7 @@ // O(N) rather than O(N log N) if all N fields are used. It's not clear if it // would really be a win though. - StructType *STy = dyn_cast(Ty); + StructType *STy = llvm::dyn_cast(Ty); // If this is not a struct type, then for sure there is no corresponding LLVM // field (we do not require GCC record types to be converted to LLVM structs). if (!STy) @@ -300,9 +308,16 @@ /// getPointerToType - Returns the LLVM register type to use for a pointer to /// the given GCC type. Type *getPointerToType(tree type) { - if (isa(type)) + if (isa(type)) { // void* -> byte* + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(type)->getContext(); +#else + TheContext; +#endif return GetUnitPointerType(Context); + } // FIXME: Handle address spaces. return ConvertType(type)->getPointerTo(); } @@ -437,6 +452,12 @@ assert(!isa(type) && "Registers must have a scalar type!"); assert(!isa(type) && "Registers cannot have void type!"); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(type)->getContext(); +#else + TheContext; +#endif switch (TREE_CODE(type)) { default: @@ -504,6 +525,12 @@ static Type *ConvertArrayTypeRecursive(tree type) { Type *ElementTy = ConvertType(TREE_TYPE(type)); uint64_t NumElements = ArrayLengthOf(type); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ElementTy->getContext(); +#else + TheContext; +#endif if (NumElements == NO_LENGTH) // Variable length array? NumElements = 0; @@ -575,6 +602,12 @@ void HandleShadowResult(PointerType *PtrArgTy, bool RetPtr) { // This function either returns void or the shadow argument, // depending on the target. + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + PtrArgTy->getContext(); +#else + TheContext; +#endif RetTy = RetPtr ? PtrArgTy : Type::getVoidTy(Context); // In any case, there is a dummy shadow argument though! @@ -608,8 +641,15 @@ if (type == float_type_node) LLVMTy = ConvertType(double_type_node); else if (LLVMTy->isIntegerTy(16) || LLVMTy->isIntegerTy(8) || - LLVMTy->isIntegerTy(1)) + LLVMTy->isIntegerTy(1)) { + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(type)->getContext(); +#else + TheContext; +#endif LLVMTy = Type::getInt32Ty(Context); + } } ArgTypes.push_back(LLVMTy); } @@ -659,6 +699,12 @@ CallingConv::ID &CallingConv, AttributeSet &PAL) { tree ReturnType = TREE_TYPE(type); SmallVector ArgTys; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(type)->getContext(); +#else + TheContext; +#endif Type *RetTy(Type::getVoidTy(Context)); FunctionTypeConversion Client(RetTy, ArgTys, CallingConv, KNRPromotion); @@ -672,7 +718,6 @@ ABIConverter.HandleReturnType(ReturnType, current_function_decl, false); SmallVector Attrs; - LLVMContext &Context = RetTy->getContext(); // Compute whether the result needs to be zext or sext'd. AttrBuilder RAttrBuilder; @@ -729,6 +774,12 @@ FunctionType * ConvertFunctionType(tree type, tree decl, tree static_chain, CallingConv::ID &CallingConv, AttributeSet &PAL) { + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(type)->getContext(); +#else + TheContext; +#endif Type *RetTy = Type::getVoidTy(Context); SmallVector ArgTypes; FunctionTypeConversion Client(RetTy, ArgTypes, CallingConv, @@ -784,7 +835,6 @@ } // Compute whether the result needs to be zext or sext'd. - LLVMContext &Context = RetTy->getContext(); AttrBuilder RAttrBuilder; HandleArgumentExtension(TREE_TYPE(type), RAttrBuilder); @@ -836,7 +886,7 @@ for (; Args && TREE_VALUE(Args) != void_type_node; Args = TREE_CHAIN(Args)) { tree ArgTy = TREE_VALUE(Args); if (!isPassedByInvisibleReference(ArgTy)) - if (const StructType *STy = dyn_cast(ConvertType(ArgTy))) + if (const StructType *STy = llvm::dyn_cast(ConvertType(ArgTy))) if (STy->isOpaque()) { // If we are passing an opaque struct by value, we don't know how many // arguments it will turn into. Because we can't handle this yet, @@ -914,6 +964,12 @@ // pointed to if this would cause trouble (the pointer type is turned into // {}* instead). tree pointee = main_type(type); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(type)->getContext(); +#else + TheContext; +#endif // The pointer type is in the strongly connected component (SCC) currently // being converted. Check whether the pointee is as well. If there is more @@ -1032,6 +1088,12 @@ /// which usually means a multiple of 8. Type *extractContents(const DataLayout &DL) { assert(R.getWidth() % BITS_PER_UNIT == 0 && "Boundaries not aligned?"); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif /// If the current value for the type can be used to represent the bits in /// the range then just return it. if (isSafeToReturnContentsDirectly(DL)) @@ -1048,7 +1110,11 @@ // byte. This is not needed for correctness, but helps the optimizers. if ((Ty->getPrimitiveSizeInBits() % BITS_PER_UNIT) != 0) { unsigned BitWidth = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + alignTo(Ty->getPrimitiveSizeInBits(), BITS_PER_UNIT); +#else RoundUpToAlignment(Ty->getPrimitiveSizeInBits(), BITS_PER_UNIT); +#endif Ty = IntegerType::get(Context, BitWidth); if (isSafeToReturnContentsDirectly(DL)) return Ty; @@ -1082,6 +1148,12 @@ // integer like this is pretty nasty, but as we only get here for bitfields // it is fairly harmless. R = R.Join(S.R); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif Ty = IntegerType::get(Context, R.getWidth()); Starts = R.getFirst(); } @@ -1090,6 +1162,12 @@ // FIXME: This new logic, especially the handling of bitfields, is untested // and probably wrong on big-endian machines. assert(TYPE_SIZE(type) && "Incomplete types should be handled elsewhere!"); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(type)->getContext(); +#else + TheContext; +#endif IntervalList Layout; const DataLayout &DL = getDataLayout(); @@ -1380,6 +1458,13 @@ assert(type == TYPE_MAIN_VARIANT(type) && "Not converting the main variant!"); assert(!mayRecurse(type) && "Expected a non-recursive type!"); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ConvertType(type)->getContext(); +#else + TheContext; +#endif + // If we already converted the type, reuse the previous conversion. Note that // this fires for types which are really recursive, such as pointer types, but // that we don't consider recursive any more because already converted. @@ -1573,6 +1658,7 @@ } Type *ConvertType(tree type) { + LLVMContext &Context = TheContext; if (type == error_mark_node) return Type::getInt32Ty(Context); @@ -1604,6 +1690,7 @@ // will be visited first. Note that this analysis is performed only once: the // results of the type conversion are cached, and any future conversion of one // of the visited types will just return the cached value. +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) for (scc_iterator I = scc_begin(type), E = scc_end(type); I != E; ++I) { const std::vector &SCC = *I; @@ -1668,6 +1755,7 @@ } } } +#endif // At this point every type reachable from this one has been converted, and // the conversion results cached. Return the value computed for the type. Index: src/mips/ABIHack.inc =================================================================== --- /dev/null +++ src/mips/ABIHack.inc @@ -0,0 +1,20 @@ +/* Subroutines used for code generation on MIPS. + Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ Index: src/mips/Target.cpp =================================================================== --- /dev/null +++ src/mips/Target.cpp @@ -0,0 +1,22 @@ +//===--------------- Target.cpp - Implements the MIPS ABI. ---------------===// +// +// Copyright (C) 2017 Leslie Zhai +// Copyright (C) 2005 to 2013 Evan Cheng, Duncan Sands et al. +// +// This file is part of DragonEgg. +// +// DragonEgg is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free Software +// Foundation; either version 2, or (at your option) any later version. +// +// DragonEgg is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// DragonEgg; see the file COPYING. If not, write to the Free Software +// Foundation, 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA. +// +//===----------------------------------------------------------------------===// +// This file implements specific LLVM MIPS ABI. +//===----------------------------------------------------------------------===// Index: src/mips/mips_builtins =================================================================== --- /dev/null +++ src/mips/mips_builtins @@ -0,0 +1 @@ +// Unsupported builtins are commented out. Index: src/x86/ABIHack6.inc =================================================================== --- /dev/null +++ src/x86/ABIHack6.inc @@ -0,0 +1,55056 @@ +/* Subroutines used for code generation on IA-32. + Copyright (C) 1988-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "rtl.h" +#include "tree.h" +#include "gimple.h" +#include "cfghooks.h" +#include "cfgloop.h" +#include "df.h" +#include "tm_p.h" +#include "stringpool.h" +#include "expmed.h" +//#include "optabs.h" +#include "regs.h" +#include "emit-rtl.h" +//#include "recog.h" +#include "cgraph.h" +#include "diagnostic.h" +#include "cfgbuild.h" +#include "alias.h" +#include "fold-const.h" +#include "attribs.h" +#include "calls.h" +#include "stor-layout.h" +#include "varasm.h" +#include "output.h" +//#include "insn-attr.h" +#include "flags.h" +#include "except.h" +#include "explow.h" +#include "expr.h" +#include "cfgrtl.h" +//#include "common/common-target.h" +#include "langhooks.h" +//#include "reload.h" +#include "gimplify.h" +//#include "dwarf2.h" +//#include "tm-constrs.h" +#include "params.h" +#include "cselib.h" +#include "sched-int.h" +#include "opts.h" +#include "tree-pass.h" +#include "context.h" +#include "pass_manager.h" +#include "target-globals.h" +#include "gimple-iterator.h" +#include "tree-vectorizer.h" +#include "shrink-wrap.h" +#include "builtins.h" +#include "rtl-iter.h" +#include "tree-iterator.h" +#include "tree-chkp.h" +#include "rtl-chkp.h" +#include "dbgcnt.h" +//#include "case-cfn-macros.h" +#include "regrename.h" +#include "dojump.h" +#include "fold-const-call.h" +#include "tree-ssanames.h" + +/* This file should be included last. */ +//#include "target-def.h" + +static rtx legitimize_dllimport_symbol (rtx, bool); +static rtx legitimize_pe_coff_extern_decl (rtx, bool); +static rtx legitimize_pe_coff_symbol (rtx, bool); +static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); + +#ifndef CHECK_STACK_LIMIT +#define CHECK_STACK_LIMIT (-1) +#endif + +/* Return index of given mode in mult and division cost tables. */ +#define MODE_INDEX(mode) \ + ((mode) == QImode ? 0 \ + : (mode) == HImode ? 1 \ + : (mode) == SImode ? 2 \ + : (mode) == DImode ? 3 \ + : 4) + +/* Processor costs (relative to an add) */ +/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ +#define COSTS_N_BYTES(N) ((N) * 2) + +#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}} + +static stringop_algs ix86_size_memcpy[2] = { + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}}; +static stringop_algs ix86_size_memset[2] = { + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}}; + +const +struct processor_costs ix86_size_cost = {/* costs for tuning for size */ + COSTS_N_BYTES (2), /* cost of an add instruction */ + COSTS_N_BYTES (3), /* cost of a lea instruction */ + COSTS_N_BYTES (2), /* variable shift costs */ + COSTS_N_BYTES (3), /* constant shift costs */ + {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ + COSTS_N_BYTES (3), /* HI */ + COSTS_N_BYTES (3), /* SI */ + COSTS_N_BYTES (3), /* DI */ + COSTS_N_BYTES (5)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ + COSTS_N_BYTES (3), /* HI */ + COSTS_N_BYTES (3), /* SI */ + COSTS_N_BYTES (3), /* DI */ + COSTS_N_BYTES (5)}, /* other */ + COSTS_N_BYTES (3), /* cost of movsx */ + COSTS_N_BYTES (3), /* cost of movzx */ + 0, /* "large" insn */ + 2, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {2, 2, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 2, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 2}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {2, 2, 2}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 3, /* cost of moving MMX register */ + {3, 3}, /* cost of loading MMX registers + in SImode and DImode */ + {3, 3}, /* cost of storing MMX registers + in SImode and DImode */ + 3, /* cost of moving SSE register */ + {3, 3, 3}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {3, 3, 3}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 0, /* size of l1 cache */ + 0, /* size of l2 cache */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ + COSTS_N_BYTES (2), /* cost of FMUL instruction. */ + COSTS_N_BYTES (2), /* cost of FDIV instruction. */ + COSTS_N_BYTES (2), /* cost of FABS instruction. */ + COSTS_N_BYTES (2), /* cost of FCHS instruction. */ + COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ + ix86_size_memcpy, + ix86_size_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 1, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 1, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +/* Processor costs (relative to an add) */ +static stringop_algs i386_memcpy[2] = { + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs i386_memset[2] = { + {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, + DUMMY_STRINGOP_ALGS}; + +static const +struct processor_costs i386_cost = { /* 386 specific costs */ + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (3), /* variable shift costs */ + COSTS_N_INSNS (2), /* constant shift costs */ + {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ + COSTS_N_INSNS (6), /* HI */ + COSTS_N_INSNS (6), /* SI */ + COSTS_N_INSNS (6), /* DI */ + COSTS_N_INSNS (6)}, /* other */ + COSTS_N_INSNS (1), /* cost of multiply per each bit set */ + {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (23), /* HI */ + COSTS_N_INSNS (23), /* SI */ + COSTS_N_INSNS (23), /* DI */ + COSTS_N_INSNS (23)}, /* other */ + COSTS_N_INSNS (3), /* cost of movsx */ + COSTS_N_INSNS (2), /* cost of movzx */ + 15, /* "large" insn */ + 3, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {8, 8, 8}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {8, 8, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 8, 16}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 8, 16}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 0, /* size of l1 cache */ + 0, /* size of l2 cache */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ + 1, /* Branch cost */ + COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (27), /* cost of FMUL instruction. */ + COSTS_N_INSNS (88), /* cost of FDIV instruction. */ + COSTS_N_INSNS (22), /* cost of FABS instruction. */ + COSTS_N_INSNS (24), /* cost of FCHS instruction. */ + COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ + i386_memcpy, + i386_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +static stringop_algs i486_memcpy[2] = { + {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs i486_memset[2] = { + {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}}, + DUMMY_STRINGOP_ALGS}; + +static const +struct processor_costs i486_cost = { /* 486 specific costs */ + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (3), /* variable shift costs */ + COSTS_N_INSNS (2), /* constant shift costs */ + {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ + COSTS_N_INSNS (12), /* HI */ + COSTS_N_INSNS (12), /* SI */ + COSTS_N_INSNS (12), /* DI */ + COSTS_N_INSNS (12)}, /* other */ + 1, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (40), /* HI */ + COSTS_N_INSNS (40), /* SI */ + COSTS_N_INSNS (40), /* DI */ + COSTS_N_INSNS (40)}, /* other */ + COSTS_N_INSNS (3), /* cost of movsx */ + COSTS_N_INSNS (2), /* cost of movzx */ + 15, /* "large" insn */ + 3, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {8, 8, 8}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {8, 8, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 8, 16}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 8, 16}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 4, /* size of l1 cache. 486 has 8kB cache + shared for code and data, so 4kB is + not really precise. */ + 4, /* size of l2 cache */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ + 1, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (16), /* cost of FMUL instruction. */ + COSTS_N_INSNS (73), /* cost of FDIV instruction. */ + COSTS_N_INSNS (3), /* cost of FABS instruction. */ + COSTS_N_INSNS (3), /* cost of FCHS instruction. */ + COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ + i486_memcpy, + i486_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +static stringop_algs pentium_memcpy[2] = { + {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs pentium_memset[2] = { + {libcall, {{-1, rep_prefix_4_byte, false}}}, + DUMMY_STRINGOP_ALGS}; + +static const +struct processor_costs pentium_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (4), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ + COSTS_N_INSNS (11), /* HI */ + COSTS_N_INSNS (11), /* SI */ + COSTS_N_INSNS (11), /* DI */ + COSTS_N_INSNS (11)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (25), /* HI */ + COSTS_N_INSNS (25), /* SI */ + COSTS_N_INSNS (25), /* DI */ + COSTS_N_INSNS (25)}, /* other */ + COSTS_N_INSNS (3), /* cost of movsx */ + COSTS_N_INSNS (2), /* cost of movzx */ + 8, /* "large" insn */ + 6, /* MOVE_RATIO */ + 6, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 8, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 8, 16}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 8, 16}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 8, /* size of l1 cache. */ + 8, /* size of l2 cache */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (3), /* cost of FMUL instruction. */ + COSTS_N_INSNS (39), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ + pentium_memcpy, + pentium_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +static const +struct processor_costs lakemont_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ + COSTS_N_INSNS (11), /* HI */ + COSTS_N_INSNS (11), /* SI */ + COSTS_N_INSNS (11), /* DI */ + COSTS_N_INSNS (11)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (25), /* HI */ + COSTS_N_INSNS (25), /* SI */ + COSTS_N_INSNS (25), /* DI */ + COSTS_N_INSNS (25)}, /* other */ + COSTS_N_INSNS (3), /* cost of movsx */ + COSTS_N_INSNS (2), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 6, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 8, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 8, 16}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 8, 16}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 8, /* size of l1 cache. */ + 8, /* size of l2 cache */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (3), /* cost of FMUL instruction. */ + COSTS_N_INSNS (39), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ + pentium_memcpy, + pentium_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes + (we ensure the alignment). For small blocks inline loop is still a + noticeable win, for bigger blocks either rep movsl or rep movsb is + way to go. Rep movsb has apparently more expensive startup time in CPU, + but after 4K the difference is down in the noise. */ +static stringop_algs pentiumpro_memcpy[2] = { + {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false}, + {8192, rep_prefix_4_byte, false}, + {-1, rep_prefix_1_byte, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs pentiumpro_memset[2] = { + {rep_prefix_4_byte, {{1024, unrolled_loop, false}, + {8192, rep_prefix_4_byte, false}, + {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static const +struct processor_costs pentiumpro_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (4), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (4)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (17), /* HI */ + COSTS_N_INSNS (17), /* SI */ + COSTS_N_INSNS (17), /* DI */ + COSTS_N_INSNS (17)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 6, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 2, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {2, 2, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {2, 2, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 8, /* size of l1 cache. */ + 256, /* size of l2 cache */ + 32, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (5), /* cost of FMUL instruction. */ + COSTS_N_INSNS (56), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ + pentiumpro_memcpy, + pentiumpro_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +static stringop_algs geode_memcpy[2] = { + {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs geode_memset[2] = { + {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static const +struct processor_costs geode_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (2), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (7), /* SI */ + COSTS_N_INSNS (7), /* DI */ + COSTS_N_INSNS (7)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (23), /* HI */ + COSTS_N_INSNS (39), /* SI */ + COSTS_N_INSNS (39), /* DI */ + COSTS_N_INSNS (39)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 4, /* MOVE_RATIO */ + 1, /* cost for loading QImode using movzbl */ + {1, 1, 1}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {1, 1, 1}, /* cost of storing integer registers */ + 1, /* cost of reg,reg fld/fst */ + {1, 1, 1}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 6, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {2, 2, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {2, 2, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 64, /* size of l1 cache. */ + 128, /* size of l2 cache. */ + 32, /* size of prefetch block */ + 1, /* number of parallel prefetches */ + 1, /* Branch cost */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (11), /* cost of FMUL instruction. */ + COSTS_N_INSNS (47), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ + geode_memcpy, + geode_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +static stringop_algs k6_memcpy[2] = { + {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs k6_memset[2] = { + {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static const +struct processor_costs k6_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (2), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (3), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (3), /* DI */ + COSTS_N_INSNS (3)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (18), /* HI */ + COSTS_N_INSNS (18), /* SI */ + COSTS_N_INSNS (18), /* DI */ + COSTS_N_INSNS (18)}, /* other */ + COSTS_N_INSNS (2), /* cost of movsx */ + COSTS_N_INSNS (2), /* cost of movzx */ + 8, /* "large" insn */ + 4, /* MOVE_RATIO */ + 3, /* cost for loading QImode using movzbl */ + {4, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 3, 2}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {6, 6, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 4}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {2, 2, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {2, 2, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 6, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 32, /* size of l2 cache. Some models + have integrated l2 cache, but + optimizing for k6 is not important + enough to worry about that. */ + 32, /* size of prefetch block */ + 1, /* number of parallel prefetches */ + 1, /* Branch cost */ + COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (2), /* cost of FMUL instruction. */ + COSTS_N_INSNS (56), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ + k6_memcpy, + k6_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +/* For some reason, Athlon deals better with REP prefix (relative to loops) + compared to K8. Alignment becomes important after 8 bytes for memcpy and + 128 bytes for memset. */ +static stringop_algs athlon_memcpy[2] = { + {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs athlon_memset[2] = { + {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; +static const +struct processor_costs athlon_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (2), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ + COSTS_N_INSNS (5), /* HI */ + COSTS_N_INSNS (5), /* SI */ + COSTS_N_INSNS (5), /* DI */ + COSTS_N_INSNS (5)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {3, 4, 3}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {3, 4, 3}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {4, 4, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 4}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 4, 6}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 5}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 64, /* size of l1 cache. */ + 256, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 5, /* Branch cost */ + COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (4), /* cost of FMUL instruction. */ + COSTS_N_INSNS (24), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + athlon_memcpy, + athlon_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +/* K8 has optimized REP instruction for medium sized blocks, but for very + small blocks it is better to use loop. For large blocks, libcall can + do nontemporary accesses and beat inline considerably. */ +static stringop_algs k8_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs k8_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; +static const +struct processor_costs k8_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (2), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (5)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {3, 4, 3}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {3, 4, 3}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {4, 4, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {3, 3}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 3, 6}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 5}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 64, /* size of l1 cache. */ + 512, /* size of l2 cache. */ + 64, /* size of prefetch block */ + /* New AMD processors never drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (4), /* cost of FMUL instruction. */ + COSTS_N_INSNS (19), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + + k8_memcpy, + k8_memset, + 4, /* scalar_stmt_cost. */ + 2, /* scalar load_cost. */ + 2, /* scalar_store_cost. */ + 5, /* vec_stmt_cost. */ + 0, /* vec_to_scalar_cost. */ + 2, /* scalar_to_vec_cost. */ + 2, /* vec_align_load_cost. */ + 3, /* vec_unalign_load_cost. */ + 3, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 2, /* cond_not_taken_branch_cost. */ +}; + +/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall can + do nontemporary accesses and beat inline considerably. */ +static stringop_algs amdfam10_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs amdfam10_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +struct processor_costs amdfam10_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (2), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (5)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (35), /* HI */ + COSTS_N_INSNS (51), /* SI */ + COSTS_N_INSNS (83), /* DI */ + COSTS_N_INSNS (83)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {3, 4, 3}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {3, 4, 3}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {4, 4, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {3, 3}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 4, 3}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 5}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + /* On K8: + MOVD reg64, xmmreg Double FSTORE 4 + MOVD reg32, xmmreg Double FSTORE 4 + On AMDFAM10: + MOVD reg64, xmmreg Double FADD 3 + 1/1 1/1 + MOVD reg32, xmmreg Double FADD 3 + 1/1 1/1 */ + 64, /* size of l1 cache. */ + 512, /* size of l2 cache. */ + 64, /* size of prefetch block */ + /* New AMD processors never drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (4), /* cost of FMUL instruction. */ + COSTS_N_INSNS (19), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + + amdfam10_memcpy, + amdfam10_memset, + 4, /* scalar_stmt_cost. */ + 2, /* scalar load_cost. */ + 2, /* scalar_store_cost. */ + 6, /* vec_stmt_cost. */ + 0, /* vec_to_scalar_cost. */ + 2, /* scalar_to_vec_cost. */ + 2, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 2, /* vec_store_cost. */ + 2, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +/* BDVER1 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ +static stringop_algs bdver1_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs bdver1_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; + +const struct processor_costs bdver1_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (4), /* SI */ + COSTS_N_INSNS (6), /* DI */ + COSTS_N_INSNS (6)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (35), /* HI */ + COSTS_N_INSNS (51), /* SI */ + COSTS_N_INSNS (83), /* DI */ + COSTS_N_INSNS (83)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {5, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {5, 5, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 4}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 4, 4}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 4}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 2, /* MMX or SSE register to integer */ + /* On K8: + MOVD reg64, xmmreg Double FSTORE 4 + MOVD reg32, xmmreg Double FSTORE 4 + On AMDFAM10: + MOVD reg64, xmmreg Double FADD 3 + 1/1 1/1 + MOVD reg32, xmmreg Double FADD 3 + 1/1 1/1 */ + 16, /* size of l1 cache. */ + 2048, /* size of l2 cache. */ + 64, /* size of prefetch block */ + /* New AMD processors never drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (6), /* cost of FMUL instruction. */ + COSTS_N_INSNS (42), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + + bdver1_memcpy, + bdver1_memset, + 6, /* scalar_stmt_cost. */ + 4, /* scalar load_cost. */ + 4, /* scalar_store_cost. */ + 6, /* vec_stmt_cost. */ + 0, /* vec_to_scalar_cost. */ + 2, /* scalar_to_vec_cost. */ + 4, /* vec_align_load_cost. */ + 4, /* vec_unalign_load_cost. */ + 4, /* vec_store_cost. */ + 4, /* cond_taken_branch_cost. */ + 2, /* cond_not_taken_branch_cost. */ +}; + +/* BDVER2 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ + +static stringop_algs bdver2_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs bdver2_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; + +const struct processor_costs bdver2_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (4), /* SI */ + COSTS_N_INSNS (6), /* DI */ + COSTS_N_INSNS (6)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (35), /* HI */ + COSTS_N_INSNS (51), /* SI */ + COSTS_N_INSNS (83), /* DI */ + COSTS_N_INSNS (83)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {5, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {5, 5, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 4}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 4, 4}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 4}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 2, /* MMX or SSE register to integer */ + /* On K8: + MOVD reg64, xmmreg Double FSTORE 4 + MOVD reg32, xmmreg Double FSTORE 4 + On AMDFAM10: + MOVD reg64, xmmreg Double FADD 3 + 1/1 1/1 + MOVD reg32, xmmreg Double FADD 3 + 1/1 1/1 */ + 16, /* size of l1 cache. */ + 2048, /* size of l2 cache. */ + 64, /* size of prefetch block */ + /* New AMD processors never drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (6), /* cost of FMUL instruction. */ + COSTS_N_INSNS (42), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + + bdver2_memcpy, + bdver2_memset, + 6, /* scalar_stmt_cost. */ + 4, /* scalar load_cost. */ + 4, /* scalar_store_cost. */ + 6, /* vec_stmt_cost. */ + 0, /* vec_to_scalar_cost. */ + 2, /* scalar_to_vec_cost. */ + 4, /* vec_align_load_cost. */ + 4, /* vec_unalign_load_cost. */ + 4, /* vec_store_cost. */ + 4, /* cond_taken_branch_cost. */ + 2, /* cond_not_taken_branch_cost. */ +}; + + + /* BDVER3 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ +static stringop_algs bdver3_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs bdver3_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +struct processor_costs bdver3_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (4), /* SI */ + COSTS_N_INSNS (6), /* DI */ + COSTS_N_INSNS (6)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (35), /* HI */ + COSTS_N_INSNS (51), /* SI */ + COSTS_N_INSNS (83), /* DI */ + COSTS_N_INSNS (83)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {5, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {5, 5, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 4}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 4, 4}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 4}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 2, /* MMX or SSE register to integer */ + 16, /* size of l1 cache. */ + 2048, /* size of l2 cache. */ + 64, /* size of prefetch block */ + /* New AMD processors never drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (6), /* cost of FMUL instruction. */ + COSTS_N_INSNS (42), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + + bdver3_memcpy, + bdver3_memset, + 6, /* scalar_stmt_cost. */ + 4, /* scalar load_cost. */ + 4, /* scalar_store_cost. */ + 6, /* vec_stmt_cost. */ + 0, /* vec_to_scalar_cost. */ + 2, /* scalar_to_vec_cost. */ + 4, /* vec_align_load_cost. */ + 4, /* vec_unalign_load_cost. */ + 4, /* vec_store_cost. */ + 4, /* cond_taken_branch_cost. */ + 2, /* cond_not_taken_branch_cost. */ +}; + +/* BDVER4 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ +static stringop_algs bdver4_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs bdver4_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +struct processor_costs bdver4_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (4), /* SI */ + COSTS_N_INSNS (6), /* DI */ + COSTS_N_INSNS (6)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (35), /* HI */ + COSTS_N_INSNS (51), /* SI */ + COSTS_N_INSNS (83), /* DI */ + COSTS_N_INSNS (83)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {5, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {5, 5, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {4, 4}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 4, 4}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 4}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 2, /* MMX or SSE register to integer */ + 16, /* size of l1 cache. */ + 2048, /* size of l2 cache. */ + 64, /* size of prefetch block */ + /* New AMD processors never drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (6), /* cost of FMUL instruction. */ + COSTS_N_INSNS (42), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + + bdver4_memcpy, + bdver4_memset, + 6, /* scalar_stmt_cost. */ + 4, /* scalar load_cost. */ + 4, /* scalar_store_cost. */ + 6, /* vec_stmt_cost. */ + 0, /* vec_to_scalar_cost. */ + 2, /* scalar_to_vec_cost. */ + 4, /* vec_align_load_cost. */ + 4, /* vec_unalign_load_cost. */ + 4, /* vec_store_cost. */ + 4, /* cond_taken_branch_cost. */ + 2, /* cond_not_taken_branch_cost. */ +}; + + +/* ZNVER1 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall + can do nontemporary accesses and beat inline considerably. */ +static stringop_algs znver1_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs znver1_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +struct processor_costs znver1_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction. */ + COSTS_N_INSNS (1), /* cost of a lea instruction. */ + COSTS_N_INSNS (1), /* variable shift costs. */ + COSTS_N_INSNS (1), /* constant shift costs. */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ + COSTS_N_INSNS (3), /* HI. */ + COSTS_N_INSNS (3), /* SI. */ + COSTS_N_INSNS (4), /* DI. */ + COSTS_N_INSNS (4)}, /* other. */ + 0, /* cost of multiply per each bit + set. */ + {COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */ + COSTS_N_INSNS (35), /* HI. */ + COSTS_N_INSNS (51), /* SI. */ + COSTS_N_INSNS (83), /* DI. */ + COSTS_N_INSNS (83)}, /* other. */ + COSTS_N_INSNS (1), /* cost of movsx. */ + COSTS_N_INSNS (1), /* cost of movzx. */ + 8, /* "large" insn. */ + 9, /* MOVE_RATIO. */ + 4, /* cost for loading QImode using + movzbl. */ + {5, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer + registers. */ + 2, /* cost of reg,reg fld/fst. */ + {5, 5, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode. */ + {4, 4, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode. */ + 2, /* cost of moving MMX register. */ + {4, 4}, /* cost of loading MMX registers + in SImode and DImode. */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode. */ + 2, /* cost of moving SSE register. */ + {4, 4, 4}, /* cost of loading SSE registers + in SImode, DImode and TImode. */ + {4, 4, 4}, /* cost of storing SSE registers + in SImode, DImode and TImode. */ + 2, /* MMX or SSE register to integer. */ + 32, /* size of l1 cache. */ + 512, /* size of l2 cache. */ + 64, /* size of prefetch block. */ + /* New AMD processors never drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches. */ + 2, /* Branch cost. */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (6), /* cost of FMUL instruction. */ + COSTS_N_INSNS (42), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ + + znver1_memcpy, + znver1_memset, + 6, /* scalar_stmt_cost. */ + 4, /* scalar load_cost. */ + 4, /* scalar_store_cost. */ + 6, /* vec_stmt_cost. */ + 0, /* vec_to_scalar_cost. */ + 2, /* scalar_to_vec_cost. */ + 4, /* vec_align_load_cost. */ + 4, /* vec_unalign_load_cost. */ + 4, /* vec_store_cost. */ + 4, /* cond_taken_branch_cost. */ + 2, /* cond_not_taken_branch_cost. */ +}; + + /* BTVER1 has optimized REP instruction for medium sized blocks, but for + very small blocks it is better to use loop. For large blocks, libcall can + do nontemporary accesses and beat inline considerably. */ +static stringop_algs btver1_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs btver1_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +const struct processor_costs btver1_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (2), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (5)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (35), /* HI */ + COSTS_N_INSNS (51), /* SI */ + COSTS_N_INSNS (83), /* DI */ + COSTS_N_INSNS (83)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {3, 4, 3}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {3, 4, 3}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {4, 4, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {3, 3}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 4, 3}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 5}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + /* On K8: + MOVD reg64, xmmreg Double FSTORE 4 + MOVD reg32, xmmreg Double FSTORE 4 + On AMDFAM10: + MOVD reg64, xmmreg Double FADD 3 + 1/1 1/1 + MOVD reg32, xmmreg Double FADD 3 + 1/1 1/1 */ + 32, /* size of l1 cache. */ + 512, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 100, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (4), /* cost of FMUL instruction. */ + COSTS_N_INSNS (19), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + + btver1_memcpy, + btver1_memset, + 4, /* scalar_stmt_cost. */ + 2, /* scalar load_cost. */ + 2, /* scalar_store_cost. */ + 6, /* vec_stmt_cost. */ + 0, /* vec_to_scalar_cost. */ + 2, /* scalar_to_vec_cost. */ + 2, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 2, /* vec_store_cost. */ + 2, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +static stringop_algs btver2_memcpy[2] = { + {libcall, {{6, loop, false}, {14, unrolled_loop, false}, + {-1, rep_prefix_4_byte, false}}}, + {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs btver2_memset[2] = { + {libcall, {{8, loop, false}, {24, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +const struct processor_costs btver2_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (2), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (5)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (35), /* HI */ + COSTS_N_INSNS (51), /* SI */ + COSTS_N_INSNS (83), /* DI */ + COSTS_N_INSNS (83)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 9, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {3, 4, 3}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {3, 4, 3}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {4, 4, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {3, 3}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 4, 3}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 5}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + /* On K8: + MOVD reg64, xmmreg Double FSTORE 4 + MOVD reg32, xmmreg Double FSTORE 4 + On AMDFAM10: + MOVD reg64, xmmreg Double FADD 3 + 1/1 1/1 + MOVD reg32, xmmreg Double FADD 3 + 1/1 1/1 */ + 32, /* size of l1 cache. */ + 2048, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 100, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (4), /* cost of FMUL instruction. */ + COSTS_N_INSNS (19), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ + btver2_memcpy, + btver2_memset, + 4, /* scalar_stmt_cost. */ + 2, /* scalar load_cost. */ + 2, /* scalar_store_cost. */ + 6, /* vec_stmt_cost. */ + 0, /* vec_to_scalar_cost. */ + 2, /* scalar_to_vec_cost. */ + 2, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 2, /* vec_store_cost. */ + 2, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +static stringop_algs pentium4_memcpy[2] = { + {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}}, + DUMMY_STRINGOP_ALGS}; +static stringop_algs pentium4_memset[2] = { + {libcall, {{6, loop_1_byte, false}, {48, loop, false}, + {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + DUMMY_STRINGOP_ALGS}; + +static const +struct processor_costs pentium4_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (3), /* cost of a lea instruction */ + COSTS_N_INSNS (4), /* variable shift costs */ + COSTS_N_INSNS (4), /* constant shift costs */ + {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ + COSTS_N_INSNS (15), /* HI */ + COSTS_N_INSNS (15), /* SI */ + COSTS_N_INSNS (15), /* DI */ + COSTS_N_INSNS (15)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (56), /* HI */ + COSTS_N_INSNS (56), /* SI */ + COSTS_N_INSNS (56), /* DI */ + COSTS_N_INSNS (56)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 16, /* "large" insn */ + 6, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {4, 5, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 3, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {2, 2}, /* cost of loading MMX registers + in SImode and DImode */ + {2, 2}, /* cost of storing MMX registers + in SImode and DImode */ + 12, /* cost of moving SSE register */ + {12, 12, 12}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {2, 2, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 10, /* MMX or SSE register to integer */ + 8, /* size of l1 cache. */ + 256, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (7), /* cost of FMUL instruction. */ + COSTS_N_INSNS (43), /* cost of FDIV instruction. */ + COSTS_N_INSNS (2), /* cost of FABS instruction. */ + COSTS_N_INSNS (2), /* cost of FCHS instruction. */ + COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ + pentium4_memcpy, + pentium4_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +static stringop_algs nocona_memcpy[2] = { + {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}}, + {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false}, + {100000, unrolled_loop, false}, {-1, libcall, false}}}}; + +static stringop_algs nocona_memset[2] = { + {libcall, {{6, loop_1_byte, false}, {48, loop, false}, + {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{24, loop, false}, {64, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; + +static const +struct processor_costs nocona_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ + COSTS_N_INSNS (10), /* HI */ + COSTS_N_INSNS (10), /* SI */ + COSTS_N_INSNS (10), /* DI */ + COSTS_N_INSNS (10)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (66), /* HI */ + COSTS_N_INSNS (66), /* SI */ + COSTS_N_INSNS (66), /* DI */ + COSTS_N_INSNS (66)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 16, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 3, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 4}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 6, /* cost of moving MMX register */ + {12, 12}, /* cost of loading MMX registers + in SImode and DImode */ + {12, 12}, /* cost of storing MMX registers + in SImode and DImode */ + 6, /* cost of moving SSE register */ + {12, 12, 12}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {12, 12, 12}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 8, /* MMX or SSE register to integer */ + 8, /* size of l1 cache. */ + 1024, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 8, /* number of parallel prefetches */ + 1, /* Branch cost */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (40), /* cost of FDIV instruction. */ + COSTS_N_INSNS (3), /* cost of FABS instruction. */ + COSTS_N_INSNS (3), /* cost of FCHS instruction. */ + COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ + nocona_memcpy, + nocona_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +static stringop_algs atom_memcpy[2] = { + {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, + {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; +static stringop_algs atom_memset[2] = { + {libcall, {{8, loop, false}, {15, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{24, loop, false}, {32, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; +static const +struct processor_costs atom_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 256, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + atom_memcpy, + atom_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +static stringop_algs slm_memcpy[2] = { + {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, + {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; +static stringop_algs slm_memset[2] = { + {libcall, {{8, loop, false}, {15, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{24, loop, false}, {32, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; +static const +struct processor_costs slm_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (3), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 256, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + slm_memcpy, + slm_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 4, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +static stringop_algs intel_memcpy[2] = { + {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, + {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; +static stringop_algs intel_memset[2] = { + {libcall, {{8, loop, false}, {15, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{24, loop, false}, {32, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; +static const +struct processor_costs intel_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (3), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 256, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + intel_memcpy, + intel_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 4, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +/* Generic should produce code tuned for Core-i7 (and newer chips) + and btver1 (and newer chips). */ + +static stringop_algs generic_memcpy[2] = { + {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, + {-1, libcall, false}}}, + {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static stringop_algs generic_memset[2] = { + {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, + {-1, libcall, false}}}, + {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, + {-1, libcall, false}}}}; +static const +struct processor_costs generic_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + /* On all chips taken into consideration lea is 2 cycles and more. With + this cost however our current implementation of synth_mult results in + use of unnecessary temporary registers causing regression on several + SPECfp benchmarks. */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 512, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + /* Benchmarks shows large regressions on K8 sixtrack benchmark when this + value is increased to perhaps more appropriate value of 5. */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + generic_memcpy, + generic_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +/* core_cost should produce code tuned for Core familly of CPUs. */ +static stringop_algs core_memcpy[2] = { + {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}}, + {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true}, + {-1, libcall, false}}}}; +static stringop_algs core_memset[2] = { + {libcall, {{6, loop_1_byte, true}, + {24, loop, true}, + {8192, rep_prefix_4_byte, true}, + {-1, libcall, false}}}, + {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true}, + {-1, libcall, false}}}}; + +static const +struct processor_costs core_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + /* On all chips taken into consideration lea is 2 cycles and more. With + this cost however our current implementation of synth_mult results in + use of unnecessary temporary registers causing regression on several + SPECfp benchmarks. */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 64, /* size of l1 cache. */ + 512, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + /* FIXME perhaps more appropriate value is 5. */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + core_memcpy, + core_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + + +/* Set by -mtune. */ +const struct processor_costs *ix86_tune_cost = &pentium_cost; + +/* Set by -mtune or -Os. */ +const struct processor_costs *ix86_cost = &pentium_cost; + +/* Processor feature/optimization bitmasks. */ +#define m_386 (1<get_passes ()->execute_pass_mode_switching (); + return 0; +} + +/* Return 1 if INSN uses or defines a hard register. + Hard register uses in a memory address are ignored. + Clobbers and flags definitions are ignored. */ + +static bool +has_non_address_hard_reg (rtx_insn *insn) +{ + df_ref ref; + FOR_EACH_INSN_DEF (ref, insn) + if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) + && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) + && DF_REF_REGNO (ref) != FLAGS_REG) + return true; + + FOR_EACH_INSN_USE (ref, insn) + if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) + return true; + + return false; +} + +/* Check if comparison INSN may be transformed + into vector comparison. Currently we transform + zero checks only which look like: + + (set (reg:CCZ 17 flags) + (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) + (subreg:SI (reg:DI x) 0)) + (const_int 0 [0]))) */ + +static bool +convertible_comparison_p (rtx_insn *insn) +{ + if (!TARGET_SSE4_1) + return false; + + rtx def_set = single_set (insn); + + gcc_assert (def_set); + + rtx src = SET_SRC (def_set); + rtx dst = SET_DEST (def_set); + + gcc_assert (GET_CODE (src) == COMPARE); + + if (GET_CODE (dst) != REG + || REGNO (dst) != FLAGS_REG + || GET_MODE (dst) != CCZmode) + return false; + + rtx op1 = XEXP (src, 0); + rtx op2 = XEXP (src, 1); + + if (op2 != CONST0_RTX (GET_MODE (op2))) + return false; + + if (GET_CODE (op1) != IOR) + return false; + + op2 = XEXP (op1, 1); + op1 = XEXP (op1, 0); + + if (!SUBREG_P (op1) + || !SUBREG_P (op2) + || GET_MODE (op1) != SImode + || GET_MODE (op2) != SImode + || ((SUBREG_BYTE (op1) != 0 + || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) + && (SUBREG_BYTE (op2) != 0 + || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) + return false; + + op1 = SUBREG_REG (op1); + op2 = SUBREG_REG (op2); + + if (op1 != op2 + || !REG_P (op1) + || GET_MODE (op1) != DImode) + return false; + + return true; +} + +/* Return 1 if INSN may be converted into vector + instruction. */ + +static bool +scalar_to_vector_candidate_p (rtx_insn *insn) +{ + rtx def_set = single_set (insn); + + if (!def_set) + return false; + + if (has_non_address_hard_reg (insn)) + return false; + + rtx src = SET_SRC (def_set); + rtx dst = SET_DEST (def_set); + + if (GET_CODE (src) == COMPARE) + return convertible_comparison_p (insn); + + /* We are interested in DImode promotion only. */ + if (GET_MODE (src) != DImode + || GET_MODE (dst) != DImode) + return false; + + if (!REG_P (dst) && !MEM_P (dst)) + return false; + + switch (GET_CODE (src)) + { + case PLUS: + case MINUS: + case IOR: + case XOR: + case AND: + break; + + case REG: + return true; + + case MEM: + return REG_P (dst); + + default: + return false; + } + + if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)) + /* Check for andnot case. */ + && (GET_CODE (src) != AND + || GET_CODE (XEXP (src, 0)) != NOT + || !REG_P (XEXP (XEXP (src, 0), 0)))) + return false; + + if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1))) + return false; + + if (GET_MODE (XEXP (src, 0)) != DImode + || GET_MODE (XEXP (src, 1)) != DImode) + return false; + + return true; +} + +/* For a given bitmap of insn UIDs scans all instruction and + remove insn from CANDIDATES in case it has both convertible + and not convertible definitions. + + All insns in a bitmap are conversion candidates according to + scalar_to_vector_candidate_p. Currently it implies all insns + are single_set. */ + +static void +remove_non_convertible_regs (bitmap candidates) +{ + bitmap_iterator bi; + unsigned id; + bitmap regs = BITMAP_ALLOC (NULL); + + EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) + { + rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); + rtx reg = SET_DEST (def_set); + + if (!REG_P (reg) + || bitmap_bit_p (regs, REGNO (reg)) + || HARD_REGISTER_P (reg)) + continue; + + for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg)); + def; + def = DF_REF_NEXT_REG (def)) + { + if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) + { + if (dump_file) + fprintf (dump_file, + "r%d has non convertible definition in insn %d\n", + REGNO (reg), DF_REF_INSN_UID (def)); + + bitmap_set_bit (regs, REGNO (reg)); + break; + } + } + } + + EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) + { + for (df_ref def = DF_REG_DEF_CHAIN (id); + def; + def = DF_REF_NEXT_REG (def)) + if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) + { + if (dump_file) + fprintf (dump_file, "Removing insn %d from candidates list\n", + DF_REF_INSN_UID (def)); + + bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); + } + } + + BITMAP_FREE (regs); +} + +class scalar_chain +{ + public: + scalar_chain (); + ~scalar_chain (); + + static unsigned max_id; + + /* ID of a chain. */ + unsigned int chain_id; + /* A queue of instructions to be included into a chain. */ + bitmap queue; + /* Instructions included into a chain. */ + bitmap insns; + /* All registers defined by a chain. */ + bitmap defs; + /* Registers used in both vector and sclar modes. */ + bitmap defs_conv; + + void build (bitmap candidates, unsigned insn_uid); + int compute_convert_gain (); + int convert (); + + private: + void add_insn (bitmap candidates, unsigned insn_uid); + void add_to_queue (unsigned insn_uid); + void mark_dual_mode_def (df_ref def); + void analyze_register_chain (bitmap candidates, df_ref ref); + rtx replace_with_subreg (rtx x, rtx reg, rtx subreg); + void emit_conversion_insns (rtx insns, rtx_insn *pos); + void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg); + void convert_insn (rtx_insn *insn); + void convert_op (rtx *op, rtx_insn *insn); + void convert_reg (unsigned regno); + void make_vector_copies (unsigned regno); +}; + +unsigned scalar_chain::max_id = 0; + +/* Initialize new chain. */ + +scalar_chain::scalar_chain () +{ + chain_id = ++max_id; + + if (dump_file) + fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id); + + bitmap_obstack_initialize (NULL); + insns = BITMAP_ALLOC (NULL); + defs = BITMAP_ALLOC (NULL); + defs_conv = BITMAP_ALLOC (NULL); + queue = NULL; +} + +/* Free chain's data. */ + +scalar_chain::~scalar_chain () +{ + BITMAP_FREE (insns); + BITMAP_FREE (defs); + BITMAP_FREE (defs_conv); + bitmap_obstack_release (NULL); +} + +/* Add instruction into chains' queue. */ + +void +scalar_chain::add_to_queue (unsigned insn_uid) +{ + if (bitmap_bit_p (insns, insn_uid) + || bitmap_bit_p (queue, insn_uid)) + return; + + if (dump_file) + fprintf (dump_file, " Adding insn %d into chain's #%d queue\n", + insn_uid, chain_id); + bitmap_set_bit (queue, insn_uid); +} + +/* Mark register defined by DEF as requiring conversion. */ + +void +scalar_chain::mark_dual_mode_def (df_ref def) +{ + gcc_assert (DF_REF_REG_DEF_P (def)); + + if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def))) + return; + + if (dump_file) + fprintf (dump_file, + " Mark r%d def in insn %d as requiring both modes in chain #%d\n", + DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id); + + bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); +} + +/* Check REF's chain to add new insns into a queue + and find registers requiring conversion. */ + +void +scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) +{ + df_link *chain; + + gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)) + || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))); + add_to_queue (DF_REF_INSN_UID (ref)); + + for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next) + { + unsigned uid = DF_REF_INSN_UID (chain->ref); + + if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) + continue; + + if (!DF_REF_REG_MEM_P (chain->ref)) + { + if (bitmap_bit_p (insns, uid)) + continue; + + if (bitmap_bit_p (candidates, uid)) + { + add_to_queue (uid); + continue; + } + } + + if (DF_REF_REG_DEF_P (chain->ref)) + { + if (dump_file) + fprintf (dump_file, " r%d def in insn %d isn't convertible\n", + DF_REF_REGNO (chain->ref), uid); + mark_dual_mode_def (chain->ref); + } + else + { + if (dump_file) + fprintf (dump_file, " r%d use in insn %d isn't convertible\n", + DF_REF_REGNO (chain->ref), uid); + mark_dual_mode_def (ref); + } + } +} + +/* Add instruction into a chain. */ + +void +scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) +{ + if (bitmap_bit_p (insns, insn_uid)) + return; + + if (dump_file) + fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id); + + bitmap_set_bit (insns, insn_uid); + + rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; + rtx def_set = single_set (insn); + if (def_set && REG_P (SET_DEST (def_set)) + && !HARD_REGISTER_P (SET_DEST (def_set))) + bitmap_set_bit (defs, REGNO (SET_DEST (def_set))); + + df_ref ref; + df_ref def; + for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) + if (!HARD_REGISTER_P (DF_REF_REG (ref))) + for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref)); + def; + def = DF_REF_NEXT_REG (def)) + analyze_register_chain (candidates, def); + for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) + if (!DF_REF_REG_MEM_P (ref)) + analyze_register_chain (candidates, ref); +} + +/* Build new chain starting from insn INSN_UID recursively + adding all dependent uses and definitions. */ + +void +scalar_chain::build (bitmap candidates, unsigned insn_uid) +{ + queue = BITMAP_ALLOC (NULL); + bitmap_set_bit (queue, insn_uid); + + if (dump_file) + fprintf (dump_file, "Building chain #%d...\n", chain_id); + + while (!bitmap_empty_p (queue)) + { + insn_uid = bitmap_first_set_bit (queue); + bitmap_clear_bit (queue, insn_uid); + bitmap_clear_bit (candidates, insn_uid); + add_insn (candidates, insn_uid); + } + + if (dump_file) + { + fprintf (dump_file, "Collected chain #%d...\n", chain_id); + fprintf (dump_file, " insns: "); + dump_bitmap (dump_file, insns); + if (!bitmap_empty_p (defs_conv)) + { + bitmap_iterator bi; + unsigned id; + const char *comma = ""; + fprintf (dump_file, " defs to convert: "); + EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) + { + fprintf (dump_file, "%sr%d", comma, id); + comma = ", "; + } + fprintf (dump_file, "\n"); + } + } + + BITMAP_FREE (queue); +} + +/* Compute a gain for chain conversion. */ + +int +scalar_chain::compute_convert_gain () +{ + bitmap_iterator bi; + unsigned insn_uid; + int gain = 0; + int cost = 0; + + if (dump_file) + fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); + + EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) + { + rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; + rtx def_set = single_set (insn); + rtx src = SET_SRC (def_set); + rtx dst = SET_DEST (def_set); + + if (REG_P (src) && REG_P (dst)) + gain += COSTS_N_INSNS (2) - ix86_cost->sse_move; + else if (REG_P (src) && MEM_P (dst)) + gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; + else if (MEM_P (src) && REG_P (dst)) + gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1]; + else if (GET_CODE (src) == PLUS + || GET_CODE (src) == MINUS + || GET_CODE (src) == IOR + || GET_CODE (src) == XOR + || GET_CODE (src) == AND) + gain += ix86_cost->add; + else if (GET_CODE (src) == COMPARE) + { + /* Assume comparison cost is the same. */ + } + else + gcc_unreachable (); + } + + if (dump_file) + fprintf (dump_file, " Instruction conversion gain: %d\n", gain); + + EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi) + cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer; + + if (dump_file) + fprintf (dump_file, " Registers conversion cost: %d\n", cost); + + gain -= cost; + + if (dump_file) + fprintf (dump_file, " Total gain: %d\n", gain); + + return gain; +} + +/* Replace REG in X with a V2DI subreg of NEW_REG. */ + +rtx +scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg) +{ + if (x == reg) + return gen_rtx_SUBREG (V2DImode, new_reg, 0); + + const char *fmt = GET_RTX_FORMAT (GET_CODE (x)); + int i, j; + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg); + else if (fmt[i] == 'E') + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j), + reg, new_reg); + } + + return x; +} + +/* Replace REG in INSN with a V2DI subreg of NEW_REG. */ + +void +scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg) +{ + replace_with_subreg (single_set (insn), reg, new_reg); +} + +/* Insert generated conversion instruction sequence INSNS + after instruction AFTER. New BB may be required in case + instruction has EH region attached. */ + +void +scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) +{ + if (!control_flow_insn_p (after)) + { + emit_insn_after (insns, after); + return; + } + + basic_block bb = BLOCK_FOR_INSN (after); + edge e = find_fallthru_edge (bb->succs); + gcc_assert (e); + + basic_block new_bb = split_edge (e); + emit_insn_after (insns, BB_HEAD (new_bb)); +} + +/* Make vector copies for all register REGNO definitions + and replace its uses in a chain. */ + +void +scalar_chain::make_vector_copies (unsigned regno) +{ + rtx reg = regno_reg_rtx[regno]; + rtx vreg = gen_reg_rtx (DImode); + df_ref ref; + + for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) + if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) + { + rtx_insn *insn = DF_REF_INSN (ref); + + start_sequence (); + if (TARGET_SSE4_1) + { + emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), + CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, reg, 0))); + emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0), + gen_rtx_SUBREG (V4SImode, vreg, 0), + gen_rtx_SUBREG (SImode, reg, 4), + GEN_INT (2))); + } + else if (TARGET_INTER_UNIT_MOVES_TO_VEC) + { + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), + CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, reg, 0))); + emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0), + CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, reg, 4))); + emit_insn (gen_vec_interleave_lowv4si + (gen_rtx_SUBREG (V4SImode, vreg, 0), + gen_rtx_SUBREG (V4SImode, vreg, 0), + gen_rtx_SUBREG (V4SImode, tmp, 0))); + } + else + { + rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP); + emit_move_insn (adjust_address (tmp, SImode, 0), + gen_rtx_SUBREG (SImode, reg, 0)); + emit_move_insn (adjust_address (tmp, SImode, 4), + gen_rtx_SUBREG (SImode, reg, 4)); + emit_move_insn (vreg, tmp); + } + rtx_insn *seq = get_insns (); + end_sequence (); + emit_conversion_insns (seq, insn); + + if (dump_file) + fprintf (dump_file, + " Copied r%d to a vector register r%d for insn %d\n", + regno, REGNO (vreg), DF_REF_INSN_UID (ref)); + } + + for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) + if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) + { + replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg); + + if (dump_file) + fprintf (dump_file, " Replaced r%d with r%d in insn %d\n", + regno, REGNO (vreg), DF_REF_INSN_UID (ref)); + } +} + +/* Convert all definitions of register REGNO + and fix its uses. Scalar copies may be created + in case register is used in not convertible insn. */ + +void +scalar_chain::convert_reg (unsigned regno) +{ + bool scalar_copy = bitmap_bit_p (defs_conv, regno); + rtx reg = regno_reg_rtx[regno]; + rtx scopy = NULL_RTX; + df_ref ref; + bitmap conv; + + conv = BITMAP_ALLOC (NULL); + bitmap_copy (conv, insns); + + if (scalar_copy) + scopy = gen_reg_rtx (DImode); + + for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) + { + rtx_insn *insn = DF_REF_INSN (ref); + rtx def_set = single_set (insn); + rtx src = SET_SRC (def_set); + rtx reg = DF_REF_REG (ref); + + if (!MEM_P (src)) + { + replace_with_subreg_in_insn (insn, reg, reg); + bitmap_clear_bit (conv, INSN_UID (insn)); + } + + if (scalar_copy) + { + rtx vcopy = gen_reg_rtx (V2DImode); + + start_sequence (); + if (TARGET_INTER_UNIT_MOVES_FROM_VEC) + { + emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0)); + emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), + gen_rtx_SUBREG (SImode, vcopy, 0)); + emit_move_insn (vcopy, + gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32))); + emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), + gen_rtx_SUBREG (SImode, vcopy, 0)); + } + else + { + rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP); + emit_move_insn (tmp, reg); + emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), + adjust_address (tmp, SImode, 0)); + emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), + adjust_address (tmp, SImode, 4)); + } + rtx_insn *seq = get_insns (); + end_sequence (); + emit_conversion_insns (seq, insn); + + if (dump_file) + fprintf (dump_file, + " Copied r%d to a scalar register r%d for insn %d\n", + regno, REGNO (scopy), INSN_UID (insn)); + } + } + + for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) + if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) + { + if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref))) + { + rtx def_set = single_set (DF_REF_INSN (ref)); + if (!MEM_P (SET_DEST (def_set)) + || !REG_P (SET_SRC (def_set))) + replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg); + bitmap_clear_bit (conv, DF_REF_INSN_UID (ref)); + } + } + /* Skip debug insns and uninitialized uses. */ + else if (DF_REF_CHAIN (ref) + && NONDEBUG_INSN_P (DF_REF_INSN (ref))) + { + gcc_assert (scopy); + replace_rtx (DF_REF_INSN (ref), reg, scopy); + df_insn_rescan (DF_REF_INSN (ref)); + } + + BITMAP_FREE (conv); +} + +/* Convert operand OP in INSN. We should handle + memory operands and uninitialized registers. + All other register uses are converted during + registers conversion. */ + +void +scalar_chain::convert_op (rtx *op, rtx_insn *insn) +{ + *op = copy_rtx_if_shared (*op); + + if (GET_CODE (*op) == NOT) + { + convert_op (&XEXP (*op, 0), insn); + PUT_MODE (*op, V2DImode); + } + else if (MEM_P (*op)) + { + rtx tmp = gen_reg_rtx (DImode); + + emit_insn_before (gen_move_insn (tmp, *op), insn); + *op = gen_rtx_SUBREG (V2DImode, tmp, 0); + + if (dump_file) + fprintf (dump_file, " Preloading operand for insn %d into r%d\n", + INSN_UID (insn), REGNO (tmp)); + } + else if (REG_P (*op)) + { + /* We may have not converted register usage in case + this register has no definition. Otherwise it + should be converted in convert_reg. */ + df_ref ref; + FOR_EACH_INSN_USE (ref, insn) + if (DF_REF_REGNO (ref) == REGNO (*op)) + { + gcc_assert (!DF_REF_CHAIN (ref)); + break; + } + *op = gen_rtx_SUBREG (V2DImode, *op, 0); + } + else + { + gcc_assert (SUBREG_P (*op)); + gcc_assert (GET_MODE (*op) == V2DImode); + } +} + +/* Convert INSN to vector mode. */ + +void +scalar_chain::convert_insn (rtx_insn *insn) +{ + rtx def_set = single_set (insn); + rtx src = SET_SRC (def_set); + rtx dst = SET_DEST (def_set); + rtx subreg; + + if (MEM_P (dst) && !REG_P (src)) + { + /* There are no scalar integer instructions and therefore + temporary register usage is required. */ + rtx tmp = gen_reg_rtx (DImode); + emit_conversion_insns (gen_move_insn (dst, tmp), insn); + dst = gen_rtx_SUBREG (V2DImode, tmp, 0); + } + + switch (GET_CODE (src)) + { + case PLUS: + case MINUS: + case IOR: + case XOR: + case AND: + convert_op (&XEXP (src, 0), insn); + convert_op (&XEXP (src, 1), insn); + PUT_MODE (src, V2DImode); + break; + + case MEM: + if (!REG_P (dst)) + convert_op (&src, insn); + break; + + case REG: + if (!MEM_P (dst)) + convert_op (&src, insn); + break; + + case SUBREG: + gcc_assert (GET_MODE (src) == V2DImode); + break; + + case COMPARE: + src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); + + gcc_assert ((REG_P (src) && GET_MODE (src) == DImode) + || (SUBREG_P (src) && GET_MODE (src) == V2DImode)); + + if (REG_P (src)) + subreg = gen_rtx_SUBREG (V2DImode, src, 0); + else + subreg = copy_rtx_if_shared (src); + emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), + copy_rtx_if_shared (subreg), + copy_rtx_if_shared (subreg)), + insn); + dst = gen_rtx_REG (CCmode, FLAGS_REG); + src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src), + copy_rtx_if_shared (src)), + UNSPEC_PTEST); + break; + + default: + gcc_unreachable (); + } + + SET_SRC (def_set) = src; + SET_DEST (def_set) = dst; + + /* Drop possible dead definitions. */ + PATTERN (insn) = def_set; + + INSN_CODE (insn) = -1; + recog_memoized (insn); + df_insn_rescan (insn); +} + +/* Convert whole chain creating required register + conversions and copies. */ + +int +scalar_chain::convert () +{ + bitmap_iterator bi; + unsigned id; + int converted_insns = 0; + + if (!dbg_cnt (stv_conversion)) + return 0; + + if (dump_file) + fprintf (dump_file, "Converting chain #%d...\n", chain_id); + + EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi) + convert_reg (id); + + EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi) + make_vector_copies (id); + + EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi) + { + convert_insn (DF_INSN_UID_GET (id)->insn); + converted_insns++; + } + + return converted_insns; +} + +/* Main STV pass function. Find and convert scalar + instructions into vector mode when profitable. */ + +static unsigned int +convert_scalars_to_vector () +{ + basic_block bb; + bitmap candidates; + int converted_insns = 0; + + bitmap_obstack_initialize (NULL); + candidates = BITMAP_ALLOC (NULL); + + calculate_dominance_info (CDI_DOMINATORS); + df_set_flags (DF_DEFER_INSN_RESCAN); + df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); + df_md_add_problem (); + df_analyze (); + + /* Find all instructions we want to convert into vector mode. */ + if (dump_file) + fprintf (dump_file, "Searching for mode conversion candidates...\n"); + + FOR_EACH_BB_FN (bb, cfun) + { + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + if (scalar_to_vector_candidate_p (insn)) + { + if (dump_file) + fprintf (dump_file, " insn %d is marked as a candidate\n", + INSN_UID (insn)); + + bitmap_set_bit (candidates, INSN_UID (insn)); + } + } + + remove_non_convertible_regs (candidates); + + if (bitmap_empty_p (candidates)) + if (dump_file) + fprintf (dump_file, "There are no candidates for optimization.\n"); + + while (!bitmap_empty_p (candidates)) + { + unsigned uid = bitmap_first_set_bit (candidates); + scalar_chain chain; + + /* Find instructions chain we want to convert to vector mode. + Check all uses and definitions to estimate all required + conversions. */ + chain.build (candidates, uid); + + if (chain.compute_convert_gain () > 0) + converted_insns += chain.convert (); + else + if (dump_file) + fprintf (dump_file, "Chain #%d conversion is not profitable\n", + chain.chain_id); + } + + if (dump_file) + fprintf (dump_file, "Total insns converted: %d\n", converted_insns); + + BITMAP_FREE (candidates); + bitmap_obstack_release (NULL); + df_process_deferred_rescans (); + + /* Conversion means we may have 128bit register spills/fills + which require aligned stack. */ + if (converted_insns) + { + if (crtl->stack_alignment_needed < 128) + crtl->stack_alignment_needed = 128; + if (crtl->stack_alignment_estimated < 128) + crtl->stack_alignment_estimated = 128; + } + + return 0; +} + +namespace { + +const pass_data pass_data_insert_vzeroupper = +{ + RTL_PASS, /* type */ + "vzeroupper", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_insert_vzeroupper : public rtl_opt_pass +{ +public: + pass_insert_vzeroupper(gcc::context *ctxt) + : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return TARGET_AVX && !TARGET_AVX512F + && TARGET_VZEROUPPER && flag_expensive_optimizations + && !optimize_size; + } + + virtual unsigned int execute (function *) + { + return rest_of_handle_insert_vzeroupper (); + } + +}; // class pass_insert_vzeroupper + +const pass_data pass_data_stv = +{ + RTL_PASS, /* type */ + "stv", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_stv : public rtl_opt_pass +{ +public: + pass_stv (gcc::context *ctxt) + : rtl_opt_pass (pass_data_stv, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1; + } + + virtual unsigned int execute (function *) + { + return convert_scalars_to_vector (); + } + +}; // class pass_stv + +} // anon namespace + +rtl_opt_pass * +make_pass_insert_vzeroupper (gcc::context *ctxt) +{ + return new pass_insert_vzeroupper (ctxt); +} + +rtl_opt_pass * +make_pass_stv (gcc::context *ctxt) +{ + return new pass_stv (ctxt); +} + +/* Return true if a red-zone is in use. */ + +bool +ix86_using_red_zone (void) +{ + return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI; +} + +/* Return a string that documents the current -m options. The caller is + responsible for freeing the string. */ + +static char * +ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch, + const char *tune, enum fpmath_unit fpmath, + bool add_nl_p) +{ + struct ix86_target_opts + { + const char *option; /* option string */ + HOST_WIDE_INT mask; /* isa mask options */ + }; + + /* This table is ordered so that options like -msse4.2 that imply + preceding options while match those first. */ + static struct ix86_target_opts isa_opts[] = + { + { "-mfma4", OPTION_MASK_ISA_FMA4 }, + { "-mfma", OPTION_MASK_ISA_FMA }, + { "-mxop", OPTION_MASK_ISA_XOP }, + { "-mlwp", OPTION_MASK_ISA_LWP }, + { "-mavx512f", OPTION_MASK_ISA_AVX512F }, + { "-mavx512er", OPTION_MASK_ISA_AVX512ER }, + { "-mavx512cd", OPTION_MASK_ISA_AVX512CD }, + { "-mavx512pf", OPTION_MASK_ISA_AVX512PF }, + { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ }, + { "-mavx512bw", OPTION_MASK_ISA_AVX512BW }, + { "-mavx512vl", OPTION_MASK_ISA_AVX512VL }, + { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA }, + { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI }, + { "-msse4a", OPTION_MASK_ISA_SSE4A }, + { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, + { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, + { "-mssse3", OPTION_MASK_ISA_SSSE3 }, + { "-msse3", OPTION_MASK_ISA_SSE3 }, + { "-msse2", OPTION_MASK_ISA_SSE2 }, + { "-msse", OPTION_MASK_ISA_SSE }, + { "-m3dnow", OPTION_MASK_ISA_3DNOW }, + { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A }, + { "-mmmx", OPTION_MASK_ISA_MMX }, + { "-mabm", OPTION_MASK_ISA_ABM }, + { "-mbmi", OPTION_MASK_ISA_BMI }, + { "-mbmi2", OPTION_MASK_ISA_BMI2 }, + { "-mlzcnt", OPTION_MASK_ISA_LZCNT }, + { "-mhle", OPTION_MASK_ISA_HLE }, + { "-mfxsr", OPTION_MASK_ISA_FXSR }, + { "-mrdseed", OPTION_MASK_ISA_RDSEED }, + { "-mprfchw", OPTION_MASK_ISA_PRFCHW }, + { "-madx", OPTION_MASK_ISA_ADX }, + { "-mtbm", OPTION_MASK_ISA_TBM }, + { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, + { "-mmovbe", OPTION_MASK_ISA_MOVBE }, + { "-mcrc32", OPTION_MASK_ISA_CRC32 }, + { "-maes", OPTION_MASK_ISA_AES }, + { "-msha", OPTION_MASK_ISA_SHA }, + { "-mpclmul", OPTION_MASK_ISA_PCLMUL }, + { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE }, + { "-mrdrnd", OPTION_MASK_ISA_RDRND }, + { "-mf16c", OPTION_MASK_ISA_F16C }, + { "-mrtm", OPTION_MASK_ISA_RTM }, + { "-mxsave", OPTION_MASK_ISA_XSAVE }, + { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT }, + { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 }, + { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT }, + { "-mxsavec", OPTION_MASK_ISA_XSAVEC }, + { "-mxsaves", OPTION_MASK_ISA_XSAVES }, + { "-mmpx", OPTION_MASK_ISA_MPX }, + { "-mclwb", OPTION_MASK_ISA_CLWB }, + { "-mmwaitx", OPTION_MASK_ISA_MWAITX }, + { "-mclzero", OPTION_MASK_ISA_CLZERO }, + { "-mpku", OPTION_MASK_ISA_PKU }, + }; + + /* Flag options. */ + static struct ix86_target_opts flag_opts[] = + { + { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE }, + { "-mlong-double-128", MASK_LONG_DOUBLE_128 }, + { "-mlong-double-64", MASK_LONG_DOUBLE_64 }, + { "-m80387", MASK_80387 }, + { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS }, + { "-malign-double", MASK_ALIGN_DOUBLE }, + { "-mcld", MASK_CLD }, + { "-mfp-ret-in-387", MASK_FLOAT_RETURNS }, + { "-mieee-fp", MASK_IEEE_FP }, + { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS }, + { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY }, + { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT }, + { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS }, + { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 }, + { "-mno-push-args", MASK_NO_PUSH_ARGS }, + { "-mno-red-zone", MASK_NO_RED_ZONE }, + { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER }, + { "-mrecip", MASK_RECIP }, + { "-mrtd", MASK_RTD }, + { "-msseregparm", MASK_SSEREGPARM }, + { "-mstack-arg-probe", MASK_STACK_PROBE }, + { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS }, + { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS }, + { "-m8bit-idiv", MASK_USE_8BIT_IDIV }, + { "-mvzeroupper", MASK_VZEROUPPER }, + { "-mstv", MASK_STV}, + { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD}, + { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE}, + { "-mprefer-avx128", MASK_PREFER_AVX128}, + }; + + const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2]; + + char isa_other[40]; + char target_other[40]; + unsigned num = 0; + unsigned i, j; + char *ret; + char *ptr; + size_t len; + size_t line_len; + size_t sep_len; + const char *abi; + + memset (opts, '\0', sizeof (opts)); + + /* Add -march= option. */ + if (arch) + { + opts[num][0] = "-march="; + opts[num++][1] = arch; + } + + /* Add -mtune= option. */ + if (tune) + { + opts[num][0] = "-mtune="; + opts[num++][1] = tune; + } + + /* Add -m32/-m64/-mx32. */ + if ((isa & OPTION_MASK_ISA_64BIT) != 0) + { + if ((isa & OPTION_MASK_ABI_64) != 0) + abi = "-m64"; + else + abi = "-mx32"; + isa &= ~ (OPTION_MASK_ISA_64BIT + | OPTION_MASK_ABI_64 + | OPTION_MASK_ABI_X32); + } + else + abi = "-m32"; + opts[num++][0] = abi; + + /* Pick out the options in isa options. */ + for (i = 0; i < ARRAY_SIZE (isa_opts); i++) + { + if ((isa & isa_opts[i].mask) != 0) + { + opts[num++][0] = isa_opts[i].option; + isa &= ~ isa_opts[i].mask; + } + } + + if (isa && add_nl_p) + { + opts[num++][0] = isa_other; + sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", + isa); + } + + /* Add flag options. */ + for (i = 0; i < ARRAY_SIZE (flag_opts); i++) + { + if ((flags & flag_opts[i].mask) != 0) + { + opts[num++][0] = flag_opts[i].option; + flags &= ~ flag_opts[i].mask; + } + } + + if (flags && add_nl_p) + { + opts[num++][0] = target_other; + sprintf (target_other, "(other flags: %#x)", flags); + } + + /* Add -fpmath= option. */ + if (fpmath) + { + opts[num][0] = "-mfpmath="; + switch ((int) fpmath) + { + case FPMATH_387: + opts[num++][1] = "387"; + break; + + case FPMATH_SSE: + opts[num++][1] = "sse"; + break; + + case FPMATH_387 | FPMATH_SSE: + opts[num++][1] = "sse+387"; + break; + + default: + gcc_unreachable (); + } + } + + /* Any options? */ + if (num == 0) + return NULL; + + gcc_assert (num < ARRAY_SIZE (opts)); + + /* Size the string. */ + len = 0; + sep_len = (add_nl_p) ? 3 : 1; + for (i = 0; i < num; i++) + { + len += sep_len; + for (j = 0; j < 2; j++) + if (opts[i][j]) + len += strlen (opts[i][j]); + } + + /* Build the string. */ + ret = ptr = (char *) xmalloc (len); + line_len = 0; + + for (i = 0; i < num; i++) + { + size_t len2[2]; + + for (j = 0; j < 2; j++) + len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0; + + if (i != 0) + { + *ptr++ = ' '; + line_len++; + + if (add_nl_p && line_len + len2[0] + len2[1] > 70) + { + *ptr++ = '\\'; + *ptr++ = '\n'; + line_len = 0; + } + } + + for (j = 0; j < 2; j++) + if (opts[i][j]) + { + memcpy (ptr, opts[i][j], len2[j]); + ptr += len2[j]; + line_len += len2[j]; + } + } + + *ptr = '\0'; + gcc_assert (ret + len >= ptr); + + return ret; +} + +/* Return true, if profiling code should be emitted before + prologue. Otherwise it returns false. + Note: For x86 with "hotfix" it is sorried. */ +static bool +ix86_profile_before_prologue (void) +{ + return flag_fentry != 0; +} + +/* Function that is callable from the debugger to print the current + options. */ +void ATTRIBUTE_UNUSED +ix86_debug_options (void) +{ + char *opts = ix86_target_string (ix86_isa_flags, target_flags, + ix86_arch_string, ix86_tune_string, + ix86_fpmath, true); + + if (opts) + { + fprintf (stderr, "%s\n\n", opts); + free (opts); + } + else + fputs ("\n\n", stderr); + + return; +} + +/* Return true if T is one of the bytes we should avoid with + -fmitigate-rop. */ + +static bool +ix86_rop_should_change_byte_p (int t) +{ + return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb; +} + +static const char *stringop_alg_names[] = { +#define DEF_ENUM +#define DEF_ALG(alg, name) #name, +#include "stringop.def" +#undef DEF_ENUM +#undef DEF_ALG +}; + +/* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=. + The string is of the following form (or comma separated list of it): + + strategy_alg:max_size:[align|noalign] + + where the full size range for the strategy is either [0, max_size] or + [min_size, max_size], in which min_size is the max_size + 1 of the + preceding range. The last size range must have max_size == -1. + + Examples: + + 1. + -mmemcpy-strategy=libcall:-1:noalign + + this is equivalent to (for known size memcpy) -mstringop-strategy=libcall + + + 2. + -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign + + This is to tell the compiler to use the following strategy for memset + 1) when the expected size is between [1, 16], use rep_8byte strategy; + 2) when the size is between [17, 2048], use vector_loop; + 3) when the size is > 2048, use libcall. */ + +struct stringop_size_range +{ + int max; + stringop_alg alg; + bool noalign; +}; + +static void +ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset) +{ + const struct stringop_algs *default_algs; + stringop_size_range input_ranges[MAX_STRINGOP_ALGS]; + char *curr_range_str, *next_range_str; + int i = 0, n = 0; + + if (is_memset) + default_algs = &ix86_cost->memset[TARGET_64BIT != 0]; + else + default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0]; + + curr_range_str = strategy_str; + + do + { + int maxs; + char alg_name[128]; + char align[16]; + next_range_str = strchr (curr_range_str, ','); + if (next_range_str) + *next_range_str++ = '\0'; + + if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s", + alg_name, &maxs, align)) + { + error ("wrong arg %s to option %s", curr_range_str, + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + + if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1)) + { + error ("size ranges of option %s should be increasing", + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + + for (i = 0; i < last_alg; i++) + if (!strcmp (alg_name, stringop_alg_names[i])) + break; + + if (i == last_alg) + { + error ("wrong stringop strategy name %s specified for option %s", + alg_name, + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + + if ((stringop_alg) i == rep_prefix_8_byte + && !TARGET_64BIT) + { + /* rep; movq isn't available in 32-bit code. */ + error ("stringop strategy name %s specified for option %s " + "not supported for 32-bit code", + alg_name, + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + + input_ranges[n].max = maxs; + input_ranges[n].alg = (stringop_alg) i; + if (!strcmp (align, "align")) + input_ranges[n].noalign = false; + else if (!strcmp (align, "noalign")) + input_ranges[n].noalign = true; + else + { + error ("unknown alignment %s specified for option %s", + align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + n++; + curr_range_str = next_range_str; + } + while (curr_range_str); + + if (input_ranges[n - 1].max != -1) + { + error ("the max value for the last size range should be -1" + " for option %s", + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + + if (n > MAX_STRINGOP_ALGS) + { + error ("too many size ranges specified in option %s", + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + + /* Now override the default algs array. */ + for (i = 0; i < n; i++) + { + *const_cast(&default_algs->size[i].max) = input_ranges[i].max; + *const_cast(&default_algs->size[i].alg) + = input_ranges[i].alg; + *const_cast(&default_algs->size[i].noalign) + = input_ranges[i].noalign; + } +} + + +/* parse -mtune-ctrl= option. When DUMP is true, + print the features that are explicitly set. */ + +static void +parse_mtune_ctrl_str (bool dump) +{ + if (!ix86_tune_ctrl_string) + return; + + char *next_feature_string = NULL; + char *curr_feature_string = xstrdup (ix86_tune_ctrl_string); + char *orig = curr_feature_string; + int i; + do + { + bool clear = false; + + next_feature_string = strchr (curr_feature_string, ','); + if (next_feature_string) + *next_feature_string++ = '\0'; + if (*curr_feature_string == '^') + { + curr_feature_string++; + clear = true; + } + for (i = 0; i < X86_TUNE_LAST; i++) + { + if (!strcmp (curr_feature_string, ix86_tune_feature_names[i])) + { + ix86_tune_features[i] = !clear; + if (dump) + fprintf (stderr, "Explicitly %s feature %s\n", + clear ? "clear" : "set", ix86_tune_feature_names[i]); + break; + } + } + if (i == X86_TUNE_LAST) + error ("Unknown parameter to option -mtune-ctrl: %s", + clear ? curr_feature_string - 1 : curr_feature_string); + curr_feature_string = next_feature_string; + } + while (curr_feature_string); + free (orig); +} + +/* Helper function to set ix86_tune_features. IX86_TUNE is the + processor type. */ + +static void +set_ix86_tune_features (enum processor_type ix86_tune, bool dump) +{ + unsigned int ix86_tune_mask = 1u << ix86_tune; + int i; + + for (i = 0; i < X86_TUNE_LAST; ++i) + { + if (ix86_tune_no_default) + ix86_tune_features[i] = 0; + else + ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask); + } + + if (dump) + { + fprintf (stderr, "List of x86 specific tuning parameter names:\n"); + for (i = 0; i < X86_TUNE_LAST; i++) + fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i], + ix86_tune_features[i] ? "on" : "off"); + } + + parse_mtune_ctrl_str (dump); +} + + +/* Default align_* from the processor table. */ + +static void +ix86_default_align (struct gcc_options *opts) +{ + if (opts->x_align_loops == 0) + { + opts->x_align_loops = processor_target_table[ix86_tune].align_loop; + align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; + } + if (opts->x_align_jumps == 0) + { + opts->x_align_jumps = processor_target_table[ix86_tune].align_jump; + align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; + } + if (opts->x_align_functions == 0) + { + opts->x_align_functions = processor_target_table[ix86_tune].align_func; + } +} + +/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */ + +static void +ix86_override_options_after_change (void) +{ + ix86_default_align (&global_options); +} + +/* Override various settings based on options. If MAIN_ARGS_P, the + options are from the command line, otherwise they are from + attributes. */ + +static void +ix86_option_override_internal (bool main_args_p, + struct gcc_options *opts, + struct gcc_options *opts_set) +{ + int i; + unsigned int ix86_arch_mask; + const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL); + const char *prefix; + const char *suffix; + const char *sw; + +#define PTA_3DNOW (HOST_WIDE_INT_1 << 0) +#define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1) +#define PTA_64BIT (HOST_WIDE_INT_1 << 2) +#define PTA_ABM (HOST_WIDE_INT_1 << 3) +#define PTA_AES (HOST_WIDE_INT_1 << 4) +#define PTA_AVX (HOST_WIDE_INT_1 << 5) +#define PTA_BMI (HOST_WIDE_INT_1 << 6) +#define PTA_CX16 (HOST_WIDE_INT_1 << 7) +#define PTA_F16C (HOST_WIDE_INT_1 << 8) +#define PTA_FMA (HOST_WIDE_INT_1 << 9) +#define PTA_FMA4 (HOST_WIDE_INT_1 << 10) +#define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11) +#define PTA_LWP (HOST_WIDE_INT_1 << 12) +#define PTA_LZCNT (HOST_WIDE_INT_1 << 13) +#define PTA_MMX (HOST_WIDE_INT_1 << 14) +#define PTA_MOVBE (HOST_WIDE_INT_1 << 15) +#define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16) +#define PTA_PCLMUL (HOST_WIDE_INT_1 << 17) +#define PTA_POPCNT (HOST_WIDE_INT_1 << 18) +#define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19) +#define PTA_RDRND (HOST_WIDE_INT_1 << 20) +#define PTA_SSE (HOST_WIDE_INT_1 << 21) +#define PTA_SSE2 (HOST_WIDE_INT_1 << 22) +#define PTA_SSE3 (HOST_WIDE_INT_1 << 23) +#define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24) +#define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25) +#define PTA_SSE4A (HOST_WIDE_INT_1 << 26) +#define PTA_SSSE3 (HOST_WIDE_INT_1 << 27) +#define PTA_TBM (HOST_WIDE_INT_1 << 28) +#define PTA_XOP (HOST_WIDE_INT_1 << 29) +#define PTA_AVX2 (HOST_WIDE_INT_1 << 30) +#define PTA_BMI2 (HOST_WIDE_INT_1 << 31) +#define PTA_RTM (HOST_WIDE_INT_1 << 32) +#define PTA_HLE (HOST_WIDE_INT_1 << 33) +#define PTA_PRFCHW (HOST_WIDE_INT_1 << 34) +#define PTA_RDSEED (HOST_WIDE_INT_1 << 35) +#define PTA_ADX (HOST_WIDE_INT_1 << 36) +#define PTA_FXSR (HOST_WIDE_INT_1 << 37) +#define PTA_XSAVE (HOST_WIDE_INT_1 << 38) +#define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39) +#define PTA_AVX512F (HOST_WIDE_INT_1 << 40) +#define PTA_AVX512ER (HOST_WIDE_INT_1 << 41) +#define PTA_AVX512PF (HOST_WIDE_INT_1 << 42) +#define PTA_AVX512CD (HOST_WIDE_INT_1 << 43) +#define PTA_MPX (HOST_WIDE_INT_1 << 44) +#define PTA_SHA (HOST_WIDE_INT_1 << 45) +#define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46) +#define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47) +#define PTA_XSAVEC (HOST_WIDE_INT_1 << 48) +#define PTA_XSAVES (HOST_WIDE_INT_1 << 49) +#define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50) +#define PTA_AVX512BW (HOST_WIDE_INT_1 << 51) +#define PTA_AVX512VL (HOST_WIDE_INT_1 << 52) +#define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53) +#define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54) +#define PTA_CLWB (HOST_WIDE_INT_1 << 55) +#define PTA_MWAITX (HOST_WIDE_INT_1 << 56) +#define PTA_CLZERO (HOST_WIDE_INT_1 << 57) +#define PTA_NO_80387 (HOST_WIDE_INT_1 << 58) +#define PTA_PKU (HOST_WIDE_INT_1 << 59) + +#define PTA_CORE2 \ + (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \ + | PTA_CX16 | PTA_FXSR) +#define PTA_NEHALEM \ + (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT) +#define PTA_WESTMERE \ + (PTA_NEHALEM | PTA_AES | PTA_PCLMUL) +#define PTA_SANDYBRIDGE \ + (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT) +#define PTA_IVYBRIDGE \ + (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C) +#define PTA_HASWELL \ + (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \ + | PTA_FMA | PTA_MOVBE | PTA_HLE) +#define PTA_BROADWELL \ + (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED) +#define PTA_SKYLAKE \ + (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES) +#define PTA_SKYLAKE_AVX512 \ + (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \ + | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU) +#define PTA_KNL \ + (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD) +#define PTA_BONNELL \ + (PTA_CORE2 | PTA_MOVBE) +#define PTA_SILVERMONT \ + (PTA_WESTMERE | PTA_MOVBE) + +/* if this reaches 64, need to widen struct pta flags below */ + + static struct pta + { + const char *const name; /* processor name or nickname. */ + const enum processor_type processor; + const enum attr_cpu schedule; + const unsigned HOST_WIDE_INT flags; + } + const processor_alias_table[] = + { + {"i386", PROCESSOR_I386, CPU_NONE, 0}, + {"i486", PROCESSOR_I486, CPU_NONE, 0}, + {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, + {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, + {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387}, + {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX}, + {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX}, + {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, + {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, + {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, + PTA_MMX | PTA_SSE | PTA_FXSR}, + {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, + {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, + {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR}, + {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, + PTA_MMX | PTA_SSE | PTA_FXSR}, + {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, + PTA_MMX | PTA_SSE | PTA_FXSR}, + {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, + PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR}, + {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE, + PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR}, + {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE, + PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR}, + {"prescott", PROCESSOR_NOCONA, CPU_NONE, + PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR}, + {"nocona", PROCESSOR_NOCONA, CPU_NONE, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR}, + {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2}, + {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM}, + {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM}, + {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE}, + {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, + PTA_SANDYBRIDGE}, + {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, + PTA_SANDYBRIDGE}, + {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, + PTA_IVYBRIDGE}, + {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM, + PTA_IVYBRIDGE}, + {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL}, + {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL}, + {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL}, + {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE}, + {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512}, + {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL}, + {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL}, + {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, + {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT}, + {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL}, + {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM}, + {"geode", PROCESSOR_GEODE, CPU_GEODE, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, + {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX}, + {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, + {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, + {"athlon", PROCESSOR_ATHLON, CPU_ATHLON, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, + {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, + {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR}, + {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR}, + {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR}, + {"x86-64", PROCESSOR_K8, CPU_K8, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, + {"k8", PROCESSOR_K8, CPU_K8, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE + | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, + {"k8-sse3", PROCESSOR_K8, CPU_K8, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE + | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR}, + {"opteron", PROCESSOR_K8, CPU_K8, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE + | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, + {"opteron-sse3", PROCESSOR_K8, CPU_K8, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE + | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR}, + {"athlon64", PROCESSOR_K8, CPU_K8, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE + | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, + {"athlon64-sse3", PROCESSOR_K8, CPU_K8, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE + | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR}, + {"athlon-fx", PROCESSOR_K8, CPU_K8, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE + | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, + {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 + | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR}, + {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 + | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR}, + {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 + | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE}, + {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 + | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C + | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE}, + {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 + | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C + | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE + | PTA_XSAVEOPT | PTA_FSGSBASE}, + {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 + | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 + | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR + | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND + | PTA_MOVBE | PTA_MWAITX}, + {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 + | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW + | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE + | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED + | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES + | PTA_SHA | PTA_LZCNT | PTA_POPCNT}, + {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW + | PTA_FXSR | PTA_XSAVE}, + {"btver2", PROCESSOR_BTVER2, CPU_BTVER2, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1 + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX + | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW + | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT}, + + {"generic", PROCESSOR_GENERIC, CPU_GENERIC, + PTA_64BIT + | PTA_HLE /* flags are only used for -march switch. */ }, + }; + + /* -mrecip options. */ + static struct + { + const char *string; /* option name */ + unsigned int mask; /* mask bits to set */ + } + const recip_options[] = + { + { "all", RECIP_MASK_ALL }, + { "none", RECIP_MASK_NONE }, + { "div", RECIP_MASK_DIV }, + { "sqrt", RECIP_MASK_SQRT }, + { "vec-div", RECIP_MASK_VEC_DIV }, + { "vec-sqrt", RECIP_MASK_VEC_SQRT }, + }; + + int const pta_size = ARRAY_SIZE (processor_alias_table); + + /* Set up prefix/suffix so the error messages refer to either the command + line argument, or the attribute(target). */ + if (main_args_p) + { + prefix = "-m"; + suffix = ""; + sw = "switch"; + } + else + { + prefix = "option(\""; + suffix = "\")"; + sw = "attribute"; + } + + /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if + TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */ + if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) + opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32); +#ifdef TARGET_BI_ARCH + else + { +#if TARGET_BI_ARCH == 1 + /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64 + is on and OPTION_MASK_ABI_X32 is off. We turn off + OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by + -mx32. */ + if (TARGET_X32_P (opts->x_ix86_isa_flags)) + opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; +#else + /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is + on and OPTION_MASK_ABI_64 is off. We turn off + OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by + -m64 or OPTION_MASK_CODE16 is turned on by -m16. */ + if (TARGET_LP64_P (opts->x_ix86_isa_flags) + || TARGET_16BIT_P (opts->x_ix86_isa_flags)) + opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; +#endif + if (TARGET_64BIT_P (opts->x_ix86_isa_flags) + && TARGET_IAMCU_P (opts->x_target_flags)) + sorry ("Intel MCU psABI isn%'t supported in %s mode", + TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit"); + } +#endif + + if (TARGET_X32_P (opts->x_ix86_isa_flags)) + { + /* Always turn on OPTION_MASK_ISA_64BIT and turn off + OPTION_MASK_ABI_64 for TARGET_X32. */ + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; + opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; + } + else if (TARGET_16BIT_P (opts->x_ix86_isa_flags)) + opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT + | OPTION_MASK_ABI_X32 + | OPTION_MASK_ABI_64); + else if (TARGET_LP64_P (opts->x_ix86_isa_flags)) + { + /* Always turn on OPTION_MASK_ISA_64BIT and turn off + OPTION_MASK_ABI_X32 for TARGET_LP64. */ + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; + opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; + } + +#ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; +#endif + +#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS + SUBSUBTARGET_OVERRIDE_OPTIONS; +#endif + + /* -fPIC is the default for x86_64. */ + if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags)) + opts->x_flag_pic = 2; + + /* Need to check -mtune=generic first. */ + if (opts->x_ix86_tune_string) + { + /* As special support for cross compilers we read -mtune=native + as -mtune=generic. With native compilers we won't see the + -mtune=native, as it was changed by the driver. */ + if (!strcmp (opts->x_ix86_tune_string, "native")) + { + opts->x_ix86_tune_string = "generic"; + } + else if (!strcmp (opts->x_ix86_tune_string, "x86-64")) + warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use " + "%stune=k8%s or %stune=generic%s instead as appropriate", + prefix, suffix, prefix, suffix, prefix, suffix); + } + else + { + if (opts->x_ix86_arch_string) + opts->x_ix86_tune_string = opts->x_ix86_arch_string; + if (!opts->x_ix86_tune_string) + { + opts->x_ix86_tune_string + = processor_target_table[TARGET_CPU_DEFAULT].name; + ix86_tune_defaulted = 1; + } + + /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string + or defaulted. We need to use a sensible tune option. */ + if (!strcmp (opts->x_ix86_tune_string, "x86-64")) + { + opts->x_ix86_tune_string = "generic"; + } + } + + if (opts->x_ix86_stringop_alg == rep_prefix_8_byte + && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) + { + /* rep; movq isn't available in 32-bit code. */ + error ("-mstringop-strategy=rep_8byte not supported for 32-bit code"); + opts->x_ix86_stringop_alg = no_stringop; + } + + if (!opts->x_ix86_arch_string) + opts->x_ix86_arch_string + = TARGET_64BIT_P (opts->x_ix86_isa_flags) + ? "x86-64" : SUBTARGET32_DEFAULT_CPU; + else + ix86_arch_specified = 1; + + if (opts_set->x_ix86_pmode) + { + if ((TARGET_LP64_P (opts->x_ix86_isa_flags) + && opts->x_ix86_pmode == PMODE_SI) + || (!TARGET_64BIT_P (opts->x_ix86_isa_flags) + && opts->x_ix86_pmode == PMODE_DI)) + error ("address mode %qs not supported in the %s bit mode", + TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long", + TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32"); + } + else + opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags) + ? PMODE_DI : PMODE_SI; + + if (!opts_set->x_ix86_abi) + opts->x_ix86_abi = DEFAULT_ABI; + + /* For targets using ms ABI enable ms-extensions, if not + explicit turned off. For non-ms ABI we turn off this + option. */ + if (!opts_set->x_flag_ms_extensions) + opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI); + + if (opts_set->x_ix86_cmodel) + { + switch (opts->x_ix86_cmodel) + { + case CM_SMALL: + case CM_SMALL_PIC: + if (opts->x_flag_pic) + opts->x_ix86_cmodel = CM_SMALL_PIC; + if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) + error ("code model %qs not supported in the %s bit mode", + "small", "32"); + break; + + case CM_MEDIUM: + case CM_MEDIUM_PIC: + if (opts->x_flag_pic) + opts->x_ix86_cmodel = CM_MEDIUM_PIC; + if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) + error ("code model %qs not supported in the %s bit mode", + "medium", "32"); + else if (TARGET_X32_P (opts->x_ix86_isa_flags)) + error ("code model %qs not supported in x32 mode", + "medium"); + break; + + case CM_LARGE: + case CM_LARGE_PIC: + if (opts->x_flag_pic) + opts->x_ix86_cmodel = CM_LARGE_PIC; + if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) + error ("code model %qs not supported in the %s bit mode", + "large", "32"); + else if (TARGET_X32_P (opts->x_ix86_isa_flags)) + error ("code model %qs not supported in x32 mode", + "large"); + break; + + case CM_32: + if (opts->x_flag_pic) + error ("code model %s does not support PIC mode", "32"); + if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) + error ("code model %qs not supported in the %s bit mode", + "32", "64"); + break; + + case CM_KERNEL: + if (opts->x_flag_pic) + { + error ("code model %s does not support PIC mode", "kernel"); + opts->x_ix86_cmodel = CM_32; + } + if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) + error ("code model %qs not supported in the %s bit mode", + "kernel", "32"); + break; + + default: + gcc_unreachable (); + } + } + else + { + /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the + use of rip-relative addressing. This eliminates fixups that + would otherwise be needed if this object is to be placed in a + DLL, and is essentially just as efficient as direct addressing. */ + if (TARGET_64BIT_P (opts->x_ix86_isa_flags) + && (TARGET_RDOS || TARGET_PECOFF)) + opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1; + else if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) + opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL; + else + opts->x_ix86_cmodel = CM_32; + } + if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL) + { + error ("-masm=intel not supported in this configuration"); + opts->x_ix86_asm_dialect = ASM_ATT; + } + if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0) + != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0)) + sorry ("%i-bit mode not compiled in", + (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32); + + for (i = 0; i < pta_size; i++) + if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name)) + { + ix86_schedule = processor_alias_table[i].schedule; + ix86_arch = processor_alias_table[i].processor; + /* Default cpu tuning to the architecture. */ + ix86_tune = ix86_arch; + + if (TARGET_64BIT_P (opts->x_ix86_isa_flags) + && !(processor_alias_table[i].flags & PTA_64BIT)) + error ("CPU you selected does not support x86-64 " + "instruction set"); + + if (processor_alias_table[i].flags & PTA_MMX + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX; + if (processor_alias_table[i].flags & PTA_3DNOW + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW; + if (processor_alias_table[i].flags & PTA_3DNOW_A + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A; + if (processor_alias_table[i].flags & PTA_SSE + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE; + if (processor_alias_table[i].flags & PTA_SSE2 + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2; + if (processor_alias_table[i].flags & PTA_SSE3 + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3; + if (processor_alias_table[i].flags & PTA_SSSE3 + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3; + if (processor_alias_table[i].flags & PTA_SSE4_1 + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; + if (processor_alias_table[i].flags & PTA_SSE4_2 + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2; + if (processor_alias_table[i].flags & PTA_AVX + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX; + if (processor_alias_table[i].flags & PTA_AVX2 + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2; + if (processor_alias_table[i].flags & PTA_FMA + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA; + if (processor_alias_table[i].flags & PTA_SSE4A + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; + if (processor_alias_table[i].flags & PTA_FMA4 + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4; + if (processor_alias_table[i].flags & PTA_XOP + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP; + if (processor_alias_table[i].flags & PTA_LWP + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP; + if (processor_alias_table[i].flags & PTA_ABM + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM; + if (processor_alias_table[i].flags & PTA_BMI + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI; + if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM) + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT; + if (processor_alias_table[i].flags & PTA_TBM + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM; + if (processor_alias_table[i].flags & PTA_BMI2 + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2; + if (processor_alias_table[i].flags & PTA_CX16 + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16; + if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM) + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; + if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags) + && (processor_alias_table[i].flags & PTA_NO_SAHF)) + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF; + if (processor_alias_table[i].flags & PTA_MOVBE + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE; + if (processor_alias_table[i].flags & PTA_AES + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) + ix86_isa_flags |= OPTION_MASK_ISA_AES; + if (processor_alias_table[i].flags & PTA_SHA + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA)) + ix86_isa_flags |= OPTION_MASK_ISA_SHA; + if (processor_alias_table[i].flags & PTA_PCLMUL + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; + if (processor_alias_table[i].flags & PTA_FSGSBASE + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE; + if (processor_alias_table[i].flags & PTA_RDRND + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND; + if (processor_alias_table[i].flags & PTA_F16C + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C; + if (processor_alias_table[i].flags & PTA_RTM + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM; + if (processor_alias_table[i].flags & PTA_HLE + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE; + if (processor_alias_table[i].flags & PTA_PRFCHW + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW; + if (processor_alias_table[i].flags & PTA_RDSEED + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED; + if (processor_alias_table[i].flags & PTA_ADX + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX; + if (processor_alias_table[i].flags & PTA_FXSR + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR; + if (processor_alias_table[i].flags & PTA_XSAVE + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE; + if (processor_alias_table[i].flags & PTA_XSAVEOPT + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT; + if (processor_alias_table[i].flags & PTA_AVX512F + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F; + if (processor_alias_table[i].flags & PTA_AVX512ER + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER; + if (processor_alias_table[i].flags & PTA_AVX512PF + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF; + if (processor_alias_table[i].flags & PTA_AVX512CD + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD; + if (processor_alias_table[i].flags & PTA_PREFETCHWT1 + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1; + if (processor_alias_table[i].flags & PTA_CLWB + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB; + if (processor_alias_table[i].flags & PTA_CLFLUSHOPT + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT; + if (processor_alias_table[i].flags & PTA_CLZERO + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO; + if (processor_alias_table[i].flags & PTA_XSAVEC + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC; + if (processor_alias_table[i].flags & PTA_XSAVES + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES; + if (processor_alias_table[i].flags & PTA_AVX512DQ + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ; + if (processor_alias_table[i].flags & PTA_AVX512BW + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW; + if (processor_alias_table[i].flags & PTA_AVX512VL + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL; + if (processor_alias_table[i].flags & PTA_MPX + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX; + if (processor_alias_table[i].flags & PTA_AVX512VBMI + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI; + if (processor_alias_table[i].flags & PTA_AVX512IFMA + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA; + if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) + x86_prefetch_sse = true; + if (processor_alias_table[i].flags & PTA_MWAITX + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX; + if (processor_alias_table[i].flags & PTA_PKU + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU; + + if (!(opts_set->x_target_flags & MASK_80387)) + { + if (processor_alias_table[i].flags & PTA_NO_80387) + opts->x_target_flags &= ~MASK_80387; + else + opts->x_target_flags |= MASK_80387; + } + break; + } + + if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX)) + error ("Intel MPX does not support x32"); + + if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX)) + error ("Intel MPX does not support x32"); + + if (!strcmp (opts->x_ix86_arch_string, "generic")) + error ("generic CPU can be used only for %stune=%s %s", + prefix, suffix, sw); + else if (!strcmp (opts->x_ix86_arch_string, "intel")) + error ("intel CPU can be used only for %stune=%s %s", + prefix, suffix, sw); + else if (i == pta_size) + error ("bad value (%s) for %sarch=%s %s", + opts->x_ix86_arch_string, prefix, suffix, sw); + + ix86_arch_mask = 1u << ix86_arch; + for (i = 0; i < X86_ARCH_LAST; ++i) + ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); + + for (i = 0; i < pta_size; i++) + if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)) + { + ix86_schedule = processor_alias_table[i].schedule; + ix86_tune = processor_alias_table[i].processor; + if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) + { + if (!(processor_alias_table[i].flags & PTA_64BIT)) + { + if (ix86_tune_defaulted) + { + opts->x_ix86_tune_string = "x86-64"; + for (i = 0; i < pta_size; i++) + if (! strcmp (opts->x_ix86_tune_string, + processor_alias_table[i].name)) + break; + ix86_schedule = processor_alias_table[i].schedule; + ix86_tune = processor_alias_table[i].processor; + } + else + error ("CPU you selected does not support x86-64 " + "instruction set"); + } + } + /* Intel CPUs have always interpreted SSE prefetch instructions as + NOPs; so, we can enable SSE prefetch instructions even when + -mtune (rather than -march) points us to a processor that has them. + However, the VIA C3 gives a SIGILL, so we only do that for i686 and + higher processors. */ + if (TARGET_CMOV + && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))) + x86_prefetch_sse = true; + break; + } + + if (ix86_tune_specified && i == pta_size) + error ("bad value (%s) for %stune=%s %s", + opts->x_ix86_tune_string, prefix, suffix, sw); + + set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes); + +#ifndef USE_IX86_FRAME_POINTER +#define USE_IX86_FRAME_POINTER 0 +#endif + +#ifndef USE_X86_64_FRAME_POINTER +#define USE_X86_64_FRAME_POINTER 0 +#endif + + /* Set the default values for switches whose default depends on TARGET_64BIT + in case they weren't overwritten by command line options. */ + if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) + { + if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) + opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER; + if (opts->x_flag_asynchronous_unwind_tables + && !opts_set->x_flag_unwind_tables + && TARGET_64BIT_MS_ABI) + opts->x_flag_unwind_tables = 1; + if (opts->x_flag_asynchronous_unwind_tables == 2) + opts->x_flag_unwind_tables + = opts->x_flag_asynchronous_unwind_tables = 1; + if (opts->x_flag_pcc_struct_return == 2) + opts->x_flag_pcc_struct_return = 0; + } + else + { + if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) + opts->x_flag_omit_frame_pointer + = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size); + if (opts->x_flag_asynchronous_unwind_tables == 2) + opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER; + if (opts->x_flag_pcc_struct_return == 2) + { + /* Intel MCU psABI specifies that -freg-struct-return should + be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1, + we check -miamcu so that -freg-struct-return is always + turned on if -miamcu is used. */ + if (TARGET_IAMCU_P (opts->x_target_flags)) + opts->x_flag_pcc_struct_return = 0; + else + opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; + } + } + + ix86_tune_cost = processor_target_table[ix86_tune].cost; + /* TODO: ix86_cost should be chosen at instruction or function granuality + so for cold code we use size_cost even in !optimize_size compilation. */ + if (opts->x_optimize_size) + ix86_cost = &ix86_size_cost; + else + ix86_cost = ix86_tune_cost; + + /* Arrange to set up i386_stack_locals for all functions. */ + init_machine_status = ix86_init_machine_status; + + /* Validate -mregparm= value. */ + if (opts_set->x_ix86_regparm) + { + if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) + warning (0, "-mregparm is ignored in 64-bit mode"); + else if (TARGET_IAMCU_P (opts->x_target_flags)) + warning (0, "-mregparm is ignored for Intel MCU psABI"); + if (opts->x_ix86_regparm > REGPARM_MAX) + { + error ("-mregparm=%d is not between 0 and %d", + opts->x_ix86_regparm, REGPARM_MAX); + opts->x_ix86_regparm = 0; + } + } + if (TARGET_IAMCU_P (opts->x_target_flags) + || TARGET_64BIT_P (opts->x_ix86_isa_flags)) + opts->x_ix86_regparm = REGPARM_MAX; + + /* Default align_* from the processor table. */ + ix86_default_align (opts); + + /* Provide default for -mbranch-cost= value. */ + if (!opts_set->x_ix86_branch_cost) + opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost; + + if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) + { + opts->x_target_flags + |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags; + + /* Enable by default the SSE and MMX builtins. Do allow the user to + explicitly disable any of these. In particular, disabling SSE and + MMX for kernel code is extremely useful. */ + if (!ix86_arch_specified) + opts->x_ix86_isa_flags + |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX + | TARGET_SUBTARGET64_ISA_DEFAULT) + & ~opts->x_ix86_isa_flags_explicit); + + if (TARGET_RTD_P (opts->x_target_flags)) + warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix); + } + else + { + opts->x_target_flags + |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags; + + if (!ix86_arch_specified) + opts->x_ix86_isa_flags + |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit; + + /* i386 ABI does not specify red zone. It still makes sense to use it + when programmer takes care to stack from being destroyed. */ + if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE)) + opts->x_target_flags |= MASK_NO_RED_ZONE; + } + + /* Keep nonleaf frame pointers. */ + if (opts->x_flag_omit_frame_pointer) + opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; + else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)) + opts->x_flag_omit_frame_pointer = 1; + + /* If we're doing fast math, we don't care about comparison order + wrt NaNs. This lets us use a shorter comparison sequence. */ + if (opts->x_flag_finite_math_only) + opts->x_target_flags &= ~MASK_IEEE_FP; + + /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, + since the insns won't need emulation. */ + if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387]) + opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387; + + /* Likewise, if the target doesn't have a 387, or we've specified + software floating point, don't use 387 inline intrinsics. */ + if (!TARGET_80387_P (opts->x_target_flags)) + opts->x_target_flags |= MASK_NO_FANCY_MATH_387; + + /* Turn on MMX builtins for -msse. */ + if (TARGET_SSE_P (opts->x_ix86_isa_flags)) + opts->x_ix86_isa_flags + |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit; + + /* Enable SSE prefetch. */ + if (TARGET_SSE_P (opts->x_ix86_isa_flags) + || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags) + && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)) + || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags)) + x86_prefetch_sse = true; + + /* Enable popcnt instruction for -msse4.2 or -mabm. */ + if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags) + || TARGET_ABM_P (opts->x_ix86_isa_flags)) + opts->x_ix86_isa_flags + |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit; + + /* Enable lzcnt instruction for -mabm. */ + if (TARGET_ABM_P(opts->x_ix86_isa_flags)) + opts->x_ix86_isa_flags + |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit; + + /* Validate -mpreferred-stack-boundary= value or default it to + PREFERRED_STACK_BOUNDARY_DEFAULT. */ + ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; + if (opts_set->x_ix86_preferred_stack_boundary_arg) + { + int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags) + ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2); + int max = (TARGET_SEH ? 4 : 12); + + if (opts->x_ix86_preferred_stack_boundary_arg < min + || opts->x_ix86_preferred_stack_boundary_arg > max) + { + if (min == max) + error ("-mpreferred-stack-boundary is not supported " + "for this target"); + else + error ("-mpreferred-stack-boundary=%d is not between %d and %d", + opts->x_ix86_preferred_stack_boundary_arg, min, max); + } + else + ix86_preferred_stack_boundary + = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT; + } + + /* Set the default value for -mstackrealign. */ + if (opts->x_ix86_force_align_arg_pointer == -1) + opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; + + ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; + + /* Validate -mincoming-stack-boundary= value or default it to + MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ + ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; + if (opts_set->x_ix86_incoming_stack_boundary_arg) + { + int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2; + + if (opts->x_ix86_incoming_stack_boundary_arg < min + || opts->x_ix86_incoming_stack_boundary_arg > 12) + error ("-mincoming-stack-boundary=%d is not between %d and 12", + opts->x_ix86_incoming_stack_boundary_arg, min); + else + { + ix86_user_incoming_stack_boundary + = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT; + ix86_incoming_stack_boundary + = ix86_user_incoming_stack_boundary; + } + } + +#ifndef NO_PROFILE_COUNTERS + if (flag_nop_mcount) + error ("-mnop-mcount is not compatible with this target"); +#endif + if (flag_nop_mcount && flag_pic) + error ("-mnop-mcount is not implemented for -fPIC"); + + /* Accept -msseregparm only if at least SSE support is enabled. */ + if (TARGET_SSEREGPARM_P (opts->x_target_flags) + && ! TARGET_SSE_P (opts->x_ix86_isa_flags)) + error ("%ssseregparm%s used without SSE enabled", prefix, suffix); + + if (opts_set->x_ix86_fpmath) + { + if (opts->x_ix86_fpmath & FPMATH_SSE) + { + if (!TARGET_SSE_P (opts->x_ix86_isa_flags)) + { + if (TARGET_80387_P (opts->x_target_flags)) + { + warning (0, "SSE instruction set disabled, using 387 arithmetics"); + opts->x_ix86_fpmath = FPMATH_387; + } + } + else if ((opts->x_ix86_fpmath & FPMATH_387) + && !TARGET_80387_P (opts->x_target_flags)) + { + warning (0, "387 instruction set disabled, using SSE arithmetics"); + opts->x_ix86_fpmath = FPMATH_SSE; + } + } + } + /* For all chips supporting SSE2, -mfpmath=sse performs better than + fpmath=387. The second is however default at many targets since the + extra 80bit precision of temporaries is considered to be part of ABI. + Overwrite the default at least for -ffast-math. + TODO: -mfpmath=both seems to produce same performing code with bit + smaller binaries. It is however not clear if register allocation is + ready for this setting. + Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE + codegen. We may switch to 387 with -ffast-math for size optimized + functions. */ + else if (fast_math_flags_set_p (&global_options) + && TARGET_SSE2_P (opts->x_ix86_isa_flags)) + opts->x_ix86_fpmath = FPMATH_SSE; + else + opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags); + + /* Use external vectorized library in vectorizing intrinsics. */ + if (opts_set->x_ix86_veclibabi_type) + switch (opts->x_ix86_veclibabi_type) + { + case ix86_veclibabi_type_svml: + ix86_veclib_handler = ix86_veclibabi_svml; + break; + + case ix86_veclibabi_type_acml: + ix86_veclib_handler = ix86_veclibabi_acml; + break; + + default: + gcc_unreachable (); + } + + if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS] + && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) + opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; + + /* If stack probes are required, the space used for large function + arguments on the stack must also be probed, so enable + -maccumulate-outgoing-args so this happens in the prologue. */ + if (TARGET_STACK_PROBE_P (opts->x_target_flags) + && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) + { + if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) + warning (0, "stack probing requires %saccumulate-outgoing-args%s " + "for correctness", prefix, suffix); + opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; + } + + /* Stack realignment without -maccumulate-outgoing-args requires %ebp, + so enable -maccumulate-outgoing-args when %ebp is fixed. */ + if (fixed_regs[BP_REG] + && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) + { + if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) + warning (0, "fixed ebp register requires %saccumulate-outgoing-args%s", + prefix, suffix); + opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; + } + + /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ + { + char *p; + ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); + p = strchr (internal_label_prefix, 'X'); + internal_label_prefix_len = p - internal_label_prefix; + *p = '\0'; + } + + /* When scheduling description is not available, disable scheduler pass + so it won't slow down the compilation and make x87 code slower. */ + if (!TARGET_SCHEDULE) + opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0; + + maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, + ix86_tune_cost->simultaneous_prefetches, + opts->x_param_values, + opts_set->x_param_values); + maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, + ix86_tune_cost->prefetch_block, + opts->x_param_values, + opts_set->x_param_values); + maybe_set_param_value (PARAM_L1_CACHE_SIZE, + ix86_tune_cost->l1_cache_size, + opts->x_param_values, + opts_set->x_param_values); + maybe_set_param_value (PARAM_L2_CACHE_SIZE, + ix86_tune_cost->l2_cache_size, + opts->x_param_values, + opts_set->x_param_values); + + /* Restrict number of if-converted SET insns to 1. */ + if (TARGET_ONE_IF_CONV_INSN) + maybe_set_param_value (PARAM_MAX_RTL_IF_CONVERSION_INSNS, + 1, + opts->x_param_values, + opts_set->x_param_values); + + /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ + if (opts->x_flag_prefetch_loop_arrays < 0 + && HAVE_prefetch + && (opts->x_optimize >= 3 || opts->x_flag_profile_use) + && !opts->x_optimize_size + && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL) + opts->x_flag_prefetch_loop_arrays = 1; + + /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) + can be opts->x_optimized to ap = __builtin_next_arg (0). */ + if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack) + targetm.expand_builtin_va_start = NULL; + + if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) + { + ix86_gen_leave = gen_leave_rex64; + if (Pmode == DImode) + { + ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di; + ix86_gen_tls_local_dynamic_base_64 + = gen_tls_local_dynamic_base_64_di; + } + else + { + ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si; + ix86_gen_tls_local_dynamic_base_64 + = gen_tls_local_dynamic_base_64_si; + } + } + else + ix86_gen_leave = gen_leave; + + if (Pmode == DImode) + { + ix86_gen_add3 = gen_adddi3; + ix86_gen_sub3 = gen_subdi3; + ix86_gen_sub3_carry = gen_subdi3_carry; + ix86_gen_one_cmpl2 = gen_one_cmpldi2; + ix86_gen_andsp = gen_anddi3; + ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di; + ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi; + ix86_gen_probe_stack_range = gen_probe_stack_rangedi; + ix86_gen_monitor = gen_sse3_monitor_di; + ix86_gen_monitorx = gen_monitorx_di; + ix86_gen_clzero = gen_clzero_di; + } + else + { + ix86_gen_add3 = gen_addsi3; + ix86_gen_sub3 = gen_subsi3; + ix86_gen_sub3_carry = gen_subsi3_carry; + ix86_gen_one_cmpl2 = gen_one_cmplsi2; + ix86_gen_andsp = gen_andsi3; + ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si; + ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi; + ix86_gen_probe_stack_range = gen_probe_stack_rangesi; + ix86_gen_monitor = gen_sse3_monitor_si; + ix86_gen_monitorx = gen_monitorx_si; + ix86_gen_clzero = gen_clzero_si; + } + +#ifdef USE_IX86_CLD + /* Use -mcld by default for 32-bit code if configured with --enable-cld. */ + if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) + opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags; +#endif + + if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic) + { + if (opts->x_flag_fentry > 0) + sorry ("-mfentry isn%'t supported for 32-bit in combination " + "with -fpic"); + opts->x_flag_fentry = 0; + } + else if (TARGET_SEH) + { + if (opts->x_flag_fentry == 0) + sorry ("-mno-fentry isn%'t compatible with SEH"); + opts->x_flag_fentry = 1; + } + else if (opts->x_flag_fentry < 0) + { +#if defined(PROFILE_BEFORE_PROLOGUE) + opts->x_flag_fentry = 1; +#else + opts->x_flag_fentry = 0; +#endif + } + + if (!(opts_set->x_target_flags & MASK_VZEROUPPER)) + opts->x_target_flags |= MASK_VZEROUPPER; + if (!(opts_set->x_target_flags & MASK_STV)) + opts->x_target_flags |= MASK_STV; + /* Disable STV if -mpreferred-stack-boundary={2,3} or + -mincoming-stack-boundary={2,3} or -mstackrealign - the needed + stack realignment will be extra cost the pass doesn't take into + account and the pass can't realign the stack. */ + if (ix86_preferred_stack_boundary < 128 + || ix86_incoming_stack_boundary < 128 + || opts->x_ix86_force_align_arg_pointer) + opts->x_target_flags &= ~MASK_STV; + if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] + && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) + opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; + if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] + && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) + opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; + /* Enable 128-bit AVX instruction generation + for the auto-vectorizer. */ + if (TARGET_AVX128_OPTIMAL + && !(opts_set->x_target_flags & MASK_PREFER_AVX128)) + opts->x_target_flags |= MASK_PREFER_AVX128; + + if (opts->x_ix86_recip_name) + { + char *p = ASTRDUP (opts->x_ix86_recip_name); + char *q; + unsigned int mask, i; + bool invert; + + while ((q = strtok (p, ",")) != NULL) + { + p = NULL; + if (*q == '!') + { + invert = true; + q++; + } + else + invert = false; + + if (!strcmp (q, "default")) + mask = RECIP_MASK_ALL; + else + { + for (i = 0; i < ARRAY_SIZE (recip_options); i++) + if (!strcmp (q, recip_options[i].string)) + { + mask = recip_options[i].mask; + break; + } + + if (i == ARRAY_SIZE (recip_options)) + { + error ("unknown option for -mrecip=%s", q); + invert = false; + mask = RECIP_MASK_NONE; + } + } + + opts->x_recip_mask_explicit |= mask; + if (invert) + opts->x_recip_mask &= ~mask; + else + opts->x_recip_mask |= mask; + } + } + + if (TARGET_RECIP_P (opts->x_target_flags)) + opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit; + else if (opts_set->x_target_flags & MASK_RECIP) + opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit); + + /* Default long double to 64-bit for 32-bit Bionic and to __float128 + for 64-bit Bionic. Also default long double to 64-bit for Intel + MCU psABI. */ + if ((TARGET_HAS_BIONIC || TARGET_IAMCU) + && !(opts_set->x_target_flags + & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128))) + opts->x_target_flags |= (TARGET_64BIT + ? MASK_LONG_DOUBLE_128 + : MASK_LONG_DOUBLE_64); + + /* Only one of them can be active. */ + gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0 + || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0); + + /* Save the initial options in case the user does function specific + options. */ + if (main_args_p) + target_option_default_node = target_option_current_node + = build_target_option_node (opts); + + /* Handle stack protector */ + if (!opts_set->x_ix86_stack_protector_guard) + opts->x_ix86_stack_protector_guard + = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS; + + /* Handle -mmemcpy-strategy= and -mmemset-strategy= */ + if (opts->x_ix86_tune_memcpy_strategy) + { + char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy); + ix86_parse_stringop_strategy_string (str, false); + free (str); + } + + if (opts->x_ix86_tune_memset_strategy) + { + char *str = xstrdup (opts->x_ix86_tune_memset_strategy); + ix86_parse_stringop_strategy_string (str, true); + free (str); + } +} + +/* Implement the TARGET_OPTION_OVERRIDE hook. */ + +static void +ix86_option_override (void) +{ + opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g); + struct register_pass_info insert_vzeroupper_info + = { pass_insert_vzeroupper, "reload", + 1, PASS_POS_INSERT_AFTER + }; + opt_pass *pass_stv = make_pass_stv (g); + struct register_pass_info stv_info + = { pass_stv, "combine", + 1, PASS_POS_INSERT_AFTER + }; + + ix86_option_override_internal (true, &global_options, &global_options_set); + + + /* This needs to be done at start up. It's convenient to do it here. */ + register_pass (&insert_vzeroupper_info); + register_pass (&stv_info); +} + +/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ +static char * +ix86_offload_options (void) +{ + if (TARGET_LP64) + return xstrdup ("-foffload-abi=lp64"); + return xstrdup ("-foffload-abi=ilp32"); +} + +/* Update register usage after having seen the compiler flags. */ + +static void +ix86_conditional_register_usage (void) +{ + int i, c_mask; + + /* For 32-bit targets, squash the REX registers. */ + if (! TARGET_64BIT) + { + for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; + for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; + for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; + } + + /* See the definition of CALL_USED_REGISTERS in i386.h. */ + c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI); + + CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + /* Set/reset conditionally defined registers from + CALL_USED_REGISTERS initializer. */ + if (call_used_regs[i] > 1) + call_used_regs[i] = !!(call_used_regs[i] & c_mask); + + /* Calculate registers of CLOBBERED_REGS register set + as call used registers from GENERAL_REGS register set. */ + if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i) + && call_used_regs[i]) + SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i); + } + + /* If MMX is disabled, squash the registers. */ + if (! TARGET_MMX) + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i)) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; + + /* If SSE is disabled, squash the registers. */ + if (! TARGET_SSE) + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i)) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; + + /* If the FPU is disabled, squash the registers. */ + if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i)) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; + + /* If AVX512F is disabled, squash the registers. */ + if (! TARGET_AVX512F) + { + for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; + + for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; + } + + /* If MPX is disabled, squash the registers. */ + if (! TARGET_MPX) + for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++) + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; +} + + +/* Save the current options */ + +static void +ix86_function_specific_save (struct cl_target_option *ptr, + struct gcc_options *opts) +{ + ptr->arch = ix86_arch; + ptr->schedule = ix86_schedule; + ptr->prefetch_sse = x86_prefetch_sse; + ptr->tune = ix86_tune; + ptr->branch_cost = ix86_branch_cost; + ptr->tune_defaulted = ix86_tune_defaulted; + ptr->arch_specified = ix86_arch_specified; + ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit; + ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit; + ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit; + ptr->x_ix86_arch_string = opts->x_ix86_arch_string; + ptr->x_ix86_tune_string = opts->x_ix86_tune_string; + ptr->x_ix86_cmodel = opts->x_ix86_cmodel; + ptr->x_ix86_abi = opts->x_ix86_abi; + ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect; + ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost; + ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes; + ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer; + ptr->x_ix86_force_drap = opts->x_ix86_force_drap; + ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg; + ptr->x_ix86_pmode = opts->x_ix86_pmode; + ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg; + ptr->x_ix86_recip_name = opts->x_ix86_recip_name; + ptr->x_ix86_regparm = opts->x_ix86_regparm; + ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold; + ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx; + ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard; + ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg; + ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect; + ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string; + ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy; + ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy; + ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default; + ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type; + + /* The fields are char but the variables are not; make sure the + values fit in the fields. */ + gcc_assert (ptr->arch == ix86_arch); + gcc_assert (ptr->schedule == ix86_schedule); + gcc_assert (ptr->tune == ix86_tune); + gcc_assert (ptr->branch_cost == ix86_branch_cost); +} + +/* Restore the current options */ + +static void +ix86_function_specific_restore (struct gcc_options *opts, + struct cl_target_option *ptr) +{ + enum processor_type old_tune = ix86_tune; + enum processor_type old_arch = ix86_arch; + unsigned int ix86_arch_mask; + int i; + + /* We don't change -fPIC. */ + opts->x_flag_pic = flag_pic; + + ix86_arch = (enum processor_type) ptr->arch; + ix86_schedule = (enum attr_cpu) ptr->schedule; + ix86_tune = (enum processor_type) ptr->tune; + x86_prefetch_sse = ptr->prefetch_sse; + opts->x_ix86_branch_cost = ptr->branch_cost; + ix86_tune_defaulted = ptr->tune_defaulted; + ix86_arch_specified = ptr->arch_specified; + opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit; + opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit; + opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit; + opts->x_ix86_arch_string = ptr->x_ix86_arch_string; + opts->x_ix86_tune_string = ptr->x_ix86_tune_string; + opts->x_ix86_cmodel = ptr->x_ix86_cmodel; + opts->x_ix86_abi = ptr->x_ix86_abi; + opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect; + opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost; + opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes; + opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer; + opts->x_ix86_force_drap = ptr->x_ix86_force_drap; + opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg; + opts->x_ix86_pmode = ptr->x_ix86_pmode; + opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg; + opts->x_ix86_recip_name = ptr->x_ix86_recip_name; + opts->x_ix86_regparm = ptr->x_ix86_regparm; + opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold; + opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx; + opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard; + opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg; + opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect; + opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string; + opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy; + opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy; + opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default; + opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type; + ix86_tune_cost = processor_target_table[ix86_tune].cost; + /* TODO: ix86_cost should be chosen at instruction or function granuality + so for cold code we use size_cost even in !optimize_size compilation. */ + if (opts->x_optimize_size) + ix86_cost = &ix86_size_cost; + else + ix86_cost = ix86_tune_cost; + + /* Recreate the arch feature tests if the arch changed */ + if (old_arch != ix86_arch) + { + ix86_arch_mask = 1u << ix86_arch; + for (i = 0; i < X86_ARCH_LAST; ++i) + ix86_arch_features[i] + = !!(initial_ix86_arch_features[i] & ix86_arch_mask); + } + + /* Recreate the tune optimization tests */ + if (old_tune != ix86_tune) + set_ix86_tune_features (ix86_tune, false); +} + +/* Adjust target options after streaming them in. This is mainly about + reconciling them with global options. */ + +static void +ix86_function_specific_post_stream_in (struct cl_target_option *ptr) +{ + /* flag_pic is a global option, but ix86_cmodel is target saved option + partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel + for PIC, or error out. */ + if (flag_pic) + switch (ptr->x_ix86_cmodel) + { + case CM_SMALL: + ptr->x_ix86_cmodel = CM_SMALL_PIC; + break; + + case CM_MEDIUM: + ptr->x_ix86_cmodel = CM_MEDIUM_PIC; + break; + + case CM_LARGE: + ptr->x_ix86_cmodel = CM_LARGE_PIC; + break; + + case CM_KERNEL: + error ("code model %s does not support PIC mode", "kernel"); + break; + + default: + break; + } + else + switch (ptr->x_ix86_cmodel) + { + case CM_SMALL_PIC: + ptr->x_ix86_cmodel = CM_SMALL; + break; + + case CM_MEDIUM_PIC: + ptr->x_ix86_cmodel = CM_MEDIUM; + break; + + case CM_LARGE_PIC: + ptr->x_ix86_cmodel = CM_LARGE; + break; + + default: + break; + } +} + +/* Print the current options */ + +static void +ix86_function_specific_print (FILE *file, int indent, + struct cl_target_option *ptr) +{ + char *target_string + = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags, + NULL, NULL, ptr->x_ix86_fpmath, false); + + gcc_assert (ptr->arch < PROCESSOR_max); + fprintf (file, "%*sarch = %d (%s)\n", + indent, "", + ptr->arch, processor_target_table[ptr->arch].name); + + gcc_assert (ptr->tune < PROCESSOR_max); + fprintf (file, "%*stune = %d (%s)\n", + indent, "", + ptr->tune, processor_target_table[ptr->tune].name); + + fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); + + if (target_string) + { + fprintf (file, "%*s%s\n", indent, "", target_string); + free (target_string); + } +} + + +/* Inner function to process the attribute((target(...))), take an argument and + set the current options from the argument. If we have a list, recursively go + over the list. */ + +static bool +ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], + struct gcc_options *opts, + struct gcc_options *opts_set, + struct gcc_options *enum_opts_set) +{ + char *next_optstr; + bool ret = true; + +#define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 } +#define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 } +#define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 } +#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } +#define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } + + enum ix86_opt_type + { + ix86_opt_unknown, + ix86_opt_yes, + ix86_opt_no, + ix86_opt_str, + ix86_opt_enum, + ix86_opt_isa + }; + + static const struct + { + const char *string; + size_t len; + enum ix86_opt_type type; + int opt; + int mask; + } attrs[] = { + /* isa options */ + IX86_ATTR_ISA ("3dnow", OPT_m3dnow), + IX86_ATTR_ISA ("abm", OPT_mabm), + IX86_ATTR_ISA ("bmi", OPT_mbmi), + IX86_ATTR_ISA ("bmi2", OPT_mbmi2), + IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt), + IX86_ATTR_ISA ("tbm", OPT_mtbm), + IX86_ATTR_ISA ("aes", OPT_maes), + IX86_ATTR_ISA ("sha", OPT_msha), + IX86_ATTR_ISA ("avx", OPT_mavx), + IX86_ATTR_ISA ("avx2", OPT_mavx2), + IX86_ATTR_ISA ("avx512f", OPT_mavx512f), + IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf), + IX86_ATTR_ISA ("avx512er", OPT_mavx512er), + IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd), + IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq), + IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw), + IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl), + IX86_ATTR_ISA ("mmx", OPT_mmmx), + IX86_ATTR_ISA ("pclmul", OPT_mpclmul), + IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), + IX86_ATTR_ISA ("sse", OPT_msse), + IX86_ATTR_ISA ("sse2", OPT_msse2), + IX86_ATTR_ISA ("sse3", OPT_msse3), + IX86_ATTR_ISA ("sse4", OPT_msse4), + IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), + IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), + IX86_ATTR_ISA ("sse4a", OPT_msse4a), + IX86_ATTR_ISA ("ssse3", OPT_mssse3), + IX86_ATTR_ISA ("fma4", OPT_mfma4), + IX86_ATTR_ISA ("fma", OPT_mfma), + IX86_ATTR_ISA ("xop", OPT_mxop), + IX86_ATTR_ISA ("lwp", OPT_mlwp), + IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase), + IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd), + IX86_ATTR_ISA ("f16c", OPT_mf16c), + IX86_ATTR_ISA ("rtm", OPT_mrtm), + IX86_ATTR_ISA ("hle", OPT_mhle), + IX86_ATTR_ISA ("prfchw", OPT_mprfchw), + IX86_ATTR_ISA ("rdseed", OPT_mrdseed), + IX86_ATTR_ISA ("adx", OPT_madx), + IX86_ATTR_ISA ("fxsr", OPT_mfxsr), + IX86_ATTR_ISA ("xsave", OPT_mxsave), + IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt), + IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1), + IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt), + IX86_ATTR_ISA ("xsavec", OPT_mxsavec), + IX86_ATTR_ISA ("xsaves", OPT_mxsaves), + IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi), + IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma), + IX86_ATTR_ISA ("clwb", OPT_mclwb), + IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx), + IX86_ATTR_ISA ("clzero", OPT_mclzero), + IX86_ATTR_ISA ("pku", OPT_mpku), + + /* enum options */ + IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), + + /* string options */ + IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), + IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE), + + /* flag options */ + IX86_ATTR_YES ("cld", + OPT_mcld, + MASK_CLD), + + IX86_ATTR_NO ("fancy-math-387", + OPT_mfancy_math_387, + MASK_NO_FANCY_MATH_387), + + IX86_ATTR_YES ("ieee-fp", + OPT_mieee_fp, + MASK_IEEE_FP), + + IX86_ATTR_YES ("inline-all-stringops", + OPT_minline_all_stringops, + MASK_INLINE_ALL_STRINGOPS), + + IX86_ATTR_YES ("inline-stringops-dynamically", + OPT_minline_stringops_dynamically, + MASK_INLINE_STRINGOPS_DYNAMICALLY), + + IX86_ATTR_NO ("align-stringops", + OPT_mno_align_stringops, + MASK_NO_ALIGN_STRINGOPS), + + IX86_ATTR_YES ("recip", + OPT_mrecip, + MASK_RECIP), + + }; + + /* If this is a list, recurse to get the options. */ + if (TREE_CODE (args) == TREE_LIST) + { + bool ret = true; + + for (; args; args = TREE_CHAIN (args)) + if (TREE_VALUE (args) + && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), + p_strings, opts, opts_set, + enum_opts_set)) + ret = false; + + return ret; + } + + else if (TREE_CODE (args) != STRING_CST) + { + error ("attribute % argument not a string"); + return false; + } + + /* Handle multiple arguments separated by commas. */ + next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); + + while (next_optstr && *next_optstr != '\0') + { + char *p = next_optstr; + char *orig_p = p; + char *comma = strchr (next_optstr, ','); + const char *opt_string; + size_t len, opt_len; + int opt; + bool opt_set_p; + char ch; + unsigned i; + enum ix86_opt_type type = ix86_opt_unknown; + int mask = 0; + + if (comma) + { + *comma = '\0'; + len = comma - next_optstr; + next_optstr = comma + 1; + } + else + { + len = strlen (p); + next_optstr = NULL; + } + + /* Recognize no-xxx. */ + if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') + { + opt_set_p = false; + p += 3; + len -= 3; + } + else + opt_set_p = true; + + /* Find the option. */ + ch = *p; + opt = N_OPTS; + for (i = 0; i < ARRAY_SIZE (attrs); i++) + { + type = attrs[i].type; + opt_len = attrs[i].len; + if (ch == attrs[i].string[0] + && ((type != ix86_opt_str && type != ix86_opt_enum) + ? len == opt_len + : len > opt_len) + && memcmp (p, attrs[i].string, opt_len) == 0) + { + opt = attrs[i].opt; + mask = attrs[i].mask; + opt_string = attrs[i].string; + break; + } + } + + /* Process the option. */ + if (opt == N_OPTS) + { + error ("attribute(target(\"%s\")) is unknown", orig_p); + ret = false; + } + + else if (type == ix86_opt_isa) + { + struct cl_decoded_option decoded; + + generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded); + ix86_handle_option (opts, opts_set, + &decoded, input_location); + } + + else if (type == ix86_opt_yes || type == ix86_opt_no) + { + if (type == ix86_opt_no) + opt_set_p = !opt_set_p; + + if (opt_set_p) + opts->x_target_flags |= mask; + else + opts->x_target_flags &= ~mask; + } + + else if (type == ix86_opt_str) + { + if (p_strings[opt]) + { + error ("option(\"%s\") was already specified", opt_string); + ret = false; + } + else + p_strings[opt] = xstrdup (p + opt_len); + } + + else if (type == ix86_opt_enum) + { + bool arg_ok; + int value; + + arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET); + if (arg_ok) + set_option (opts, enum_opts_set, opt, value, + p + opt_len, DK_UNSPECIFIED, input_location, + global_dc); + else + { + error ("attribute(target(\"%s\")) is unknown", orig_p); + ret = false; + } + } + + else + gcc_unreachable (); + } + + return ret; +} + +/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ + +tree +ix86_valid_target_attribute_tree (tree args, + struct gcc_options *opts, + struct gcc_options *opts_set) +{ + const char *orig_arch_string = opts->x_ix86_arch_string; + const char *orig_tune_string = opts->x_ix86_tune_string; + enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath; + int orig_tune_defaulted = ix86_tune_defaulted; + int orig_arch_specified = ix86_arch_specified; + char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL }; + tree t = NULL_TREE; + int i; + struct cl_target_option *def + = TREE_TARGET_OPTION (target_option_default_node); + struct gcc_options enum_opts_set; + + memset (&enum_opts_set, 0, sizeof (enum_opts_set)); + + /* Process each of the options on the chain. */ + if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts, + opts_set, &enum_opts_set)) + return error_mark_node; + + /* If the changed options are different from the default, rerun + ix86_option_override_internal, and then save the options away. + The string options are attribute options, and will be undone + when we copy the save structure. */ + if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags + || opts->x_target_flags != def->x_target_flags + || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] + || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] + || enum_opts_set.x_ix86_fpmath) + { + /* If we are using the default tune= or arch=, undo the string assigned, + and use the default. */ + if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) + { + opts->x_ix86_arch_string + = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]); + + /* If arch= is set, clear all bits in x_ix86_isa_flags, + except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */ + opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT + | OPTION_MASK_ABI_64 + | OPTION_MASK_ABI_X32 + | OPTION_MASK_CODE16); + + } + else if (!orig_arch_specified) + opts->x_ix86_arch_string = NULL; + + if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) + opts->x_ix86_tune_string + = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]); + else if (orig_tune_defaulted) + opts->x_ix86_tune_string = NULL; + + /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ + if (enum_opts_set.x_ix86_fpmath) + opts_set->x_ix86_fpmath = (enum fpmath_unit) 1; + else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) + && TARGET_SSE_P (opts->x_ix86_isa_flags)) + { + if (TARGET_80387_P (opts->x_target_flags)) + opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE + | FPMATH_387); + else + opts->x_ix86_fpmath = (enum fpmath_unit) FPMATH_SSE; + opts_set->x_ix86_fpmath = (enum fpmath_unit) 1; + } + + /* Do any overrides, such as arch=xxx, or tune=xxx support. */ + ix86_option_override_internal (false, opts, opts_set); + + /* Add any builtin functions with the new isa if any. */ + ix86_add_new_builtins (opts->x_ix86_isa_flags); + + /* Save the current options unless we are validating options for + #pragma. */ + t = build_target_option_node (opts); + + opts->x_ix86_arch_string = orig_arch_string; + opts->x_ix86_tune_string = orig_tune_string; + opts_set->x_ix86_fpmath = orig_fpmath_set; + + /* Free up memory allocated to hold the strings */ + for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++) + free (option_strings[i]); + } + + return t; +} + +/* Hook to validate attribute((target("string"))). */ + +static bool +ix86_valid_target_attribute_p (tree fndecl, + tree ARG_UNUSED (name), + tree args, + int ARG_UNUSED (flags)) +{ + struct gcc_options func_options; + tree new_target, new_optimize; + bool ret = true; + + /* attribute((target("default"))) does nothing, beyond + affecting multi-versioning. */ + if (TREE_VALUE (args) + && TREE_CODE (TREE_VALUE (args)) == STRING_CST + && TREE_CHAIN (args) == NULL_TREE + && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0) + return true; + + tree old_optimize = build_optimization_node (&global_options); + + /* Get the optimization options of the current function. */ + tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); + + if (!func_optimize) + func_optimize = old_optimize; + + /* Init func_options. */ + memset (&func_options, 0, sizeof (func_options)); + init_options_struct (&func_options, NULL); + lang_hooks.init_options_struct (&func_options); + + cl_optimization_restore (&func_options, + TREE_OPTIMIZATION (func_optimize)); + + /* Initialize func_options to the default before its target options can + be set. */ + cl_target_option_restore (&func_options, + TREE_TARGET_OPTION (target_option_default_node)); + + new_target = ix86_valid_target_attribute_tree (args, &func_options, + &global_options_set); + + new_optimize = build_optimization_node (&func_options); + + if (new_target == error_mark_node) + ret = false; + + else if (fndecl && new_target) + { + DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; + + if (old_optimize != new_optimize) + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; + } + + finalize_options_struct (&func_options); + + return ret; +} + + +/* Hook to determine if one function can safely inline another. */ + +static bool +ix86_can_inline_p (tree caller, tree callee) +{ + bool ret = false; + tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); + tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); + + /* If callee has no option attributes, then it is ok to inline. */ + if (!callee_tree) + ret = true; + + /* If caller has no option attributes, but callee does then it is not ok to + inline. */ + else if (!caller_tree) + ret = false; + + else + { + struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); + struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); + + /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function + can inline a SSE2 function but a SSE2 function can't inline a SSE4 + function. */ + if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) + != callee_opts->x_ix86_isa_flags) + ret = false; + + /* See if we have the same non-isa options. */ + else if (caller_opts->x_target_flags != callee_opts->x_target_flags) + ret = false; + + /* See if arch, tune, etc. are the same. */ + else if (caller_opts->arch != callee_opts->arch) + ret = false; + + else if (caller_opts->tune != callee_opts->tune) + ret = false; + + else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath) + ret = false; + + else if (caller_opts->branch_cost != callee_opts->branch_cost) + ret = false; + + else + ret = true; + } + + return ret; +} + + +/* Remember the last target of ix86_set_current_function. */ +static GTY(()) tree ix86_previous_fndecl; + +/* Set targets globals to the default (or current #pragma GCC target + if active). Invalidate ix86_previous_fndecl cache. */ + +void +ix86_reset_previous_fndecl (void) +{ + tree new_tree = target_option_current_node; + cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); + if (TREE_TARGET_GLOBALS (new_tree)) + restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); + else if (new_tree == target_option_default_node) + restore_target_globals (&default_target_globals); + else + TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); + ix86_previous_fndecl = NULL_TREE; +} + +/* Establish appropriate back-end context for processing the function + FNDECL. The argument might be NULL to indicate processing at top + level, outside of any function scope. */ +static void +ix86_set_current_function (tree fndecl) +{ + /* Only change the context if the function changes. This hook is called + several times in the course of compiling a function, and we don't want to + slow things down too much or call target_reinit when it isn't safe. */ + if (fndecl == ix86_previous_fndecl) + return; + + tree old_tree; + if (ix86_previous_fndecl == NULL_TREE) + old_tree = target_option_current_node; + else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)) + old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl); + else + old_tree = target_option_default_node; + + if (fndecl == NULL_TREE) + { + if (old_tree != target_option_current_node) + ix86_reset_previous_fndecl (); + return; + } + + tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); + if (new_tree == NULL_TREE) + new_tree = target_option_default_node; + + if (old_tree != new_tree) + { + cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); + if (TREE_TARGET_GLOBALS (new_tree)) + restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); + else if (new_tree == target_option_default_node) + restore_target_globals (&default_target_globals); + else + TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); + } + ix86_previous_fndecl = fndecl; + + /* 64-bit MS and SYSV ABI have different set of call used registers. + Avoid expensive re-initialization of init_regs each time we switch + function context. */ + if (TARGET_64BIT + && (call_used_regs[SI_REG] + == (cfun->machine->call_abi == MS_ABI))) + reinit_regs (); +} + + +/* Return true if this goes in large data/bss. */ + +static bool +ix86_in_large_data_p (tree exp) +{ + if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) + return false; + + /* Functions are never large data. */ + if (TREE_CODE (exp) == FUNCTION_DECL) + return false; + + /* Automatic variables are never large data. */ + if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp)) + return false; + + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) + { + const char *section = DECL_SECTION_NAME (exp); + if (strcmp (section, ".ldata") == 0 + || strcmp (section, ".lbss") == 0) + return true; + return false; + } + else + { + HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); + + /* If this is an incomplete type with size 0, then we can't put it + in data because it might be too big when completed. Also, + int_size_in_bytes returns -1 if size can vary or is larger than + an integer in which case also it is safer to assume that it goes in + large data. */ + if (size <= 0 || size > ix86_section_threshold) + return true; + } + + return false; +} + +/* Switch to the appropriate section for output of DECL. + DECL is either a `VAR_DECL' node or a constant of some sort. + RELOC indicates whether forming the initial value of DECL requires + link-time relocations. */ + +ATTRIBUTE_UNUSED static section * +x86_64_elf_select_section (tree decl, int reloc, + unsigned HOST_WIDE_INT align) +{ + if (ix86_in_large_data_p (decl)) + { + const char *sname = NULL; + unsigned int flags = SECTION_WRITE; + switch (categorize_decl_for_section (decl, reloc)) + { + case SECCAT_DATA: + sname = ".ldata"; + break; + case SECCAT_DATA_REL: + sname = ".ldata.rel"; + break; + case SECCAT_DATA_REL_LOCAL: + sname = ".ldata.rel.local"; + break; + case SECCAT_DATA_REL_RO: + sname = ".ldata.rel.ro"; + break; + case SECCAT_DATA_REL_RO_LOCAL: + sname = ".ldata.rel.ro.local"; + break; + case SECCAT_BSS: + sname = ".lbss"; + flags |= SECTION_BSS; + break; + case SECCAT_RODATA: + case SECCAT_RODATA_MERGE_STR: + case SECCAT_RODATA_MERGE_STR_INIT: + case SECCAT_RODATA_MERGE_CONST: + sname = ".lrodata"; + flags = 0; + break; + case SECCAT_SRODATA: + case SECCAT_SDATA: + case SECCAT_SBSS: + gcc_unreachable (); + case SECCAT_TEXT: + case SECCAT_TDATA: + case SECCAT_TBSS: + /* We don't split these for medium model. Place them into + default sections and hope for best. */ + break; + } + if (sname) + { + /* We might get called with string constants, but get_named_section + doesn't like them as they are not DECLs. Also, we need to set + flags in that case. */ + if (!DECL_P (decl)) + return get_section (sname, flags, NULL); + return get_named_section (decl, sname, reloc); + } + } + return default_elf_select_section (decl, reloc, align); +} + +/* Select a set of attributes for section NAME based on the properties + of DECL and whether or not RELOC indicates that DECL's initializer + might contain runtime relocations. */ + +static unsigned int ATTRIBUTE_UNUSED +x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int flags = default_section_type_flags (decl, name, reloc); + + if (decl == NULL_TREE + && (strcmp (name, ".ldata.rel.ro") == 0 + || strcmp (name, ".ldata.rel.ro.local") == 0)) + flags |= SECTION_RELRO; + + if (strcmp (name, ".lbss") == 0 + || strncmp (name, ".lbss.", 5) == 0 + || strncmp (name, ".gnu.linkonce.lb.", 16) == 0) + flags |= SECTION_BSS; + + return flags; +} + +/* Build up a unique section name, expressed as a + STRING_CST node, and assign it to DECL_SECTION_NAME (decl). + RELOC indicates whether the initial value of EXP requires + link-time relocations. */ + +static void ATTRIBUTE_UNUSED +x86_64_elf_unique_section (tree decl, int reloc) +{ + if (ix86_in_large_data_p (decl)) + { + const char *prefix = NULL; + /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ + bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; + + switch (categorize_decl_for_section (decl, reloc)) + { + case SECCAT_DATA: + case SECCAT_DATA_REL: + case SECCAT_DATA_REL_LOCAL: + case SECCAT_DATA_REL_RO: + case SECCAT_DATA_REL_RO_LOCAL: + prefix = one_only ? ".ld" : ".ldata"; + break; + case SECCAT_BSS: + prefix = one_only ? ".lb" : ".lbss"; + break; + case SECCAT_RODATA: + case SECCAT_RODATA_MERGE_STR: + case SECCAT_RODATA_MERGE_STR_INIT: + case SECCAT_RODATA_MERGE_CONST: + prefix = one_only ? ".lr" : ".lrodata"; + break; + case SECCAT_SRODATA: + case SECCAT_SDATA: + case SECCAT_SBSS: + gcc_unreachable (); + case SECCAT_TEXT: + case SECCAT_TDATA: + case SECCAT_TBSS: + /* We don't split these for medium model. Place them into + default sections and hope for best. */ + break; + } + if (prefix) + { + const char *name, *linkonce; + char *string; + + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + name = targetm.strip_name_encoding (name); + + /* If we're using one_only, then there needs to be a .gnu.linkonce + prefix to the section name. */ + linkonce = one_only ? ".gnu.linkonce" : ""; + + string = ACONCAT ((linkonce, prefix, ".", name, NULL)); + + set_decl_section_name (decl, string); + return; + } + } + default_unique_section (decl, reloc); +} + +#ifdef COMMON_ASM_OP +/* This says how to output assembler code to declare an + uninitialized external linkage data object. + + For medium model x86-64 we need to use .largecomm opcode for + large objects. */ +void +x86_elf_aligned_common (FILE *file, + const char *name, unsigned HOST_WIDE_INT size, + int align) +{ + if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) + && size > (unsigned int)ix86_section_threshold) + fputs ("\t.largecomm\t", file); + else + fputs (COMMON_ASM_OP, file); + assemble_name (file, name); + fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", + size, align / BITS_PER_UNIT); +} +#endif + +/* Utility function for targets to use in implementing + ASM_OUTPUT_ALIGNED_BSS. */ + +void +x86_output_aligned_bss (FILE *file, tree decl, const char *name, + unsigned HOST_WIDE_INT size, int align) +{ + if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) + && size > (unsigned int)ix86_section_threshold) + switch_to_section (get_named_section (decl, ".lbss", 0)); + else + switch_to_section (bss_section); + ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); +#ifdef ASM_DECLARE_OBJECT_NAME + last_assemble_variable_decl = decl; + ASM_DECLARE_OBJECT_NAME (file, name, decl); +#else + /* Standard thing is just output label for the object. */ + ASM_OUTPUT_LABEL (file, name); +#endif /* ASM_DECLARE_OBJECT_NAME */ + ASM_OUTPUT_SKIP (file, size ? size : 1); +} + +/* Decide whether we must probe the stack before any space allocation + on this target. It's essentially TARGET_STACK_PROBE except when + -fstack-check causes the stack to be already probed differently. */ + +bool +ix86_target_stack_probe (void) +{ + /* Do not probe the stack twice if static stack checking is enabled. */ + if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) + return false; + + return TARGET_STACK_PROBE; +} + +/* Decide whether we can make a sibling call to a function. DECL is the + declaration of the function being targeted by the call and EXP is the + CALL_EXPR representing the call. */ + +static bool +ix86_function_ok_for_sibcall (tree decl, tree exp) +{ + tree type, decl_or_type; + rtx a, b; + bool bind_global = decl && !targetm.binds_local_p (decl); + + /* If we are generating position-independent code, we cannot sibcall + optimize direct calls to global functions, as the PLT requires + %ebx be live. (Darwin does not have a PLT.) */ + if (!TARGET_MACHO + && !TARGET_64BIT + && flag_pic + && flag_plt + && bind_global) + return false; + + /* If we need to align the outgoing stack, then sibcalling would + unalign the stack, which may break the called function. */ + if (ix86_minimum_incoming_stack_boundary (true) + < PREFERRED_STACK_BOUNDARY) + return false; + + if (decl) + { + decl_or_type = decl; + type = TREE_TYPE (decl); + } + else + { + /* We're looking at the CALL_EXPR, we need the type of the function. */ + type = CALL_EXPR_FN (exp); /* pointer expression */ + type = TREE_TYPE (type); /* pointer type */ + type = TREE_TYPE (type); /* function type */ + decl_or_type = type; + } + + /* Check that the return value locations are the same. Like + if we are returning floats on the 80387 register stack, we cannot + make a sibcall from a function that doesn't return a float to a + function that does or, conversely, from a function that does return + a float to a function that doesn't; the necessary stack adjustment + would not be executed. This is also the place we notice + differences in the return value ABI. Note that it is ok for one + of the functions to have void return type as long as the return + value of the other is passed in a register. */ + a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); + b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), + cfun->decl, false); + if (STACK_REG_P (a) || STACK_REG_P (b)) + { + if (!rtx_equal_p (a, b)) + return false; + } + else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) + ; + else if (!rtx_equal_p (a, b)) + return false; + + if (TARGET_64BIT) + { + /* The SYSV ABI has more call-clobbered registers; + disallow sibcalls from MS to SYSV. */ + if (cfun->machine->call_abi == MS_ABI + && ix86_function_type_abi (type) == SYSV_ABI) + return false; + } + else + { + /* If this call is indirect, we'll need to be able to use a + call-clobbered register for the address of the target function. + Make sure that all such registers are not used for passing + parameters. Note that DLLIMPORT functions and call to global + function via GOT slot are indirect. */ + if (!decl + || (bind_global && flag_pic && !flag_plt) + || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))) + { + /* Check if regparm >= 3 since arg_reg_available is set to + false if regparm == 0. If regparm is 1 or 2, there is + always a call-clobbered register available. + + ??? The symbol indirect call doesn't need a call-clobbered + register. But we don't know if this is a symbol indirect + call or not here. */ + if (ix86_function_regparm (type, NULL) >= 3 + && !cfun->machine->arg_reg_available) + return false; + } + } + + /* Otherwise okay. That also includes certain types of indirect calls. */ + return true; +} + +/* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall", + and "sseregparm" calling convention attributes; + arguments as in struct attribute_spec.handler. */ + +static tree +ix86_handle_cconv_attribute (tree *node, tree name, + tree args, + int, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + return NULL_TREE; + } + + /* Can combine regparm with all attributes but fastcall, and thiscall. */ + if (is_attribute_p ("regparm", name)) + { + tree cst; + + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and regparm attributes are not compatible"); + } + + if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) + { + error ("regparam and thiscall attributes are not compatible"); + } + + cst = TREE_VALUE (args); + if (TREE_CODE (cst) != INTEGER_CST) + { + warning (OPT_Wattributes, + "%qE attribute requires an integer constant argument", + name); + *no_add_attrs = true; + } + else if (compare_tree_int (cst, REGPARM_MAX) > 0) + { + warning (OPT_Wattributes, "argument to %qE attribute larger than %d", + name, REGPARM_MAX); + *no_add_attrs = true; + } + + return NULL_TREE; + } + + if (TARGET_64BIT) + { + /* Do not warn when emulating the MS ABI. */ + if ((TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE) + || ix86_function_type_abi (*node) != MS_ABI) + warning (OPT_Wattributes, "%qE attribute ignored", + name); + *no_add_attrs = true; + return NULL_TREE; + } + + /* Can combine fastcall with stdcall (redundant) and sseregparm. */ + if (is_attribute_p ("fastcall", name)) + { + if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and cdecl attributes are not compatible"); + } + if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and stdcall attributes are not compatible"); + } + if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and regparm attributes are not compatible"); + } + if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and thiscall attributes are not compatible"); + } + } + + /* Can combine stdcall with fastcall (redundant), regparm and + sseregparm. */ + else if (is_attribute_p ("stdcall", name)) + { + if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) + { + error ("stdcall and cdecl attributes are not compatible"); + } + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) + { + error ("stdcall and fastcall attributes are not compatible"); + } + if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) + { + error ("stdcall and thiscall attributes are not compatible"); + } + } + + /* Can combine cdecl with regparm and sseregparm. */ + else if (is_attribute_p ("cdecl", name)) + { + if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) + { + error ("stdcall and cdecl attributes are not compatible"); + } + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and cdecl attributes are not compatible"); + } + if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) + { + error ("cdecl and thiscall attributes are not compatible"); + } + } + else if (is_attribute_p ("thiscall", name)) + { + if (TREE_CODE (*node) != METHOD_TYPE && pedantic) + warning (OPT_Wattributes, "%qE attribute is used for non-class method", + name); + if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) + { + error ("stdcall and thiscall attributes are not compatible"); + } + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) + { + error ("fastcall and thiscall attributes are not compatible"); + } + if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) + { + error ("cdecl and thiscall attributes are not compatible"); + } + } + + /* Can combine sseregparm with all attributes. */ + + return NULL_TREE; +} + +/* The transactional memory builtins are implicitly regparm or fastcall + depending on the ABI. Override the generic do-nothing attribute that + these builtins were declared with, and replace it with one of the two + attributes that we expect elsewhere. */ + +static tree +ix86_handle_tm_regparm_attribute (tree *node, tree, tree, + int flags, bool *no_add_attrs) +{ + tree alt; + + /* In no case do we want to add the placeholder attribute. */ + *no_add_attrs = true; + + /* The 64-bit ABI is unchanged for transactional memory. */ + if (TARGET_64BIT) + return NULL_TREE; + + /* ??? Is there a better way to validate 32-bit windows? We have + cfun->machine->call_abi, but that seems to be set only for 64-bit. */ + if (CHECK_STACK_LIMIT > 0) + alt = tree_cons (get_identifier ("fastcall"), NULL, NULL); + else + { + alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL); + alt = tree_cons (get_identifier ("regparm"), alt, NULL); + } + decl_attributes (node, alt, flags); + + return NULL_TREE; +} + +/* This function determines from TYPE the calling-convention. */ + +unsigned int +ix86_get_callcvt (const_tree type) +{ + unsigned int ret = 0; + bool is_stdarg; + tree attrs; + + if (TARGET_64BIT) + return IX86_CALLCVT_CDECL; + + attrs = TYPE_ATTRIBUTES (type); + if (attrs != NULL_TREE) + { + if (lookup_attribute ("cdecl", attrs)) + ret |= IX86_CALLCVT_CDECL; + else if (lookup_attribute ("stdcall", attrs)) + ret |= IX86_CALLCVT_STDCALL; + else if (lookup_attribute ("fastcall", attrs)) + ret |= IX86_CALLCVT_FASTCALL; + else if (lookup_attribute ("thiscall", attrs)) + ret |= IX86_CALLCVT_THISCALL; + + /* Regparam isn't allowed for thiscall and fastcall. */ + if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) + { + if (lookup_attribute ("regparm", attrs)) + ret |= IX86_CALLCVT_REGPARM; + if (lookup_attribute ("sseregparm", attrs)) + ret |= IX86_CALLCVT_SSEREGPARM; + } + + if (IX86_BASE_CALLCVT(ret) != 0) + return ret; + } + + is_stdarg = stdarg_p (type); + if (TARGET_RTD && !is_stdarg) + return IX86_CALLCVT_STDCALL | ret; + + if (ret != 0 + || is_stdarg + || TREE_CODE (type) != METHOD_TYPE + || ix86_function_type_abi (type) != MS_ABI) + return IX86_CALLCVT_CDECL | ret; + + return IX86_CALLCVT_THISCALL; +} + +/* Return 0 if the attributes for two types are incompatible, 1 if they + are compatible, and 2 if they are nearly compatible (which causes a + warning to be generated). */ + +static int +ix86_comp_type_attributes (const_tree type1, const_tree type2) +{ + unsigned int ccvt1, ccvt2; + + if (TREE_CODE (type1) != FUNCTION_TYPE + && TREE_CODE (type1) != METHOD_TYPE) + return 1; + + ccvt1 = ix86_get_callcvt (type1); + ccvt2 = ix86_get_callcvt (type2); + if (ccvt1 != ccvt2) + return 0; + if (ix86_function_regparm (type1, NULL) + != ix86_function_regparm (type2, NULL)) + return 0; + + return 1; +} + +/* Return the regparm value for a function with the indicated TYPE and DECL. + DECL may be NULL when calling function indirectly + or considering a libcall. */ + +static int +ix86_function_regparm (const_tree type, const_tree decl) +{ + tree attr; + int regparm; + unsigned int ccvt; + + if (TARGET_64BIT) + return (ix86_function_type_abi (type) == SYSV_ABI + ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); + ccvt = ix86_get_callcvt (type); + regparm = ix86_regparm; + + if ((ccvt & IX86_CALLCVT_REGPARM) != 0) + { + attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); + if (attr) + { + regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); + return regparm; + } + } + else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) + return 2; + else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) + return 1; + + /* Use register calling convention for local functions when possible. */ + if (decl + && TREE_CODE (decl) == FUNCTION_DECL) + { + cgraph_node *target = cgraph_node::get (decl); + if (target) + target = target->function_symbol (); + + /* Caller and callee must agree on the calling convention, so + checking here just optimize means that with + __attribute__((optimize (...))) caller could use regparm convention + and callee not, or vice versa. Instead look at whether the callee + is optimized or not. */ + if (target && opt_for_fn (target->decl, optimize) + && !(profile_flag && !flag_fentry)) + { + cgraph_local_info *i = &target->local; + if (i && i->local && i->can_change_signature) + { + int local_regparm, globals = 0, regno; + + /* Make sure no regparm register is taken by a + fixed register variable. */ + for (local_regparm = 0; local_regparm < REGPARM_MAX; + local_regparm++) + if (fixed_regs[local_regparm]) + break; + + /* We don't want to use regparm(3) for nested functions as + these use a static chain pointer in the third argument. */ + if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) + local_regparm = 2; + + /* Save a register for the split stack. */ + if (local_regparm == 3 && flag_split_stack) + local_regparm = 2; + + /* Each fixed register usage increases register pressure, + so less registers should be used for argument passing. + This functionality can be overriden by an explicit + regparm value. */ + for (regno = AX_REG; regno <= DI_REG; regno++) + if (fixed_regs[regno]) + globals++; + + local_regparm + = globals < local_regparm ? local_regparm - globals : 0; + + if (local_regparm > regparm) + regparm = local_regparm; + } + } + } + + return regparm; +} + +/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and + DFmode (2) arguments in SSE registers for a function with the + indicated TYPE and DECL. DECL may be NULL when calling function + indirectly or considering a libcall. Return -1 if any FP parameter + should be rejected by error. This is used in siutation we imply SSE + calling convetion but the function is called from another function with + SSE disabled. Otherwise return 0. */ + +static int +ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) +{ + gcc_assert (!TARGET_64BIT); + + /* Use SSE registers to pass SFmode and DFmode arguments if requested + by the sseregparm attribute. */ + if (TARGET_SSEREGPARM + || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) + { + if (!TARGET_SSE) + { + if (warn) + { + if (decl) + error ("calling %qD with attribute sseregparm without " + "SSE/SSE2 enabled", decl); + else + error ("calling %qT with attribute sseregparm without " + "SSE/SSE2 enabled", type); + } + return 0; + } + + return 2; + } + + if (!decl) + return 0; + + cgraph_node *target = cgraph_node::get (decl); + if (target) + target = target->function_symbol (); + + /* For local functions, pass up to SSE_REGPARM_MAX SFmode + (and DFmode for SSE2) arguments in SSE registers. */ + if (target + /* TARGET_SSE_MATH */ + && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE) + && opt_for_fn (target->decl, optimize) + && !(profile_flag && !flag_fentry)) + { + cgraph_local_info *i = &target->local; + if (i && i->local && i->can_change_signature) + { + /* Refuse to produce wrong code when local function with SSE enabled + is called from SSE disabled function. + FIXME: We need a way to detect these cases cross-ltrans partition + and avoid using SSE calling conventions on local functions called + from function with SSE disabled. For now at least delay the + warning until we know we are going to produce wrong code. + See PR66047 */ + if (!TARGET_SSE && warn) + return -1; + return TARGET_SSE2_P (target_opts_for_fn (target->decl) + ->x_ix86_isa_flags) ? 2 : 1; + } + } + + return 0; +} + +/* Return true if EAX is live at the start of the function. Used by + ix86_expand_prologue to determine if we need special help before + calling allocate_stack_worker. */ + +static bool +ix86_eax_live_at_start_p (void) +{ + /* Cheat. Don't bother working forward from ix86_function_regparm + to the function type to whether an actual argument is located in + eax. Instead just look at cfg info, which is still close enough + to correct at this point. This gives false positives for broken + functions that might use uninitialized data that happens to be + allocated in eax, but who cares? */ + return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); +} + +static bool +ix86_keep_aggregate_return_pointer (tree fntype) +{ + tree attr; + + if (!TARGET_64BIT) + { + attr = lookup_attribute ("callee_pop_aggregate_return", + TYPE_ATTRIBUTES (fntype)); + if (attr) + return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); + + /* For 32-bit MS-ABI the default is to keep aggregate + return pointer. */ + if (ix86_function_type_abi (fntype) == MS_ABI) + return true; + } + return KEEP_AGGREGATE_RETURN_POINTER != 0; +} + +/* Value is the number of bytes of arguments automatically + popped when returning from a subroutine call. + FUNDECL is the declaration node of the function (as a tree), + FUNTYPE is the data type of the function (as a tree), + or for a library call it is an identifier node for the subroutine name. + SIZE is the number of bytes of arguments passed on the stack. + + On the 80386, the RTD insn may be used to pop them if the number + of args is fixed, but if the number is variable then the caller + must pop them all. RTD can't be used for library calls now + because the library is compiled with the Unix compiler. + Use of RTD is a selectable option, since it is incompatible with + standard Unix calling sequences. If the option is not selected, + the caller must always pop the args. + + The attribute stdcall is equivalent to RTD on a per module basis. */ + +static int +ix86_return_pops_args (tree fundecl, tree funtype, int size) +{ + unsigned int ccvt; + + /* None of the 64-bit ABIs pop arguments. */ + if (TARGET_64BIT) + return 0; + + ccvt = ix86_get_callcvt (funtype); + + if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL + | IX86_CALLCVT_THISCALL)) != 0 + && ! stdarg_p (funtype)) + return size; + + /* Lose any fake structure return argument if it is passed on the stack. */ + if (aggregate_value_p (TREE_TYPE (funtype), fundecl) + && !ix86_keep_aggregate_return_pointer (funtype)) + { + int nregs = ix86_function_regparm (funtype, fundecl); + if (nregs == 0) + return GET_MODE_SIZE (Pmode); + } + + return 0; +} + +/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ + +static bool +ix86_legitimate_combined_insn (rtx_insn *insn) +{ + /* Check operand constraints in case hard registers were propagated + into insn pattern. This check prevents combine pass from + generating insn patterns with invalid hard register operands. + These invalid insns can eventually confuse reload to error out + with a spill failure. See also PRs 46829 and 46843. */ + if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0) + { + int i; + + extract_insn (insn); + preprocess_constraints (insn); + + int n_operands = recog_data.n_operands; + int n_alternatives = recog_data.n_alternatives; + for (i = 0; i < n_operands; i++) + { + rtx op = recog_data.operand[i]; + machine_mode mode = GET_MODE (op); + const operand_alternative *op_alt; + int offset = 0; + bool win; + int j; + + /* For pre-AVX disallow unaligned loads/stores where the + instructions don't support it. */ + if (!TARGET_AVX + && VECTOR_MODE_P (mode) + && misaligned_operand (op, mode)) + { + unsigned int min_align = get_attr_ssememalign (insn); + if (min_align == 0 + || MEM_ALIGN (op) < min_align) + return false; + } + + /* A unary operator may be accepted by the predicate, but it + is irrelevant for matching constraints. */ + if (UNARY_P (op)) + op = XEXP (op, 0); + + if (SUBREG_P (op)) + { + if (REG_P (SUBREG_REG (op)) + && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) + offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), + GET_MODE (SUBREG_REG (op)), + SUBREG_BYTE (op), + GET_MODE (op)); + op = SUBREG_REG (op); + } + + if (!(REG_P (op) && HARD_REGISTER_P (op))) + continue; + + op_alt = recog_op_alt; + + /* Operand has no constraints, anything is OK. */ + win = !n_alternatives; + + alternative_mask preferred = get_preferred_alternatives (insn); + for (j = 0; j < n_alternatives; j++, op_alt += n_operands) + { + if (!TEST_BIT (preferred, j)) + continue; + if (op_alt[i].anything_ok + || (op_alt[i].matches != -1 + && operands_match_p + (recog_data.operand[i], + recog_data.operand[op_alt[i].matches])) + || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) + { + win = true; + break; + } + } + + if (!win) + return false; + } + } + + return true; +} + +/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ + +static unsigned HOST_WIDE_INT +ix86_asan_shadow_offset (void) +{ + return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44) + : HOST_WIDE_INT_C (0x7fff8000)) + : (HOST_WIDE_INT_1 << 29); +} + +/* Argument support functions. */ + +/* Return true when register may be used to pass function parameters. */ +bool +ix86_function_arg_regno_p (int regno) +{ + int i; + enum calling_abi call_abi; + const int *parm_regs; + + if (TARGET_MPX && BND_REGNO_P (regno)) + return true; + + if (!TARGET_64BIT) + { + if (TARGET_MACHO) + return (regno < REGPARM_MAX + || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); + else + return (regno < REGPARM_MAX + || (TARGET_MMX && MMX_REGNO_P (regno) + && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) + || (TARGET_SSE && SSE_REGNO_P (regno) + && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); + } + + if (TARGET_SSE && SSE_REGNO_P (regno) + && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) + return true; + + /* TODO: The function should depend on current function ABI but + builtins.c would need updating then. Therefore we use the + default ABI. */ + call_abi = ix86_cfun_abi (); + + /* RAX is used as hidden argument to va_arg functions. */ + if (call_abi == SYSV_ABI && regno == AX_REG) + return true; + + if (call_abi == MS_ABI) + parm_regs = x86_64_ms_abi_int_parameter_registers; + else + parm_regs = x86_64_int_parameter_registers; + + for (i = 0; i < (call_abi == MS_ABI + ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) + if (regno == parm_regs[i]) + return true; + return false; +} + +/* Return if we do not know how to pass TYPE solely in registers. */ + +static bool +ix86_must_pass_in_stack (machine_mode mode, const_tree type) +{ + if (must_pass_in_stack_var_size_or_pad (mode, type)) + return true; + + /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! + The layout_type routine is crafty and tries to trick us into passing + currently unsupported vector types on the stack by using TImode. */ + return (!TARGET_64BIT && mode == TImode + && type && TREE_CODE (type) != VECTOR_TYPE); +} + +/* It returns the size, in bytes, of the area reserved for arguments passed + in registers for the function represented by fndecl dependent to the used + abi format. */ +int +ix86_reg_parm_stack_space (const_tree fndecl) +{ + enum calling_abi call_abi = SYSV_ABI; + if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) + call_abi = ix86_function_abi (fndecl); + else + call_abi = ix86_function_type_abi (fndecl); + if (TARGET_64BIT && call_abi == MS_ABI) + return 32; + return 0; +} + +/* We add this as a workaround in order to use libc_has_function + hook in i386.md. */ +bool +ix86_libc_has_function (enum function_class fn_class) +{ + return targetm.libc_has_function (fn_class); +} + +/* Returns value SYSV_ABI, MS_ABI dependent on fntype, + specifying the call abi used. */ +enum calling_abi +ix86_function_type_abi (const_tree fntype) +{ + enum calling_abi abi = ix86_abi; + + if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE) + return abi; + + if (abi == SYSV_ABI + && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) + { + if (TARGET_X32) + error ("X32 does not support ms_abi attribute"); + + abi = MS_ABI; + } + else if (abi == MS_ABI + && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) + abi = SYSV_ABI; + + return abi; +} + +static enum calling_abi +ix86_function_abi (const_tree fndecl) +{ + return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi; +} + +/* Returns value SYSV_ABI, MS_ABI dependent on cfun, + specifying the call abi used. */ +enum calling_abi +ix86_cfun_abi (void) +{ + return cfun ? cfun->machine->call_abi : ix86_abi; +} + +static bool +ix86_function_ms_hook_prologue (const_tree fn) +{ + if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn))) + { + if (decl_function_context (fn) != NULL_TREE) + error_at (DECL_SOURCE_LOCATION (fn), + "ms_hook_prologue is not compatible with nested function"); + else + return true; + } + return false; +} + +/* Write the extra assembler code needed to declare a function properly. */ + +void +ix86_asm_output_function_label (FILE *asm_out_file, const char *fname, + tree decl) +{ + bool is_ms_hook = ix86_function_ms_hook_prologue (decl); + + if (is_ms_hook) + { + int i, filler_count = (TARGET_64BIT ? 32 : 16); + unsigned int filler_cc = 0xcccccccc; + + for (i = 0; i < filler_count; i += 4) + fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc); + } + +#ifdef SUBTARGET_ASM_UNWIND_INIT + SUBTARGET_ASM_UNWIND_INIT (asm_out_file); +#endif + + ASM_OUTPUT_LABEL (asm_out_file, fname); + + /* Output magic byte marker, if hot-patch attribute is set. */ + if (is_ms_hook) + { + if (TARGET_64BIT) + { + /* leaq [%rsp + 0], %rsp */ + asm_fprintf (asm_out_file, ASM_BYTE + "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n"); + } + else + { + /* movl.s %edi, %edi + push %ebp + movl.s %esp, %ebp */ + asm_fprintf (asm_out_file, ASM_BYTE + "0x8b, 0xff, 0x55, 0x8b, 0xec\n"); + } + } +} + +/* regclass.c */ +extern void init_regs (void); + +/* Implementation of call abi switching target hook. Specific to FNDECL + the specific call register sets are set. See also + ix86_conditional_register_usage for more details. */ +void +ix86_call_abi_override (const_tree fndecl) +{ + cfun->machine->call_abi = ix86_function_abi (fndecl); +} + +/* Return 1 if pseudo register should be created and used to hold + GOT address for PIC code. */ +bool +ix86_use_pseudo_pic_reg (void) +{ + if ((TARGET_64BIT + && (ix86_cmodel == CM_SMALL_PIC + || TARGET_PECOFF)) + || !flag_pic) + return false; + return true; +} + +/* Initialize large model PIC register. */ + +static void +ix86_init_large_pic_reg (unsigned int tmp_regno) +{ + rtx_code_label *label; + rtx tmp_reg; + + gcc_assert (Pmode == DImode); + label = gen_label_rtx (); + emit_label (label); + LABEL_PRESERVE_P (label) = 1; + tmp_reg = gen_rtx_REG (Pmode, tmp_regno); + gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); + emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, + label)); + emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); + emit_insn (ix86_gen_add3 (pic_offset_table_rtx, + pic_offset_table_rtx, tmp_reg)); +} + +/* Create and initialize PIC register if required. */ +static void +ix86_init_pic_reg (void) +{ + edge entry_edge; + rtx_insn *seq; + + if (!ix86_use_pseudo_pic_reg ()) + return; + + start_sequence (); + + if (TARGET_64BIT) + { + if (ix86_cmodel == CM_LARGE_PIC) + ix86_init_large_pic_reg (R11_REG); + else + emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); + } + else + { + /* If there is future mcount call in the function it is more profitable + to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ + rtx reg = crtl->profile + ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) + : pic_offset_table_rtx; + rtx_insn *insn = emit_insn (gen_set_got (reg)); + RTX_FRAME_RELATED_P (insn) = 1; + if (crtl->profile) + emit_move_insn (pic_offset_table_rtx, reg); + add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); + } + + seq = get_insns (); + end_sequence (); + + entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + insert_insn_on_edge (seq, entry_edge); + commit_one_edge_insertion (entry_edge); +} + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +void +init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ + tree fntype, /* tree ptr for function decl */ + rtx libname, /* SYMBOL_REF of library name or 0 */ + tree fndecl, + int caller) +{ + struct cgraph_local_info *i = NULL; + struct cgraph_node *target = NULL; + + memset (cum, 0, sizeof (*cum)); + + if (fndecl) + { + target = cgraph_node::get (fndecl); + if (target) + { + target = target->function_symbol (); + i = cgraph_node::local_info (target->decl); + cum->call_abi = ix86_function_abi (target->decl); + } + else + cum->call_abi = ix86_function_abi (fndecl); + } + else + cum->call_abi = ix86_function_type_abi (fntype); + + cum->caller = caller; + + /* Set up the number of registers to use for passing arguments. */ + cum->nregs = ix86_regparm; + if (TARGET_64BIT) + { + cum->nregs = (cum->call_abi == SYSV_ABI + ? X86_64_REGPARM_MAX + : X86_64_MS_REGPARM_MAX); + } + if (TARGET_SSE) + { + cum->sse_nregs = SSE_REGPARM_MAX; + if (TARGET_64BIT) + { + cum->sse_nregs = (cum->call_abi == SYSV_ABI + ? X86_64_SSE_REGPARM_MAX + : X86_64_MS_SSE_REGPARM_MAX); + } + } + if (TARGET_MMX) + cum->mmx_nregs = MMX_REGPARM_MAX; + cum->warn_avx512f = true; + cum->warn_avx = true; + cum->warn_sse = true; + cum->warn_mmx = true; + + /* Because type might mismatch in between caller and callee, we need to + use actual type of function for local calls. + FIXME: cgraph_analyze can be told to actually record if function uses + va_start so for local functions maybe_vaarg can be made aggressive + helping K&R code. + FIXME: once typesytem is fixed, we won't need this code anymore. */ + if (i && i->local && i->can_change_signature) + fntype = TREE_TYPE (target->decl); + cum->stdarg = stdarg_p (fntype); + cum->maybe_vaarg = (fntype + ? (!prototype_p (fntype) || stdarg_p (fntype)) + : !libname); + + cum->bnd_regno = FIRST_BND_REG; + cum->bnds_in_bt = 0; + cum->force_bnd_pass = 0; + cum->decl = fndecl; + + if (!TARGET_64BIT) + { + /* If there are variable arguments, then we won't pass anything + in registers in 32-bit mode. */ + if (stdarg_p (fntype)) + { + cum->nregs = 0; + /* Since in 32-bit, variable arguments are always passed on + stack, there is scratch register available for indirect + sibcall. */ + cfun->machine->arg_reg_available = true; + cum->sse_nregs = 0; + cum->mmx_nregs = 0; + cum->warn_avx512f = false; + cum->warn_avx = false; + cum->warn_sse = false; + cum->warn_mmx = false; + return; + } + + /* Use ecx and edx registers if function has fastcall attribute, + else look for regparm information. */ + if (fntype) + { + unsigned int ccvt = ix86_get_callcvt (fntype); + if ((ccvt & IX86_CALLCVT_THISCALL) != 0) + { + cum->nregs = 1; + cum->fastcall = 1; /* Same first register as in fastcall. */ + } + else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) + { + cum->nregs = 2; + cum->fastcall = 1; + } + else + cum->nregs = ix86_function_regparm (fntype, fndecl); + } + + /* Set up the number of SSE registers used for passing SFmode + and DFmode arguments. Warn for mismatching ABI. */ + cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true); + } + + cfun->machine->arg_reg_available = (cum->nregs > 0); +} + +/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. + But in the case of vector types, it is some vector mode. + + When we have only some of our vector isa extensions enabled, then there + are some modes for which vector_mode_supported_p is false. For these + modes, the generic vector support in gcc will choose some non-vector mode + in order to implement the type. By computing the natural mode, we'll + select the proper ABI location for the operand and not depend on whatever + the middle-end decides to do with these vector types. + + The midde-end can't deal with the vector types > 16 bytes. In this + case, we return the original mode and warn ABI change if CUM isn't + NULL. + + If INT_RETURN is true, warn ABI change if the vector mode isn't + available for function return value. */ + +static machine_mode +type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, + bool in_return) +{ + machine_mode mode = TYPE_MODE (type); + + if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + if ((size == 8 || size == 16 || size == 32 || size == 64) + /* ??? Generic code allows us to create width 1 vectors. Ignore. */ + && TYPE_VECTOR_SUBPARTS (type) > 1) + { + machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); + + /* There are no XFmode vector modes. */ + if (innermode == XFmode) + return mode; + + if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) + mode = MIN_MODE_VECTOR_FLOAT; + else + mode = MIN_MODE_VECTOR_INT; + + /* Get the mode which has this inner mode and number of units. */ + for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) + if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) + && GET_MODE_INNER (mode) == innermode) + { + if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) + { + static bool warnedavx512f; + static bool warnedavx512f_ret; + + if (cum && cum->warn_avx512f && !warnedavx512f) + { + if (warning (OPT_Wpsabi, "AVX512F vector argument " + "without AVX512F enabled changes the ABI")) + warnedavx512f = true; + } + else if (in_return && !warnedavx512f_ret) + { + if (warning (OPT_Wpsabi, "AVX512F vector return " + "without AVX512F enabled changes the ABI")) + warnedavx512f_ret = true; + } + + return TYPE_MODE (type); + } + else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU) + { + static bool warnedavx; + static bool warnedavx_ret; + + if (cum && cum->warn_avx && !warnedavx) + { + if (warning (OPT_Wpsabi, "AVX vector argument " + "without AVX enabled changes the ABI")) + warnedavx = true; + } + else if (in_return && !warnedavx_ret) + { + if (warning (OPT_Wpsabi, "AVX vector return " + "without AVX enabled changes the ABI")) + warnedavx_ret = true; + } + + return TYPE_MODE (type); + } + else if (((size == 8 && TARGET_64BIT) || size == 16) + && !TARGET_SSE + && !TARGET_IAMCU) + { + static bool warnedsse; + static bool warnedsse_ret; + + if (cum && cum->warn_sse && !warnedsse) + { + if (warning (OPT_Wpsabi, "SSE vector argument " + "without SSE enabled changes the ABI")) + warnedsse = true; + } + else if (!TARGET_64BIT && in_return && !warnedsse_ret) + { + if (warning (OPT_Wpsabi, "SSE vector return " + "without SSE enabled changes the ABI")) + warnedsse_ret = true; + } + } + else if ((size == 8 && !TARGET_64BIT) + && !TARGET_MMX + && !TARGET_IAMCU) + { + static bool warnedmmx; + static bool warnedmmx_ret; + + if (cum && cum->warn_mmx && !warnedmmx) + { + if (warning (OPT_Wpsabi, "MMX vector argument " + "without MMX enabled changes the ABI")) + warnedmmx = true; + } + else if (in_return && !warnedmmx_ret) + { + if (warning (OPT_Wpsabi, "MMX vector return " + "without MMX enabled changes the ABI")) + warnedmmx_ret = true; + } + } + return mode; + } + + gcc_unreachable (); + } + } + + return mode; +} + +/* We want to pass a value in REGNO whose "natural" mode is MODE. However, + this may not agree with the mode that the type system has chosen for the + register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can + go ahead and use it. Otherwise we have to build a PARALLEL instead. */ + +static rtx +gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, + unsigned int regno) +{ + rtx tmp; + + if (orig_mode != BLKmode) + tmp = gen_rtx_REG (orig_mode, regno); + else + { + tmp = gen_rtx_REG (mode, regno); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); + } + + return tmp; +} + +/* x86-64 register passing implementation. See x86-64 ABI for details. Goal + of this code is to classify each 8bytes of incoming argument by the register + class and assign registers accordingly. */ + +/* Return the union class of CLASS1 and CLASS2. + See the x86-64 PS ABI for details. */ + +static enum x86_64_reg_class +merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) +{ + /* Rule #1: If both classes are equal, this is the resulting class. */ + if (class1 == class2) + return class1; + + /* Rule #2: If one of the classes is NO_CLASS, the resulting class is + the other class. */ + if (class1 == X86_64_NO_CLASS) + return class2; + if (class2 == X86_64_NO_CLASS) + return class1; + + /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ + if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) + return X86_64_MEMORY_CLASS; + + /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ + if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) + || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) + return X86_64_INTEGERSI_CLASS; + if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS + || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) + return X86_64_INTEGER_CLASS; + + /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, + MEMORY is used. */ + if (class1 == X86_64_X87_CLASS + || class1 == X86_64_X87UP_CLASS + || class1 == X86_64_COMPLEX_X87_CLASS + || class2 == X86_64_X87_CLASS + || class2 == X86_64_X87UP_CLASS + || class2 == X86_64_COMPLEX_X87_CLASS) + return X86_64_MEMORY_CLASS; + + /* Rule #6: Otherwise class SSE is used. */ + return X86_64_SSE_CLASS; +} + +/* Classify the argument of type TYPE and mode MODE. + CLASSES will be filled by the register class used to pass each word + of the operand. The number of words is returned. In case the parameter + should be passed in memory, 0 is returned. As a special case for zero + sized containers, classes[0] will be NO_CLASS and 1 is returned. + + BIT_OFFSET is used internally for handling records and specifies offset + of the offset in bits modulo 512 to avoid overflow cases. + + See the x86-64 PS ABI for details. +*/ + +static int +classify_argument (machine_mode mode, const_tree type, + enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) +{ + HOST_WIDE_INT bytes = + (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); + + /* Variable sized entities are always passed/returned in memory. */ + if (bytes < 0) + return 0; + + if (mode != VOIDmode + && targetm.calls.must_pass_in_stack (mode, type)) + return 0; + + if (type && AGGREGATE_TYPE_P (type)) + { + int i; + tree field; + enum x86_64_reg_class subclasses[MAX_CLASSES]; + + /* On x86-64 we pass structures larger than 64 bytes on the stack. */ + if (bytes > 64) + return 0; + + for (i = 0; i < words; i++) + classes[i] = X86_64_NO_CLASS; + + /* Zero sized arrays or structures are NO_CLASS. We return 0 to + signalize memory class, so handle it as special case. */ + if (!words) + { + classes[0] = X86_64_NO_CLASS; + return 1; + } + + /* Classify each field of record and merge classes. */ + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + /* And now merge the fields of structure. */ + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL) + { + int num; + + if (TREE_TYPE (field) == error_mark_node) + continue; + + /* Bitfields are always classified as integer. Handle them + early, since later code would consider them to be + misaligned integers. */ + if (DECL_BIT_FIELD (field)) + { + for (i = (int_bit_position (field) + + (bit_offset % 64)) / 8 / 8; + i < ((int_bit_position (field) + (bit_offset % 64)) + + tree_to_shwi (DECL_SIZE (field)) + + 63) / 8 / 8; i++) + classes[i] = + merge_classes (X86_64_INTEGER_CLASS, + classes[i]); + } + else + { + int pos; + + type = TREE_TYPE (field); + + /* Flexible array member is ignored. */ + if (TYPE_MODE (type) == BLKmode + && TREE_CODE (type) == ARRAY_TYPE + && TYPE_SIZE (type) == NULL_TREE + && TYPE_DOMAIN (type) != NULL_TREE + && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) + == NULL_TREE)) + { + static bool warned; + + if (!warned && warn_psabi) + { + warned = true; + inform (input_location, + "the ABI of passing struct with" + " a flexible array member has" + " changed in GCC 4.4"); + } + continue; + } + num = classify_argument (TYPE_MODE (type), type, + subclasses, + (int_bit_position (field) + + bit_offset) % 512); + if (!num) + return 0; + pos = (int_bit_position (field) + + (bit_offset % 64)) / 8 / 8; + for (i = 0; i < num && (i + pos) < words; i++) + classes[i + pos] = + merge_classes (subclasses[i], classes[i + pos]); + } + } + } + break; + + case ARRAY_TYPE: + /* Arrays are handled as small records. */ + { + int num; + num = classify_argument (TYPE_MODE (TREE_TYPE (type)), + TREE_TYPE (type), subclasses, bit_offset); + if (!num) + return 0; + + /* The partial classes are now full classes. */ + if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) + subclasses[0] = X86_64_SSE_CLASS; + if (subclasses[0] == X86_64_INTEGERSI_CLASS + && !((bit_offset % 64) == 0 && bytes == 4)) + subclasses[0] = X86_64_INTEGER_CLASS; + + for (i = 0; i < words; i++) + classes[i] = subclasses[i % num]; + + break; + } + case UNION_TYPE: + case QUAL_UNION_TYPE: + /* Unions are similar to RECORD_TYPE but offset is always 0. + */ + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL) + { + int num; + + if (TREE_TYPE (field) == error_mark_node) + continue; + + num = classify_argument (TYPE_MODE (TREE_TYPE (field)), + TREE_TYPE (field), subclasses, + bit_offset); + if (!num) + return 0; + for (i = 0; i < num && i < words; i++) + classes[i] = merge_classes (subclasses[i], classes[i]); + } + } + break; + + default: + gcc_unreachable (); + } + + if (words > 2) + { + /* When size > 16 bytes, if the first one isn't + X86_64_SSE_CLASS or any other ones aren't + X86_64_SSEUP_CLASS, everything should be passed in + memory. */ + if (classes[0] != X86_64_SSE_CLASS) + return 0; + + for (i = 1; i < words; i++) + if (classes[i] != X86_64_SSEUP_CLASS) + return 0; + } + + /* Final merger cleanup. */ + for (i = 0; i < words; i++) + { + /* If one class is MEMORY, everything should be passed in + memory. */ + if (classes[i] == X86_64_MEMORY_CLASS) + return 0; + + /* The X86_64_SSEUP_CLASS should be always preceded by + X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ + if (classes[i] == X86_64_SSEUP_CLASS + && classes[i - 1] != X86_64_SSE_CLASS + && classes[i - 1] != X86_64_SSEUP_CLASS) + { + /* The first one should never be X86_64_SSEUP_CLASS. */ + gcc_assert (i != 0); + classes[i] = X86_64_SSE_CLASS; + } + + /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, + everything should be passed in memory. */ + if (classes[i] == X86_64_X87UP_CLASS + && (classes[i - 1] != X86_64_X87_CLASS)) + { + static bool warned; + + /* The first one should never be X86_64_X87UP_CLASS. */ + gcc_assert (i != 0); + if (!warned && warn_psabi) + { + warned = true; + inform (input_location, + "the ABI of passing union with long double" + " has changed in GCC 4.4"); + } + return 0; + } + } + return words; + } + + /* Compute alignment needed. We align all types to natural boundaries with + exception of XFmode that is aligned to 64bits. */ + if (mode != VOIDmode && mode != BLKmode) + { + int mode_alignment = GET_MODE_BITSIZE (mode); + + if (mode == XFmode) + mode_alignment = 128; + else if (mode == XCmode) + mode_alignment = 256; + if (COMPLEX_MODE_P (mode)) + mode_alignment /= 2; + /* Misaligned fields are always returned in memory. */ + if (bit_offset % mode_alignment) + return 0; + } + + /* for V1xx modes, just use the base mode */ + if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode + && GET_MODE_UNIT_SIZE (mode) == bytes) + mode = GET_MODE_INNER (mode); + + /* Classification of atomic types. */ + switch (mode) + { + case SDmode: + case DDmode: + classes[0] = X86_64_SSE_CLASS; + return 1; + case TDmode: + classes[0] = X86_64_SSE_CLASS; + classes[1] = X86_64_SSEUP_CLASS; + return 2; + case DImode: + case SImode: + case HImode: + case QImode: + case CSImode: + case CHImode: + case CQImode: + { + int size = bit_offset + (int) GET_MODE_BITSIZE (mode); + + /* Analyze last 128 bits only. */ + size = (size - 1) & 0x7f; + + if (size < 32) + { + classes[0] = X86_64_INTEGERSI_CLASS; + return 1; + } + else if (size < 64) + { + classes[0] = X86_64_INTEGER_CLASS; + return 1; + } + else if (size < 64+32) + { + classes[0] = X86_64_INTEGER_CLASS; + classes[1] = X86_64_INTEGERSI_CLASS; + return 2; + } + else if (size < 64+64) + { + classes[0] = classes[1] = X86_64_INTEGER_CLASS; + return 2; + } + else + gcc_unreachable (); + } + case CDImode: + case TImode: + classes[0] = classes[1] = X86_64_INTEGER_CLASS; + return 2; + case COImode: + case OImode: + /* OImode shouldn't be used directly. */ + gcc_unreachable (); + case CTImode: + return 0; + case SFmode: + if (!(bit_offset % 64)) + classes[0] = X86_64_SSESF_CLASS; + else + classes[0] = X86_64_SSE_CLASS; + return 1; + case DFmode: + classes[0] = X86_64_SSEDF_CLASS; + return 1; + case XFmode: + classes[0] = X86_64_X87_CLASS; + classes[1] = X86_64_X87UP_CLASS; + return 2; + case TFmode: + classes[0] = X86_64_SSE_CLASS; + classes[1] = X86_64_SSEUP_CLASS; + return 2; + case SCmode: + classes[0] = X86_64_SSE_CLASS; + if (!(bit_offset % 64)) + return 1; + else + { + static bool warned; + + if (!warned && warn_psabi) + { + warned = true; + inform (input_location, + "the ABI of passing structure with complex float" + " member has changed in GCC 4.4"); + } + classes[1] = X86_64_SSESF_CLASS; + return 2; + } + case DCmode: + classes[0] = X86_64_SSEDF_CLASS; + classes[1] = X86_64_SSEDF_CLASS; + return 2; + case XCmode: + classes[0] = X86_64_COMPLEX_X87_CLASS; + return 1; + case TCmode: + /* This modes is larger than 16 bytes. */ + return 0; + case V8SFmode: + case V8SImode: + case V32QImode: + case V16HImode: + case V4DFmode: + case V4DImode: + classes[0] = X86_64_SSE_CLASS; + classes[1] = X86_64_SSEUP_CLASS; + classes[2] = X86_64_SSEUP_CLASS; + classes[3] = X86_64_SSEUP_CLASS; + return 4; + case V8DFmode: + case V16SFmode: + case V8DImode: + case V16SImode: + case V32HImode: + case V64QImode: + classes[0] = X86_64_SSE_CLASS; + classes[1] = X86_64_SSEUP_CLASS; + classes[2] = X86_64_SSEUP_CLASS; + classes[3] = X86_64_SSEUP_CLASS; + classes[4] = X86_64_SSEUP_CLASS; + classes[5] = X86_64_SSEUP_CLASS; + classes[6] = X86_64_SSEUP_CLASS; + classes[7] = X86_64_SSEUP_CLASS; + return 8; + case V4SFmode: + case V4SImode: + case V16QImode: + case V8HImode: + case V2DFmode: + case V2DImode: + classes[0] = X86_64_SSE_CLASS; + classes[1] = X86_64_SSEUP_CLASS; + return 2; + case V1TImode: + case V1DImode: + case V2SFmode: + case V2SImode: + case V4HImode: + case V8QImode: + classes[0] = X86_64_SSE_CLASS; + return 1; + case BLKmode: + case VOIDmode: + return 0; + default: + gcc_assert (VECTOR_MODE_P (mode)); + + if (bytes > 16) + return 0; + + gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); + + if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) + classes[0] = X86_64_INTEGERSI_CLASS; + else + classes[0] = X86_64_INTEGER_CLASS; + classes[1] = X86_64_INTEGER_CLASS; + return 1 + (bytes > 8); + } +} + +/* Examine the argument and return set number of register required in each + class. Return true iff parameter should be passed in memory. */ + +static bool +examine_argument (machine_mode mode, const_tree type, int in_return, + int *int_nregs, int *sse_nregs) +{ + enum x86_64_reg_class regclass[MAX_CLASSES]; + int n = classify_argument (mode, type, regclass, 0); + + *int_nregs = 0; + *sse_nregs = 0; + + if (!n) + return true; + for (n--; n >= 0; n--) + switch (regclass[n]) + { + case X86_64_INTEGER_CLASS: + case X86_64_INTEGERSI_CLASS: + (*int_nregs)++; + break; + case X86_64_SSE_CLASS: + case X86_64_SSESF_CLASS: + case X86_64_SSEDF_CLASS: + (*sse_nregs)++; + break; + case X86_64_NO_CLASS: + case X86_64_SSEUP_CLASS: + break; + case X86_64_X87_CLASS: + case X86_64_X87UP_CLASS: + case X86_64_COMPLEX_X87_CLASS: + if (!in_return) + return true; + break; + case X86_64_MEMORY_CLASS: + gcc_unreachable (); + } + + return false; +} + +/* Construct container for the argument used by GCC interface. See + FUNCTION_ARG for the detailed description. */ + +static rtx +construct_container (machine_mode mode, machine_mode orig_mode, + const_tree type, int in_return, int nintregs, int nsseregs, + const int *intreg, int sse_regno) +{ + /* The following variables hold the static issued_error state. */ + static bool issued_sse_arg_error; + static bool issued_sse_ret_error; + static bool issued_x87_ret_error; + + machine_mode tmpmode; + int bytes = + (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + enum x86_64_reg_class regclass[MAX_CLASSES]; + int n; + int i; + int nexps = 0; + int needed_sseregs, needed_intregs; + rtx exp[MAX_CLASSES]; + rtx ret; + + n = classify_argument (mode, type, regclass, 0); + if (!n) + return NULL; + if (examine_argument (mode, type, in_return, &needed_intregs, + &needed_sseregs)) + return NULL; + if (needed_intregs > nintregs || needed_sseregs > nsseregs) + return NULL; + + /* We allowed the user to turn off SSE for kernel mode. Don't crash if + some less clueful developer tries to use floating-point anyway. */ + if (needed_sseregs && !TARGET_SSE) + { + if (in_return) + { + if (!issued_sse_ret_error) + { + error ("SSE register return with SSE disabled"); + issued_sse_ret_error = true; + } + } + else if (!issued_sse_arg_error) + { + error ("SSE register argument with SSE disabled"); + issued_sse_arg_error = true; + } + return NULL; + } + + /* Likewise, error if the ABI requires us to return values in the + x87 registers and the user specified -mno-80387. */ + if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) + for (i = 0; i < n; i++) + if (regclass[i] == X86_64_X87_CLASS + || regclass[i] == X86_64_X87UP_CLASS + || regclass[i] == X86_64_COMPLEX_X87_CLASS) + { + if (!issued_x87_ret_error) + { + error ("x87 register return with x87 disabled"); + issued_x87_ret_error = true; + } + return NULL; + } + + /* First construct simple cases. Avoid SCmode, since we want to use + single register to pass this type. */ + if (n == 1 && mode != SCmode) + switch (regclass[0]) + { + case X86_64_INTEGER_CLASS: + case X86_64_INTEGERSI_CLASS: + return gen_rtx_REG (mode, intreg[0]); + case X86_64_SSE_CLASS: + case X86_64_SSESF_CLASS: + case X86_64_SSEDF_CLASS: + if (mode != BLKmode) + return gen_reg_or_parallel (mode, orig_mode, + SSE_REGNO (sse_regno)); + break; + case X86_64_X87_CLASS: + case X86_64_COMPLEX_X87_CLASS: + return gen_rtx_REG (mode, FIRST_STACK_REG); + case X86_64_NO_CLASS: + /* Zero sized array, struct or class. */ + return NULL; + default: + gcc_unreachable (); + } + if (n == 2 + && regclass[0] == X86_64_SSE_CLASS + && regclass[1] == X86_64_SSEUP_CLASS + && mode != BLKmode) + return gen_reg_or_parallel (mode, orig_mode, + SSE_REGNO (sse_regno)); + if (n == 4 + && regclass[0] == X86_64_SSE_CLASS + && regclass[1] == X86_64_SSEUP_CLASS + && regclass[2] == X86_64_SSEUP_CLASS + && regclass[3] == X86_64_SSEUP_CLASS + && mode != BLKmode) + return gen_reg_or_parallel (mode, orig_mode, + SSE_REGNO (sse_regno)); + if (n == 8 + && regclass[0] == X86_64_SSE_CLASS + && regclass[1] == X86_64_SSEUP_CLASS + && regclass[2] == X86_64_SSEUP_CLASS + && regclass[3] == X86_64_SSEUP_CLASS + && regclass[4] == X86_64_SSEUP_CLASS + && regclass[5] == X86_64_SSEUP_CLASS + && regclass[6] == X86_64_SSEUP_CLASS + && regclass[7] == X86_64_SSEUP_CLASS + && mode != BLKmode) + return gen_reg_or_parallel (mode, orig_mode, + SSE_REGNO (sse_regno)); + if (n == 2 + && regclass[0] == X86_64_X87_CLASS + && regclass[1] == X86_64_X87UP_CLASS) + return gen_rtx_REG (XFmode, FIRST_STACK_REG); + + if (n == 2 + && regclass[0] == X86_64_INTEGER_CLASS + && regclass[1] == X86_64_INTEGER_CLASS + && (mode == CDImode || mode == TImode) + && intreg[0] + 1 == intreg[1]) + return gen_rtx_REG (mode, intreg[0]); + + /* Otherwise figure out the entries of the PARALLEL. */ + for (i = 0; i < n; i++) + { + int pos; + + switch (regclass[i]) + { + case X86_64_NO_CLASS: + break; + case X86_64_INTEGER_CLASS: + case X86_64_INTEGERSI_CLASS: + /* Merge TImodes on aligned occasions here too. */ + if (i * 8 + 8 > bytes) + tmpmode + = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); + else if (regclass[i] == X86_64_INTEGERSI_CLASS) + tmpmode = SImode; + else + tmpmode = DImode; + /* We've requested 24 bytes we + don't have mode for. Use DImode. */ + if (tmpmode == BLKmode) + tmpmode = DImode; + exp [nexps++] + = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (tmpmode, *intreg), + GEN_INT (i*8)); + intreg++; + break; + case X86_64_SSESF_CLASS: + exp [nexps++] + = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (SFmode, + SSE_REGNO (sse_regno)), + GEN_INT (i*8)); + sse_regno++; + break; + case X86_64_SSEDF_CLASS: + exp [nexps++] + = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DFmode, + SSE_REGNO (sse_regno)), + GEN_INT (i*8)); + sse_regno++; + break; + case X86_64_SSE_CLASS: + pos = i; + switch (n) + { + case 1: + tmpmode = DImode; + break; + case 2: + if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) + { + tmpmode = TImode; + i++; + } + else + tmpmode = DImode; + break; + case 4: + gcc_assert (i == 0 + && regclass[1] == X86_64_SSEUP_CLASS + && regclass[2] == X86_64_SSEUP_CLASS + && regclass[3] == X86_64_SSEUP_CLASS); + tmpmode = OImode; + i += 3; + break; + case 8: + gcc_assert (i == 0 + && regclass[1] == X86_64_SSEUP_CLASS + && regclass[2] == X86_64_SSEUP_CLASS + && regclass[3] == X86_64_SSEUP_CLASS + && regclass[4] == X86_64_SSEUP_CLASS + && regclass[5] == X86_64_SSEUP_CLASS + && regclass[6] == X86_64_SSEUP_CLASS + && regclass[7] == X86_64_SSEUP_CLASS); + tmpmode = XImode; + i += 7; + break; + default: + gcc_unreachable (); + } + exp [nexps++] + = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (tmpmode, + SSE_REGNO (sse_regno)), + GEN_INT (pos*8)); + sse_regno++; + break; + default: + gcc_unreachable (); + } + } + + /* Empty aligned struct, union or class. */ + if (nexps == 0) + return NULL; + + ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); + for (i = 0; i < nexps; i++) + XVECEXP (ret, 0, i) = exp [i]; + return ret; +} + +/* Update the data in CUM to advance over an argument of mode MODE + and data type TYPE. (TYPE is null for libcalls where that information + may not be available.) + + Return a number of integer regsiters advanced over. */ + +static int +function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, + const_tree type, HOST_WIDE_INT bytes, + HOST_WIDE_INT words) +{ + int res = 0; + bool error_p = NULL; + + if (TARGET_IAMCU) + { + /* Intel MCU psABI passes scalars and aggregates no larger than 8 + bytes in registers. */ + if (!VECTOR_MODE_P (mode) && bytes <= 8) + goto pass_in_reg; + return res; + } + + switch (mode) + { + default: + break; + + case BLKmode: + if (bytes < 0) + break; + /* FALLTHRU */ + + case DImode: + case SImode: + case HImode: + case QImode: +pass_in_reg: + cum->words += words; + cum->nregs -= words; + cum->regno += words; + if (cum->nregs >= 0) + res = words; + if (cum->nregs <= 0) + { + cum->nregs = 0; + cfun->machine->arg_reg_available = false; + cum->regno = 0; + } + break; + + case OImode: + /* OImode shouldn't be used directly. */ + gcc_unreachable (); + + case DFmode: + if (cum->float_in_sse == -1) + error_p = 1; + if (cum->float_in_sse < 2) + break; + case SFmode: + if (cum->float_in_sse == -1) + error_p = 1; + if (cum->float_in_sse < 1) + break; + /* FALLTHRU */ + + case V8SFmode: + case V8SImode: + case V64QImode: + case V32HImode: + case V16SImode: + case V8DImode: + case V16SFmode: + case V8DFmode: + case V32QImode: + case V16HImode: + case V4DFmode: + case V4DImode: + case TImode: + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + case V4SFmode: + case V2DFmode: + if (!type || !AGGREGATE_TYPE_P (type)) + { + cum->sse_words += words; + cum->sse_nregs -= 1; + cum->sse_regno += 1; + if (cum->sse_nregs <= 0) + { + cum->sse_nregs = 0; + cum->sse_regno = 0; + } + } + break; + + case V8QImode: + case V4HImode: + case V2SImode: + case V2SFmode: + case V1TImode: + case V1DImode: + if (!type || !AGGREGATE_TYPE_P (type)) + { + cum->mmx_words += words; + cum->mmx_nregs -= 1; + cum->mmx_regno += 1; + if (cum->mmx_nregs <= 0) + { + cum->mmx_nregs = 0; + cum->mmx_regno = 0; + } + } + break; + } + if (error_p) + { + cum->float_in_sse = 0; + error ("calling %qD with SSE calling convention without " + "SSE/SSE2 enabled", cum->decl); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } + + return res; +} + +static int +function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, + const_tree type, HOST_WIDE_INT words, bool named) +{ + int int_nregs, sse_nregs; + + /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ + if (!named && (VALID_AVX512F_REG_MODE (mode) + || VALID_AVX256_REG_MODE (mode))) + return 0; + + if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs) + && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) + { + cum->nregs -= int_nregs; + cum->sse_nregs -= sse_nregs; + cum->regno += int_nregs; + cum->sse_regno += sse_nregs; + return int_nregs; + } + else + { + int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; + cum->words = ROUND_UP (cum->words, align); + cum->words += words; + return 0; + } +} + +static int +function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, + HOST_WIDE_INT words) +{ + /* Otherwise, this should be passed indirect. */ + gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); + + cum->words += words; + if (cum->nregs > 0) + { + cum->nregs -= 1; + cum->regno += 1; + return 1; + } + return 0; +} + +/* Update the data in CUM to advance over an argument of mode MODE and + data type TYPE. (TYPE is null for libcalls where that information + may not be available.) */ + +static void +ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, + const_tree type, bool named) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + HOST_WIDE_INT bytes, words; + int nregs; + + if (mode == BLKmode) + bytes = int_size_in_bytes (type); + else + bytes = GET_MODE_SIZE (mode); + words = CEIL (bytes, UNITS_PER_WORD); + + if (type) + mode = type_natural_mode (type, NULL, false); + + if ((type && POINTER_BOUNDS_TYPE_P (type)) + || POINTER_BOUNDS_MODE_P (mode)) + { + /* If we pass bounds in BT then just update remained bounds count. */ + if (cum->bnds_in_bt) + { + cum->bnds_in_bt--; + return; + } + + /* Update remained number of bounds to force. */ + if (cum->force_bnd_pass) + cum->force_bnd_pass--; + + cum->bnd_regno++; + + return; + } + + /* The first arg not going to Bounds Tables resets this counter. */ + cum->bnds_in_bt = 0; + /* For unnamed args we always pass bounds to avoid bounds mess when + passed and received types do not match. If bounds do not follow + unnamed arg, still pretend required number of bounds were passed. */ + if (cum->force_bnd_pass) + { + cum->bnd_regno += cum->force_bnd_pass; + cum->force_bnd_pass = 0; + } + + if (TARGET_64BIT) + { + enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; + + if (call_abi == MS_ABI) + nregs = function_arg_advance_ms_64 (cum, bytes, words); + else + nregs = function_arg_advance_64 (cum, mode, type, words, named); + } + else + nregs = function_arg_advance_32 (cum, mode, type, bytes, words); + + /* For stdarg we expect bounds to be passed for each value passed + in register. */ + if (cum->stdarg) + cum->force_bnd_pass = nregs; + /* For pointers passed in memory we expect bounds passed in Bounds + Table. */ + if (!nregs) + cum->bnds_in_bt = chkp_type_bounds_count (type); +} + +/* Define where to put the arguments to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). */ + +static rtx +function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, + machine_mode orig_mode, const_tree type, + HOST_WIDE_INT bytes, HOST_WIDE_INT words) +{ + bool error_p = false; + /* Avoid the AL settings for the Unix64 ABI. */ + if (mode == VOIDmode) + return constm1_rtx; + + if (TARGET_IAMCU) + { + /* Intel MCU psABI passes scalars and aggregates no larger than 8 + bytes in registers. */ + if (!VECTOR_MODE_P (mode) && bytes <= 8) + goto pass_in_reg; + return NULL_RTX; + } + + switch (mode) + { + default: + break; + + case BLKmode: + if (bytes < 0) + break; + /* FALLTHRU */ + case DImode: + case SImode: + case HImode: + case QImode: +pass_in_reg: + if (words <= cum->nregs) + { + int regno = cum->regno; + + /* Fastcall allocates the first two DWORD (SImode) or + smaller arguments to ECX and EDX if it isn't an + aggregate type . */ + if (cum->fastcall) + { + if (mode == BLKmode + || mode == DImode + || (type && AGGREGATE_TYPE_P (type))) + break; + + /* ECX not EAX is the first allocated register. */ + if (regno == AX_REG) + regno = CX_REG; + } + return gen_rtx_REG (mode, regno); + } + break; + + case DFmode: + if (cum->float_in_sse == -1) + error_p = 1; + if (cum->float_in_sse < 2) + break; + case SFmode: + if (cum->float_in_sse == -1) + error_p = 1; + if (cum->float_in_sse < 1) + break; + /* FALLTHRU */ + case TImode: + /* In 32bit, we pass TImode in xmm registers. */ + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + case V4SFmode: + case V2DFmode: + if (!type || !AGGREGATE_TYPE_P (type)) + { + if (cum->sse_nregs) + return gen_reg_or_parallel (mode, orig_mode, + cum->sse_regno + FIRST_SSE_REG); + } + break; + + case OImode: + case XImode: + /* OImode and XImode shouldn't be used directly. */ + gcc_unreachable (); + + case V64QImode: + case V32HImode: + case V16SImode: + case V8DImode: + case V16SFmode: + case V8DFmode: + case V8SFmode: + case V8SImode: + case V32QImode: + case V16HImode: + case V4DFmode: + case V4DImode: + if (!type || !AGGREGATE_TYPE_P (type)) + { + if (cum->sse_nregs) + return gen_reg_or_parallel (mode, orig_mode, + cum->sse_regno + FIRST_SSE_REG); + } + break; + + case V8QImode: + case V4HImode: + case V2SImode: + case V2SFmode: + case V1TImode: + case V1DImode: + if (!type || !AGGREGATE_TYPE_P (type)) + { + if (cum->mmx_nregs) + return gen_reg_or_parallel (mode, orig_mode, + cum->mmx_regno + FIRST_MMX_REG); + } + break; + } + if (error_p) + { + cum->float_in_sse = 0; + error ("calling %qD with SSE calling convention without " + "SSE/SSE2 enabled", cum->decl); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } + + return NULL_RTX; +} + +static rtx +function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, + machine_mode orig_mode, const_tree type, bool named) +{ + /* Handle a hidden AL argument containing number of registers + for varargs x86-64 functions. */ + if (mode == VOIDmode) + return GEN_INT (cum->maybe_vaarg + ? (cum->sse_nregs < 0 + ? X86_64_SSE_REGPARM_MAX + : cum->sse_regno) + : -1); + + switch (mode) + { + default: + break; + + case V8SFmode: + case V8SImode: + case V32QImode: + case V16HImode: + case V4DFmode: + case V4DImode: + case V16SFmode: + case V16SImode: + case V64QImode: + case V32HImode: + case V8DFmode: + case V8DImode: + /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ + if (!named) + return NULL; + break; + } + + return construct_container (mode, orig_mode, type, 0, cum->nregs, + cum->sse_nregs, + &x86_64_int_parameter_registers [cum->regno], + cum->sse_regno); +} + +static rtx +function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, + machine_mode orig_mode, bool named, + HOST_WIDE_INT bytes) +{ + unsigned int regno; + + /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. + We use value of -2 to specify that current function call is MSABI. */ + if (mode == VOIDmode) + return GEN_INT (-2); + + /* If we've run out of registers, it goes on the stack. */ + if (cum->nregs == 0) + return NULL_RTX; + + regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; + + /* Only floating point modes are passed in anything but integer regs. */ + if (TARGET_SSE && (mode == SFmode || mode == DFmode)) + { + if (named) + regno = cum->regno + FIRST_SSE_REG; + else + { + rtx t1, t2; + + /* Unnamed floating parameters are passed in both the + SSE and integer registers. */ + t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); + t2 = gen_rtx_REG (mode, regno); + t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); + t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); + return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); + } + } + /* Handle aggregated types passed in register. */ + if (orig_mode == BLKmode) + { + if (bytes > 0 && bytes <= 8) + mode = (bytes > 4 ? DImode : SImode); + if (mode == BLKmode) + mode = DImode; + } + + return gen_reg_or_parallel (mode, orig_mode, regno); +} + +/* Return where to put the arguments to a function. + Return zero to push the argument on the stack, or a hard register in which to store the argument. + + MODE is the argument's machine mode. TYPE is the data type of the + argument. It is null for libcalls where that information may not be + available. CUM gives information about the preceding args and about + the function being called. NAMED is nonzero if this argument is a + named parameter (otherwise it is an extra parameter matching an + ellipsis). */ + +static rtx +ix86_function_arg (cumulative_args_t cum_v, machine_mode omode, + const_tree type, bool named) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + machine_mode mode = omode; + HOST_WIDE_INT bytes, words; + rtx arg; + + /* All pointer bounds arguments are handled separately here. */ + if ((type && POINTER_BOUNDS_TYPE_P (type)) + || POINTER_BOUNDS_MODE_P (mode)) + { + /* Return NULL if bounds are forced to go in Bounds Table. */ + if (cum->bnds_in_bt) + arg = NULL; + /* Return the next available bound reg if any. */ + else if (cum->bnd_regno <= LAST_BND_REG) + arg = gen_rtx_REG (BNDmode, cum->bnd_regno); + /* Return the next special slot number otherwise. */ + else + arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1); + + return arg; + } + + if (mode == BLKmode) + bytes = int_size_in_bytes (type); + else + bytes = GET_MODE_SIZE (mode); + words = CEIL (bytes, UNITS_PER_WORD); + + /* To simplify the code below, represent vector types with a vector mode + even if MMX/SSE are not active. */ + if (type && TREE_CODE (type) == VECTOR_TYPE) + mode = type_natural_mode (type, cum, false); + + if (TARGET_64BIT) + { + enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; + + if (call_abi == MS_ABI) + arg = function_arg_ms_64 (cum, mode, omode, named, bytes); + else + arg = function_arg_64 (cum, mode, omode, type, named); + } + else + arg = function_arg_32 (cum, mode, omode, type, bytes, words); + + return arg; +} + +/* A C expression that indicates when an argument must be passed by + reference. If nonzero for an argument, a copy of that argument is + made in memory and a pointer to the argument is passed instead of + the argument itself. The pointer is passed in whatever way is + appropriate for passing a pointer to that type. */ + +static bool +ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode, + const_tree type, bool) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + + /* Bounds are never passed by reference. */ + if ((type && POINTER_BOUNDS_TYPE_P (type)) + || POINTER_BOUNDS_MODE_P (mode)) + return false; + + if (TARGET_64BIT) + { + enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; + + /* See Windows x64 Software Convention. */ + if (call_abi == MS_ABI) + { + HOST_WIDE_INT msize = GET_MODE_SIZE (mode); + + if (type) + { + /* Arrays are passed by reference. */ + if (TREE_CODE (type) == ARRAY_TYPE) + return true; + + if (RECORD_OR_UNION_TYPE_P (type)) + { + /* Structs/unions of sizes other than 8, 16, 32, or 64 bits + are passed by reference. */ + msize = int_size_in_bytes (type); + } + } + + /* __m128 is passed by reference. */ + return msize != 1 && msize != 2 && msize != 4 && msize != 8; + } + else if (type && int_size_in_bytes (type) == -1) + return true; + } + + return false; +} + +/* Return true when TYPE should be 128bit aligned for 32bit argument + passing ABI. XXX: This function is obsolete and is only used for + checking psABI compatibility with previous versions of GCC. */ + +static bool +ix86_compat_aligned_value_p (const_tree type) +{ + machine_mode mode = TYPE_MODE (type); + if (((TARGET_SSE && SSE_REG_MODE_P (mode)) + || mode == TDmode + || mode == TFmode + || mode == TCmode) + && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) + return true; + if (TYPE_ALIGN (type) < 128) + return false; + + if (AGGREGATE_TYPE_P (type)) + { + /* Walk the aggregates recursively. */ + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + case UNION_TYPE: + case QUAL_UNION_TYPE: + { + tree field; + + /* Walk all the structure fields. */ + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL + && ix86_compat_aligned_value_p (TREE_TYPE (field))) + return true; + } + break; + } + + case ARRAY_TYPE: + /* Just for use if some languages passes arrays by value. */ + if (ix86_compat_aligned_value_p (TREE_TYPE (type))) + return true; + break; + + default: + gcc_unreachable (); + } + } + return false; +} + +/* Return the alignment boundary for MODE and TYPE with alignment ALIGN. + XXX: This function is obsolete and is only used for checking psABI + compatibility with previous versions of GCC. */ + +static unsigned int +ix86_compat_function_arg_boundary (machine_mode mode, + const_tree type, unsigned int align) +{ + /* In 32bit, only _Decimal128 and __float128 are aligned to their + natural boundaries. */ + if (!TARGET_64BIT && mode != TDmode && mode != TFmode) + { + /* i386 ABI defines all arguments to be 4 byte aligned. We have to + make an exception for SSE modes since these require 128bit + alignment. + + The handling here differs from field_alignment. ICC aligns MMX + arguments to 4 byte boundaries, while structure fields are aligned + to 8 byte boundaries. */ + if (!type) + { + if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) + align = PARM_BOUNDARY; + } + else + { + if (!ix86_compat_aligned_value_p (type)) + align = PARM_BOUNDARY; + } + } + if (align > BIGGEST_ALIGNMENT) + align = BIGGEST_ALIGNMENT; + return align; +} + +/* Return true when TYPE should be 128bit aligned for 32bit argument + passing ABI. */ + +static bool +ix86_contains_aligned_value_p (const_tree type) +{ + machine_mode mode = TYPE_MODE (type); + + if (mode == XFmode || mode == XCmode) + return false; + + if (TYPE_ALIGN (type) < 128) + return false; + + if (AGGREGATE_TYPE_P (type)) + { + /* Walk the aggregates recursively. */ + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + case UNION_TYPE: + case QUAL_UNION_TYPE: + { + tree field; + + /* Walk all the structure fields. */ + for (field = TYPE_FIELDS (type); + field; + field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL + && ix86_contains_aligned_value_p (TREE_TYPE (field))) + return true; + } + break; + } + + case ARRAY_TYPE: + /* Just for use if some languages passes arrays by value. */ + if (ix86_contains_aligned_value_p (TREE_TYPE (type))) + return true; + break; + + default: + gcc_unreachable (); + } + } + else + return TYPE_ALIGN (type) >= 128; + + return false; +} + +/* Gives the alignment boundary, in bits, of an argument with the + specified mode and type. */ + +static unsigned int +ix86_function_arg_boundary (machine_mode mode, const_tree type) +{ + unsigned int align; + if (type) + { + /* Since the main variant type is used for call, we convert it to + the main variant type. */ + type = TYPE_MAIN_VARIANT (type); + align = TYPE_ALIGN (type); + } + else + align = GET_MODE_ALIGNMENT (mode); + if (align < PARM_BOUNDARY) + align = PARM_BOUNDARY; + else + { + static bool warned; + unsigned int saved_align = align; + + if (!TARGET_64BIT) + { + /* i386 ABI defines XFmode arguments to be 4 byte aligned. */ + if (!type) + { + if (mode == XFmode || mode == XCmode) + align = PARM_BOUNDARY; + } + else if (!ix86_contains_aligned_value_p (type)) + align = PARM_BOUNDARY; + + if (align < 128) + align = PARM_BOUNDARY; + } + + if (warn_psabi + && !warned + && align != ix86_compat_function_arg_boundary (mode, type, + saved_align)) + { + warned = true; + inform (input_location, + "The ABI for passing parameters with %d-byte" + " alignment has changed in GCC 4.6", + align / BITS_PER_UNIT); + } + } + + return align; +} + +/* Return true if N is a possible register number of function value. */ + +static bool +ix86_function_value_regno_p (const unsigned int regno) +{ + switch (regno) + { + case AX_REG: + return true; + case DX_REG: + return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); + case DI_REG: + case SI_REG: + return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; + + case BND0_REG: + case BND1_REG: + return chkp_function_instrumented_p (current_function_decl); + + /* Complex values are returned in %st(0)/%st(1) pair. */ + case ST0_REG: + case ST1_REG: + /* TODO: The function should depend on current function ABI but + builtins.c would need updating then. Therefore we use the + default ABI. */ + if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) + return false; + return TARGET_FLOAT_RETURNS_IN_80387; + + /* Complex values are returned in %xmm0/%xmm1 pair. */ + case XMM0_REG: + case XMM1_REG: + return TARGET_SSE; + + case MM0_REG: + if (TARGET_MACHO || TARGET_64BIT) + return false; + return TARGET_MMX; + } + + return false; +} + +/* Define how to find the value returned by a function. + VALTYPE is the data type of the value (as a tree). + If the precise function being called is known, FUNC is its FUNCTION_DECL; + otherwise, FUNC is 0. */ + +static rtx +function_value_32 (machine_mode orig_mode, machine_mode mode, + const_tree fntype, const_tree fn) +{ + unsigned int regno; + + /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where + we normally prevent this case when mmx is not available. However + some ABIs may require the result to be returned like DImode. */ + if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) + regno = FIRST_MMX_REG; + + /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where + we prevent this case when sse is not available. However some ABIs + may require the result to be returned like integer TImode. */ + else if (mode == TImode + || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) + regno = FIRST_SSE_REG; + + /* 32-byte vector modes in %ymm0. */ + else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) + regno = FIRST_SSE_REG; + + /* 64-byte vector modes in %zmm0. */ + else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) + regno = FIRST_SSE_REG; + + /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ + else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) + regno = FIRST_FLOAT_REG; + else + /* Most things go in %eax. */ + regno = AX_REG; + + /* Override FP return register with %xmm0 for local functions when + SSE math is enabled or for functions with sseregparm attribute. */ + if ((fn || fntype) && (mode == SFmode || mode == DFmode)) + { + int sse_level = ix86_function_sseregparm (fntype, fn, false); + if (sse_level == -1) + { + error ("calling %qD with SSE caling convention without " + "SSE/SSE2 enabled", fn); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } + else if ((sse_level >= 1 && mode == SFmode) + || (sse_level == 2 && mode == DFmode)) + regno = FIRST_SSE_REG; + } + + /* OImode shouldn't be used directly. */ + gcc_assert (mode != OImode); + + return gen_rtx_REG (orig_mode, regno); +} + +static rtx +function_value_64 (machine_mode orig_mode, machine_mode mode, + const_tree valtype) +{ + rtx ret; + + /* Handle libcalls, which don't provide a type node. */ + if (valtype == NULL) + { + unsigned int regno; + + switch (mode) + { + case SFmode: + case SCmode: + case DFmode: + case DCmode: + case TFmode: + case SDmode: + case DDmode: + case TDmode: + regno = FIRST_SSE_REG; + break; + case XFmode: + case XCmode: + regno = FIRST_FLOAT_REG; + break; + case TCmode: + return NULL; + default: + regno = AX_REG; + } + + return gen_rtx_REG (mode, regno); + } + else if (POINTER_TYPE_P (valtype)) + { + /* Pointers are always returned in word_mode. */ + mode = word_mode; + } + + ret = construct_container (mode, orig_mode, valtype, 1, + X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, + x86_64_int_return_registers, 0); + + /* For zero sized structures, construct_container returns NULL, but we + need to keep rest of compiler happy by returning meaningful value. */ + if (!ret) + ret = gen_rtx_REG (orig_mode, AX_REG); + + return ret; +} + +static rtx +function_value_ms_64 (machine_mode orig_mode, machine_mode mode, + const_tree valtype) +{ + unsigned int regno = AX_REG; + + if (TARGET_SSE) + { + switch (GET_MODE_SIZE (mode)) + { + case 16: + if (valtype != NULL_TREE + && !VECTOR_INTEGER_TYPE_P (valtype) + && !VECTOR_INTEGER_TYPE_P (valtype) + && !INTEGRAL_TYPE_P (valtype) + && !VECTOR_FLOAT_TYPE_P (valtype)) + break; + if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) + && !COMPLEX_MODE_P (mode)) + regno = FIRST_SSE_REG; + break; + case 8: + case 4: + if (mode == SFmode || mode == DFmode) + regno = FIRST_SSE_REG; + break; + default: + break; + } + } + return gen_rtx_REG (orig_mode, regno); +} + +static rtx +ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, + machine_mode orig_mode, machine_mode mode) +{ + const_tree fn, fntype; + + fn = NULL_TREE; + if (fntype_or_decl && DECL_P (fntype_or_decl)) + fn = fntype_or_decl; + fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; + + if ((valtype && POINTER_BOUNDS_TYPE_P (valtype)) + || POINTER_BOUNDS_MODE_P (mode)) + return gen_rtx_REG (BNDmode, FIRST_BND_REG); + else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) + return function_value_ms_64 (orig_mode, mode, valtype); + else if (TARGET_64BIT) + return function_value_64 (orig_mode, mode, valtype); + else + return function_value_32 (orig_mode, mode, fntype, fn); +} + +static rtx +ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool) +{ + machine_mode mode, orig_mode; + + orig_mode = TYPE_MODE (valtype); + mode = type_natural_mode (valtype, NULL, true); + return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); +} + +/* Return an RTX representing a place where a function returns + or recieves pointer bounds or NULL if no bounds are returned. + + VALTYPE is a data type of a value returned by the function. + + FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL + or FUNCTION_TYPE of the function. + + If OUTGOING is false, return a place in which the caller will + see the return value. Otherwise, return a place where a + function returns a value. */ + +static rtx +ix86_function_value_bounds (const_tree valtype, + const_tree fntype_or_decl ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + rtx res = NULL_RTX; + + if (BOUNDED_TYPE_P (valtype)) + res = gen_rtx_REG (BNDmode, FIRST_BND_REG); + else if (chkp_type_has_pointer (valtype)) + { + bitmap slots; + rtx bounds[2]; + bitmap_iterator bi; + unsigned i, bnd_no = 0; + + bitmap_obstack_initialize (NULL); + slots = BITMAP_ALLOC (NULL); + chkp_find_bound_slots (valtype, slots); + + EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi) + { + rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no); + rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT); + gcc_assert (bnd_no < 2); + bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs); + } + + res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds)); + + BITMAP_FREE (slots); + bitmap_obstack_release (NULL); + } + else + res = NULL_RTX; + + return res; +} + +/* Pointer function arguments and return values are promoted to + word_mode. */ + +static machine_mode +ix86_promote_function_mode (const_tree type, machine_mode mode, + int *punsignedp, const_tree fntype, + int for_return) +{ + if (type != NULL_TREE && POINTER_TYPE_P (type)) + { + *punsignedp = POINTERS_EXTEND_UNSIGNED; + return word_mode; + } + return default_promote_function_mode (type, mode, punsignedp, fntype, + for_return); +} + +/* Return true if a structure, union or array with MODE containing FIELD + should be accessed using BLKmode. */ + +static bool +ix86_member_type_forces_blk (const_tree field, machine_mode mode) +{ + /* Union with XFmode must be in BLKmode. */ + return (mode == XFmode + && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE + || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE)); +} + +rtx +ix86_libcall_value (machine_mode mode) +{ + return ix86_function_value_1 (NULL, NULL, mode, mode); +} + +/* Return true iff type is returned in memory. */ + +static bool +ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ +#ifdef SUBTARGET_RETURN_IN_MEMORY + return SUBTARGET_RETURN_IN_MEMORY (type, fntype); +#else + const machine_mode mode = type_natural_mode (type, NULL, true); + HOST_WIDE_INT size; + + if (POINTER_BOUNDS_TYPE_P (type)) + return false; + + if (TARGET_64BIT) + { + if (ix86_function_type_abi (fntype) == MS_ABI) + { + size = int_size_in_bytes (type); + + /* __m128 is returned in xmm0. */ + if ((!type || VECTOR_INTEGER_TYPE_P (type) + || INTEGRAL_TYPE_P (type) + || VECTOR_FLOAT_TYPE_P (type)) + && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) + && !COMPLEX_MODE_P (mode) + && (GET_MODE_SIZE (mode) == 16 || size == 16)) + return false; + + /* Otherwise, the size must be exactly in [1248]. */ + return size != 1 && size != 2 && size != 4 && size != 8; + } + else + { + int needed_intregs, needed_sseregs; + + return examine_argument (mode, type, 1, + &needed_intregs, &needed_sseregs); + } + } + else + { + size = int_size_in_bytes (type); + + /* Intel MCU psABI returns scalars and aggregates no larger than 8 + bytes in registers. */ + if (TARGET_IAMCU) + return VECTOR_MODE_P (mode) || size < 0 || size > 8; + + if (mode == BLKmode) + return true; + + if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) + return false; + + if (VECTOR_MODE_P (mode) || mode == TImode) + { + /* User-created vectors small enough to fit in EAX. */ + if (size < 8) + return false; + + /* Unless ABI prescibes otherwise, + MMX/3dNow values are returned in MM0 if available. */ + + if (size == 8) + return TARGET_VECT8_RETURNS || !TARGET_MMX; + + /* SSE values are returned in XMM0 if available. */ + if (size == 16) + return !TARGET_SSE; + + /* AVX values are returned in YMM0 if available. */ + if (size == 32) + return !TARGET_AVX; + + /* AVX512F values are returned in ZMM0 if available. */ + if (size == 64) + return !TARGET_AVX512F; + } + + if (mode == XFmode) + return false; + + if (size > 12) + return true; + + /* OImode shouldn't be used directly. */ + gcc_assert (mode != OImode); + + return false; + } +#endif +} + + +/* Create the va_list data type. */ + +static tree +ix86_build_builtin_va_list_64 (void) +{ + tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; + + record = lang_hooks.types.make_type (RECORD_TYPE); + type_decl = build_decl (BUILTINS_LOCATION, + TYPE_DECL, get_identifier ("__va_list_tag"), record); + + f_gpr = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("gp_offset"), + unsigned_type_node); + f_fpr = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("fp_offset"), + unsigned_type_node); + f_ovf = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("overflow_arg_area"), + ptr_type_node); + f_sav = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("reg_save_area"), + ptr_type_node); + + va_list_gpr_counter_field = f_gpr; + va_list_fpr_counter_field = f_fpr; + + DECL_FIELD_CONTEXT (f_gpr) = record; + DECL_FIELD_CONTEXT (f_fpr) = record; + DECL_FIELD_CONTEXT (f_ovf) = record; + DECL_FIELD_CONTEXT (f_sav) = record; + + TYPE_STUB_DECL (record) = type_decl; + TYPE_NAME (record) = type_decl; + TYPE_FIELDS (record) = f_gpr; + DECL_CHAIN (f_gpr) = f_fpr; + DECL_CHAIN (f_fpr) = f_ovf; + DECL_CHAIN (f_ovf) = f_sav; + + layout_type (record); + + TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"), + NULL_TREE, TYPE_ATTRIBUTES (record)); + + /* The correct type is an array type of one element. */ + return build_array_type (record, build_index_type (size_zero_node)); +} + +/* Setup the builtin va_list data type and for 64-bit the additional + calling convention specific va_list data types. */ + +static tree +ix86_build_builtin_va_list (void) +{ + if (TARGET_64BIT) + { + /* Initialize ABI specific va_list builtin types. + + In lto1, we can encounter two va_list types: + - one as a result of the type-merge across TUs, and + - the one constructed here. + These two types will not have the same TYPE_MAIN_VARIANT, and therefore + a type identity check in canonical_va_list_type based on + TYPE_MAIN_VARIANT (which we used to have) will not work. + Instead, we tag each va_list_type_node with its unique attribute, and + look for the attribute in the type identity check in + canonical_va_list_type. + + Tagging sysv_va_list_type_node directly with the attribute is + problematic since it's a array of one record, which will degrade into a + pointer to record when used as parameter (see build_va_arg comments for + an example), dropping the attribute in the process. So we tag the + record instead. */ + + /* For SYSV_ABI we use an array of one record. */ + sysv_va_list_type_node = ix86_build_builtin_va_list_64 (); + + /* For MS_ABI we use plain pointer to argument area. */ + tree char_ptr_type = build_pointer_type (char_type_node); + tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE, + TYPE_ATTRIBUTES (char_ptr_type)); + ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr); + + return ((ix86_abi == MS_ABI) + ? ms_va_list_type_node + : sysv_va_list_type_node); + } + else + { + /* For i386 we use plain pointer to argument area. */ + return build_pointer_type (char_type_node); + } +} + +/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ + +static void +setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) +{ + rtx save_area, mem; + alias_set_type set; + int i, max; + + /* GPR size of varargs save area. */ + if (cfun->va_list_gpr_size) + ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; + else + ix86_varargs_gpr_size = 0; + + /* FPR size of varargs save area. We don't need it if we don't pass + anything in SSE registers. */ + if (TARGET_SSE && cfun->va_list_fpr_size) + ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; + else + ix86_varargs_fpr_size = 0; + + if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) + return; + + save_area = frame_pointer_rtx; + set = get_varargs_alias_set (); + + max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; + if (max > X86_64_REGPARM_MAX) + max = X86_64_REGPARM_MAX; + + for (i = cum->regno; i < max; i++) + { + mem = gen_rtx_MEM (word_mode, + plus_constant (Pmode, save_area, i * UNITS_PER_WORD)); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, set); + emit_move_insn (mem, + gen_rtx_REG (word_mode, + x86_64_int_parameter_registers[i])); + } + + if (ix86_varargs_fpr_size) + { + machine_mode smode; + rtx_code_label *label; + rtx test; + + /* Now emit code to save SSE registers. The AX parameter contains number + of SSE parameter registers used to call this function, though all we + actually check here is the zero/non-zero status. */ + + label = gen_label_rtx (); + test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); + emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), + label)); + + /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if + we used movdqa (i.e. TImode) instead? Perhaps even better would + be if we could determine the real mode of the data, via a hook + into pass_stdarg. Ignore all that for now. */ + smode = V4SFmode; + if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode)) + crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode); + + max = cum->sse_regno + cfun->va_list_fpr_size / 16; + if (max > X86_64_SSE_REGPARM_MAX) + max = X86_64_SSE_REGPARM_MAX; + + for (i = cum->sse_regno; i < max; ++i) + { + mem = plus_constant (Pmode, save_area, + i * 16 + ix86_varargs_gpr_size); + mem = gen_rtx_MEM (smode, mem); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, set); + set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); + + emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i))); + } + + emit_label (label); + } +} + +static void +setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) +{ + alias_set_type set = get_varargs_alias_set (); + int i; + + /* Reset to zero, as there might be a sysv vaarg used + before. */ + ix86_varargs_gpr_size = 0; + ix86_varargs_fpr_size = 0; + + for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) + { + rtx reg, mem; + + mem = gen_rtx_MEM (Pmode, + plus_constant (Pmode, virtual_incoming_args_rtx, + i * UNITS_PER_WORD)); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, set); + + reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); + emit_move_insn (mem, reg); + } +} + +static void +ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, + tree type, int *, int no_rtl) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + CUMULATIVE_ARGS next_cum; + tree fntype; + + /* This argument doesn't appear to be used anymore. Which is good, + because the old code here didn't suppress rtl generation. */ + gcc_assert (!no_rtl); + + if (!TARGET_64BIT) + return; + + fntype = TREE_TYPE (current_function_decl); + + /* For varargs, we do not want to skip the dummy va_dcl argument. + For stdargs, we do want to skip the last named argument. */ + next_cum = *cum; + if (stdarg_p (fntype)) + ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, + true); + + if (cum->call_abi == MS_ABI) + setup_incoming_varargs_ms_64 (&next_cum); + else + setup_incoming_varargs_64 (&next_cum); +} + +static void +ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v, + enum machine_mode mode, + tree type, + int *pretend_size ATTRIBUTE_UNUSED, + int no_rtl) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + CUMULATIVE_ARGS next_cum; + tree fntype; + rtx save_area; + int bnd_reg, i, max; + + gcc_assert (!no_rtl); + + /* Do nothing if we use plain pointer to argument area. */ + if (!TARGET_64BIT || cum->call_abi == MS_ABI) + return; + + fntype = TREE_TYPE (current_function_decl); + + /* For varargs, we do not want to skip the dummy va_dcl argument. + For stdargs, we do want to skip the last named argument. */ + next_cum = *cum; + if (stdarg_p (fntype)) + ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, + true); + save_area = frame_pointer_rtx; + + max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; + if (max > X86_64_REGPARM_MAX) + max = X86_64_REGPARM_MAX; + + bnd_reg = cum->bnd_regno + cum->force_bnd_pass; + if (chkp_function_instrumented_p (current_function_decl)) + for (i = cum->regno; i < max; i++) + { + rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD); + rtx ptr = gen_rtx_REG (Pmode, + x86_64_int_parameter_registers[i]); + rtx bounds; + + if (bnd_reg <= LAST_BND_REG) + bounds = gen_rtx_REG (BNDmode, bnd_reg); + else + { + rtx ldx_addr = + plus_constant (Pmode, arg_pointer_rtx, + (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode)); + bounds = gen_reg_rtx (BNDmode); + emit_insn (BNDmode == BND64mode + ? gen_bnd64_ldx (bounds, ldx_addr, ptr) + : gen_bnd32_ldx (bounds, ldx_addr, ptr)); + } + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_stx (addr, ptr, bounds) + : gen_bnd32_stx (addr, ptr, bounds)); + + bnd_reg++; + } +} + + +/* Checks if TYPE is of kind va_list char *. */ + +static bool +is_va_list_char_pointer (tree type) +{ + tree canonic; + + /* For 32-bit it is always true. */ + if (!TARGET_64BIT) + return true; + canonic = ix86_canonical_va_list_type (type); + return (canonic == ms_va_list_type_node + || (ix86_abi == MS_ABI && canonic == va_list_type_node)); +} + +/* Implement va_start. */ + +static void +ix86_va_start (tree valist, rtx nextarg) +{ + HOST_WIDE_INT words, n_gpr, n_fpr; + tree f_gpr, f_fpr, f_ovf, f_sav; + tree gpr, fpr, ovf, sav, t; + tree type; + rtx ovf_rtx; + + if (flag_split_stack + && cfun->machine->split_stack_varargs_pointer == NULL_RTX) + { + unsigned int scratch_regno; + + /* When we are splitting the stack, we can't refer to the stack + arguments using internal_arg_pointer, because they may be on + the old stack. The split stack prologue will arrange to + leave a pointer to the old stack arguments in a scratch + register, which we here copy to a pseudo-register. The split + stack prologue can't set the pseudo-register directly because + it (the prologue) runs before any registers have been saved. */ + + scratch_regno = split_stack_prologue_scratch_regno (); + if (scratch_regno != INVALID_REGNUM) + { + rtx reg; + rtx_insn *seq; + + reg = gen_reg_rtx (Pmode); + cfun->machine->split_stack_varargs_pointer = reg; + + start_sequence (); + emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); + seq = get_insns (); + end_sequence (); + + push_topmost_sequence (); + emit_insn_after (seq, entry_of_function ()); + pop_topmost_sequence (); + } + } + + /* Only 64bit target needs something special. */ + if (is_va_list_char_pointer (TREE_TYPE (valist))) + { + if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) + std_expand_builtin_va_start (valist, nextarg); + else + { + rtx va_r, next; + + va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE); + next = expand_binop (ptr_mode, add_optab, + cfun->machine->split_stack_varargs_pointer, + crtl->args.arg_offset_rtx, + NULL_RTX, 0, OPTAB_LIB_WIDEN); + convert_move (va_r, next, 0); + + /* Store zero bounds for va_list. */ + if (chkp_function_instrumented_p (current_function_decl)) + chkp_expand_bounds_reset_for_mem (valist, + make_tree (TREE_TYPE (valist), + next)); + + } + return; + } + + f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); + f_fpr = DECL_CHAIN (f_gpr); + f_ovf = DECL_CHAIN (f_fpr); + f_sav = DECL_CHAIN (f_ovf); + + valist = build_simple_mem_ref (valist); + TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node); + /* The following should be folded into the MEM_REF offset. */ + gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist), + f_gpr, NULL_TREE); + fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), + f_fpr, NULL_TREE); + ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), + f_ovf, NULL_TREE); + sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), + f_sav, NULL_TREE); + + /* Count number of gp and fp argument registers used. */ + words = crtl->args.info.words; + n_gpr = crtl->args.info.regno; + n_fpr = crtl->args.info.sse_regno; + + if (cfun->va_list_gpr_size) + { + type = TREE_TYPE (gpr); + t = build2 (MODIFY_EXPR, type, + gpr, build_int_cst (type, n_gpr * 8)); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + + if (TARGET_SSE && cfun->va_list_fpr_size) + { + type = TREE_TYPE (fpr); + t = build2 (MODIFY_EXPR, type, fpr, + build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + + /* Find the overflow area. */ + type = TREE_TYPE (ovf); + if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) + ovf_rtx = crtl->args.internal_arg_pointer; + else + ovf_rtx = cfun->machine->split_stack_varargs_pointer; + t = make_tree (type, ovf_rtx); + if (words != 0) + t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); + + /* Store zero bounds for overflow area pointer. */ + if (chkp_function_instrumented_p (current_function_decl)) + chkp_expand_bounds_reset_for_mem (ovf, t); + + t = build2 (MODIFY_EXPR, type, ovf, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) + { + /* Find the register save area. + Prologue of the function save it right above stack frame. */ + type = TREE_TYPE (sav); + t = make_tree (type, frame_pointer_rtx); + if (!ix86_varargs_gpr_size) + t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); + + /* Store zero bounds for save area pointer. */ + if (chkp_function_instrumented_p (current_function_decl)) + chkp_expand_bounds_reset_for_mem (sav, t); + + t = build2 (MODIFY_EXPR, type, sav, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } +} + +/* Implement va_arg. */ + +static tree +ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) +{ + static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; + tree f_gpr, f_fpr, f_ovf, f_sav; + tree gpr, fpr, ovf, sav, t; + int size, rsize; + tree lab_false, lab_over = NULL_TREE; + tree addr, t2; + rtx container; + int indirect_p = 0; + tree ptrtype; + machine_mode nat_mode; + unsigned int arg_boundary; + + /* Only 64bit target needs something special. */ + if (is_va_list_char_pointer (TREE_TYPE (valist))) + return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); + + f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); + f_fpr = DECL_CHAIN (f_gpr); + f_ovf = DECL_CHAIN (f_fpr); + f_sav = DECL_CHAIN (f_ovf); + + gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), + valist, f_gpr, NULL_TREE); + + fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); + ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); + sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); + + indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); + if (indirect_p) + type = build_pointer_type (type); + size = int_size_in_bytes (type); + rsize = CEIL (size, UNITS_PER_WORD); + + nat_mode = type_natural_mode (type, NULL, false); + switch (nat_mode) + { + case V8SFmode: + case V8SImode: + case V32QImode: + case V16HImode: + case V4DFmode: + case V4DImode: + case V16SFmode: + case V16SImode: + case V64QImode: + case V32HImode: + case V8DFmode: + case V8DImode: + /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ + if (!TARGET_64BIT_MS_ABI) + { + container = NULL; + break; + } + + default: + container = construct_container (nat_mode, TYPE_MODE (type), + type, 0, X86_64_REGPARM_MAX, + X86_64_SSE_REGPARM_MAX, intreg, + 0); + break; + } + + /* Pull the value out of the saved registers. */ + + addr = create_tmp_var (ptr_type_node, "addr"); + + if (container) + { + int needed_intregs, needed_sseregs; + bool need_temp; + tree int_addr, sse_addr; + + lab_false = create_artificial_label (UNKNOWN_LOCATION); + lab_over = create_artificial_label (UNKNOWN_LOCATION); + + examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); + + need_temp = (!REG_P (container) + && ((needed_intregs && TYPE_ALIGN (type) > 64) + || TYPE_ALIGN (type) > 128)); + + /* In case we are passing structure, verify that it is consecutive block + on the register save area. If not we need to do moves. */ + if (!need_temp && !REG_P (container)) + { + /* Verify that all registers are strictly consecutive */ + if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) + { + int i; + + for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) + { + rtx slot = XVECEXP (container, 0, i); + if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i + || INTVAL (XEXP (slot, 1)) != i * 16) + need_temp = true; + } + } + else + { + int i; + + for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) + { + rtx slot = XVECEXP (container, 0, i); + if (REGNO (XEXP (slot, 0)) != (unsigned int) i + || INTVAL (XEXP (slot, 1)) != i * 8) + need_temp = true; + } + } + } + if (!need_temp) + { + int_addr = addr; + sse_addr = addr; + } + else + { + int_addr = create_tmp_var (ptr_type_node, "int_addr"); + sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); + } + + /* First ensure that we fit completely in registers. */ + if (needed_intregs) + { + t = build_int_cst (TREE_TYPE (gpr), + (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); + t = build2 (GE_EXPR, boolean_type_node, gpr, t); + t2 = build1 (GOTO_EXPR, void_type_node, lab_false); + t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); + gimplify_and_add (t, pre_p); + } + if (needed_sseregs) + { + t = build_int_cst (TREE_TYPE (fpr), + (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 + + X86_64_REGPARM_MAX * 8); + t = build2 (GE_EXPR, boolean_type_node, fpr, t); + t2 = build1 (GOTO_EXPR, void_type_node, lab_false); + t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); + gimplify_and_add (t, pre_p); + } + + /* Compute index to start of area used for integer regs. */ + if (needed_intregs) + { + /* int_addr = gpr + sav; */ + t = fold_build_pointer_plus (sav, gpr); + gimplify_assign (int_addr, t, pre_p); + } + if (needed_sseregs) + { + /* sse_addr = fpr + sav; */ + t = fold_build_pointer_plus (sav, fpr); + gimplify_assign (sse_addr, t, pre_p); + } + if (need_temp) + { + int i, prev_size = 0; + tree temp = create_tmp_var (type, "va_arg_tmp"); + + /* addr = &temp; */ + t = build1 (ADDR_EXPR, build_pointer_type (type), temp); + gimplify_assign (addr, t, pre_p); + + for (i = 0; i < XVECLEN (container, 0); i++) + { + rtx slot = XVECEXP (container, 0, i); + rtx reg = XEXP (slot, 0); + machine_mode mode = GET_MODE (reg); + tree piece_type; + tree addr_type; + tree daddr_type; + tree src_addr, src; + int src_offset; + tree dest_addr, dest; + int cur_size = GET_MODE_SIZE (mode); + + gcc_assert (prev_size <= INTVAL (XEXP (slot, 1))); + prev_size = INTVAL (XEXP (slot, 1)); + if (prev_size + cur_size > size) + { + cur_size = size - prev_size; + mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1); + if (mode == BLKmode) + mode = QImode; + } + piece_type = lang_hooks.types.type_for_mode (mode, 1); + if (mode == GET_MODE (reg)) + addr_type = build_pointer_type (piece_type); + else + addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, + true); + daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, + true); + + if (SSE_REGNO_P (REGNO (reg))) + { + src_addr = sse_addr; + src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; + } + else + { + src_addr = int_addr; + src_offset = REGNO (reg) * 8; + } + src_addr = fold_convert (addr_type, src_addr); + src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset); + + dest_addr = fold_convert (daddr_type, addr); + dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size); + if (cur_size == GET_MODE_SIZE (mode)) + { + src = build_va_arg_indirect_ref (src_addr); + dest = build_va_arg_indirect_ref (dest_addr); + + gimplify_assign (dest, src, pre_p); + } + else + { + tree copy + = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), + 3, dest_addr, src_addr, + size_int (cur_size)); + gimplify_and_add (copy, pre_p); + } + prev_size += cur_size; + } + } + + if (needed_intregs) + { + t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, + build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); + gimplify_assign (gpr, t, pre_p); + } + + if (needed_sseregs) + { + t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, + build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); + gimplify_assign (unshare_expr (fpr), t, pre_p); + } + + gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); + + gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); + } + + /* ... otherwise out of the overflow area. */ + + /* When we align parameter on stack for caller, if the parameter + alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be + aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee + here with caller. */ + arg_boundary = ix86_function_arg_boundary (VOIDmode, type); + if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) + arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; + + /* Care for on-stack alignment if needed. */ + if (arg_boundary <= 64 || size == 0) + t = ovf; + else + { + HOST_WIDE_INT align = arg_boundary / 8; + t = fold_build_pointer_plus_hwi (ovf, align - 1); + t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, + build_int_cst (TREE_TYPE (t), -align)); + } + + gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); + gimplify_assign (addr, t, pre_p); + + t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD); + gimplify_assign (unshare_expr (ovf), t, pre_p); + + if (container) + gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); + + ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); + addr = fold_convert (ptrtype, addr); + + if (indirect_p) + addr = build_va_arg_indirect_ref (addr); + return build_va_arg_indirect_ref (addr); +} + +/* Return true if OPNUM's MEM should be matched + in movabs* patterns. */ + +bool +ix86_check_movabs (rtx insn, int opnum) +{ + rtx set, mem; + + set = PATTERN (insn); + if (GET_CODE (set) == PARALLEL) + set = XVECEXP (set, 0, 0); + gcc_assert (GET_CODE (set) == SET); + mem = XEXP (set, opnum); + while (SUBREG_P (mem)) + mem = SUBREG_REG (mem); + gcc_assert (MEM_P (mem)); + return volatile_ok || !MEM_VOLATILE_P (mem); +} + +/* Return false if INSN contains a MEM with a non-default address space. */ +bool +ix86_check_no_addr_space (rtx insn) +{ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL) + { + rtx x = *iter; + if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))) + return false; + } + return true; +} + +/* Initialize the table of extra 80387 mathematical constants. */ + +static void +init_ext_80387_constants (void) +{ + static const char * cst[5] = + { + "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ + "0.6931471805599453094286904741849753009", /* 1: fldln2 */ + "1.4426950408889634073876517827983434472", /* 2: fldl2e */ + "3.3219280948873623478083405569094566090", /* 3: fldl2t */ + "3.1415926535897932385128089594061862044", /* 4: fldpi */ + }; + int i; + + for (i = 0; i < 5; i++) + { + real_from_string (&ext_80387_constants_table[i], cst[i]); + /* Ensure each constant is rounded to XFmode precision. */ + real_convert (&ext_80387_constants_table[i], + XFmode, &ext_80387_constants_table[i]); + } + + ext_80387_constants_init = 1; +} + +/* Return non-zero if the constant is something that + can be loaded with a special instruction. */ + +int +standard_80387_constant_p (rtx x) +{ + machine_mode mode = GET_MODE (x); + + const REAL_VALUE_TYPE *r; + + if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode))) + return -1; + + if (x == CONST0_RTX (mode)) + return 1; + if (x == CONST1_RTX (mode)) + return 2; + + r = CONST_DOUBLE_REAL_VALUE (x); + + /* For XFmode constants, try to find a special 80387 instruction when + optimizing for size or on those CPUs that benefit from them. */ + if (mode == XFmode + && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)) + { + int i; + + if (! ext_80387_constants_init) + init_ext_80387_constants (); + + for (i = 0; i < 5; i++) + if (real_identical (r, &ext_80387_constants_table[i])) + return i + 3; + } + + /* Load of the constant -0.0 or -1.0 will be split as + fldz;fchs or fld1;fchs sequence. */ + if (real_isnegzero (r)) + return 8; + if (real_identical (r, &dconstm1)) + return 9; + + return 0; +} + +/* Return the opcode of the special instruction to be used to load + the constant X. */ + +const char * +standard_80387_constant_opcode (rtx x) +{ + switch (standard_80387_constant_p (x)) + { + case 1: + return "fldz"; + case 2: + return "fld1"; + case 3: + return "fldlg2"; + case 4: + return "fldln2"; + case 5: + return "fldl2e"; + case 6: + return "fldl2t"; + case 7: + return "fldpi"; + case 8: + case 9: + return "#"; + default: + gcc_unreachable (); + } +} + +/* Return the CONST_DOUBLE representing the 80387 constant that is + loaded by the specified special instruction. The argument IDX + matches the return value from standard_80387_constant_p. */ + +rtx +standard_80387_constant_rtx (int idx) +{ + int i; + + if (! ext_80387_constants_init) + init_ext_80387_constants (); + + switch (idx) + { + case 3: + case 4: + case 5: + case 6: + case 7: + i = idx - 3; + break; + + default: + gcc_unreachable (); + } + + return const_double_from_real_value (ext_80387_constants_table[i], + XFmode); +} + +/* Return 1 if X is all 0s and 2 if x is all 1s + in supported SSE/AVX vector mode. */ + +int +standard_sse_constant_p (rtx x) +{ + machine_mode mode; + + if (!TARGET_SSE) + return 0; + + mode = GET_MODE (x); + + if (x == const0_rtx || x == CONST0_RTX (mode)) + return 1; + if (vector_all_ones_operand (x, mode)) + switch (mode) + { + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + if (TARGET_SSE2) + return 2; + case V32QImode: + case V16HImode: + case V8SImode: + case V4DImode: + if (TARGET_AVX2) + return 2; + case V64QImode: + case V32HImode: + case V16SImode: + case V8DImode: + if (TARGET_AVX512F) + return 2; + default: + break; + } + + return 0; +} + +/* Return the opcode of the special instruction to be used to load + the constant X. */ + +const char * +standard_sse_constant_opcode (rtx_insn *insn, rtx x) +{ + switch (standard_sse_constant_p (x)) + { + case 1: + switch (get_attr_mode (insn)) + { + case MODE_XI: + return "vpxord\t%g0, %g0, %g0"; + case MODE_V16SF: + return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0" + : "vpxord\t%g0, %g0, %g0"; + case MODE_V8DF: + return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0" + : "vpxorq\t%g0, %g0, %g0"; + case MODE_TI: + return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0" + : "%vpxor\t%0, %d0"; + case MODE_V2DF: + return "%vxorpd\t%0, %d0"; + case MODE_V4SF: + return "%vxorps\t%0, %d0"; + + case MODE_OI: + return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0" + : "vpxor\t%x0, %x0, %x0"; + case MODE_V4DF: + return "vxorpd\t%x0, %x0, %x0"; + case MODE_V8SF: + return "vxorps\t%x0, %x0, %x0"; + + default: + break; + } + + case 2: + if (TARGET_AVX512VL + || get_attr_mode (insn) == MODE_XI + || get_attr_mode (insn) == MODE_V8DF + || get_attr_mode (insn) == MODE_V16SF) + return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; + if (TARGET_AVX) + return "vpcmpeqd\t%0, %0, %0"; + else + return "pcmpeqd\t%0, %0"; + + default: + break; + } + gcc_unreachable (); +} + +/* Returns true if OP contains a symbol reference */ + +bool +symbolic_reference_mentioned_p (rtx op) +{ + const char *fmt; + int i; + + if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) + return true; + + fmt = GET_RTX_FORMAT (GET_CODE (op)); + for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + int j; + + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) + return true; + } + + else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) + return true; + } + + return false; +} + +/* Return true if it is appropriate to emit `ret' instructions in the + body of a function. Do this only if the epilogue is simple, needing a + couple of insns. Prior to reloading, we can't tell how many registers + must be saved, so return false then. Return false if there is no frame + marker to de-allocate. */ + +bool +ix86_can_use_return_insn_p (void) +{ + struct ix86_frame frame; + + if (! reload_completed || frame_pointer_needed) + return 0; + + /* Don't allow more than 32k pop, since that's all we can do + with one instruction. */ + if (crtl->args.pops_args && crtl->args.size >= 32768) + return 0; + + ix86_compute_frame_layout (&frame); + return (frame.stack_pointer_offset == UNITS_PER_WORD + && (frame.nregs + frame.nsseregs) == 0); +} + +/* Value should be nonzero if functions must have frame pointers. + Zero means the frame pointer need not be set up (and parms may + be accessed via the stack pointer) in functions that seem suitable. */ + +static bool +ix86_frame_pointer_required (void) +{ + /* If we accessed previous frames, then the generated code expects + to be able to access the saved ebp value in our frame. */ + if (cfun->machine->accesses_prev_frame) + return true; + + /* Several x86 os'es need a frame pointer for other reasons, + usually pertaining to setjmp. */ + if (SUBTARGET_FRAME_POINTER_REQUIRED) + return true; + + /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ + if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp) + return true; + + /* Win64 SEH, very large frames need a frame-pointer as maximum stack + allocation is 4GB. */ + if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE) + return true; + + /* SSE saves require frame-pointer when stack is misaligned. */ + if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128) + return true; + + /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER + turns off the frame pointer by default. Turn it back on now if + we've not got a leaf function. */ + if (TARGET_OMIT_LEAF_FRAME_POINTER + && (!crtl->is_leaf + || ix86_current_function_calls_tls_descriptor)) + return true; + + if (crtl->profile && !flag_fentry) + return true; + + return false; +} + +/* Record that the current function accesses previous call frames. */ + +void +ix86_setup_frame_addresses (void) +{ + cfun->machine->accesses_prev_frame = 1; +} + +#ifndef USE_HIDDEN_LINKONCE +# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0) +# define USE_HIDDEN_LINKONCE 1 +# else +# define USE_HIDDEN_LINKONCE 0 +# endif +#endif + +static int pic_labels_used; + +/* Fills in the label name that should be used for a pc thunk for + the given register. */ + +static void +get_pc_thunk_name (char name[32], unsigned int regno) +{ + gcc_assert (!TARGET_64BIT); + + if (USE_HIDDEN_LINKONCE) + sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]); + else + ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); +} + + +/* This function generates code for -fpic that loads %ebx with + the return address of the caller and then returns. */ + +static void +ix86_code_end (void) +{ + rtx xops[2]; + int regno; + + for (regno = AX_REG; regno <= SP_REG; regno++) + { + char name[32]; + tree decl; + + if (!(pic_labels_used & (1 << regno))) + continue; + + get_pc_thunk_name (name, regno); + + decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, + get_identifier (name), + build_function_type_list (void_type_node, NULL_TREE)); + DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, + NULL_TREE, void_type_node); + TREE_PUBLIC (decl) = 1; + TREE_STATIC (decl) = 1; + DECL_IGNORED_P (decl) = 1; + +#if TARGET_MACHO + if (TARGET_MACHO) + { + switch_to_section (darwin_sections[picbase_thunk_section]); + fputs ("\t.weak_definition\t", asm_out_file); + assemble_name (asm_out_file, name); + fputs ("\n\t.private_extern\t", asm_out_file); + assemble_name (asm_out_file, name); + putc ('\n', asm_out_file); + ASM_OUTPUT_LABEL (asm_out_file, name); + DECL_WEAK (decl) = 1; + } + else +#endif + if (USE_HIDDEN_LINKONCE) + { + cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); + + targetm.asm_out.unique_section (decl, 0); + switch_to_section (get_named_section (decl, NULL, 0)); + + targetm.asm_out.globalize_label (asm_out_file, name); + fputs ("\t.hidden\t", asm_out_file); + assemble_name (asm_out_file, name); + putc ('\n', asm_out_file); + ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); + } + else + { + switch_to_section (text_section); + ASM_OUTPUT_LABEL (asm_out_file, name); + } + + DECL_INITIAL (decl) = make_node (BLOCK); + current_function_decl = decl; + allocate_struct_function (decl, false); + init_function_start (decl); + /* We're about to hide the function body from callees of final_* by + emitting it directly; tell them we're a thunk, if they care. */ + cfun->is_thunk = true; + first_function_block_is_cold = false; + /* Make sure unwind info is emitted for the thunk if needed. */ + final_start_function (emit_barrier (), asm_out_file, 1); + + /* Pad stack IP move with 4 instructions (two NOPs count + as one instruction). */ + if (TARGET_PAD_SHORT_FUNCTION) + { + int i = 8; + + while (i--) + fputs ("\tnop\n", asm_out_file); + } + + xops[0] = gen_rtx_REG (Pmode, regno); + xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); + output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); + output_asm_insn ("%!ret", NULL); + final_end_function (); + init_insn_lengths (); + free_after_compilation (cfun); + set_cfun (NULL); + current_function_decl = NULL; + } + + if (flag_split_stack) + file_end_indicate_split_stack (); +} + +/* Emit code for the SET_GOT patterns. */ + +const char * +output_set_got (rtx dest, rtx label) +{ + rtx xops[3]; + + xops[0] = dest; + + if (TARGET_VXWORKS_RTP && flag_pic) + { + /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ + xops[2] = gen_rtx_MEM (Pmode, + gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); + output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); + + /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. + Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as + an unadorned address. */ + xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); + SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; + output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops); + return ""; + } + + xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); + + if (flag_pic) + { + char name[32]; + get_pc_thunk_name (name, REGNO (dest)); + pic_labels_used |= 1 << REGNO (dest); + + xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); + xops[2] = gen_rtx_MEM (QImode, xops[2]); + output_asm_insn ("%!call\t%X2", xops); + +#if TARGET_MACHO + /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. + This is what will be referenced by the Mach-O PIC subsystem. */ + if (machopic_should_output_picbase_label () || !label) + ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); + + /* When we are restoring the pic base at the site of a nonlocal label, + and we decided to emit the pic base above, we will still output a + local label used for calculating the correction offset (even though + the offset will be 0 in that case). */ + if (label) + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (label)); +#endif + } + else + { + if (TARGET_MACHO) + /* We don't need a pic base, we're not producing pic. */ + gcc_unreachable (); + + xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); + output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (XEXP (xops[2], 0))); + } + + if (!TARGET_MACHO) + output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); + + return ""; +} + +/* Generate an "push" pattern for input ARG. */ + +static rtx +gen_push (rtx arg) +{ + struct machine_function *m = cfun->machine; + + if (m->fs.cfa_reg == stack_pointer_rtx) + m->fs.cfa_offset += UNITS_PER_WORD; + m->fs.sp_offset += UNITS_PER_WORD; + + if (REG_P (arg) && GET_MODE (arg) != word_mode) + arg = gen_rtx_REG (word_mode, REGNO (arg)); + + return gen_rtx_SET (gen_rtx_MEM (word_mode, + gen_rtx_PRE_DEC (Pmode, + stack_pointer_rtx)), + arg); +} + +/* Generate an "pop" pattern for input ARG. */ + +static rtx +gen_pop (rtx arg) +{ + if (REG_P (arg) && GET_MODE (arg) != word_mode) + arg = gen_rtx_REG (word_mode, REGNO (arg)); + + return gen_rtx_SET (arg, + gen_rtx_MEM (word_mode, + gen_rtx_POST_INC (Pmode, + stack_pointer_rtx))); +} + +/* Return >= 0 if there is an unused call-clobbered register available + for the entire function. */ + +static unsigned int +ix86_select_alt_pic_regnum (void) +{ + if (ix86_use_pseudo_pic_reg ()) + return INVALID_REGNUM; + + if (crtl->is_leaf + && !crtl->profile + && !ix86_current_function_calls_tls_descriptor) + { + int i, drap; + /* Can't use the same register for both PIC and DRAP. */ + if (crtl->drap_reg) + drap = REGNO (crtl->drap_reg); + else + drap = -1; + for (i = 2; i >= 0; --i) + if (i != drap && !df_regs_ever_live_p (i)) + return i; + } + + return INVALID_REGNUM; +} + +/* Return TRUE if we need to save REGNO. */ + +static bool +ix86_save_reg (unsigned int regno, bool maybe_eh_return) +{ + if (regno == REAL_PIC_OFFSET_TABLE_REGNUM + && pic_offset_table_rtx) + { + if (ix86_use_pseudo_pic_reg ()) + { + /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to + _mcount in prologue. */ + if (!TARGET_64BIT && flag_pic && crtl->profile) + return true; + } + else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) + || crtl->profile + || crtl->calls_eh_return + || crtl->uses_const_pool + || cfun->has_nonlocal_label) + return ix86_select_alt_pic_regnum () == INVALID_REGNUM; + } + + if (crtl->calls_eh_return && maybe_eh_return) + { + unsigned i; + for (i = 0; ; i++) + { + unsigned test = EH_RETURN_DATA_REGNO (i); + if (test == INVALID_REGNUM) + break; + if (test == regno) + return true; + } + } + + if (crtl->drap_reg + && regno == REGNO (crtl->drap_reg) + && !cfun->machine->no_drap_save_restore) + return true; + + return (df_regs_ever_live_p (regno) + && !call_used_regs[regno] + && !fixed_regs[regno] + && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); +} + +/* Return number of saved general prupose registers. */ + +static int +ix86_nsaved_regs (void) +{ + int nregs = 0; + int regno; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true)) + nregs ++; + return nregs; +} + +/* Return number of saved SSE registers. */ + +static int +ix86_nsaved_sseregs (void) +{ + int nregs = 0; + int regno; + + if (!TARGET_64BIT_MS_ABI) + return 0; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) + nregs ++; + return nregs; +} + +/* Given FROM and TO register numbers, say whether this elimination is + allowed. If stack alignment is needed, we can only replace argument + pointer with hard frame pointer, or replace frame pointer with stack + pointer. Otherwise, frame pointer elimination is automatically + handled and all other eliminations are valid. */ + +static bool +ix86_can_eliminate (const int from, const int to) +{ + if (stack_realign_fp) + return ((from == ARG_POINTER_REGNUM + && to == HARD_FRAME_POINTER_REGNUM) + || (from == FRAME_POINTER_REGNUM + && to == STACK_POINTER_REGNUM)); + else + return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; +} + +/* Return the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +HOST_WIDE_INT +ix86_initial_elimination_offset (int from, int to) +{ + struct ix86_frame frame; + ix86_compute_frame_layout (&frame); + + if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return frame.hard_frame_pointer_offset; + else if (from == FRAME_POINTER_REGNUM + && to == HARD_FRAME_POINTER_REGNUM) + return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; + else + { + gcc_assert (to == STACK_POINTER_REGNUM); + + if (from == ARG_POINTER_REGNUM) + return frame.stack_pointer_offset; + + gcc_assert (from == FRAME_POINTER_REGNUM); + return frame.stack_pointer_offset - frame.frame_pointer_offset; + } +} + +/* In a dynamically-aligned function, we can't know the offset from + stack pointer to frame pointer, so we must ensure that setjmp + eliminates fp against the hard fp (%ebp) rather than trying to + index from %esp up to the top of the frame across a gap that is + of unknown (at compile-time) size. */ +static rtx +ix86_builtin_setjmp_frame_value (void) +{ + return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx; +} + +/* When using -fsplit-stack, the allocation routines set a field in + the TCB to the bottom of the stack plus this much space, measured + in bytes. */ + +#define SPLIT_STACK_AVAILABLE 256 + +/* Fill structure ix86_frame about frame of currently computed function. */ + +static void +ix86_compute_frame_layout (struct ix86_frame *frame) +{ + unsigned HOST_WIDE_INT stack_alignment_needed; + HOST_WIDE_INT offset; + unsigned HOST_WIDE_INT preferred_alignment; + HOST_WIDE_INT size = get_frame_size (); + HOST_WIDE_INT to_allocate; + + frame->nregs = ix86_nsaved_regs (); + frame->nsseregs = ix86_nsaved_sseregs (); + + /* 64-bit MS ABI seem to require stack alignment to be always 16, + except for function prologues, leaf functions and when the defult + incoming stack boundary is overriden at command line or via + force_align_arg_pointer attribute. */ + if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128) + && (!crtl->is_leaf || cfun->calls_alloca != 0 + || ix86_current_function_calls_tls_descriptor + || ix86_incoming_stack_boundary < 128)) + { + crtl->preferred_stack_boundary = 128; + crtl->stack_alignment_needed = 128; + } + + stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; + preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; + + gcc_assert (!size || stack_alignment_needed); + gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); + gcc_assert (preferred_alignment <= stack_alignment_needed); + + /* For SEH we have to limit the amount of code movement into the prologue. + At present we do this via a BLOCKAGE, at which point there's very little + scheduling that can be done, which means that there's very little point + in doing anything except PUSHs. */ + if (TARGET_SEH) + cfun->machine->use_fast_prologue_epilogue = false; + + /* During reload iteration the amount of registers saved can change. + Recompute the value as needed. Do not recompute when amount of registers + didn't change as reload does multiple calls to the function and does not + expect the decision to change within single iteration. */ + else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)) + && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) + { + int count = frame->nregs; + struct cgraph_node *node = cgraph_node::get (current_function_decl); + + cfun->machine->use_fast_prologue_epilogue_nregs = count; + + /* The fast prologue uses move instead of push to save registers. This + is significantly longer, but also executes faster as modern hardware + can execute the moves in parallel, but can't do that for push/pop. + + Be careful about choosing what prologue to emit: When function takes + many instructions to execute we may use slow version as well as in + case function is known to be outside hot spot (this is known with + feedback only). Weight the size of function by number of registers + to save as it is cheap to use one or two push instructions but very + slow to use many of them. */ + if (count) + count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; + if (node->frequency < NODE_FREQUENCY_NORMAL + || (flag_branch_probabilities + && node->frequency < NODE_FREQUENCY_HOT)) + cfun->machine->use_fast_prologue_epilogue = false; + else + cfun->machine->use_fast_prologue_epilogue + = !expensive_function_p (count); + } + + frame->save_regs_using_mov + = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue + /* If static stack checking is enabled and done with probes, + the registers need to be saved before allocating the frame. */ + && flag_stack_check != STATIC_BUILTIN_STACK_CHECK); + + /* Skip return address. */ + offset = UNITS_PER_WORD; + + /* Skip pushed static chain. */ + if (ix86_static_chain_on_stack) + offset += UNITS_PER_WORD; + + /* Skip saved base pointer. */ + if (frame_pointer_needed) + offset += UNITS_PER_WORD; + frame->hfp_save_offset = offset; + + /* The traditional frame pointer location is at the top of the frame. */ + frame->hard_frame_pointer_offset = offset; + + /* Register save area */ + offset += frame->nregs * UNITS_PER_WORD; + frame->reg_save_offset = offset; + + /* On SEH target, registers are pushed just before the frame pointer + location. */ + if (TARGET_SEH) + frame->hard_frame_pointer_offset = offset; + + /* Align and set SSE register save area. */ + if (frame->nsseregs) + { + /* The only ABI that has saved SSE registers (Win64) also has a + 16-byte aligned default stack, and thus we don't need to be + within the re-aligned local stack frame to save them. In case + incoming stack boundary is aligned to less than 16 bytes, + unaligned move of SSE register will be emitted, so there is + no point to round up the SSE register save area outside the + re-aligned local stack frame to 16 bytes. */ + if (ix86_incoming_stack_boundary >= 128) + offset = ROUND_UP (offset, 16); + offset += frame->nsseregs * 16; + } + frame->sse_reg_save_offset = offset; + + /* The re-aligned stack starts here. Values before this point are not + directly comparable with values below this point. In order to make + sure that no value happens to be the same before and after, force + the alignment computation below to add a non-zero value. */ + if (stack_realign_fp) + offset = ROUND_UP (offset, stack_alignment_needed); + + /* Va-arg area */ + frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; + offset += frame->va_arg_size; + + /* Align start of frame for local function. */ + if (stack_realign_fp + || offset != frame->sse_reg_save_offset + || size != 0 + || !crtl->is_leaf + || cfun->calls_alloca + || ix86_current_function_calls_tls_descriptor) + offset = ROUND_UP (offset, stack_alignment_needed); + + /* Frame pointer points here. */ + frame->frame_pointer_offset = offset; + + offset += size; + + /* Add outgoing arguments area. Can be skipped if we eliminated + all the function calls as dead code. + Skipping is however impossible when function calls alloca. Alloca + expander assumes that last crtl->outgoing_args_size + of stack frame are unused. */ + if (ACCUMULATE_OUTGOING_ARGS + && (!crtl->is_leaf || cfun->calls_alloca + || ix86_current_function_calls_tls_descriptor)) + { + offset += crtl->outgoing_args_size; + frame->outgoing_arguments_size = crtl->outgoing_args_size; + } + else + frame->outgoing_arguments_size = 0; + + /* Align stack boundary. Only needed if we're calling another function + or using alloca. */ + if (!crtl->is_leaf || cfun->calls_alloca + || ix86_current_function_calls_tls_descriptor) + offset = ROUND_UP (offset, preferred_alignment); + + /* We've reached end of stack frame. */ + frame->stack_pointer_offset = offset; + + /* Size prologue needs to allocate. */ + to_allocate = offset - frame->sse_reg_save_offset; + + if ((!to_allocate && frame->nregs <= 1) + || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000)) + frame->save_regs_using_mov = false; + + if (ix86_using_red_zone () + && crtl->sp_is_unchanging + && crtl->is_leaf + && !ix86_pc_thunk_call_expanded + && !ix86_current_function_calls_tls_descriptor) + { + frame->red_zone_size = to_allocate; + if (frame->save_regs_using_mov) + frame->red_zone_size += frame->nregs * UNITS_PER_WORD; + if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) + frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; + } + else + frame->red_zone_size = 0; + frame->stack_pointer_offset -= frame->red_zone_size; + + /* The SEH frame pointer location is near the bottom of the frame. + This is enforced by the fact that the difference between the + stack pointer and the frame pointer is limited to 240 bytes in + the unwind data structure. */ + if (TARGET_SEH) + { + HOST_WIDE_INT diff; + + /* If we can leave the frame pointer where it is, do so. Also, returns + the establisher frame for __builtin_frame_address (0). */ + diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; + if (diff <= SEH_MAX_FRAME_SIZE + && (diff > 240 || (diff & 15) != 0) + && !crtl->accesses_prior_frames) + { + /* Ideally we'd determine what portion of the local stack frame + (within the constraint of the lowest 240) is most heavily used. + But without that complication, simply bias the frame pointer + by 128 bytes so as to maximize the amount of the local stack + frame that is addressable with 8-bit offsets. */ + frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; + } + } +} + +/* This is semi-inlined memory_address_length, but simplified + since we know that we're always dealing with reg+offset, and + to avoid having to create and discard all that rtl. */ + +static inline int +choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) +{ + int len = 4; + + if (offset == 0) + { + /* EBP and R13 cannot be encoded without an offset. */ + len = (regno == BP_REG || regno == R13_REG); + } + else if (IN_RANGE (offset, -128, 127)) + len = 1; + + /* ESP and R12 must be encoded with a SIB byte. */ + if (regno == SP_REG || regno == R12_REG) + len++; + + return len; +} + +/* Return an RTX that points to CFA_OFFSET within the stack frame. + The valid base registers are taken from CFUN->MACHINE->FS. */ + +static rtx +choose_baseaddr (HOST_WIDE_INT cfa_offset) +{ + const struct machine_function *m = cfun->machine; + rtx base_reg = NULL; + HOST_WIDE_INT base_offset = 0; + + if (m->use_fast_prologue_epilogue) + { + /* Choose the base register most likely to allow the most scheduling + opportunities. Generally FP is valid throughout the function, + while DRAP must be reloaded within the epilogue. But choose either + over the SP due to increased encoding size. */ + + if (m->fs.fp_valid) + { + base_reg = hard_frame_pointer_rtx; + base_offset = m->fs.fp_offset - cfa_offset; + } + else if (m->fs.drap_valid) + { + base_reg = crtl->drap_reg; + base_offset = 0 - cfa_offset; + } + else if (m->fs.sp_valid) + { + base_reg = stack_pointer_rtx; + base_offset = m->fs.sp_offset - cfa_offset; + } + } + else + { + HOST_WIDE_INT toffset; + int len = 16, tlen; + + /* Choose the base register with the smallest address encoding. + With a tie, choose FP > DRAP > SP. */ + if (m->fs.sp_valid) + { + base_reg = stack_pointer_rtx; + base_offset = m->fs.sp_offset - cfa_offset; + len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset); + } + if (m->fs.drap_valid) + { + toffset = 0 - cfa_offset; + tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset); + if (tlen <= len) + { + base_reg = crtl->drap_reg; + base_offset = toffset; + len = tlen; + } + } + if (m->fs.fp_valid) + { + toffset = m->fs.fp_offset - cfa_offset; + tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset); + if (tlen <= len) + { + base_reg = hard_frame_pointer_rtx; + base_offset = toffset; + len = tlen; + } + } + } + gcc_assert (base_reg != NULL); + + return plus_constant (Pmode, base_reg, base_offset); +} + +/* Emit code to save registers in the prologue. */ + +static void +ix86_emit_save_regs (void) +{ + unsigned int regno; + rtx_insn *insn; + + for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; ) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true)) + { + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +/* Emit a single register save at CFA - CFA_OFFSET. */ + +static void +ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, + HOST_WIDE_INT cfa_offset) +{ + struct machine_function *m = cfun->machine; + rtx reg = gen_rtx_REG (mode, regno); + rtx unspec = NULL_RTX; + rtx mem, addr, base, insn; + unsigned int align; + + addr = choose_baseaddr (cfa_offset); + mem = gen_frame_mem (mode, addr); + + /* The location is aligned up to INCOMING_STACK_BOUNDARY. */ + align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY); + set_mem_align (mem, align); + + /* SSE saves are not within re-aligned local stack frame. + In case INCOMING_STACK_BOUNDARY is misaligned, we have + to emit unaligned store. */ + if (mode == V4SFmode && align < 128) + unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU); + + insn = emit_insn (gen_rtx_SET (mem, unspec ? unspec : reg)); + RTX_FRAME_RELATED_P (insn) = 1; + + base = addr; + if (GET_CODE (base) == PLUS) + base = XEXP (base, 0); + gcc_checking_assert (REG_P (base)); + + /* When saving registers into a re-aligned local stack frame, avoid + any tricky guessing by dwarf2out. */ + if (m->fs.realigned) + { + gcc_checking_assert (stack_realign_drap); + + if (regno == REGNO (crtl->drap_reg)) + { + /* A bit of a hack. We force the DRAP register to be saved in + the re-aligned stack frame, which provides us with a copy + of the CFA that will last past the prologue. Install it. */ + gcc_checking_assert (cfun->machine->fs.fp_valid); + addr = plus_constant (Pmode, hard_frame_pointer_rtx, + cfun->machine->fs.fp_offset - cfa_offset); + mem = gen_rtx_MEM (mode, addr); + add_reg_note (insn, REG_CFA_DEF_CFA, mem); + } + else + { + /* The frame pointer is a stable reference within the + aligned frame. Use it. */ + gcc_checking_assert (cfun->machine->fs.fp_valid); + addr = plus_constant (Pmode, hard_frame_pointer_rtx, + cfun->machine->fs.fp_offset - cfa_offset); + mem = gen_rtx_MEM (mode, addr); + add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); + } + } + + /* The memory may not be relative to the current CFA register, + which means that we may need to generate a new pattern for + use by the unwind info. */ + else if (base != m->fs.cfa_reg) + { + addr = plus_constant (Pmode, m->fs.cfa_reg, + m->fs.cfa_offset - cfa_offset); + mem = gen_rtx_MEM (mode, addr); + add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); + } + else if (unspec) + add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); +} + +/* Emit code to save registers using MOV insns. + First register is stored at CFA - CFA_OFFSET. */ +static void +ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) +{ + unsigned int regno; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true)) + { + ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); + cfa_offset -= UNITS_PER_WORD; + } +} + +/* Emit code to save SSE registers using MOV insns. + First register is stored at CFA - CFA_OFFSET. */ +static void +ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) +{ + unsigned int regno; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) + { + ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); + cfa_offset -= GET_MODE_SIZE (V4SFmode); + } +} + +static GTY(()) rtx queued_cfa_restores; + +/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack + manipulation insn. The value is on the stack at CFA - CFA_OFFSET. + Don't add the note if the previously saved value will be left untouched + within stack red-zone till return, as unwinders can find the same value + in the register and on the stack. */ + +static void +ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset) +{ + if (!crtl->shrink_wrapped + && cfa_offset <= cfun->machine->fs.red_zone_offset) + return; + + if (insn) + { + add_reg_note (insn, REG_CFA_RESTORE, reg); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + queued_cfa_restores + = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); +} + +/* Add queued REG_CFA_RESTORE notes if any to INSN. */ + +static void +ix86_add_queued_cfa_restore_notes (rtx insn) +{ + rtx last; + if (!queued_cfa_restores) + return; + for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) + ; + XEXP (last, 1) = REG_NOTES (insn); + REG_NOTES (insn) = queued_cfa_restores; + queued_cfa_restores = NULL_RTX; + RTX_FRAME_RELATED_P (insn) = 1; +} + +/* Expand prologue or epilogue stack adjustment. + The pattern exist to put a dependency on all ebp-based memory accesses. + STYLE should be negative if instructions should be marked as frame related, + zero if %r11 register is live and cannot be freely used and positive + otherwise. */ + +static void +pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, + int style, bool set_cfa) +{ + struct machine_function *m = cfun->machine; + rtx insn; + bool add_frame_related_expr = false; + + if (Pmode == SImode) + insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset); + else if (x86_64_immediate_operand (offset, DImode)) + insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset); + else + { + rtx tmp; + /* r11 is used by indirect sibcall return as well, set before the + epilogue and used after the epilogue. */ + if (style) + tmp = gen_rtx_REG (DImode, R11_REG); + else + { + gcc_assert (src != hard_frame_pointer_rtx + && dest != hard_frame_pointer_rtx); + tmp = hard_frame_pointer_rtx; + } + insn = emit_insn (gen_rtx_SET (tmp, offset)); + if (style < 0) + add_frame_related_expr = true; + + insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp); + } + + insn = emit_insn (insn); + if (style >= 0) + ix86_add_queued_cfa_restore_notes (insn); + + if (set_cfa) + { + rtx r; + + gcc_assert (m->fs.cfa_reg == src); + m->fs.cfa_offset += INTVAL (offset); + m->fs.cfa_reg = dest; + + r = gen_rtx_PLUS (Pmode, src, offset); + r = gen_rtx_SET (dest, r); + add_reg_note (insn, REG_CFA_ADJUST_CFA, r); + RTX_FRAME_RELATED_P (insn) = 1; + } + else if (style < 0) + { + RTX_FRAME_RELATED_P (insn) = 1; + if (add_frame_related_expr) + { + rtx r = gen_rtx_PLUS (Pmode, src, offset); + r = gen_rtx_SET (dest, r); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, r); + } + } + + if (dest == stack_pointer_rtx) + { + HOST_WIDE_INT ooffset = m->fs.sp_offset; + bool valid = m->fs.sp_valid; + + if (src == hard_frame_pointer_rtx) + { + valid = m->fs.fp_valid; + ooffset = m->fs.fp_offset; + } + else if (src == crtl->drap_reg) + { + valid = m->fs.drap_valid; + ooffset = 0; + } + else + { + /* Else there are two possibilities: SP itself, which we set + up as the default above. Or EH_RETURN_STACKADJ_RTX, which is + taken care of this by hand along the eh_return path. */ + gcc_checking_assert (src == stack_pointer_rtx + || offset == const0_rtx); + } + + m->fs.sp_offset = ooffset - INTVAL (offset); + m->fs.sp_valid = valid; + } +} + +/* Find an available register to be used as dynamic realign argument + pointer regsiter. Such a register will be written in prologue and + used in begin of body, so it must not be + 1. parameter passing register. + 2. GOT pointer. + We reuse static-chain register if it is available. Otherwise, we + use DI for i386 and R13 for x86-64. We chose R13 since it has + shorter encoding. + + Return: the regno of chosen register. */ + +static unsigned int +find_drap_reg (void) +{ + tree decl = cfun->decl; + + if (TARGET_64BIT) + { + /* Use R13 for nested function or function need static chain. + Since function with tail call may use any caller-saved + registers in epilogue, DRAP must not use caller-saved + register in such case. */ + if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit) + return R13_REG; + + return R10_REG; + } + else + { + /* Use DI for nested function or function need static chain. + Since function with tail call may use any caller-saved + registers in epilogue, DRAP must not use caller-saved + register in such case. */ + if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit) + return DI_REG; + + /* Reuse static chain register if it isn't used for parameter + passing. */ + if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2) + { + unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl)); + if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0) + return CX_REG; + } + return DI_REG; + } +} + +/* Handle a "force_align_arg_pointer" attribute. */ + +static tree +ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name, + tree, int, bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Return minimum incoming stack alignment. */ + +static unsigned int +ix86_minimum_incoming_stack_boundary (bool sibcall) +{ + unsigned int incoming_stack_boundary; + + /* Prefer the one specified at command line. */ + if (ix86_user_incoming_stack_boundary) + incoming_stack_boundary = ix86_user_incoming_stack_boundary; + /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary + if -mstackrealign is used, it isn't used for sibcall check and + estimated stack alignment is 128bit. */ + else if (!sibcall + && ix86_force_align_arg_pointer + && crtl->stack_alignment_estimated == 128) + incoming_stack_boundary = MIN_STACK_BOUNDARY; + else + incoming_stack_boundary = ix86_default_incoming_stack_boundary; + + /* Incoming stack alignment can be changed on individual functions + via force_align_arg_pointer attribute. We use the smallest + incoming stack boundary. */ + if (incoming_stack_boundary > MIN_STACK_BOUNDARY + && lookup_attribute (ix86_force_align_arg_pointer_string, + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) + incoming_stack_boundary = MIN_STACK_BOUNDARY; + + /* The incoming stack frame has to be aligned at least at + parm_stack_boundary. */ + if (incoming_stack_boundary < crtl->parm_stack_boundary) + incoming_stack_boundary = crtl->parm_stack_boundary; + + /* Stack at entrance of main is aligned by runtime. We use the + smallest incoming stack boundary. */ + if (incoming_stack_boundary > MAIN_STACK_BOUNDARY + && DECL_NAME (current_function_decl) + && MAIN_NAME_P (DECL_NAME (current_function_decl)) + && DECL_FILE_SCOPE_P (current_function_decl)) + incoming_stack_boundary = MAIN_STACK_BOUNDARY; + + return incoming_stack_boundary; +} + +/* Update incoming stack boundary and estimated stack alignment. */ + +static void +ix86_update_stack_boundary (void) +{ + ix86_incoming_stack_boundary + = ix86_minimum_incoming_stack_boundary (false); + + /* x86_64 vararg needs 16byte stack alignment for register save + area. */ + if (TARGET_64BIT + && cfun->stdarg + && crtl->stack_alignment_estimated < 128) + crtl->stack_alignment_estimated = 128; + + /* __tls_get_addr needs to be called with 16-byte aligned stack. */ + if (ix86_tls_descriptor_calls_expanded_in_cfun + && crtl->preferred_stack_boundary < 128) + crtl->preferred_stack_boundary = 128; +} + +/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is + needed or an rtx for DRAP otherwise. */ + +static rtx +ix86_get_drap_rtx (void) +{ + if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS) + crtl->need_drap = true; + + if (stack_realign_drap) + { + /* Assign DRAP to vDRAP and returns vDRAP */ + unsigned int regno = find_drap_reg (); + rtx drap_vreg; + rtx arg_ptr; + rtx_insn *seq, *insn; + + arg_ptr = gen_rtx_REG (Pmode, regno); + crtl->drap_reg = arg_ptr; + + start_sequence (); + drap_vreg = copy_to_reg (arg_ptr); + seq = get_insns (); + end_sequence (); + + insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); + if (!optimize) + { + add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); + RTX_FRAME_RELATED_P (insn) = 1; + } + return drap_vreg; + } + else + return NULL; +} + +/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ + +static rtx +ix86_internal_arg_pointer (void) +{ + return virtual_incoming_args_rtx; +} + +struct scratch_reg { + rtx reg; + bool saved; +}; + +/* Return a short-lived scratch register for use on function entry. + In 32-bit mode, it is valid only after the registers are saved + in the prologue. This register must be released by means of + release_scratch_register_on_entry once it is dead. */ + +static void +get_scratch_register_on_entry (struct scratch_reg *sr) +{ + int regno; + + sr->saved = false; + + if (TARGET_64BIT) + { + /* We always use R11 in 64-bit mode. */ + regno = R11_REG; + } + else + { + tree decl = current_function_decl, fntype = TREE_TYPE (decl); + bool fastcall_p + = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; + bool thiscall_p + = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; + bool static_chain_p = DECL_STATIC_CHAIN (decl); + int regparm = ix86_function_regparm (fntype, decl); + int drap_regno + = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM; + + /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax + for the static chain register. */ + if ((regparm < 1 || (fastcall_p && !static_chain_p)) + && drap_regno != AX_REG) + regno = AX_REG; + /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx + for the static chain register. */ + else if (thiscall_p && !static_chain_p && drap_regno != AX_REG) + regno = AX_REG; + else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG) + regno = DX_REG; + /* ecx is the static chain register. */ + else if (regparm < 3 && !fastcall_p && !thiscall_p + && !static_chain_p + && drap_regno != CX_REG) + regno = CX_REG; + else if (ix86_save_reg (BX_REG, true)) + regno = BX_REG; + /* esi is the static chain register. */ + else if (!(regparm == 3 && static_chain_p) + && ix86_save_reg (SI_REG, true)) + regno = SI_REG; + else if (ix86_save_reg (DI_REG, true)) + regno = DI_REG; + else + { + regno = (drap_regno == AX_REG ? DX_REG : AX_REG); + sr->saved = true; + } + } + + sr->reg = gen_rtx_REG (Pmode, regno); + if (sr->saved) + { + rtx_insn *insn = emit_insn (gen_push (sr->reg)); + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +/* Release a scratch register obtained from the preceding function. */ + +static void +release_scratch_register_on_entry (struct scratch_reg *sr) +{ + if (sr->saved) + { + struct machine_function *m = cfun->machine; + rtx x, insn = emit_insn (gen_pop (sr->reg)); + + /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */ + RTX_FRAME_RELATED_P (insn) = 1; + x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD)); + x = gen_rtx_SET (stack_pointer_rtx, x); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); + m->fs.sp_offset -= UNITS_PER_WORD; + } +} + +#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) + +/* Emit code to adjust the stack pointer by SIZE bytes while probing it. */ + +static void +ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) +{ + /* We skip the probe for the first interval + a small dope of 4 words and + probe that many bytes past the specified size to maintain a protection + area at the botton of the stack. */ + const int dope = 4 * UNITS_PER_WORD; + rtx size_rtx = GEN_INT (size), last; + + /* See if we have a constant small number of probes to generate. If so, + that's the easy case. The run-time loop is made up of 9 insns in the + generic case while the compile-time loop is made up of 3+2*(n-1) insns + for n # of intervals. */ + if (size <= 4 * PROBE_INTERVAL) + { + HOST_WIDE_INT i, adjust; + bool first_probe = true; + + /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for + values of N from 1 until it exceeds SIZE. If only one probe is + needed, this will not generate any code. Then adjust and probe + to PROBE_INTERVAL + SIZE. */ + for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) + { + if (first_probe) + { + adjust = 2 * PROBE_INTERVAL + dope; + first_probe = false; + } + else + adjust = PROBE_INTERVAL; + + emit_insn (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -adjust))); + emit_stack_probe (stack_pointer_rtx); + } + + if (first_probe) + adjust = size + PROBE_INTERVAL + dope; + else + adjust = size + PROBE_INTERVAL - i; + + emit_insn (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -adjust))); + emit_stack_probe (stack_pointer_rtx); + + /* Adjust back to account for the additional first interval. */ + last = emit_insn (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + PROBE_INTERVAL + dope))); + } + + /* Otherwise, do the same as above, but in a loop. Note that we must be + extra careful with variables wrapping around because we might be at + the very top (or the very bottom) of the address space and we have + to be able to handle this case properly; in particular, we use an + equality test for the loop condition. */ + else + { + HOST_WIDE_INT rounded_size; + struct scratch_reg sr; + + get_scratch_register_on_entry (&sr); + + + /* Step 1: round SIZE to the previous multiple of the interval. */ + + rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); + + + /* Step 2: compute initial and final value of the loop counter. */ + + /* SP = SP_0 + PROBE_INTERVAL. */ + emit_insn (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + - (PROBE_INTERVAL + dope)))); + + /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */ + if (rounded_size <= (HOST_WIDE_INT_1 << 31)) + emit_insn (gen_rtx_SET (sr.reg, + plus_constant (Pmode, stack_pointer_rtx, + -rounded_size))); + else + { + emit_move_insn (sr.reg, GEN_INT (-rounded_size)); + emit_insn (gen_rtx_SET (sr.reg, + gen_rtx_PLUS (Pmode, sr.reg, + stack_pointer_rtx))); + } + + + /* Step 3: the loop + + do + { + SP = SP + PROBE_INTERVAL + probe at SP + } + while (SP != LAST_ADDR) + + adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for + values of N from 1 until it is equal to ROUNDED_SIZE. */ + + emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx)); + + + /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot + assert at compile-time that SIZE is equal to ROUNDED_SIZE. */ + + if (size != rounded_size) + { + emit_insn (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + rounded_size - size))); + emit_stack_probe (stack_pointer_rtx); + } + + /* Adjust back to account for the additional first interval. */ + last = emit_insn (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + PROBE_INTERVAL + dope))); + + release_scratch_register_on_entry (&sr); + } + + /* Even if the stack pointer isn't the CFA register, we need to correctly + describe the adjustments made to it, in particular differentiate the + frame-related ones from the frame-unrelated ones. */ + if (size > 0) + { + rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); + XVECEXP (expr, 0, 0) + = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, -size)); + XVECEXP (expr, 0, 1) + = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + PROBE_INTERVAL + dope + size)); + add_reg_note (last, REG_FRAME_RELATED_EXPR, expr); + RTX_FRAME_RELATED_P (last) = 1; + + cfun->machine->fs.sp_offset += size; + } + + /* Make sure nothing is scheduled before we are done. */ + emit_insn (gen_blockage ()); +} + +/* Adjust the stack pointer up to REG while probing it. */ + +const char * +output_adjust_stack_and_probe (rtx reg) +{ + static int labelno = 0; + char loop_lab[32]; + rtx xops[2]; + + ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); + + /* Loop. */ + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); + + /* SP = SP + PROBE_INTERVAL. */ + xops[0] = stack_pointer_rtx; + xops[1] = GEN_INT (PROBE_INTERVAL); + output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); + + /* Probe at SP. */ + xops[1] = const0_rtx; + output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops); + + /* Test if SP == LAST_ADDR. */ + xops[0] = stack_pointer_rtx; + xops[1] = reg; + output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); + + /* Branch. */ + fputs ("\tjne\t", asm_out_file); + assemble_name_raw (asm_out_file, loop_lab); + fputc ('\n', asm_out_file); + + return ""; +} + +/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, + inclusive. These are offsets from the current stack pointer. */ + +static void +ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) +{ + /* See if we have a constant small number of probes to generate. If so, + that's the easy case. The run-time loop is made up of 6 insns in the + generic case while the compile-time loop is made up of n insns for n # + of intervals. */ + if (size <= 6 * PROBE_INTERVAL) + { + HOST_WIDE_INT i; + + /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until + it exceeds SIZE. If only one probe is needed, this will not + generate any code. Then probe at FIRST + SIZE. */ + for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, + -(first + i))); + + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, + -(first + size))); + } + + /* Otherwise, do the same as above, but in a loop. Note that we must be + extra careful with variables wrapping around because we might be at + the very top (or the very bottom) of the address space and we have + to be able to handle this case properly; in particular, we use an + equality test for the loop condition. */ + else + { + HOST_WIDE_INT rounded_size, last; + struct scratch_reg sr; + + get_scratch_register_on_entry (&sr); + + + /* Step 1: round SIZE to the previous multiple of the interval. */ + + rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); + + + /* Step 2: compute initial and final value of the loop counter. */ + + /* TEST_OFFSET = FIRST. */ + emit_move_insn (sr.reg, GEN_INT (-first)); + + /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */ + last = first + rounded_size; + + + /* Step 3: the loop + + do + { + TEST_ADDR = TEST_ADDR + PROBE_INTERVAL + probe at TEST_ADDR + } + while (TEST_ADDR != LAST_ADDR) + + probes at FIRST + N * PROBE_INTERVAL for values of N from 1 + until it is equal to ROUNDED_SIZE. */ + + emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last))); + + + /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time + that SIZE is equal to ROUNDED_SIZE. */ + + if (size != rounded_size) + emit_stack_probe (plus_constant (Pmode, + gen_rtx_PLUS (Pmode, + stack_pointer_rtx, + sr.reg), + rounded_size - size)); + + release_scratch_register_on_entry (&sr); + } + + /* Make sure nothing is scheduled before we are done. */ + emit_insn (gen_blockage ()); +} + +/* Probe a range of stack addresses from REG to END, inclusive. These are + offsets from the current stack pointer. */ + +const char * +output_probe_stack_range (rtx reg, rtx end) +{ + static int labelno = 0; + char loop_lab[32]; + rtx xops[3]; + + ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); + + /* Loop. */ + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); + + /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ + xops[0] = reg; + xops[1] = GEN_INT (PROBE_INTERVAL); + output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); + + /* Probe at TEST_ADDR. */ + xops[0] = stack_pointer_rtx; + xops[1] = reg; + xops[2] = const0_rtx; + output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops); + + /* Test if TEST_ADDR == LAST_ADDR. */ + xops[0] = reg; + xops[1] = end; + output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); + + /* Branch. */ + fputs ("\tjne\t", asm_out_file); + assemble_name_raw (asm_out_file, loop_lab); + fputc ('\n', asm_out_file); + + return ""; +} + +/* Finalize stack_realign_needed flag, which will guide prologue/epilogue + to be generated in correct form. */ +static void +ix86_finalize_stack_realign_flags (void) +{ + /* Check if stack realign is really needed after reload, and + stores result in cfun */ + unsigned int incoming_stack_boundary + = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary + ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); + unsigned int stack_realign + = (incoming_stack_boundary + < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor + ? crtl->max_used_stack_slot_alignment + : crtl->stack_alignment_needed)); + + if (crtl->stack_realign_finalized) + { + /* After stack_realign_needed is finalized, we can't no longer + change it. */ + gcc_assert (crtl->stack_realign_needed == stack_realign); + return; + } + + /* If the only reason for frame_pointer_needed is that we conservatively + assumed stack realignment might be needed, but in the end nothing that + needed the stack alignment had been spilled, clear frame_pointer_needed + and say we don't need stack realignment. */ + if (stack_realign + && frame_pointer_needed + && crtl->is_leaf + && flag_omit_frame_pointer + && crtl->sp_is_unchanging + && !ix86_current_function_calls_tls_descriptor + && !crtl->accesses_prior_frames + && !cfun->calls_alloca + && !crtl->calls_eh_return + /* See ira_setup_eliminable_regset for the rationale. */ + && !(STACK_CHECK_MOVING_SP + && flag_stack_check + && flag_exceptions + && cfun->can_throw_non_call_exceptions) + && !ix86_frame_pointer_required () + && get_frame_size () == 0 + && ix86_nsaved_sseregs () == 0 + && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) + { + HARD_REG_SET set_up_by_prologue, prologue_used; + basic_block bb; + + CLEAR_HARD_REG_SET (prologue_used); + CLEAR_HARD_REG_SET (set_up_by_prologue); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, + HARD_FRAME_POINTER_REGNUM); + FOR_EACH_BB_FN (bb, cfun) + { + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + if (NONDEBUG_INSN_P (insn) + && requires_stack_frame_p (insn, prologue_used, + set_up_by_prologue)) + { + crtl->stack_realign_needed = stack_realign; + crtl->stack_realign_finalized = true; + return; + } + } + + /* If drap has been set, but it actually isn't live at the start + of the function, there is no reason to set it up. */ + if (crtl->drap_reg) + { + basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; + if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg))) + { + crtl->drap_reg = NULL_RTX; + crtl->need_drap = false; + } + } + else + cfun->machine->no_drap_save_restore = true; + + frame_pointer_needed = false; + stack_realign = false; + crtl->max_used_stack_slot_alignment = incoming_stack_boundary; + crtl->stack_alignment_needed = incoming_stack_boundary; + crtl->stack_alignment_estimated = incoming_stack_boundary; + if (crtl->preferred_stack_boundary > incoming_stack_boundary) + crtl->preferred_stack_boundary = incoming_stack_boundary; + df_finish_pass (true); + df_scan_alloc (NULL); + df_scan_blocks (); + df_compute_regs_ever_live (true); + df_analyze (); + } + + crtl->stack_realign_needed = stack_realign; + crtl->stack_realign_finalized = true; +} + +/* Delete SET_GOT right after entry block if it is allocated to reg. */ + +static void +ix86_elim_entry_set_got (rtx reg) +{ + basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; + rtx_insn *c_insn = BB_HEAD (bb); + if (!NONDEBUG_INSN_P (c_insn)) + c_insn = next_nonnote_nondebug_insn (c_insn); + if (c_insn && NONJUMP_INSN_P (c_insn)) + { + rtx pat = PATTERN (c_insn); + if (GET_CODE (pat) == PARALLEL) + { + rtx vec = XVECEXP (pat, 0, 0); + if (GET_CODE (vec) == SET + && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT + && REGNO (XEXP (vec, 0)) == REGNO (reg)) + delete_insn (c_insn); + } + } +} + +/* Expand the prologue into a bunch of separate insns. */ + +void +ix86_expand_prologue (void) +{ + struct machine_function *m = cfun->machine; + rtx insn, t; + struct ix86_frame frame; + HOST_WIDE_INT allocate; + bool int_registers_saved; + bool sse_registers_saved; + rtx static_chain = NULL_RTX; + + ix86_finalize_stack_realign_flags (); + + /* DRAP should not coexist with stack_realign_fp */ + gcc_assert (!(crtl->drap_reg && stack_realign_fp)); + + memset (&m->fs, 0, sizeof (m->fs)); + + /* Initialize CFA state for before the prologue. */ + m->fs.cfa_reg = stack_pointer_rtx; + m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; + + /* Track SP offset to the CFA. We continue tracking this after we've + swapped the CFA register away from SP. In the case of re-alignment + this is fudged; we're interested to offsets within the local frame. */ + m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; + m->fs.sp_valid = true; + + ix86_compute_frame_layout (&frame); + + if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl)) + { + /* We should have already generated an error for any use of + ms_hook on a nested function. */ + gcc_checking_assert (!ix86_static_chain_on_stack); + + /* Check if profiling is active and we shall use profiling before + prologue variant. If so sorry. */ + if (crtl->profile && flag_fentry != 0) + sorry ("ms_hook_prologue attribute isn%'t compatible " + "with -mfentry for 32-bit"); + + /* In ix86_asm_output_function_label we emitted: + 8b ff movl.s %edi,%edi + 55 push %ebp + 8b ec movl.s %esp,%ebp + + This matches the hookable function prologue in Win32 API + functions in Microsoft Windows XP Service Pack 2 and newer. + Wine uses this to enable Windows apps to hook the Win32 API + functions provided by Wine. + + What that means is that we've already set up the frame pointer. */ + + if (frame_pointer_needed + && !(crtl->drap_reg && crtl->stack_realign_needed)) + { + rtx push, mov; + + /* We've decided to use the frame pointer already set up. + Describe this to the unwinder by pretending that both + push and mov insns happen right here. + + Putting the unwind info here at the end of the ms_hook + is done so that we can make absolutely certain we get + the required byte sequence at the start of the function, + rather than relying on an assembler that can produce + the exact encoding required. + + However it does mean (in the unpatched case) that we have + a 1 insn window where the asynchronous unwind info is + incorrect. However, if we placed the unwind info at + its correct location we would have incorrect unwind info + in the patched case. Which is probably all moot since + I don't expect Wine generates dwarf2 unwind info for the + system libraries that use this feature. */ + + insn = emit_insn (gen_blockage ()); + + push = gen_push (hard_frame_pointer_rtx); + mov = gen_rtx_SET (hard_frame_pointer_rtx, + stack_pointer_rtx); + RTX_FRAME_RELATED_P (push) = 1; + RTX_FRAME_RELATED_P (mov) = 1; + + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov))); + + /* Note that gen_push incremented m->fs.cfa_offset, even + though we didn't emit the push insn here. */ + m->fs.cfa_reg = hard_frame_pointer_rtx; + m->fs.fp_offset = m->fs.cfa_offset; + m->fs.fp_valid = true; + } + else + { + /* The frame pointer is not needed so pop %ebp again. + This leaves us with a pristine state. */ + emit_insn (gen_pop (hard_frame_pointer_rtx)); + } + } + + /* The first insn of a function that accepts its static chain on the + stack is to push the register that would be filled in by a direct + call. This insn will be skipped by the trampoline. */ + else if (ix86_static_chain_on_stack) + { + static_chain = ix86_static_chain (cfun->decl, false); + insn = emit_insn (gen_push (static_chain)); + emit_insn (gen_blockage ()); + + /* We don't want to interpret this push insn as a register save, + only as a stack adjustment. The real copy of the register as + a save will be done later, if needed. */ + t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); + t = gen_rtx_SET (stack_pointer_rtx, t); + add_reg_note (insn, REG_CFA_ADJUST_CFA, t); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Emit prologue code to adjust stack alignment and setup DRAP, in case + of DRAP is needed and stack realignment is really needed after reload */ + if (stack_realign_drap) + { + int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; + + /* Only need to push parameter pointer reg if it is caller saved. */ + if (!call_used_regs[REGNO (crtl->drap_reg)]) + { + /* Push arg pointer reg */ + insn = emit_insn (gen_push (crtl->drap_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Grab the argument pointer. */ + t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset); + insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t)); + RTX_FRAME_RELATED_P (insn) = 1; + m->fs.cfa_reg = crtl->drap_reg; + m->fs.cfa_offset = 0; + + /* Align the stack. */ + insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-align_bytes))); + RTX_FRAME_RELATED_P (insn) = 1; + + /* Replicate the return address on the stack so that return + address can be reached via (argp - 1) slot. This is needed + to implement macro RETURN_ADDR_RTX and intrinsic function + expand_builtin_return_addr etc. */ + t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD); + t = gen_frame_mem (word_mode, t); + insn = emit_insn (gen_push (t)); + RTX_FRAME_RELATED_P (insn) = 1; + + /* For the purposes of frame and register save area addressing, + we've started over with a new frame. */ + m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; + m->fs.realigned = true; + + if (static_chain) + { + /* Replicate static chain on the stack so that static chain + can be reached via (argp - 2) slot. This is needed for + nested function with stack realignment. */ + insn = emit_insn (gen_push (static_chain)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + int_registers_saved = (frame.nregs == 0); + sse_registers_saved = (frame.nsseregs == 0); + + if (frame_pointer_needed && !m->fs.fp_valid) + { + /* Note: AT&T enter does NOT have reversed args. Enter is probably + slower on all targets. Also sdb doesn't like it. */ + insn = emit_insn (gen_push (hard_frame_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + + /* Push registers now, before setting the frame pointer + on SEH target. */ + if (!int_registers_saved + && TARGET_SEH + && !frame.save_regs_using_mov) + { + ix86_emit_save_regs (); + int_registers_saved = true; + gcc_assert (m->fs.sp_offset == frame.reg_save_offset); + } + + if (m->fs.sp_offset == frame.hard_frame_pointer_offset) + { + insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + + if (m->fs.cfa_reg == stack_pointer_rtx) + m->fs.cfa_reg = hard_frame_pointer_rtx; + m->fs.fp_offset = m->fs.sp_offset; + m->fs.fp_valid = true; + } + } + + if (!int_registers_saved) + { + /* If saving registers via PUSH, do so now. */ + if (!frame.save_regs_using_mov) + { + ix86_emit_save_regs (); + int_registers_saved = true; + gcc_assert (m->fs.sp_offset == frame.reg_save_offset); + } + + /* When using red zone we may start register saving before allocating + the stack frame saving one cycle of the prologue. However, avoid + doing this if we have to probe the stack; at least on x86_64 the + stack probe can turn into a call that clobbers a red zone location. */ + else if (ix86_using_red_zone () + && (! TARGET_STACK_PROBE + || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) + { + ix86_emit_save_regs_using_mov (frame.reg_save_offset); + int_registers_saved = true; + } + } + + if (stack_realign_fp) + { + int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; + gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); + + /* The computation of the size of the re-aligned stack frame means + that we must allocate the size of the register save area before + performing the actual alignment. Otherwise we cannot guarantee + that there's enough storage above the realignment point. */ + if (m->fs.sp_offset != frame.sse_reg_save_offset) + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (m->fs.sp_offset + - frame.sse_reg_save_offset), + -1, false); + + /* Align the stack. */ + insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-align_bytes))); + + /* For the purposes of register save area addressing, the stack + pointer is no longer valid. As for the value of sp_offset, + see ix86_compute_frame_layout, which we need to match in order + to pass verification of stack_pointer_offset at the end. */ + m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); + m->fs.sp_valid = false; + } + + allocate = frame.stack_pointer_offset - m->fs.sp_offset; + + if (flag_stack_usage_info) + { + /* We start to count from ARG_POINTER. */ + HOST_WIDE_INT stack_size = frame.stack_pointer_offset; + + /* If it was realigned, take into account the fake frame. */ + if (stack_realign_drap) + { + if (ix86_static_chain_on_stack) + stack_size += UNITS_PER_WORD; + + if (!call_used_regs[REGNO (crtl->drap_reg)]) + stack_size += UNITS_PER_WORD; + + /* This over-estimates by 1 minimal-stack-alignment-unit but + mitigates that by counting in the new return address slot. */ + current_function_dynamic_stack_size + += crtl->stack_alignment_needed / BITS_PER_UNIT; + } + + current_function_static_stack_size = stack_size; + } + + /* On SEH target with very large frame size, allocate an area to save + SSE registers (as the very large allocation won't be described). */ + if (TARGET_SEH + && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE + && !sse_registers_saved) + { + HOST_WIDE_INT sse_size = + frame.sse_reg_save_offset - frame.reg_save_offset; + + gcc_assert (int_registers_saved); + + /* No need to do stack checking as the area will be immediately + written. */ + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-sse_size), -1, + m->fs.cfa_reg == stack_pointer_rtx); + allocate -= sse_size; + ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); + sse_registers_saved = true; + } + + /* The stack has already been decremented by the instruction calling us + so probe if the size is non-negative to preserve the protection area. */ + if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK) + { + /* We expect the registers to be saved when probes are used. */ + gcc_assert (int_registers_saved); + + if (STACK_CHECK_MOVING_SP) + { + if (!(crtl->is_leaf && !cfun->calls_alloca + && allocate <= PROBE_INTERVAL)) + { + ix86_adjust_stack_and_probe (allocate); + allocate = 0; + } + } + else + { + HOST_WIDE_INT size = allocate; + + if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000) + size = 0x80000000 - STACK_CHECK_PROTECT - 1; + + if (TARGET_STACK_PROBE) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { + if (size > PROBE_INTERVAL) + ix86_emit_probe_stack_range (0, size); + } + else + ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT); + } + else + { + if (crtl->is_leaf && !cfun->calls_alloca) + { + if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) + ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, + size - STACK_CHECK_PROTECT); + } + else + ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size); + } + } + } + + if (allocate == 0) + ; + else if (!ix86_target_stack_probe () + || frame.stack_pointer_offset < CHECK_STACK_LIMIT) + { + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-allocate), -1, + m->fs.cfa_reg == stack_pointer_rtx); + } + else + { + rtx eax = gen_rtx_REG (Pmode, AX_REG); + rtx r10 = NULL; + rtx (*adjust_stack_insn)(rtx, rtx, rtx); + const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); + bool eax_live = ix86_eax_live_at_start_p (); + bool r10_live = false; + + if (TARGET_64BIT) + r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); + + if (eax_live) + { + insn = emit_insn (gen_push (eax)); + allocate -= UNITS_PER_WORD; + /* Note that SEH directives need to continue tracking the stack + pointer even after the frame pointer has been set up. */ + if (sp_is_cfa_reg || TARGET_SEH) + { + if (sp_is_cfa_reg) + m->fs.cfa_offset += UNITS_PER_WORD; + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -UNITS_PER_WORD))); + } + } + + if (r10_live) + { + r10 = gen_rtx_REG (Pmode, R10_REG); + insn = emit_insn (gen_push (r10)); + allocate -= UNITS_PER_WORD; + if (sp_is_cfa_reg || TARGET_SEH) + { + if (sp_is_cfa_reg) + m->fs.cfa_offset += UNITS_PER_WORD; + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -UNITS_PER_WORD))); + } + } + + emit_move_insn (eax, GEN_INT (allocate)); + emit_insn (ix86_gen_allocate_stack_worker (eax, eax)); + + /* Use the fact that AX still contains ALLOCATE. */ + adjust_stack_insn = (Pmode == DImode + ? gen_pro_epilogue_adjust_stack_di_sub + : gen_pro_epilogue_adjust_stack_si_sub); + + insn = emit_insn (adjust_stack_insn (stack_pointer_rtx, + stack_pointer_rtx, eax)); + + if (sp_is_cfa_reg || TARGET_SEH) + { + if (sp_is_cfa_reg) + m->fs.cfa_offset += allocate; + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -allocate))); + } + m->fs.sp_offset += allocate; + + /* Use stack_pointer_rtx for relative addressing so that code + works for realigned stack, too. */ + if (r10_live && eax_live) + { + t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); + emit_move_insn (gen_rtx_REG (word_mode, R10_REG), + gen_frame_mem (word_mode, t)); + t = plus_constant (Pmode, t, UNITS_PER_WORD); + emit_move_insn (gen_rtx_REG (word_mode, AX_REG), + gen_frame_mem (word_mode, t)); + } + else if (eax_live || r10_live) + { + t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); + emit_move_insn (gen_rtx_REG (word_mode, + (eax_live ? AX_REG : R10_REG)), + gen_frame_mem (word_mode, t)); + } + } + gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); + + /* If we havn't already set up the frame pointer, do so now. */ + if (frame_pointer_needed && !m->fs.fp_valid) + { + insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx, + GEN_INT (frame.stack_pointer_offset + - frame.hard_frame_pointer_offset)); + insn = emit_insn (insn); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); + + if (m->fs.cfa_reg == stack_pointer_rtx) + m->fs.cfa_reg = hard_frame_pointer_rtx; + m->fs.fp_offset = frame.hard_frame_pointer_offset; + m->fs.fp_valid = true; + } + + if (!int_registers_saved) + ix86_emit_save_regs_using_mov (frame.reg_save_offset); + if (!sse_registers_saved) + ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); + + /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT + in PROLOGUE. */ + if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) + { + rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); + insn = emit_insn (gen_set_got (pic)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); + emit_insn (gen_prologue_use (pic)); + /* Deleting already emmitted SET_GOT if exist and allocated to + REAL_PIC_OFFSET_TABLE_REGNUM. */ + ix86_elim_entry_set_got (pic); + } + + if (crtl->drap_reg && !crtl->stack_realign_needed) + { + /* vDRAP is setup but after reload it turns out stack realign + isn't necessary, here we will emit prologue to setup DRAP + without stack realign adjustment */ + t = choose_baseaddr (0); + emit_insn (gen_rtx_SET (crtl->drap_reg, t)); + } + + /* Prevent instructions from being scheduled into register save push + sequence when access to the redzone area is done through frame pointer. + The offset between the frame pointer and the stack pointer is calculated + relative to the value of the stack pointer at the end of the function + prologue, and moving instructions that access redzone area via frame + pointer inside push sequence violates this assumption. */ + if (frame_pointer_needed && frame.red_zone_size) + emit_insn (gen_memory_blockage ()); + + /* Emit cld instruction if stringops are used in the function. */ + if (TARGET_CLD && ix86_current_function_needs_cld) + emit_insn (gen_cld ()); + + /* SEH requires that the prologue end within 256 bytes of the start of + the function. Prevent instruction schedules that would extend that. + Further, prevent alloca modifications to the stack pointer from being + combined with prologue modifications. */ + if (TARGET_SEH) + emit_insn (gen_prologue_use (stack_pointer_rtx)); +} + +/* Emit code to restore REG using a POP insn. */ + +static void +ix86_emit_restore_reg_using_pop (rtx reg) +{ + struct machine_function *m = cfun->machine; + rtx_insn *insn = emit_insn (gen_pop (reg)); + + ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); + m->fs.sp_offset -= UNITS_PER_WORD; + + if (m->fs.cfa_reg == crtl->drap_reg + && REGNO (reg) == REGNO (crtl->drap_reg)) + { + /* Previously we'd represented the CFA as an expression + like *(%ebp - 8). We've just popped that value from + the stack, which means we need to reset the CFA to + the drap register. This will remain until we restore + the stack pointer. */ + add_reg_note (insn, REG_CFA_DEF_CFA, reg); + RTX_FRAME_RELATED_P (insn) = 1; + + /* This means that the DRAP register is valid for addressing too. */ + m->fs.drap_valid = true; + return; + } + + if (m->fs.cfa_reg == stack_pointer_rtx) + { + rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); + x = gen_rtx_SET (stack_pointer_rtx, x); + add_reg_note (insn, REG_CFA_ADJUST_CFA, x); + RTX_FRAME_RELATED_P (insn) = 1; + + m->fs.cfa_offset -= UNITS_PER_WORD; + } + + /* When the frame pointer is the CFA, and we pop it, we are + swapping back to the stack pointer as the CFA. This happens + for stack frames that don't allocate other data, so we assume + the stack pointer is now pointing at the return address, i.e. + the function entry state, which makes the offset be 1 word. */ + if (reg == hard_frame_pointer_rtx) + { + m->fs.fp_valid = false; + if (m->fs.cfa_reg == hard_frame_pointer_rtx) + { + m->fs.cfa_reg = stack_pointer_rtx; + m->fs.cfa_offset -= UNITS_PER_WORD; + + add_reg_note (insn, REG_CFA_DEF_CFA, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (m->fs.cfa_offset))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } +} + +/* Emit code to restore saved registers using POP insns. */ + +static void +ix86_emit_restore_regs_using_pop (void) +{ + unsigned int regno; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false)) + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); +} + +/* Emit code and notes for the LEAVE instruction. */ + +static void +ix86_emit_leave (void) +{ + struct machine_function *m = cfun->machine; + rtx_insn *insn = emit_insn (ix86_gen_leave ()); + + ix86_add_queued_cfa_restore_notes (insn); + + gcc_assert (m->fs.fp_valid); + m->fs.sp_valid = true; + m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; + m->fs.fp_valid = false; + + if (m->fs.cfa_reg == hard_frame_pointer_rtx) + { + m->fs.cfa_reg = stack_pointer_rtx; + m->fs.cfa_offset = m->fs.sp_offset; + + add_reg_note (insn, REG_CFA_DEF_CFA, + plus_constant (Pmode, stack_pointer_rtx, + m->fs.sp_offset)); + RTX_FRAME_RELATED_P (insn) = 1; + } + ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, + m->fs.fp_offset); +} + +/* Emit code to restore saved registers using MOV insns. + First register is restored from CFA - CFA_OFFSET. */ +static void +ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, + bool maybe_eh_return) +{ + struct machine_function *m = cfun->machine; + unsigned int regno; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) + { + rtx reg = gen_rtx_REG (word_mode, regno); + rtx mem; + rtx_insn *insn; + + mem = choose_baseaddr (cfa_offset); + mem = gen_frame_mem (word_mode, mem); + insn = emit_move_insn (reg, mem); + + if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) + { + /* Previously we'd represented the CFA as an expression + like *(%ebp - 8). We've just popped that value from + the stack, which means we need to reset the CFA to + the drap register. This will remain until we restore + the stack pointer. */ + add_reg_note (insn, REG_CFA_DEF_CFA, reg); + RTX_FRAME_RELATED_P (insn) = 1; + + /* This means that the DRAP register is valid for addressing. */ + m->fs.drap_valid = true; + } + else + ix86_add_cfa_restore_note (NULL, reg, cfa_offset); + + cfa_offset -= UNITS_PER_WORD; + } +} + +/* Emit code to restore saved registers using MOV insns. + First register is restored from CFA - CFA_OFFSET. */ +static void +ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, + bool maybe_eh_return) +{ + unsigned int regno; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) + { + rtx reg = gen_rtx_REG (V4SFmode, regno); + rtx mem; + unsigned int align; + + mem = choose_baseaddr (cfa_offset); + mem = gen_rtx_MEM (V4SFmode, mem); + + /* The location is aligned up to INCOMING_STACK_BOUNDARY. */ + align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY); + set_mem_align (mem, align); + + /* SSE saves are not within re-aligned local stack frame. + In case INCOMING_STACK_BOUNDARY is misaligned, we have + to emit unaligned load. */ + if (align < 128) + { + rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem), + UNSPEC_LOADU); + emit_insn (gen_rtx_SET (reg, unspec)); + } + else + emit_insn (gen_rtx_SET (reg, mem)); + + ix86_add_cfa_restore_note (NULL, reg, cfa_offset); + + cfa_offset -= GET_MODE_SIZE (V4SFmode); + } +} + +/* Restore function stack, frame, and registers. */ + +void +ix86_expand_epilogue (int style) +{ + struct machine_function *m = cfun->machine; + struct machine_frame_state frame_state_save = m->fs; + struct ix86_frame frame; + bool restore_regs_via_mov; + bool using_drap; + + ix86_finalize_stack_realign_flags (); + ix86_compute_frame_layout (&frame); + + m->fs.sp_valid = (!frame_pointer_needed + || (crtl->sp_is_unchanging + && !stack_realign_fp)); + gcc_assert (!m->fs.sp_valid + || m->fs.sp_offset == frame.stack_pointer_offset); + + /* The FP must be valid if the frame pointer is present. */ + gcc_assert (frame_pointer_needed == m->fs.fp_valid); + gcc_assert (!m->fs.fp_valid + || m->fs.fp_offset == frame.hard_frame_pointer_offset); + + /* We must have *some* valid pointer to the stack frame. */ + gcc_assert (m->fs.sp_valid || m->fs.fp_valid); + + /* The DRAP is never valid at this point. */ + gcc_assert (!m->fs.drap_valid); + + /* See the comment about red zone and frame + pointer usage in ix86_expand_prologue. */ + if (frame_pointer_needed && frame.red_zone_size) + emit_insn (gen_memory_blockage ()); + + using_drap = crtl->drap_reg && crtl->stack_realign_needed; + gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg); + + /* Determine the CFA offset of the end of the red-zone. */ + m->fs.red_zone_offset = 0; + if (ix86_using_red_zone () && crtl->args.pops_args < 65536) + { + /* The red-zone begins below the return address. */ + m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD; + + /* When the register save area is in the aligned portion of + the stack, determine the maximum runtime displacement that + matches up with the aligned frame. */ + if (stack_realign_drap) + m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT + + UNITS_PER_WORD); + } + + /* Special care must be taken for the normal return case of a function + using eh_return: the eax and edx registers are marked as saved, but + not restored along this path. Adjust the save location to match. */ + if (crtl->calls_eh_return && style != 2) + frame.reg_save_offset -= 2 * UNITS_PER_WORD; + + /* EH_RETURN requires the use of moves to function properly. */ + if (crtl->calls_eh_return) + restore_regs_via_mov = true; + /* SEH requires the use of pops to identify the epilogue. */ + else if (TARGET_SEH) + restore_regs_via_mov = false; + /* If we're only restoring one register and sp is not valid then + using a move instruction to restore the register since it's + less work than reloading sp and popping the register. */ + else if (!m->fs.sp_valid && frame.nregs <= 1) + restore_regs_via_mov = true; + else if (TARGET_EPILOGUE_USING_MOVE + && cfun->machine->use_fast_prologue_epilogue + && (frame.nregs > 1 + || m->fs.sp_offset != frame.reg_save_offset)) + restore_regs_via_mov = true; + else if (frame_pointer_needed + && !frame.nregs + && m->fs.sp_offset != frame.reg_save_offset) + restore_regs_via_mov = true; + else if (frame_pointer_needed + && TARGET_USE_LEAVE + && cfun->machine->use_fast_prologue_epilogue + && frame.nregs == 1) + restore_regs_via_mov = true; + else + restore_regs_via_mov = false; + + if (restore_regs_via_mov || frame.nsseregs) + { + /* Ensure that the entire register save area is addressable via + the stack pointer, if we will restore via sp. */ + if (TARGET_64BIT + && m->fs.sp_offset > 0x7fffffff + && !(m->fs.fp_valid || m->fs.drap_valid) + && (frame.nsseregs + frame.nregs) != 0) + { + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (m->fs.sp_offset + - frame.sse_reg_save_offset), + style, + m->fs.cfa_reg == stack_pointer_rtx); + } + } + + /* If there are any SSE registers to restore, then we have to do it + via moves, since there's obviously no pop for SSE regs. */ + if (frame.nsseregs) + ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset, + style == 2); + + if (restore_regs_via_mov) + { + rtx t; + + if (frame.nregs) + ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2); + + /* eh_return epilogues need %ecx added to the stack pointer. */ + if (style == 2) + { + rtx sa = EH_RETURN_STACKADJ_RTX; + rtx_insn *insn; + + /* %ecx can't be used for both DRAP register and eh_return. */ + if (crtl->drap_reg) + gcc_assert (REGNO (crtl->drap_reg) != CX_REG); + + /* regparm nested functions don't work with eh_return. */ + gcc_assert (!ix86_static_chain_on_stack); + + if (frame_pointer_needed) + { + t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); + t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD); + emit_insn (gen_rtx_SET (sa, t)); + + t = gen_frame_mem (Pmode, hard_frame_pointer_rtx); + insn = emit_move_insn (hard_frame_pointer_rtx, t); + + /* Note that we use SA as a temporary CFA, as the return + address is at the proper place relative to it. We + pretend this happens at the FP restore insn because + prior to this insn the FP would be stored at the wrong + offset relative to SA, and after this insn we have no + other reasonable register to use for the CFA. We don't + bother resetting the CFA to the SP for the duration of + the return insn. */ + add_reg_note (insn, REG_CFA_DEF_CFA, + plus_constant (Pmode, sa, UNITS_PER_WORD)); + ix86_add_queued_cfa_restore_notes (insn); + add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + + m->fs.cfa_reg = sa; + m->fs.cfa_offset = UNITS_PER_WORD; + m->fs.fp_valid = false; + + pro_epilogue_adjust_stack (stack_pointer_rtx, sa, + const0_rtx, style, false); + } + else + { + t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); + t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD); + insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t)); + ix86_add_queued_cfa_restore_notes (insn); + + gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); + if (m->fs.cfa_offset != UNITS_PER_WORD) + { + m->fs.cfa_offset = UNITS_PER_WORD; + add_reg_note (insn, REG_CFA_DEF_CFA, + plus_constant (Pmode, stack_pointer_rtx, + UNITS_PER_WORD)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + m->fs.sp_offset = UNITS_PER_WORD; + m->fs.sp_valid = true; + } + } + else + { + /* SEH requires that the function end with (1) a stack adjustment + if necessary, (2) a sequence of pops, and (3) a return or + jump instruction. Prevent insns from the function body from + being scheduled into this sequence. */ + if (TARGET_SEH) + { + /* Prevent a catch region from being adjacent to the standard + epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor + several other flags that would be interesting to test are + not yet set up. */ + if (flag_non_call_exceptions) + emit_insn (gen_nops (const1_rtx)); + else + emit_insn (gen_blockage ()); + } + + /* First step is to deallocate the stack frame so that we can + pop the registers. Also do it on SEH target for very large + frame as the emitted instructions aren't allowed by the ABI in + epilogues. */ + if (!m->fs.sp_valid + || (TARGET_SEH + && (m->fs.sp_offset - frame.reg_save_offset + >= SEH_MAX_FRAME_SIZE))) + { + pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, + GEN_INT (m->fs.fp_offset + - frame.reg_save_offset), + style, false); + } + else if (m->fs.sp_offset != frame.reg_save_offset) + { + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (m->fs.sp_offset + - frame.reg_save_offset), + style, + m->fs.cfa_reg == stack_pointer_rtx); + } + + ix86_emit_restore_regs_using_pop (); + } + + /* If we used a stack pointer and haven't already got rid of it, + then do so now. */ + if (m->fs.fp_valid) + { + /* If the stack pointer is valid and pointing at the frame + pointer store address, then we only need a pop. */ + if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset) + ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); + /* Leave results in shorter dependency chains on CPUs that are + able to grok it fast. */ + else if (TARGET_USE_LEAVE + || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) + || !cfun->machine->use_fast_prologue_epilogue) + ix86_emit_leave (); + else + { + pro_epilogue_adjust_stack (stack_pointer_rtx, + hard_frame_pointer_rtx, + const0_rtx, style, !using_drap); + ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); + } + } + + if (using_drap) + { + int param_ptr_offset = UNITS_PER_WORD; + rtx_insn *insn; + + gcc_assert (stack_realign_drap); + + if (ix86_static_chain_on_stack) + param_ptr_offset += UNITS_PER_WORD; + if (!call_used_regs[REGNO (crtl->drap_reg)]) + param_ptr_offset += UNITS_PER_WORD; + + insn = emit_insn (gen_rtx_SET + (stack_pointer_rtx, + gen_rtx_PLUS (Pmode, + crtl->drap_reg, + GEN_INT (-param_ptr_offset)))); + m->fs.cfa_reg = stack_pointer_rtx; + m->fs.cfa_offset = param_ptr_offset; + m->fs.sp_offset = param_ptr_offset; + m->fs.realigned = false; + + add_reg_note (insn, REG_CFA_DEF_CFA, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (param_ptr_offset))); + RTX_FRAME_RELATED_P (insn) = 1; + + if (!call_used_regs[REGNO (crtl->drap_reg)]) + ix86_emit_restore_reg_using_pop (crtl->drap_reg); + } + + /* At this point the stack pointer must be valid, and we must have + restored all of the registers. We may not have deallocated the + entire stack frame. We've delayed this until now because it may + be possible to merge the local stack deallocation with the + deallocation forced by ix86_static_chain_on_stack. */ + gcc_assert (m->fs.sp_valid); + gcc_assert (!m->fs.fp_valid); + gcc_assert (!m->fs.realigned); + if (m->fs.sp_offset != UNITS_PER_WORD) + { + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), + style, true); + } + else + ix86_add_queued_cfa_restore_notes (get_last_insn ()); + + /* Sibcall epilogues don't want a return instruction. */ + if (style == 0) + { + m->fs = frame_state_save; + return; + } + + if (crtl->args.pops_args && crtl->args.size) + { + rtx popc = GEN_INT (crtl->args.pops_args); + + /* i386 can only pop 64K bytes. If asked to pop more, pop return + address, do explicit add, and jump indirectly to the caller. */ + + if (crtl->args.pops_args >= 65536) + { + rtx ecx = gen_rtx_REG (SImode, CX_REG); + rtx_insn *insn; + + /* There is no "pascal" calling convention in any 64bit ABI. */ + gcc_assert (!TARGET_64BIT); + + insn = emit_insn (gen_pop (ecx)); + m->fs.cfa_offset -= UNITS_PER_WORD; + m->fs.sp_offset -= UNITS_PER_WORD; + + rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); + x = gen_rtx_SET (stack_pointer_rtx, x); + add_reg_note (insn, REG_CFA_ADJUST_CFA, x); + add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + popc, -1, true); + emit_jump_insn (gen_simple_return_indirect_internal (ecx)); + } + else + emit_jump_insn (gen_simple_return_pop_internal (popc)); + } + else + emit_jump_insn (gen_simple_return_internal ()); + + /* Restore the state back to the state from the prologue, + so that it's correct for the next epilogue. */ + m->fs = frame_state_save; +} + +/* Reset from the function's potential modifications. */ + +static void +ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT) +{ + if (pic_offset_table_rtx + && !ix86_use_pseudo_pic_reg ()) + SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); + + if (TARGET_MACHO) + { + rtx_insn *insn = get_last_insn (); + rtx_insn *deleted_debug_label = NULL; + + /* Mach-O doesn't support labels at the end of objects, so if + it looks like we might want one, take special action. + First, collect any sequence of deleted debug labels. */ + while (insn + && NOTE_P (insn) + && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) + { + /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL + notes only, instead set their CODE_LABEL_NUMBER to -1, + otherwise there would be code generation differences + in between -g and -g0. */ + if (NOTE_P (insn) && NOTE_KIND (insn) + == NOTE_INSN_DELETED_DEBUG_LABEL) + deleted_debug_label = insn; + insn = PREV_INSN (insn); + } + + /* If we have: + label: + barrier + then this needs to be detected, so skip past the barrier. */ + + if (insn && BARRIER_P (insn)) + insn = PREV_INSN (insn); + + /* Up to now we've only seen notes or barriers. */ + if (insn) + { + if (LABEL_P (insn) + || (NOTE_P (insn) + && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) + /* Trailing label. */ + fputs ("\tnop\n", file); + else if (cfun && ! cfun->is_thunk) + { + /* See if we have a completely empty function body, skipping + the special case of the picbase thunk emitted as asm. */ + while (insn && ! INSN_P (insn)) + insn = PREV_INSN (insn); + /* If we don't find any insns, we've got an empty function body; + I.e. completely empty - without a return or branch. This is + taken as the case where a function body has been removed + because it contains an inline __builtin_unreachable(). GCC + declares that reaching __builtin_unreachable() means UB so + we're not obliged to do anything special; however, we want + non-zero-sized function bodies. To meet this, and help the + user out, let's trap the case. */ + if (insn == NULL) + fputs ("\tud2\n", file); + } + } + else if (deleted_debug_label) + for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) + if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) + CODE_LABEL_NUMBER (insn) = -1; + } +} + +/* Return a scratch register to use in the split stack prologue. The + split stack prologue is used for -fsplit-stack. It is the first + instructions in the function, even before the regular prologue. + The scratch register can be any caller-saved register which is not + used for parameters or for the static chain. */ + +static unsigned int +split_stack_prologue_scratch_regno (void) +{ + if (TARGET_64BIT) + return R11_REG; + else + { + bool is_fastcall, is_thiscall; + int regparm; + + is_fastcall = (lookup_attribute ("fastcall", + TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) + != NULL); + is_thiscall = (lookup_attribute ("thiscall", + TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) + != NULL); + regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); + + if (is_fastcall) + { + if (DECL_STATIC_CHAIN (cfun->decl)) + { + sorry ("-fsplit-stack does not support fastcall with " + "nested function"); + return INVALID_REGNUM; + } + return AX_REG; + } + else if (is_thiscall) + { + if (!DECL_STATIC_CHAIN (cfun->decl)) + return DX_REG; + return AX_REG; + } + else if (regparm < 3) + { + if (!DECL_STATIC_CHAIN (cfun->decl)) + return CX_REG; + else + { + if (regparm >= 2) + { + sorry ("-fsplit-stack does not support 2 register " + "parameters for a nested function"); + return INVALID_REGNUM; + } + return DX_REG; + } + } + else + { + /* FIXME: We could make this work by pushing a register + around the addition and comparison. */ + sorry ("-fsplit-stack does not support 3 register parameters"); + return INVALID_REGNUM; + } + } +} + +/* A SYMBOL_REF for the function which allocates new stackspace for + -fsplit-stack. */ + +static GTY(()) rtx split_stack_fn; + +/* A SYMBOL_REF for the more stack function when using the large + model. */ + +static GTY(()) rtx split_stack_fn_large; + +/* Handle -fsplit-stack. These are the first instructions in the + function, even before the regular prologue. */ + +void +ix86_expand_split_stack_prologue (void) +{ + struct ix86_frame frame; + HOST_WIDE_INT allocate; + unsigned HOST_WIDE_INT args_size; + rtx_code_label *label; + rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage; + rtx scratch_reg = NULL_RTX; + rtx_code_label *varargs_label = NULL; + rtx fn; + + gcc_assert (flag_split_stack && reload_completed); + + ix86_finalize_stack_realign_flags (); + ix86_compute_frame_layout (&frame); + allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; + + /* This is the label we will branch to if we have enough stack + space. We expect the basic block reordering pass to reverse this + branch if optimizing, so that we branch in the unlikely case. */ + label = gen_label_rtx (); + + /* We need to compare the stack pointer minus the frame size with + the stack boundary in the TCB. The stack boundary always gives + us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we + can compare directly. Otherwise we need to do an addition. */ + + limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_STACK_CHECK); + limit = gen_rtx_CONST (Pmode, limit); + limit = gen_rtx_MEM (Pmode, limit); + if (allocate < SPLIT_STACK_AVAILABLE) + current = stack_pointer_rtx; + else + { + unsigned int scratch_regno; + rtx offset; + + /* We need a scratch register to hold the stack pointer minus + the required frame size. Since this is the very start of the + function, the scratch register can be any caller-saved + register which is not used for parameters. */ + offset = GEN_INT (- allocate); + scratch_regno = split_stack_prologue_scratch_regno (); + if (scratch_regno == INVALID_REGNUM) + return; + scratch_reg = gen_rtx_REG (Pmode, scratch_regno); + if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) + { + /* We don't use ix86_gen_add3 in this case because it will + want to split to lea, but when not optimizing the insn + will not be split after this point. */ + emit_insn (gen_rtx_SET (scratch_reg, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + offset))); + } + else + { + emit_move_insn (scratch_reg, offset); + emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg, + stack_pointer_rtx)); + } + current = scratch_reg; + } + + ix86_expand_branch (GEU, current, limit, label); + jump_insn = get_last_insn (); + JUMP_LABEL (jump_insn) = label; + + /* Mark the jump as very likely to be taken. */ + add_int_reg_note (jump_insn, REG_BR_PROB, + REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100); + + if (split_stack_fn == NULL_RTX) + { + split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); + SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL; + } + fn = split_stack_fn; + + /* Get more stack space. We pass in the desired stack space and the + size of the arguments to copy to the new stack. In 32-bit mode + we push the parameters; __morestack will return on a new stack + anyhow. In 64-bit mode we pass the parameters in r10 and + r11. */ + allocate_rtx = GEN_INT (allocate); + args_size = crtl->args.size >= 0 ? crtl->args.size : 0; + call_fusage = NULL_RTX; + if (TARGET_64BIT) + { + rtx reg10, reg11; + + reg10 = gen_rtx_REG (Pmode, R10_REG); + reg11 = gen_rtx_REG (Pmode, R11_REG); + + /* If this function uses a static chain, it will be in %r10. + Preserve it across the call to __morestack. */ + if (DECL_STATIC_CHAIN (cfun->decl)) + { + rtx rax; + + rax = gen_rtx_REG (word_mode, AX_REG); + emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); + use_reg (&call_fusage, rax); + } + + if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) + && !TARGET_PECOFF) + { + HOST_WIDE_INT argval; + + gcc_assert (Pmode == DImode); + /* When using the large model we need to load the address + into a register, and we've run out of registers. So we + switch to a different calling convention, and we call a + different function: __morestack_large. We pass the + argument size in the upper 32 bits of r10 and pass the + frame size in the lower 32 bits. */ + gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate); + gcc_assert ((args_size & 0xffffffff) == args_size); + + if (split_stack_fn_large == NULL_RTX) + { + split_stack_fn_large = + gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); + SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; + } + if (ix86_cmodel == CM_LARGE_PIC) + { + rtx_code_label *label; + rtx x; + + label = gen_label_rtx (); + emit_label (label); + LABEL_PRESERVE_P (label) = 1; + emit_insn (gen_set_rip_rex64 (reg10, label)); + emit_insn (gen_set_got_offset_rex64 (reg11, label)); + emit_insn (ix86_gen_add3 (reg10, reg10, reg11)); + x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large), + UNSPEC_GOT); + x = gen_rtx_CONST (Pmode, x); + emit_move_insn (reg11, x); + x = gen_rtx_PLUS (Pmode, reg10, reg11); + x = gen_const_mem (Pmode, x); + emit_move_insn (reg11, x); + } + else + emit_move_insn (reg11, split_stack_fn_large); + + fn = reg11; + + argval = ((args_size << 16) << 16) + allocate; + emit_move_insn (reg10, GEN_INT (argval)); + } + else + { + emit_move_insn (reg10, allocate_rtx); + emit_move_insn (reg11, GEN_INT (args_size)); + use_reg (&call_fusage, reg11); + } + + use_reg (&call_fusage, reg10); + } + else + { + emit_insn (gen_push (GEN_INT (args_size))); + emit_insn (gen_push (allocate_rtx)); + } + call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), + GEN_INT (UNITS_PER_WORD), constm1_rtx, + NULL_RTX, false); + add_function_usage_to (call_insn, call_fusage); + + /* In order to make call/return prediction work right, we now need + to execute a return instruction. See + libgcc/config/i386/morestack.S for the details on how this works. + + For flow purposes gcc must not see this as a return + instruction--we need control flow to continue at the subsequent + label. Therefore, we use an unspec. */ + gcc_assert (crtl->args.pops_args < 65536); + emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); + + /* If we are in 64-bit mode and this function uses a static chain, + we saved %r10 in %rax before calling _morestack. */ + if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) + emit_move_insn (gen_rtx_REG (word_mode, R10_REG), + gen_rtx_REG (word_mode, AX_REG)); + + /* If this function calls va_start, we need to store a pointer to + the arguments on the old stack, because they may not have been + all copied to the new stack. At this point the old stack can be + found at the frame pointer value used by __morestack, because + __morestack has set that up before calling back to us. Here we + store that pointer in a scratch register, and in + ix86_expand_prologue we store the scratch register in a stack + slot. */ + if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) + { + unsigned int scratch_regno; + rtx frame_reg; + int words; + + scratch_regno = split_stack_prologue_scratch_regno (); + scratch_reg = gen_rtx_REG (Pmode, scratch_regno); + frame_reg = gen_rtx_REG (Pmode, BP_REG); + + /* 64-bit: + fp -> old fp value + return address within this function + return address of caller of this function + stack arguments + So we add three words to get to the stack arguments. + + 32-bit: + fp -> old fp value + return address within this function + first argument to __morestack + second argument to __morestack + return address of caller of this function + stack arguments + So we add five words to get to the stack arguments. + */ + words = TARGET_64BIT ? 3 : 5; + emit_insn (gen_rtx_SET (scratch_reg, + gen_rtx_PLUS (Pmode, frame_reg, + GEN_INT (words * UNITS_PER_WORD)))); + + varargs_label = gen_label_rtx (); + emit_jump_insn (gen_jump (varargs_label)); + JUMP_LABEL (get_last_insn ()) = varargs_label; + + emit_barrier (); + } + + emit_label (label); + LABEL_NUSES (label) = 1; + + /* If this function calls va_start, we now have to set the scratch + register for the case where we do not call __morestack. In this + case we need to set it based on the stack pointer. */ + if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) + { + emit_insn (gen_rtx_SET (scratch_reg, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (UNITS_PER_WORD)))); + + emit_label (varargs_label); + LABEL_NUSES (varargs_label) = 1; + } +} + +/* We may have to tell the dataflow pass that the split stack prologue + is initializing a scratch register. */ + +static void +ix86_live_on_entry (bitmap regs) +{ + if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) + { + gcc_assert (flag_split_stack); + bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); + } +} + +/* Extract the parts of an RTL expression that is a valid memory address + for an instruction. Return 0 if the structure of the address is + grossly off. Return -1 if the address contains ASHIFT, so it is not + strictly valid, but still used for computing length of lea instruction. */ + +int +ix86_decompose_address (rtx addr, struct ix86_address *out) +{ + rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; + rtx base_reg, index_reg; + HOST_WIDE_INT scale = 1; + rtx scale_rtx = NULL_RTX; + rtx tmp; + int retval = 1; + addr_space_t seg = ADDR_SPACE_GENERIC; + + /* Allow zero-extended SImode addresses, + they will be emitted with addr32 prefix. */ + if (TARGET_64BIT && GET_MODE (addr) == DImode) + { + if (GET_CODE (addr) == ZERO_EXTEND + && GET_MODE (XEXP (addr, 0)) == SImode) + { + addr = XEXP (addr, 0); + if (CONST_INT_P (addr)) + return 0; + } + else if (GET_CODE (addr) == AND + && const_32bit_mask (XEXP (addr, 1), DImode)) + { + addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0); + if (addr == NULL_RTX) + return 0; + + if (CONST_INT_P (addr)) + return 0; + } + } + + /* Allow SImode subregs of DImode addresses, + they will be emitted with addr32 prefix. */ + if (TARGET_64BIT && GET_MODE (addr) == SImode) + { + if (SUBREG_P (addr) + && GET_MODE (SUBREG_REG (addr)) == DImode) + { + addr = SUBREG_REG (addr); + if (CONST_INT_P (addr)) + return 0; + } + } + + if (REG_P (addr)) + base = addr; + else if (SUBREG_P (addr)) + { + if (REG_P (SUBREG_REG (addr))) + base = addr; + else + return 0; + } + else if (GET_CODE (addr) == PLUS) + { + rtx addends[4], op; + int n = 0, i; + + op = addr; + do + { + if (n >= 4) + return 0; + addends[n++] = XEXP (op, 1); + op = XEXP (op, 0); + } + while (GET_CODE (op) == PLUS); + if (n >= 4) + return 0; + addends[n] = op; + + for (i = n; i >= 0; --i) + { + op = addends[i]; + switch (GET_CODE (op)) + { + case MULT: + if (index) + return 0; + index = XEXP (op, 0); + scale_rtx = XEXP (op, 1); + break; + + case ASHIFT: + if (index) + return 0; + index = XEXP (op, 0); + tmp = XEXP (op, 1); + if (!CONST_INT_P (tmp)) + return 0; + scale = INTVAL (tmp); + if ((unsigned HOST_WIDE_INT) scale > 3) + return 0; + scale = 1 << scale; + break; + + case ZERO_EXTEND: + op = XEXP (op, 0); + if (GET_CODE (op) != UNSPEC) + return 0; + /* FALLTHRU */ + + case UNSPEC: + if (XINT (op, 1) == UNSPEC_TP + && TARGET_TLS_DIRECT_SEG_REFS + && seg == ADDR_SPACE_GENERIC) + seg = DEFAULT_TLS_SEG_REG; + else + return 0; + break; + + case SUBREG: + if (!REG_P (SUBREG_REG (op))) + return 0; + /* FALLTHRU */ + + case REG: + if (!base) + base = op; + else if (!index) + index = op; + else + return 0; + break; + + case CONST: + case CONST_INT: + case SYMBOL_REF: + case LABEL_REF: + if (disp) + return 0; + disp = op; + break; + + default: + return 0; + } + } + } + else if (GET_CODE (addr) == MULT) + { + index = XEXP (addr, 0); /* index*scale */ + scale_rtx = XEXP (addr, 1); + } + else if (GET_CODE (addr) == ASHIFT) + { + /* We're called for lea too, which implements ashift on occasion. */ + index = XEXP (addr, 0); + tmp = XEXP (addr, 1); + if (!CONST_INT_P (tmp)) + return 0; + scale = INTVAL (tmp); + if ((unsigned HOST_WIDE_INT) scale > 3) + return 0; + scale = 1 << scale; + retval = -1; + } + else + disp = addr; /* displacement */ + + if (index) + { + if (REG_P (index)) + ; + else if (SUBREG_P (index) + && REG_P (SUBREG_REG (index))) + ; + else + return 0; + } + + /* Extract the integral value of scale. */ + if (scale_rtx) + { + if (!CONST_INT_P (scale_rtx)) + return 0; + scale = INTVAL (scale_rtx); + } + + base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base; + index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index; + + /* Avoid useless 0 displacement. */ + if (disp == const0_rtx && (base || index)) + disp = NULL_RTX; + + /* Allow arg pointer and stack pointer as index if there is not scaling. */ + if (base_reg && index_reg && scale == 1 + && (index_reg == arg_pointer_rtx + || index_reg == frame_pointer_rtx + || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) + { + std::swap (base, index); + std::swap (base_reg, index_reg); + } + + /* Special case: %ebp cannot be encoded as a base without a displacement. + Similarly %r13. */ + if (!disp + && base_reg + && (base_reg == hard_frame_pointer_rtx + || base_reg == frame_pointer_rtx + || base_reg == arg_pointer_rtx + || (REG_P (base_reg) + && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM + || REGNO (base_reg) == R13_REG)))) + disp = const0_rtx; + + /* Special case: on K6, [%esi] makes the instruction vector decoded. + Avoid this by transforming to [%esi+0]. + Reload calls address legitimization without cfun defined, so we need + to test cfun for being non-NULL. */ + if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun) + && base_reg && !index_reg && !disp + && REG_P (base_reg) && REGNO (base_reg) == SI_REG) + disp = const0_rtx; + + /* Special case: encode reg+reg instead of reg*2. */ + if (!base && index && scale == 2) + base = index, base_reg = index_reg, scale = 1; + + /* Special case: scaling cannot be encoded without base or displacement. */ + if (!base && !disp && index && scale != 1) + disp = const0_rtx; + + out->base = base; + out->index = index; + out->disp = disp; + out->scale = scale; + out->seg = seg; + + return retval; +} + +/* Return cost of the memory address x. + For i386, it is better to use a complex address than let gcc copy + the address into a reg and make a new pseudo. But not if the address + requires to two regs - that would mean more pseudos with longer + lifetimes. */ +static int +ix86_address_cost (rtx x, machine_mode, addr_space_t, bool) +{ + struct ix86_address parts; + int cost = 1; + int ok = ix86_decompose_address (x, &parts); + + gcc_assert (ok); + + if (parts.base && SUBREG_P (parts.base)) + parts.base = SUBREG_REG (parts.base); + if (parts.index && SUBREG_P (parts.index)) + parts.index = SUBREG_REG (parts.index); + + /* Attempt to minimize number of registers in the address by increasing + address cost for each used register. We don't increase address cost + for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx" + is not invariant itself it most likely means that base or index is not + invariant. Therefore only "pic_offset_table_rtx" could be hoisted out, + which is not profitable for x86. */ + if (parts.base + && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) + && (current_pass->type == GIMPLE_PASS + || !pic_offset_table_rtx + || !REG_P (parts.base) + || REGNO (pic_offset_table_rtx) != REGNO (parts.base))) + cost++; + + if (parts.index + && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) + && (current_pass->type == GIMPLE_PASS + || !pic_offset_table_rtx + || !REG_P (parts.index) + || REGNO (pic_offset_table_rtx) != REGNO (parts.index))) + cost++; + + /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, + since it's predecode logic can't detect the length of instructions + and it degenerates to vector decoded. Increase cost of such + addresses here. The penalty is minimally 2 cycles. It may be worthwhile + to split such addresses or even refuse such addresses at all. + + Following addressing modes are affected: + [base+scale*index] + [scale*index+disp] + [base+index] + + The first and last case may be avoidable by explicitly coding the zero in + memory address, but I don't have AMD-K6 machine handy to check this + theory. */ + + if (TARGET_K6 + && ((!parts.disp && parts.base && parts.index && parts.scale != 1) + || (parts.disp && !parts.base && parts.index && parts.scale != 1) + || (!parts.disp && parts.base && parts.index && parts.scale == 1))) + cost += 10; + + return cost; +} + +/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as + this is used for to form addresses to local data when -fPIC is in + use. */ + +static bool +darwin_local_data_pic (rtx disp) +{ + return (GET_CODE (disp) == UNSPEC + && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); +} + +/* Determine if a given RTX is a valid constant. We already know this + satisfies CONSTANT_P. */ + +static bool +ix86_legitimate_constant_p (machine_mode, rtx x) +{ + /* Pointer bounds constants are not valid. */ + if (POINTER_BOUNDS_MODE_P (GET_MODE (x))) + return false; + + switch (GET_CODE (x)) + { + case CONST: + x = XEXP (x, 0); + + if (GET_CODE (x) == PLUS) + { + if (!CONST_INT_P (XEXP (x, 1))) + return false; + x = XEXP (x, 0); + } + + if (TARGET_MACHO && darwin_local_data_pic (x)) + return true; + + /* Only some unspecs are valid as "constants". */ + if (GET_CODE (x) == UNSPEC) + switch (XINT (x, 1)) + { + case UNSPEC_GOT: + case UNSPEC_GOTOFF: + case UNSPEC_PLTOFF: + return TARGET_64BIT; + case UNSPEC_TPOFF: + case UNSPEC_NTPOFF: + x = XVECEXP (x, 0, 0); + return (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); + case UNSPEC_DTPOFF: + x = XVECEXP (x, 0, 0); + return (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); + default: + return false; + } + + /* We must have drilled down to a symbol. */ + if (GET_CODE (x) == LABEL_REF) + return true; + if (GET_CODE (x) != SYMBOL_REF) + return false; + /* FALLTHRU */ + + case SYMBOL_REF: + /* TLS symbols are never valid. */ + if (SYMBOL_REF_TLS_MODEL (x)) + return false; + + /* DLLIMPORT symbols are never valid. */ + if (TARGET_DLLIMPORT_DECL_ATTRIBUTES + && SYMBOL_REF_DLLIMPORT_P (x)) + return false; + +#if TARGET_MACHO + /* mdynamic-no-pic */ + if (MACHO_DYNAMIC_NO_PIC_P) + return machopic_symbol_defined_p (x); +#endif + break; + + case CONST_WIDE_INT: + if (!TARGET_64BIT && !standard_sse_constant_p (x)) + return false; + break; + + case CONST_VECTOR: + if (!standard_sse_constant_p (x)) + return false; + + default: + break; + } + + /* Otherwise we handle everything else in the move patterns. */ + return true; +} + +/* Determine if it's legal to put X into the constant pool. This + is not possible for the address of thread-local symbols, which + is checked above. */ + +static bool +ix86_cannot_force_const_mem (machine_mode mode, rtx x) +{ + /* We can always put integral constants and vectors in memory. */ + switch (GET_CODE (x)) + { + case CONST_INT: + case CONST_WIDE_INT: + case CONST_DOUBLE: + case CONST_VECTOR: + return false; + + default: + break; + } + return !ix86_legitimate_constant_p (mode, x); +} + +/* Nonzero if the symbol is marked as dllimport, or as stub-variable, + otherwise zero. */ + +static bool +is_imported_p (rtx x) +{ + if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES + || GET_CODE (x) != SYMBOL_REF) + return false; + + return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x); +} + + +/* Nonzero if the constant value X is a legitimate general operand + when generating PIC code. It is given that flag_pic is on and + that X satisfies CONSTANT_P. */ + +bool +legitimate_pic_operand_p (rtx x) +{ + rtx inner; + + switch (GET_CODE (x)) + { + case CONST: + inner = XEXP (x, 0); + if (GET_CODE (inner) == PLUS + && CONST_INT_P (XEXP (inner, 1))) + inner = XEXP (inner, 0); + + /* Only some unspecs are valid as "constants". */ + if (GET_CODE (inner) == UNSPEC) + switch (XINT (inner, 1)) + { + case UNSPEC_GOT: + case UNSPEC_GOTOFF: + case UNSPEC_PLTOFF: + return TARGET_64BIT; + case UNSPEC_TPOFF: + x = XVECEXP (inner, 0, 0); + return (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); + case UNSPEC_MACHOPIC_OFFSET: + return legitimate_pic_address_disp_p (x); + default: + return false; + } + /* FALLTHRU */ + + case SYMBOL_REF: + case LABEL_REF: + return legitimate_pic_address_disp_p (x); + + default: + return true; + } +} + +/* Determine if a given CONST RTX is a valid memory displacement + in PIC mode. */ + +bool +legitimate_pic_address_disp_p (rtx disp) +{ + bool saw_plus; + + /* In 64bit mode we can allow direct addresses of symbols and labels + when they are not dynamic symbols. */ + if (TARGET_64BIT) + { + rtx op0 = disp, op1; + + switch (GET_CODE (disp)) + { + case LABEL_REF: + return true; + + case CONST: + if (GET_CODE (XEXP (disp, 0)) != PLUS) + break; + op0 = XEXP (XEXP (disp, 0), 0); + op1 = XEXP (XEXP (disp, 0), 1); + if (!CONST_INT_P (op1) + || INTVAL (op1) >= 16*1024*1024 + || INTVAL (op1) < -16*1024*1024) + break; + if (GET_CODE (op0) == LABEL_REF) + return true; + if (GET_CODE (op0) == CONST + && GET_CODE (XEXP (op0, 0)) == UNSPEC + && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL) + return true; + if (GET_CODE (op0) == UNSPEC + && XINT (op0, 1) == UNSPEC_PCREL) + return true; + if (GET_CODE (op0) != SYMBOL_REF) + break; + /* FALLTHRU */ + + case SYMBOL_REF: + /* TLS references should always be enclosed in UNSPEC. + The dllimported symbol needs always to be resolved. */ + if (SYMBOL_REF_TLS_MODEL (op0) + || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0))) + return false; + + if (TARGET_PECOFF) + { + if (is_imported_p (op0)) + return true; + + if (SYMBOL_REF_FAR_ADDR_P (op0) + || !SYMBOL_REF_LOCAL_P (op0)) + break; + + /* Function-symbols need to be resolved only for + large-model. + For the small-model we don't need to resolve anything + here. */ + if ((ix86_cmodel != CM_LARGE_PIC + && SYMBOL_REF_FUNCTION_P (op0)) + || ix86_cmodel == CM_SMALL_PIC) + return true; + /* Non-external symbols don't need to be resolved for + large, and medium-model. */ + if ((ix86_cmodel == CM_LARGE_PIC + || ix86_cmodel == CM_MEDIUM_PIC) + && !SYMBOL_REF_EXTERNAL_P (op0)) + return true; + } + else if (!SYMBOL_REF_FAR_ADDR_P (op0) + && (SYMBOL_REF_LOCAL_P (op0) + || (HAVE_LD_PIE_COPYRELOC + && flag_pie + && !SYMBOL_REF_WEAK (op0) + && !SYMBOL_REF_FUNCTION_P (op0))) + && ix86_cmodel != CM_LARGE_PIC) + return true; + break; + + default: + break; + } + } + if (GET_CODE (disp) != CONST) + return false; + disp = XEXP (disp, 0); + + if (TARGET_64BIT) + { + /* We are unsafe to allow PLUS expressions. This limit allowed distance + of GOT tables. We should not need these anyway. */ + if (GET_CODE (disp) != UNSPEC + || (XINT (disp, 1) != UNSPEC_GOTPCREL + && XINT (disp, 1) != UNSPEC_GOTOFF + && XINT (disp, 1) != UNSPEC_PCREL + && XINT (disp, 1) != UNSPEC_PLTOFF)) + return false; + + if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF + && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) + return false; + return true; + } + + saw_plus = false; + if (GET_CODE (disp) == PLUS) + { + if (!CONST_INT_P (XEXP (disp, 1))) + return false; + disp = XEXP (disp, 0); + saw_plus = true; + } + + if (TARGET_MACHO && darwin_local_data_pic (disp)) + return true; + + if (GET_CODE (disp) != UNSPEC) + return false; + + switch (XINT (disp, 1)) + { + case UNSPEC_GOT: + if (saw_plus) + return false; + /* We need to check for both symbols and labels because VxWorks loads + text labels with @GOT rather than @GOTOFF. See gotoff_operand for + details. */ + return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF + || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); + case UNSPEC_GOTOFF: + /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. + While ABI specify also 32bit relocation but we don't produce it in + small PIC model at all. */ + if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF + || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) + && !TARGET_64BIT) + return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode); + return false; + case UNSPEC_GOTTPOFF: + case UNSPEC_GOTNTPOFF: + case UNSPEC_INDNTPOFF: + if (saw_plus) + return false; + disp = XVECEXP (disp, 0, 0); + return (GET_CODE (disp) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); + case UNSPEC_NTPOFF: + disp = XVECEXP (disp, 0, 0); + return (GET_CODE (disp) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); + case UNSPEC_DTPOFF: + disp = XVECEXP (disp, 0, 0); + return (GET_CODE (disp) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); + } + + return false; +} + +/* Determine if op is suitable RTX for an address register. + Return naked register if a register or a register subreg is + found, otherwise return NULL_RTX. */ + +static rtx +ix86_validate_address_register (rtx op) +{ + machine_mode mode = GET_MODE (op); + + /* Only SImode or DImode registers can form the address. */ + if (mode != SImode && mode != DImode) + return NULL_RTX; + + if (REG_P (op)) + return op; + else if (SUBREG_P (op)) + { + rtx reg = SUBREG_REG (op); + + if (!REG_P (reg)) + return NULL_RTX; + + mode = GET_MODE (reg); + + /* Don't allow SUBREGs that span more than a word. It can + lead to spill failures when the register is one word out + of a two word structure. */ + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return NULL_RTX; + + /* Allow only SUBREGs of non-eliminable hard registers. */ + if (register_no_elim_operand (reg, mode)) + return reg; + } + + /* Op is not a register. */ + return NULL_RTX; +} + +/* Recognizes RTL expressions that are valid memory addresses for an + instruction. The MODE argument is the machine mode for the MEM + expression that wants to use this address. + + It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should + convert common non-canonical forms to canonical form so that they will + be recognized. */ + +static bool +ix86_legitimate_address_p (machine_mode, rtx addr, bool strict) +{ + struct ix86_address parts; + rtx base, index, disp; + HOST_WIDE_INT scale; + addr_space_t seg; + + if (ix86_decompose_address (addr, &parts) <= 0) + /* Decomposition failed. */ + return false; + + base = parts.base; + index = parts.index; + disp = parts.disp; + scale = parts.scale; + seg = parts.seg; + + /* Validate base register. */ + if (base) + { + rtx reg = ix86_validate_address_register (base); + + if (reg == NULL_RTX) + return false; + + if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) + || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) + /* Base is not valid. */ + return false; + } + + /* Validate index register. */ + if (index) + { + rtx reg = ix86_validate_address_register (index); + + if (reg == NULL_RTX) + return false; + + if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) + || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) + /* Index is not valid. */ + return false; + } + + /* Index and base should have the same mode. */ + if (base && index + && GET_MODE (base) != GET_MODE (index)) + return false; + + /* Address override works only on the (%reg) part of %fs:(%reg). */ + if (seg != ADDR_SPACE_GENERIC + && ((base && GET_MODE (base) != word_mode) + || (index && GET_MODE (index) != word_mode))) + return false; + + /* Validate scale factor. */ + if (scale != 1) + { + if (!index) + /* Scale without index. */ + return false; + + if (scale != 2 && scale != 4 && scale != 8) + /* Scale is not a valid multiplier. */ + return false; + } + + /* Validate displacement. */ + if (disp) + { + if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == UNSPEC + && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) + switch (XINT (XEXP (disp, 0), 1)) + { + /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when + used. While ABI specify also 32bit relocations, we don't produce + them at all and use IP relative instead. */ + case UNSPEC_GOT: + case UNSPEC_GOTOFF: + gcc_assert (flag_pic); + if (!TARGET_64BIT) + goto is_legitimate_pic; + + /* 64bit address unspec. */ + return false; + + case UNSPEC_GOTPCREL: + case UNSPEC_PCREL: + gcc_assert (flag_pic); + goto is_legitimate_pic; + + case UNSPEC_GOTTPOFF: + case UNSPEC_GOTNTPOFF: + case UNSPEC_INDNTPOFF: + case UNSPEC_NTPOFF: + case UNSPEC_DTPOFF: + break; + + case UNSPEC_STACK_CHECK: + gcc_assert (flag_split_stack); + break; + + default: + /* Invalid address unspec. */ + return false; + } + + else if (SYMBOLIC_CONST (disp) + && (flag_pic + || (TARGET_MACHO +#if TARGET_MACHO + && MACHOPIC_INDIRECT + && !machopic_operand_p (disp) +#endif + ))) + { + + is_legitimate_pic: + if (TARGET_64BIT && (index || base)) + { + /* foo@dtpoff(%rX) is ok. */ + if (GET_CODE (disp) != CONST + || GET_CODE (XEXP (disp, 0)) != PLUS + || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC + || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) + || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF + && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) + /* Non-constant pic memory reference. */ + return false; + } + else if ((!TARGET_MACHO || flag_pic) + && ! legitimate_pic_address_disp_p (disp)) + /* Displacement is an invalid pic construct. */ + return false; +#if TARGET_MACHO + else if (MACHO_DYNAMIC_NO_PIC_P + && !ix86_legitimate_constant_p (Pmode, disp)) + /* displacment must be referenced via non_lazy_pointer */ + return false; +#endif + + /* This code used to verify that a symbolic pic displacement + includes the pic_offset_table_rtx register. + + While this is good idea, unfortunately these constructs may + be created by "adds using lea" optimization for incorrect + code like: + + int a; + int foo(int i) + { + return *(&a+i); + } + + This code is nonsensical, but results in addressing + GOT table with pic_offset_table_rtx base. We can't + just refuse it easily, since it gets matched by + "addsi3" pattern, that later gets split to lea in the + case output register differs from input. While this + can be handled by separate addsi pattern for this case + that never results in lea, this seems to be easier and + correct fix for crash to disable this test. */ + } + else if (GET_CODE (disp) != LABEL_REF + && !CONST_INT_P (disp) + && (GET_CODE (disp) != CONST + || !ix86_legitimate_constant_p (Pmode, disp)) + && (GET_CODE (disp) != SYMBOL_REF + || !ix86_legitimate_constant_p (Pmode, disp))) + /* Displacement is not constant. */ + return false; + else if (TARGET_64BIT + && !x86_64_immediate_operand (disp, VOIDmode)) + /* Displacement is out of range. */ + return false; + /* In x32 mode, constant addresses are sign extended to 64bit, so + we have to prevent addresses from 0x80000000 to 0xffffffff. */ + else if (TARGET_X32 && !(index || base) + && CONST_INT_P (disp) + && val_signbit_known_set_p (SImode, INTVAL (disp))) + return false; + } + + /* Everything looks valid. */ + return true; +} + +/* Determine if a given RTX is a valid constant address. */ + +bool +constant_address_p (rtx x) +{ + return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1); +} + +/* Return a unique alias set for the GOT. */ + +static alias_set_type +ix86_GOT_alias_set (void) +{ + static alias_set_type set = -1; + if (set == -1) + set = new_alias_set (); + return set; +} + +/* Return a legitimate reference for ORIG (an address) using the + register REG. If REG is 0, a new pseudo is generated. + + There are two types of references that must be handled: + + 1. Global data references must load the address from the GOT, via + the PIC reg. An insn is emitted to do this load, and the reg is + returned. + + 2. Static data references, constant pool addresses, and code labels + compute the address as an offset from the GOT, whose base is in + the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to + differentiate them from global data objects. The returned + address is the PIC reg + an unspec constant. + + TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC + reg also appears in the address. */ + +static rtx +legitimize_pic_address (rtx orig, rtx reg) +{ + rtx addr = orig; + rtx new_rtx = orig; + +#if TARGET_MACHO + if (TARGET_MACHO && !TARGET_64BIT) + { + if (reg == 0) + reg = gen_reg_rtx (Pmode); + /* Use the generic Mach-O PIC machinery. */ + return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); + } +#endif + + if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES) + { + rtx tmp = legitimize_pe_coff_symbol (addr, true); + if (tmp) + return tmp; + } + + if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) + new_rtx = addr; + else if (TARGET_64BIT && !TARGET_PECOFF + && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode)) + { + rtx tmpreg; + /* This symbol may be referenced via a displacement from the PIC + base address (@GOTOFF). */ + + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + if (GET_CODE (addr) == PLUS) + { + new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), + UNSPEC_GOTOFF); + new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); + } + else + new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); + new_rtx = gen_rtx_CONST (Pmode, new_rtx); + if (!reg) + tmpreg = gen_reg_rtx (Pmode); + else + tmpreg = reg; + emit_move_insn (tmpreg, new_rtx); + + if (reg != 0) + { + new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, + tmpreg, 1, OPTAB_DIRECT); + new_rtx = reg; + } + else + new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); + } + else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode)) + { + /* This symbol may be referenced via a displacement from the PIC + base address (@GOTOFF). */ + + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + if (GET_CODE (addr) == PLUS) + { + new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), + UNSPEC_GOTOFF); + new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); + } + else + new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); + new_rtx = gen_rtx_CONST (Pmode, new_rtx); + new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); + + if (reg != 0) + { + emit_move_insn (reg, new_rtx); + new_rtx = reg; + } + } + else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) + /* We can't use @GOTOFF for text labels on VxWorks; + see gotoff_operand. */ + || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) + { + rtx tmp = legitimize_pe_coff_symbol (addr, true); + if (tmp) + return tmp; + + /* For x64 PE-COFF there is no GOT table. So we use address + directly. */ + if (TARGET_64BIT && TARGET_PECOFF) + { + new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL); + new_rtx = gen_rtx_CONST (Pmode, new_rtx); + + if (reg == 0) + reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, new_rtx); + new_rtx = reg; + } + else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) + { + new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); + new_rtx = gen_rtx_CONST (Pmode, new_rtx); + new_rtx = gen_const_mem (Pmode, new_rtx); + set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); + + if (reg == 0) + reg = gen_reg_rtx (Pmode); + /* Use directly gen_movsi, otherwise the address is loaded + into register for CSE. We don't want to CSE this addresses, + instead we CSE addresses from the GOT table, so skip this. */ + emit_insn (gen_movsi (reg, new_rtx)); + new_rtx = reg; + } + else + { + /* This symbol must be referenced via a load from the + Global Offset Table (@GOT). */ + + new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); + new_rtx = gen_rtx_CONST (Pmode, new_rtx); + if (TARGET_64BIT) + new_rtx = force_reg (Pmode, new_rtx); + new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); + new_rtx = gen_const_mem (Pmode, new_rtx); + set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); + + if (reg == 0) + reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, new_rtx); + new_rtx = reg; + } + } + else + { + if (CONST_INT_P (addr) + && !x86_64_immediate_operand (addr, VOIDmode)) + { + if (reg) + { + emit_move_insn (reg, addr); + new_rtx = reg; + } + else + new_rtx = force_reg (Pmode, addr); + } + else if (GET_CODE (addr) == CONST) + { + addr = XEXP (addr, 0); + + /* We must match stuff we generate before. Assume the only + unspecs that can get here are ours. Not that we could do + anything with them anyway.... */ + if (GET_CODE (addr) == UNSPEC + || (GET_CODE (addr) == PLUS + && GET_CODE (XEXP (addr, 0)) == UNSPEC)) + return orig; + gcc_assert (GET_CODE (addr) == PLUS); + } + if (GET_CODE (addr) == PLUS) + { + rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); + + /* Check first to see if this is a constant offset from a @GOTOFF + symbol reference. */ + if (!TARGET_PECOFF && gotoff_operand (op0, Pmode) + && CONST_INT_P (op1)) + { + if (!TARGET_64BIT) + { + new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), + UNSPEC_GOTOFF); + new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); + new_rtx = gen_rtx_CONST (Pmode, new_rtx); + new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); + + if (reg != 0) + { + emit_move_insn (reg, new_rtx); + new_rtx = reg; + } + } + else + { + if (INTVAL (op1) < -16*1024*1024 + || INTVAL (op1) >= 16*1024*1024) + { + if (!x86_64_immediate_operand (op1, Pmode)) + op1 = force_reg (Pmode, op1); + new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); + } + } + } + else + { + rtx base = legitimize_pic_address (op0, reg); + machine_mode mode = GET_MODE (base); + new_rtx + = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg); + + if (CONST_INT_P (new_rtx)) + { + if (INTVAL (new_rtx) < -16*1024*1024 + || INTVAL (new_rtx) >= 16*1024*1024) + { + if (!x86_64_immediate_operand (new_rtx, mode)) + new_rtx = force_reg (mode, new_rtx); + new_rtx + = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx); + } + else + new_rtx = plus_constant (mode, base, INTVAL (new_rtx)); + } + else + { + /* For %rip addressing, we have to use just disp32, not + base nor index. */ + if (TARGET_64BIT + && (GET_CODE (base) == SYMBOL_REF + || GET_CODE (base) == LABEL_REF)) + base = force_reg (mode, base); + if (GET_CODE (new_rtx) == PLUS + && CONSTANT_P (XEXP (new_rtx, 1))) + { + base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0)); + new_rtx = XEXP (new_rtx, 1); + } + new_rtx = gen_rtx_PLUS (mode, base, new_rtx); + } + } + } + } + return new_rtx; +} + +/* Load the thread pointer. If TO_REG is true, force it into a register. */ + +static rtx +get_thread_pointer (machine_mode tp_mode, bool to_reg) +{ + rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); + + if (GET_MODE (tp) != tp_mode) + { + gcc_assert (GET_MODE (tp) == SImode); + gcc_assert (tp_mode == DImode); + + tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); + } + + if (to_reg) + tp = copy_to_mode_reg (tp_mode, tp); + + return tp; +} + +/* Construct the SYMBOL_REF for the tls_get_addr function. */ + +static GTY(()) rtx ix86_tls_symbol; + +static rtx +ix86_tls_get_addr (void) +{ + if (!ix86_tls_symbol) + { + const char *sym + = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT) + ? "___tls_get_addr" : "__tls_get_addr"); + + ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); + } + + if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) + { + rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), + UNSPEC_PLTOFF); + return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, + gen_rtx_CONST (Pmode, unspec)); + } + + return ix86_tls_symbol; +} + +/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ + +static GTY(()) rtx ix86_tls_module_base_symbol; + +rtx +ix86_tls_module_base (void) +{ + if (!ix86_tls_module_base_symbol) + { + ix86_tls_module_base_symbol + = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_"); + + SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) + |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; + } + + return ix86_tls_module_base_symbol; +} + +/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is + false if we expect this to be used for a memory address and true if + we expect to load the address into a register. */ + +static rtx +legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) +{ + rtx dest, base, off; + rtx pic = NULL_RTX, tp = NULL_RTX; + machine_mode tp_mode = Pmode; + int type; + + /* Fall back to global dynamic model if tool chain cannot support local + dynamic. */ + if (TARGET_SUN_TLS && !TARGET_64BIT + && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM + && model == TLS_MODEL_LOCAL_DYNAMIC) + model = TLS_MODEL_GLOBAL_DYNAMIC; + + switch (model) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + dest = gen_reg_rtx (Pmode); + + if (!TARGET_64BIT) + { + if (flag_pic && !TARGET_PECOFF) + pic = pic_offset_table_rtx; + else + { + pic = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (pic)); + } + } + + if (TARGET_GNU2_TLS) + { + if (TARGET_64BIT) + emit_insn (gen_tls_dynamic_gnu2_64 (dest, x)); + else + emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); + + tp = get_thread_pointer (Pmode, true); + dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); + + if (GET_MODE (x) != Pmode) + x = gen_rtx_ZERO_EXTEND (Pmode, x); + + set_unique_reg_note (get_last_insn (), REG_EQUAL, x); + } + else + { + rtx caddr = ix86_tls_get_addr (); + + if (TARGET_64BIT) + { + rtx rax = gen_rtx_REG (Pmode, AX_REG); + rtx_insn *insns; + + start_sequence (); + emit_call_insn + (ix86_gen_tls_global_dynamic_64 (rax, x, caddr)); + insns = get_insns (); + end_sequence (); + + if (GET_MODE (x) != Pmode) + x = gen_rtx_ZERO_EXTEND (Pmode, x); + + RTL_CONST_CALL_P (insns) = 1; + emit_libcall_block (insns, dest, rax, x); + } + else + emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); + } + break; + + case TLS_MODEL_LOCAL_DYNAMIC: + base = gen_reg_rtx (Pmode); + + if (!TARGET_64BIT) + { + if (flag_pic) + pic = pic_offset_table_rtx; + else + { + pic = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (pic)); + } + } + + if (TARGET_GNU2_TLS) + { + rtx tmp = ix86_tls_module_base (); + + if (TARGET_64BIT) + emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp)); + else + emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); + + tp = get_thread_pointer (Pmode, true); + set_unique_reg_note (get_last_insn (), REG_EQUAL, + gen_rtx_MINUS (Pmode, tmp, tp)); + } + else + { + rtx caddr = ix86_tls_get_addr (); + + if (TARGET_64BIT) + { + rtx rax = gen_rtx_REG (Pmode, AX_REG); + rtx_insn *insns; + rtx eqv; + + start_sequence (); + emit_call_insn + (ix86_gen_tls_local_dynamic_base_64 (rax, caddr)); + insns = get_insns (); + end_sequence (); + + /* Attach a unique REG_EQUAL, to allow the RTL optimizers to + share the LD_BASE result with other LD model accesses. */ + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_TLS_LD_BASE); + + RTL_CONST_CALL_P (insns) = 1; + emit_libcall_block (insns, base, rax, eqv); + } + else + emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); + } + + off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); + off = gen_rtx_CONST (Pmode, off); + + dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); + + if (TARGET_GNU2_TLS) + { + dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); + + if (GET_MODE (x) != Pmode) + x = gen_rtx_ZERO_EXTEND (Pmode, x); + + set_unique_reg_note (get_last_insn (), REG_EQUAL, x); + } + break; + + case TLS_MODEL_INITIAL_EXEC: + if (TARGET_64BIT) + { + if (TARGET_SUN_TLS && !TARGET_X32) + { + /* The Sun linker took the AMD64 TLS spec literally + and can only handle %rax as destination of the + initial executable code sequence. */ + + dest = gen_reg_rtx (DImode); + emit_insn (gen_tls_initial_exec_64_sun (dest, x)); + return dest; + } + + /* Generate DImode references to avoid %fs:(%reg32) + problems and linker IE->LE relaxation bug. */ + tp_mode = DImode; + pic = NULL; + type = UNSPEC_GOTNTPOFF; + } + else if (flag_pic) + { + pic = pic_offset_table_rtx; + type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; + } + else if (!TARGET_ANY_GNU_TLS) + { + pic = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (pic)); + type = UNSPEC_GOTTPOFF; + } + else + { + pic = NULL; + type = UNSPEC_INDNTPOFF; + } + + off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); + off = gen_rtx_CONST (tp_mode, off); + if (pic) + off = gen_rtx_PLUS (tp_mode, pic, off); + off = gen_const_mem (tp_mode, off); + set_mem_alias_set (off, ix86_GOT_alias_set ()); + + if (TARGET_64BIT || TARGET_ANY_GNU_TLS) + { + base = get_thread_pointer (tp_mode, + for_mov || !TARGET_TLS_DIRECT_SEG_REFS); + off = force_reg (tp_mode, off); + dest = gen_rtx_PLUS (tp_mode, base, off); + if (tp_mode != Pmode) + dest = convert_to_mode (Pmode, dest, 1); + } + else + { + base = get_thread_pointer (Pmode, true); + dest = gen_reg_rtx (Pmode); + emit_insn (ix86_gen_sub3 (dest, base, off)); + } + break; + + case TLS_MODEL_LOCAL_EXEC: + off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), + (TARGET_64BIT || TARGET_ANY_GNU_TLS) + ? UNSPEC_NTPOFF : UNSPEC_TPOFF); + off = gen_rtx_CONST (Pmode, off); + + if (TARGET_64BIT || TARGET_ANY_GNU_TLS) + { + base = get_thread_pointer (Pmode, + for_mov || !TARGET_TLS_DIRECT_SEG_REFS); + return gen_rtx_PLUS (Pmode, base, off); + } + else + { + base = get_thread_pointer (Pmode, true); + dest = gen_reg_rtx (Pmode); + emit_insn (ix86_gen_sub3 (dest, base, off)); + } + break; + + default: + gcc_unreachable (); + } + + return dest; +} + +/* Create or return the unique __imp_DECL dllimport symbol corresponding + to symbol DECL if BEIMPORT is true. Otherwise create or return the + unique refptr-DECL symbol corresponding to symbol DECL. */ + +struct dllimport_hasher : ggc_cache_ptr_hash +{ + static inline hashval_t hash (tree_map *m) { return m->hash; } + static inline bool + equal (tree_map *a, tree_map *b) + { + return a->base.from == b->base.from; + } + + static int + keep_cache_entry (tree_map *&m) + { + return ggc_marked_p (m->base.from); + } +}; + +static GTY((cache)) hash_table *dllimport_map; + +static tree +get_dllimport_decl (tree decl, bool beimport) +{ + struct tree_map *h, in; + const char *name; + const char *prefix; + size_t namelen, prefixlen; + char *imp_name; + tree to; + rtx rtl; + + if (!dllimport_map) + dllimport_map = hash_table::create_ggc (512); + + in.hash = htab_hash_pointer (decl); + in.base.from = decl; + tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT); + h = *loc; + if (h) + return h->to; + + *loc = h = ggc_alloc (); + h->hash = in.hash; + h->base.from = decl; + h->to = to = build_decl (DECL_SOURCE_LOCATION (decl), + VAR_DECL, NULL, ptr_type_node); + DECL_ARTIFICIAL (to) = 1; + DECL_IGNORED_P (to) = 1; + DECL_EXTERNAL (to) = 1; + TREE_READONLY (to) = 1; + + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + name = targetm.strip_name_encoding (name); + if (beimport) + prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 + ? "*__imp_" : "*__imp__"; + else + prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr."; + namelen = strlen (name); + prefixlen = strlen (prefix); + imp_name = (char *) alloca (namelen + prefixlen + 1); + memcpy (imp_name, prefix, prefixlen); + memcpy (imp_name + prefixlen, name, namelen + 1); + + name = ggc_alloc_string (imp_name, namelen + prefixlen); + rtl = gen_rtx_SYMBOL_REF (Pmode, name); + SET_SYMBOL_REF_DECL (rtl, to); + SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR; + if (!beimport) + { + SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL; +#ifdef SUB_TARGET_RECORD_STUB + SUB_TARGET_RECORD_STUB (name); +#endif + } + + rtl = gen_const_mem (Pmode, rtl); + set_mem_alias_set (rtl, ix86_GOT_alias_set ()); + + SET_DECL_RTL (to, rtl); + SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); + + return to; +} + +/* Expand SYMBOL into its corresponding far-addresse symbol. + WANT_REG is true if we require the result be a register. */ + +static rtx +legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg) +{ + tree imp_decl; + rtx x; + + gcc_assert (SYMBOL_REF_DECL (symbol)); + imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false); + + x = DECL_RTL (imp_decl); + if (want_reg) + x = force_reg (Pmode, x); + return x; +} + +/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is + true if we require the result be a register. */ + +static rtx +legitimize_dllimport_symbol (rtx symbol, bool want_reg) +{ + tree imp_decl; + rtx x; + + gcc_assert (SYMBOL_REF_DECL (symbol)); + imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true); + + x = DECL_RTL (imp_decl); + if (want_reg) + x = force_reg (Pmode, x); + return x; +} + +/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG + is true if we require the result be a register. */ + +static rtx +legitimize_pe_coff_symbol (rtx addr, bool inreg) +{ + if (!TARGET_PECOFF) + return NULL_RTX; + + if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) + { + if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) + return legitimize_dllimport_symbol (addr, inreg); + if (GET_CODE (addr) == CONST + && GET_CODE (XEXP (addr, 0)) == PLUS + && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF + && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) + { + rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg); + return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); + } + } + + if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC) + return NULL_RTX; + if (GET_CODE (addr) == SYMBOL_REF + && !is_imported_p (addr) + && SYMBOL_REF_EXTERNAL_P (addr) + && SYMBOL_REF_DECL (addr)) + return legitimize_pe_coff_extern_decl (addr, inreg); + + if (GET_CODE (addr) == CONST + && GET_CODE (XEXP (addr, 0)) == PLUS + && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF + && !is_imported_p (XEXP (XEXP (addr, 0), 0)) + && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0)) + && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0))) + { + rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg); + return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); + } + return NULL_RTX; +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. + This macro is used in only one place: `memory_address' in explow.c. + + OLDX is the address as it was before break_out_memory_refs was called. + In some cases it is useful to look at this to decide what needs to be done. + + It is always safe for this macro to do nothing. It exists to recognize + opportunities to optimize the output. + + For the 80386, we handle X+REG by loading X into a register R and + using R+REG. R will go in a general reg and indexing will be used. + However, if REG is a broken-out memory address or multiplication, + nothing needs to be done because REG can certainly go in a general reg. + + When -fpic is used, special handling is needed for symbolic references. + See comments by legitimize_pic_address in i386.c for details. */ + +static rtx +ix86_legitimize_address (rtx x, rtx, machine_mode mode) +{ + bool changed = false; + unsigned log; + + log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; + if (log) + return legitimize_tls_address (x, (enum tls_model) log, false); + if (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF + && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) + { + rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), + (enum tls_model) log, false); + return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); + } + + if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) + { + rtx tmp = legitimize_pe_coff_symbol (x, true); + if (tmp) + return tmp; + } + + if (flag_pic && SYMBOLIC_CONST (x)) + return legitimize_pic_address (x, 0); + +#if TARGET_MACHO + if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) + return machopic_indirect_data_reference (x, 0); +#endif + + /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ + if (GET_CODE (x) == ASHIFT + && CONST_INT_P (XEXP (x, 1)) + && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) + { + changed = true; + log = INTVAL (XEXP (x, 1)); + x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), + GEN_INT (1 << log)); + } + + if (GET_CODE (x) == PLUS) + { + /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ + + if (GET_CODE (XEXP (x, 0)) == ASHIFT + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) + { + changed = true; + log = INTVAL (XEXP (XEXP (x, 0), 1)); + XEXP (x, 0) = gen_rtx_MULT (Pmode, + force_reg (Pmode, XEXP (XEXP (x, 0), 0)), + GEN_INT (1 << log)); + } + + if (GET_CODE (XEXP (x, 1)) == ASHIFT + && CONST_INT_P (XEXP (XEXP (x, 1), 1)) + && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) + { + changed = true; + log = INTVAL (XEXP (XEXP (x, 1), 1)); + XEXP (x, 1) = gen_rtx_MULT (Pmode, + force_reg (Pmode, XEXP (XEXP (x, 1), 0)), + GEN_INT (1 << log)); + } + + /* Put multiply first if it isn't already. */ + if (GET_CODE (XEXP (x, 1)) == MULT) + { + std::swap (XEXP (x, 0), XEXP (x, 1)); + changed = true; + } + + /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) + into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be + created by virtual register instantiation, register elimination, and + similar optimizations. */ + if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) + { + changed = true; + x = gen_rtx_PLUS (Pmode, + gen_rtx_PLUS (Pmode, XEXP (x, 0), + XEXP (XEXP (x, 1), 0)), + XEXP (XEXP (x, 1), 1)); + } + + /* Canonicalize + (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) + into (plus (plus (mult (reg) (const)) (reg)) (const)). */ + else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS + && CONSTANT_P (XEXP (x, 1))) + { + rtx constant; + rtx other = NULL_RTX; + + if (CONST_INT_P (XEXP (x, 1))) + { + constant = XEXP (x, 1); + other = XEXP (XEXP (XEXP (x, 0), 1), 1); + } + else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) + { + constant = XEXP (XEXP (XEXP (x, 0), 1), 1); + other = XEXP (x, 1); + } + else + constant = 0; + + if (constant) + { + changed = true; + x = gen_rtx_PLUS (Pmode, + gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), + XEXP (XEXP (XEXP (x, 0), 1), 0)), + plus_constant (Pmode, other, + INTVAL (constant))); + } + } + + if (changed && ix86_legitimate_address_p (mode, x, false)) + return x; + + if (GET_CODE (XEXP (x, 0)) == MULT) + { + changed = true; + XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); + } + + if (GET_CODE (XEXP (x, 1)) == MULT) + { + changed = true; + XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); + } + + if (changed + && REG_P (XEXP (x, 1)) + && REG_P (XEXP (x, 0))) + return x; + + if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) + { + changed = true; + x = legitimize_pic_address (x, 0); + } + + if (changed && ix86_legitimate_address_p (mode, x, false)) + return x; + + if (REG_P (XEXP (x, 0))) + { + rtx temp = gen_reg_rtx (Pmode); + rtx val = force_operand (XEXP (x, 1), temp); + if (val != temp) + { + val = convert_to_mode (Pmode, val, 1); + emit_move_insn (temp, val); + } + + XEXP (x, 1) = temp; + return x; + } + + else if (REG_P (XEXP (x, 1))) + { + rtx temp = gen_reg_rtx (Pmode); + rtx val = force_operand (XEXP (x, 0), temp); + if (val != temp) + { + val = convert_to_mode (Pmode, val, 1); + emit_move_insn (temp, val); + } + + XEXP (x, 0) = temp; + return x; + } + } + + return x; +} + +/* Print an integer constant expression in assembler syntax. Addition + and subtraction are the only arithmetic that may appear in these + expressions. FILE is the stdio stream to write to, X is the rtx, and + CODE is the operand print code from the output string. */ + +static void +output_pic_addr_const (FILE *file, rtx x, int code) +{ + char buf[256]; + + switch (GET_CODE (x)) + { + case PC: + gcc_assert (flag_pic); + putc ('.', file); + break; + + case SYMBOL_REF: + if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS) + output_addr_const (file, x); + else + { + const char *name = XSTR (x, 0); + + /* Mark the decl as referenced so that cgraph will + output the function. */ + if (SYMBOL_REF_DECL (x)) + mark_decl_referenced (SYMBOL_REF_DECL (x)); + +#if TARGET_MACHO + if (MACHOPIC_INDIRECT + && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) + name = machopic_indirection_name (x, /*stub_p=*/true); +#endif + assemble_name (file, name); + } + if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF) + && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) + fputs ("@PLT", file); + break; + + case LABEL_REF: + x = XEXP (x, 0); + /* FALLTHRU */ + case CODE_LABEL: + ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); + assemble_name (asm_out_file, buf); + break; + + case CONST_INT: + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + break; + + case CONST: + /* This used to output parentheses around the expression, + but that does not work on the 386 (either ATT or BSD assembler). */ + output_pic_addr_const (file, XEXP (x, 0), code); + break; + + case CONST_DOUBLE: + /* We can't handle floating point constants; + TARGET_PRINT_OPERAND must handle them. */ + output_operand_lossage ("floating constant misused"); + break; + + case PLUS: + /* Some assemblers need integer constants to appear first. */ + if (CONST_INT_P (XEXP (x, 0))) + { + output_pic_addr_const (file, XEXP (x, 0), code); + putc ('+', file); + output_pic_addr_const (file, XEXP (x, 1), code); + } + else + { + gcc_assert (CONST_INT_P (XEXP (x, 1))); + output_pic_addr_const (file, XEXP (x, 1), code); + putc ('+', file); + output_pic_addr_const (file, XEXP (x, 0), code); + } + break; + + case MINUS: + if (!TARGET_MACHO) + putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); + output_pic_addr_const (file, XEXP (x, 0), code); + putc ('-', file); + output_pic_addr_const (file, XEXP (x, 1), code); + if (!TARGET_MACHO) + putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); + break; + + case UNSPEC: + if (XINT (x, 1) == UNSPEC_STACK_CHECK) + { + bool f = i386_asm_output_addr_const_extra (file, x); + gcc_assert (f); + break; + } + + gcc_assert (XVECLEN (x, 0) == 1); + output_pic_addr_const (file, XVECEXP (x, 0, 0), code); + switch (XINT (x, 1)) + { + case UNSPEC_GOT: + fputs ("@GOT", file); + break; + case UNSPEC_GOTOFF: + fputs ("@GOTOFF", file); + break; + case UNSPEC_PLTOFF: + fputs ("@PLTOFF", file); + break; + case UNSPEC_PCREL: + fputs (ASSEMBLER_DIALECT == ASM_ATT ? + "(%rip)" : "[rip]", file); + break; + case UNSPEC_GOTPCREL: + fputs (ASSEMBLER_DIALECT == ASM_ATT ? + "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); + break; + case UNSPEC_GOTTPOFF: + /* FIXME: This might be @TPOFF in Sun ld too. */ + fputs ("@gottpoff", file); + break; + case UNSPEC_TPOFF: + fputs ("@tpoff", file); + break; + case UNSPEC_NTPOFF: + if (TARGET_64BIT) + fputs ("@tpoff", file); + else + fputs ("@ntpoff", file); + break; + case UNSPEC_DTPOFF: + fputs ("@dtpoff", file); + break; + case UNSPEC_GOTNTPOFF: + if (TARGET_64BIT) + fputs (ASSEMBLER_DIALECT == ASM_ATT ? + "@gottpoff(%rip)": "@gottpoff[rip]", file); + else + fputs ("@gotntpoff", file); + break; + case UNSPEC_INDNTPOFF: + fputs ("@indntpoff", file); + break; +#if TARGET_MACHO + case UNSPEC_MACHOPIC_OFFSET: + putc ('-', file); + machopic_output_function_base_name (file); + break; +#endif + default: + output_operand_lossage ("invalid UNSPEC as operand"); + break; + } + break; + + default: + output_operand_lossage ("invalid expression as operand"); + } +} + +/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. + We need to emit DTP-relative relocations. */ + +static void ATTRIBUTE_UNUSED +i386_output_dwarf_dtprel (FILE *file, int size, rtx x) +{ + fputs (ASM_LONG, file); + output_addr_const (file, x); + fputs ("@dtpoff", file); + switch (size) + { + case 4: + break; + case 8: + fputs (", 0", file); + break; + default: + gcc_unreachable (); + } +} + +/* Return true if X is a representation of the PIC register. This copes + with calls from ix86_find_base_term, where the register might have + been replaced by a cselib value. */ + +static bool +ix86_pic_register_p (rtx x) +{ + if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) + return (pic_offset_table_rtx + && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); + else if (!REG_P (x)) + return false; + else if (pic_offset_table_rtx) + { + if (REGNO (x) == REGNO (pic_offset_table_rtx)) + return true; + if (HARD_REGISTER_P (x) + && !HARD_REGISTER_P (pic_offset_table_rtx) + && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) + return true; + return false; + } + else + return REGNO (x) == PIC_OFFSET_TABLE_REGNUM; +} + +/* Helper function for ix86_delegitimize_address. + Attempt to delegitimize TLS local-exec accesses. */ + +static rtx +ix86_delegitimize_tls_address (rtx orig_x) +{ + rtx x = orig_x, unspec; + struct ix86_address addr; + + if (!TARGET_TLS_DIRECT_SEG_REFS) + return orig_x; + if (MEM_P (x)) + x = XEXP (x, 0); + if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode) + return orig_x; + if (ix86_decompose_address (x, &addr) == 0 + || addr.seg != DEFAULT_TLS_SEG_REG + || addr.disp == NULL_RTX + || GET_CODE (addr.disp) != CONST) + return orig_x; + unspec = XEXP (addr.disp, 0); + if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1))) + unspec = XEXP (unspec, 0); + if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF) + return orig_x; + x = XVECEXP (unspec, 0, 0); + gcc_assert (GET_CODE (x) == SYMBOL_REF); + if (unspec != XEXP (addr.disp, 0)) + x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1)); + if (addr.index) + { + rtx idx = addr.index; + if (addr.scale != 1) + idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale)); + x = gen_rtx_PLUS (Pmode, idx, x); + } + if (addr.base) + x = gen_rtx_PLUS (Pmode, addr.base, x); + if (MEM_P (orig_x)) + x = replace_equiv_address_nv (orig_x, x); + return x; +} + +/* In the name of slightly smaller debug output, and to cater to + general assembler lossage, recognize PIC+GOTOFF and turn it back + into a direct symbol reference. + + On Darwin, this is necessary to avoid a crash, because Darwin + has a different PIC label for each routine but the DWARF debugging + information is not associated with any particular routine, so it's + necessary to remove references to the PIC label from RTL stored by + the DWARF output code. */ + +static rtx +ix86_delegitimize_address (rtx x) +{ + rtx orig_x = delegitimize_mem_from_attrs (x); + /* addend is NULL or some rtx if x is something+GOTOFF where + something doesn't include the PIC register. */ + rtx addend = NULL_RTX; + /* reg_addend is NULL or a multiple of some register. */ + rtx reg_addend = NULL_RTX; + /* const_addend is NULL or a const_int. */ + rtx const_addend = NULL_RTX; + /* This is the result, or NULL. */ + rtx result = NULL_RTX; + + x = orig_x; + + if (MEM_P (x)) + x = XEXP (x, 0); + + if (TARGET_64BIT) + { + if (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_MODE (XEXP (x, 0)) == Pmode + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC + && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL) + { + rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0); + x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2); + if (MEM_P (orig_x)) + x = replace_equiv_address_nv (orig_x, x); + return x; + } + + if (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == UNSPEC + && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL + || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) + && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) + { + x = XVECEXP (XEXP (x, 0), 0, 0); + if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) + { + x = simplify_gen_subreg (GET_MODE (orig_x), x, + GET_MODE (x), 0); + if (x == NULL_RTX) + return orig_x; + } + return x; + } + + if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) + return ix86_delegitimize_tls_address (orig_x); + + /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic + and -mcmodel=medium -fpic. */ + } + + if (GET_CODE (x) != PLUS + || GET_CODE (XEXP (x, 1)) != CONST) + return ix86_delegitimize_tls_address (orig_x); + + if (ix86_pic_register_p (XEXP (x, 0))) + /* %ebx + GOT/GOTOFF */ + ; + else if (GET_CODE (XEXP (x, 0)) == PLUS) + { + /* %ebx + %reg * scale + GOT/GOTOFF */ + reg_addend = XEXP (x, 0); + if (ix86_pic_register_p (XEXP (reg_addend, 0))) + reg_addend = XEXP (reg_addend, 1); + else if (ix86_pic_register_p (XEXP (reg_addend, 1))) + reg_addend = XEXP (reg_addend, 0); + else + { + reg_addend = NULL_RTX; + addend = XEXP (x, 0); + } + } + else + addend = XEXP (x, 0); + + x = XEXP (XEXP (x, 1), 0); + if (GET_CODE (x) == PLUS + && CONST_INT_P (XEXP (x, 1))) + { + const_addend = XEXP (x, 1); + x = XEXP (x, 0); + } + + if (GET_CODE (x) == UNSPEC + && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) + || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) + || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC + && !MEM_P (orig_x) && !addend))) + result = XVECEXP (x, 0, 0); + + if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x) + && !MEM_P (orig_x)) + result = XVECEXP (x, 0, 0); + + if (! result) + return ix86_delegitimize_tls_address (orig_x); + + if (const_addend) + result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); + if (reg_addend) + result = gen_rtx_PLUS (Pmode, reg_addend, result); + if (addend) + { + /* If the rest of original X doesn't involve the PIC register, add + addend and subtract pic_offset_table_rtx. This can happen e.g. + for code like: + leal (%ebx, %ecx, 4), %ecx + ... + movl foo@GOTOFF(%ecx), %edx + in which case we return (%ecx - %ebx) + foo + or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg + and reload has completed. */ + if (pic_offset_table_rtx + && (!reload_completed || !ix86_use_pseudo_pic_reg ())) + result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), + pic_offset_table_rtx), + result); + else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP) + { + rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); + tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); + result = gen_rtx_PLUS (Pmode, tmp, result); + } + else + return orig_x; + } + if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) + { + result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0); + if (result == NULL_RTX) + return orig_x; + } + return result; +} + +/* If X is a machine specific address (i.e. a symbol or label being + referenced as a displacement from the GOT implemented using an + UNSPEC), then return the base term. Otherwise return X. */ + +rtx +ix86_find_base_term (rtx x) +{ + rtx term; + + if (TARGET_64BIT) + { + if (GET_CODE (x) != CONST) + return x; + term = XEXP (x, 0); + if (GET_CODE (term) == PLUS + && CONST_INT_P (XEXP (term, 1))) + term = XEXP (term, 0); + if (GET_CODE (term) != UNSPEC + || (XINT (term, 1) != UNSPEC_GOTPCREL + && XINT (term, 1) != UNSPEC_PCREL)) + return x; + + return XVECEXP (term, 0, 0); + } + + return ix86_delegitimize_address (x); +} + +static void +put_condition_code (enum rtx_code code, machine_mode mode, bool reverse, + bool fp, FILE *file) +{ + const char *suffix; + + if (mode == CCFPmode || mode == CCFPUmode) + { + code = ix86_fp_compare_code_to_integer (code); + mode = CCmode; + } + if (reverse) + code = reverse_condition (code); + + switch (code) + { + case EQ: + switch (mode) + { + case CCAmode: + suffix = "a"; + break; + case CCCmode: + suffix = "c"; + break; + case CCOmode: + suffix = "o"; + break; + case CCPmode: + suffix = "p"; + break; + case CCSmode: + suffix = "s"; + break; + default: + suffix = "e"; + break; + } + break; + case NE: + switch (mode) + { + case CCAmode: + suffix = "na"; + break; + case CCCmode: + suffix = "nc"; + break; + case CCOmode: + suffix = "no"; + break; + case CCPmode: + suffix = "np"; + break; + case CCSmode: + suffix = "ns"; + break; + default: + suffix = "ne"; + break; + } + break; + case GT: + gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); + suffix = "g"; + break; + case GTU: + /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. + Those same assemblers have the same but opposite lossage on cmov. */ + if (mode == CCmode) + suffix = fp ? "nbe" : "a"; + else + gcc_unreachable (); + break; + case LT: + switch (mode) + { + case CCNOmode: + case CCGOCmode: + suffix = "s"; + break; + + case CCmode: + case CCGCmode: + suffix = "l"; + break; + + default: + gcc_unreachable (); + } + break; + case LTU: + if (mode == CCmode) + suffix = "b"; + else if (mode == CCCmode) + suffix = fp ? "b" : "c"; + else + gcc_unreachable (); + break; + case GE: + switch (mode) + { + case CCNOmode: + case CCGOCmode: + suffix = "ns"; + break; + + case CCmode: + case CCGCmode: + suffix = "ge"; + break; + + default: + gcc_unreachable (); + } + break; + case GEU: + if (mode == CCmode) + suffix = "nb"; + else if (mode == CCCmode) + suffix = fp ? "nb" : "nc"; + else + gcc_unreachable (); + break; + case LE: + gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); + suffix = "le"; + break; + case LEU: + if (mode == CCmode) + suffix = "be"; + else + gcc_unreachable (); + break; + case UNORDERED: + suffix = fp ? "u" : "p"; + break; + case ORDERED: + suffix = fp ? "nu" : "np"; + break; + default: + gcc_unreachable (); + } + fputs (suffix, file); +} + +/* Print the name of register X to FILE based on its machine mode and number. + If CODE is 'w', pretend the mode is HImode. + If CODE is 'b', pretend the mode is QImode. + If CODE is 'k', pretend the mode is SImode. + If CODE is 'q', pretend the mode is DImode. + If CODE is 'x', pretend the mode is V4SFmode. + If CODE is 't', pretend the mode is V8SFmode. + If CODE is 'g', pretend the mode is V16SFmode. + If CODE is 'h', pretend the reg is the 'high' byte register. + If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. + If CODE is 'd', duplicate the operand for AVX instruction. + */ + +void +print_reg (rtx x, int code, FILE *file) +{ + const char *reg; + int msize; + unsigned int regno; + bool duplicated; + + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('%', file); + + if (x == pc_rtx) + { + gcc_assert (TARGET_64BIT); + fputs ("rip", file); + return; + } + + if (code == 'y' && STACK_TOP_P (x)) + { + fputs ("st(0)", file); + return; + } + + if (code == 'w') + msize = 2; + else if (code == 'b') + msize = 1; + else if (code == 'k') + msize = 4; + else if (code == 'q') + msize = 8; + else if (code == 'h') + msize = 0; + else if (code == 'x') + msize = 16; + else if (code == 't') + msize = 32; + else if (code == 'g') + msize = 64; + else + msize = GET_MODE_SIZE (GET_MODE (x)); + + regno = true_regnum (x); + + gcc_assert (regno != ARG_POINTER_REGNUM + && regno != FRAME_POINTER_REGNUM + && regno != FLAGS_REG + && regno != FPSR_REG + && regno != FPCR_REG); + + duplicated = code == 'd' && TARGET_AVX; + + switch (msize) + { + case 8: + case 4: + if (LEGACY_INT_REGNO_P (regno)) + putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file); + case 16: + case 12: + case 2: + normal: + reg = hi_reg_name[regno]; + break; + case 1: + if (regno >= ARRAY_SIZE (qi_reg_name)) + goto normal; + reg = qi_reg_name[regno]; + break; + case 0: + if (regno >= ARRAY_SIZE (qi_high_reg_name)) + goto normal; + reg = qi_high_reg_name[regno]; + break; + case 32: + case 64: + if (SSE_REGNO_P (regno)) + { + gcc_assert (!duplicated); + putc (msize == 32 ? 'y' : 'z', file); + reg = hi_reg_name[regno] + 1; + break; + } + goto normal; + default: + gcc_unreachable (); + } + + fputs (reg, file); + + /* Irritatingly, AMD extended registers use + different naming convention: "r%d[bwd]" */ + if (REX_INT_REGNO_P (regno)) + { + gcc_assert (TARGET_64BIT); + switch (msize) + { + case 0: + error ("extended registers have no high halves"); + break; + case 1: + putc ('b', file); + break; + case 2: + putc ('w', file); + break; + case 4: + putc ('d', file); + break; + case 8: + /* no suffix */ + break; + default: + error ("unsupported operand size for extended register"); + break; + } + return; + } + + if (duplicated) + { + if (ASSEMBLER_DIALECT == ASM_ATT) + fprintf (file, ", %%%s", reg); + else + fprintf (file, ", %s", reg); + } +} + +/* Meaning of CODE: + L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. + C -- print opcode suffix for set/cmov insn. + c -- like C, but print reversed condition + F,f -- likewise, but for floating-point. + O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", + otherwise nothing + R -- print embeded rounding and sae. + r -- print only sae. + z -- print the opcode suffix for the size of the current operand. + Z -- likewise, with special suffixes for x87 instructions. + * -- print a star (in certain assembler syntax) + A -- print an absolute memory reference. + E -- print address with DImode register names if TARGET_64BIT. + w -- print the operand as if it's a "word" (HImode) even if it isn't. + s -- print a shift double count, followed by the assemblers argument + delimiter. + b -- print the QImode name of the register for the indicated operand. + %b0 would print %al if operands[0] is reg 0. + w -- likewise, print the HImode name of the register. + k -- likewise, print the SImode name of the register. + q -- likewise, print the DImode name of the register. + x -- likewise, print the V4SFmode name of the register. + t -- likewise, print the V8SFmode name of the register. + g -- likewise, print the V16SFmode name of the register. + h -- print the QImode name for a "high" register, either ah, bh, ch or dh. + y -- print "st(0)" instead of "st" as a register. + d -- print duplicated register operand for AVX instruction. + D -- print condition for SSE cmp instruction. + P -- if PIC, print an @PLT suffix. + p -- print raw symbol name. + X -- don't print any sort of PIC '@' suffix for a symbol. + & -- print some in-use local-dynamic symbol name. + H -- print a memory address offset by 8; used for sse high-parts + Y -- print condition for XOP pcom* instruction. + + -- print a branch hint as 'cs' or 'ds' prefix + ; -- print a semicolon (after prefixes due to bug in older gas). + ~ -- print "i" if TARGET_AVX2, "f" otherwise. + @ -- print a segment register of thread base pointer load + ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode + ! -- print MPX prefix for jxx/call/ret instructions if required. + */ + +void +ix86_print_operand (FILE *file, rtx x, int code) +{ + if (code) + { + switch (code) + { + case 'A': + switch (ASSEMBLER_DIALECT) + { + case ASM_ATT: + putc ('*', file); + break; + + case ASM_INTEL: + /* Intel syntax. For absolute addresses, registers should not + be surrounded by braces. */ + if (!REG_P (x)) + { + putc ('[', file); + ix86_print_operand (file, x, 0); + putc (']', file); + return; + } + break; + + default: + gcc_unreachable (); + } + + ix86_print_operand (file, x, 0); + return; + + case 'E': + /* Wrap address in an UNSPEC to declare special handling. */ + if (TARGET_64BIT) + x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); + + output_address (VOIDmode, x); + return; + + case 'L': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('l', file); + return; + + case 'W': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('w', file); + return; + + case 'B': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('b', file); + return; + + case 'Q': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('l', file); + return; + + case 'S': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('s', file); + return; + + case 'T': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('t', file); + return; + + case 'O': +#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX + if (ASSEMBLER_DIALECT != ASM_ATT) + return; + + switch (GET_MODE_SIZE (GET_MODE (x))) + { + case 2: + putc ('w', file); + break; + + case 4: + putc ('l', file); + break; + + case 8: + putc ('q', file); + break; + + default: + output_operand_lossage + ("invalid operand size for operand code 'O'"); + return; + } + + putc ('.', file); +#endif + return; + + case 'z': + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + { + /* Opcodes don't get size suffixes if using Intel opcodes. */ + if (ASSEMBLER_DIALECT == ASM_INTEL) + return; + + switch (GET_MODE_SIZE (GET_MODE (x))) + { + case 1: + putc ('b', file); + return; + + case 2: + putc ('w', file); + return; + + case 4: + putc ('l', file); + return; + + case 8: + putc ('q', file); + return; + + default: + output_operand_lossage + ("invalid operand size for operand code 'z'"); + return; + } + } + + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + warning + (0, "non-integer operand used with operand code 'z'"); + /* FALLTHRU */ + + case 'Z': + /* 387 opcodes don't get size suffixes if using Intel opcodes. */ + if (ASSEMBLER_DIALECT == ASM_INTEL) + return; + + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + { + switch (GET_MODE_SIZE (GET_MODE (x))) + { + case 2: +#ifdef HAVE_AS_IX86_FILDS + putc ('s', file); +#endif + return; + + case 4: + putc ('l', file); + return; + + case 8: +#ifdef HAVE_AS_IX86_FILDQ + putc ('q', file); +#else + fputs ("ll", file); +#endif + return; + + default: + break; + } + } + else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + { + /* 387 opcodes don't get size suffixes + if the operands are registers. */ + if (STACK_REG_P (x)) + return; + + switch (GET_MODE_SIZE (GET_MODE (x))) + { + case 4: + putc ('s', file); + return; + + case 8: + putc ('l', file); + return; + + case 12: + case 16: + putc ('t', file); + return; + + default: + break; + } + } + else + { + output_operand_lossage + ("invalid operand type used with operand code 'Z'"); + return; + } + + output_operand_lossage + ("invalid operand size for operand code 'Z'"); + return; + + case 'd': + case 'b': + case 'w': + case 'k': + case 'q': + case 'h': + case 't': + case 'g': + case 'y': + case 'x': + case 'X': + case 'P': + case 'p': + break; + + case 's': + if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) + { + ix86_print_operand (file, x, 0); + fputs (", ", file); + } + return; + + case 'Y': + switch (GET_CODE (x)) + { + case NE: + fputs ("neq", file); + break; + case EQ: + fputs ("eq", file); + break; + case GE: + case GEU: + fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); + break; + case GT: + case GTU: + fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); + break; + case LE: + case LEU: + fputs ("le", file); + break; + case LT: + case LTU: + fputs ("lt", file); + break; + case UNORDERED: + fputs ("unord", file); + break; + case ORDERED: + fputs ("ord", file); + break; + case UNEQ: + fputs ("ueq", file); + break; + case UNGE: + fputs ("nlt", file); + break; + case UNGT: + fputs ("nle", file); + break; + case UNLE: + fputs ("ule", file); + break; + case UNLT: + fputs ("ult", file); + break; + case LTGT: + fputs ("une", file); + break; + default: + output_operand_lossage ("operand is not a condition code, " + "invalid operand code 'Y'"); + return; + } + return; + + case 'D': + /* Little bit of braindamage here. The SSE compare instructions + does use completely different names for the comparisons that the + fp conditional moves. */ + switch (GET_CODE (x)) + { + case UNEQ: + if (TARGET_AVX) + { + fputs ("eq_us", file); + break; + } + case EQ: + fputs ("eq", file); + break; + case UNLT: + if (TARGET_AVX) + { + fputs ("nge", file); + break; + } + case LT: + fputs ("lt", file); + break; + case UNLE: + if (TARGET_AVX) + { + fputs ("ngt", file); + break; + } + case LE: + fputs ("le", file); + break; + case UNORDERED: + fputs ("unord", file); + break; + case LTGT: + if (TARGET_AVX) + { + fputs ("neq_oq", file); + break; + } + case NE: + fputs ("neq", file); + break; + case GE: + if (TARGET_AVX) + { + fputs ("ge", file); + break; + } + case UNGE: + fputs ("nlt", file); + break; + case GT: + if (TARGET_AVX) + { + fputs ("gt", file); + break; + } + case UNGT: + fputs ("nle", file); + break; + case ORDERED: + fputs ("ord", file); + break; + default: + output_operand_lossage ("operand is not a condition code, " + "invalid operand code 'D'"); + return; + } + return; + + case 'F': + case 'f': +#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('.', file); +#endif + + case 'C': + case 'c': + if (!COMPARISON_P (x)) + { + output_operand_lossage ("operand is not a condition code, " + "invalid operand code '%c'", code); + return; + } + put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), + code == 'c' || code == 'f', + code == 'F' || code == 'f', + file); + return; + + case 'H': + if (!offsettable_memref_p (x)) + { + output_operand_lossage ("operand is not an offsettable memory " + "reference, invalid operand code 'H'"); + return; + } + /* It doesn't actually matter what mode we use here, as we're + only going to use this for printing. */ + x = adjust_address_nv (x, DImode, 8); + /* Output 'qword ptr' for intel assembler dialect. */ + if (ASSEMBLER_DIALECT == ASM_INTEL) + code = 'q'; + break; + + case 'K': + gcc_assert (CONST_INT_P (x)); + + if (INTVAL (x) & IX86_HLE_ACQUIRE) +#ifdef HAVE_AS_IX86_HLE + fputs ("xacquire ", file); +#else + fputs ("\n" ASM_BYTE "0xf2\n\t", file); +#endif + else if (INTVAL (x) & IX86_HLE_RELEASE) +#ifdef HAVE_AS_IX86_HLE + fputs ("xrelease ", file); +#else + fputs ("\n" ASM_BYTE "0xf3\n\t", file); +#endif + /* We do not want to print value of the operand. */ + return; + + case 'N': + if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) + fputs ("{z}", file); + return; + + case 'r': + gcc_assert (CONST_INT_P (x)); + gcc_assert (INTVAL (x) == ROUND_SAE); + + if (ASSEMBLER_DIALECT == ASM_INTEL) + fputs (", ", file); + + fputs ("{sae}", file); + + if (ASSEMBLER_DIALECT == ASM_ATT) + fputs (", ", file); + + return; + + case 'R': + gcc_assert (CONST_INT_P (x)); + + if (ASSEMBLER_DIALECT == ASM_INTEL) + fputs (", ", file); + + switch (INTVAL (x)) + { + case ROUND_NEAREST_INT | ROUND_SAE: + fputs ("{rn-sae}", file); + break; + case ROUND_NEG_INF | ROUND_SAE: + fputs ("{rd-sae}", file); + break; + case ROUND_POS_INF | ROUND_SAE: + fputs ("{ru-sae}", file); + break; + case ROUND_ZERO | ROUND_SAE: + fputs ("{rz-sae}", file); + break; + default: + gcc_unreachable (); + } + + if (ASSEMBLER_DIALECT == ASM_ATT) + fputs (", ", file); + + return; + + case '*': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('*', file); + return; + + case '&': + { + const char *name = get_some_local_dynamic_name (); + if (name == NULL) + output_operand_lossage ("'%%&' used without any " + "local dynamic TLS references"); + else + assemble_name (file, name); + return; + } + + case '+': + { + rtx x; + + if (!optimize + || optimize_function_for_size_p (cfun) + || !TARGET_BRANCH_PREDICTION_HINTS) + return; + + x = find_reg_note (current_output_insn, REG_BR_PROB, 0); + if (x) + { + int pred_val = XINT (x, 0); + + if (pred_val < REG_BR_PROB_BASE * 45 / 100 + || pred_val > REG_BR_PROB_BASE * 55 / 100) + { + bool taken = pred_val > REG_BR_PROB_BASE / 2; + bool cputaken + = final_forward_branch_p (current_output_insn) == 0; + + /* Emit hints only in the case default branch prediction + heuristics would fail. */ + if (taken != cputaken) + { + /* We use 3e (DS) prefix for taken branches and + 2e (CS) prefix for not taken branches. */ + if (taken) + fputs ("ds ; ", file); + else + fputs ("cs ; ", file); + } + } + } + return; + } + + case ';': +#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX + putc (';', file); +#endif + return; + + case '@': + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('%', file); + + /* The kernel uses a different segment register for performance + reasons; a system call would not have to trash the userspace + segment register, which would be expensive. */ + if (TARGET_64BIT && ix86_cmodel != CM_KERNEL) + fputs ("fs", file); + else + fputs ("gs", file); + return; + + case '~': + putc (TARGET_AVX2 ? 'i' : 'f', file); + return; + + case '^': + if (TARGET_64BIT && Pmode != word_mode) + fputs ("addr32 ", file); + return; + + case '!': + if (ix86_bnd_prefixed_insn_p (current_output_insn)) + fputs ("bnd ", file); + return; + + default: + output_operand_lossage ("invalid operand code '%c'", code); + } + } + + if (REG_P (x)) + print_reg (x, code, file); + + else if (MEM_P (x)) + { + rtx addr = XEXP (x, 0); + + /* No `byte ptr' prefix for call instructions ... */ + if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') + { + machine_mode mode = GET_MODE (x); + const char *size; + + /* Check for explicit size override codes. */ + if (code == 'b') + size = "BYTE"; + else if (code == 'w') + size = "WORD"; + else if (code == 'k') + size = "DWORD"; + else if (code == 'q') + size = "QWORD"; + else if (code == 'x') + size = "XMMWORD"; + else if (mode == BLKmode) + /* ... or BLKmode operands, when not overridden. */ + size = NULL; + else + switch (GET_MODE_SIZE (mode)) + { + case 1: size = "BYTE"; break; + case 2: size = "WORD"; break; + case 4: size = "DWORD"; break; + case 8: size = "QWORD"; break; + case 12: size = "TBYTE"; break; + case 16: + if (mode == XFmode) + size = "TBYTE"; + else + size = "XMMWORD"; + break; + case 32: size = "YMMWORD"; break; + case 64: size = "ZMMWORD"; break; + default: + gcc_unreachable (); + } + if (size) + { + fputs (size, file); + fputs (" PTR ", file); + } + } + + if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) + output_operand_lossage ("invalid constraints for operand"); + else + ix86_print_operand_address_as + (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P'); + } + + else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode) + { + long l; + + REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); + + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('$', file); + /* Sign extend 32bit SFmode immediate to 8 bytes. */ + if (code == 'q') + fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x", + (unsigned long long) (int) l); + else + fprintf (file, "0x%08x", (unsigned int) l); + } + + else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode) + { + long l[2]; + + REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); + + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('$', file); + fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff); + } + + /* These float cases don't actually occur as immediate operands. */ + else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode) + { + char dstr[30]; + + real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); + fputs (dstr, file); + } + + else + { + /* We have patterns that allow zero sets of memory, for instance. + In 64-bit mode, we should probably support all 8-byte vectors, + since we can in fact encode that into an immediate. */ + if (GET_CODE (x) == CONST_VECTOR) + { + gcc_assert (x == CONST0_RTX (GET_MODE (x))); + x = const0_rtx; + } + + if (code != 'P' && code != 'p') + { + if (CONST_INT_P (x)) + { + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('$', file); + } + else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == LABEL_REF) + { + if (ASSEMBLER_DIALECT == ASM_ATT) + putc ('$', file); + else + fputs ("OFFSET FLAT:", file); + } + } + if (CONST_INT_P (x)) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + else if (flag_pic || MACHOPIC_INDIRECT) + output_pic_addr_const (file, x, code); + else + output_addr_const (file, x); + } +} + +static bool +ix86_print_operand_punct_valid_p (unsigned char code) +{ + return (code == '@' || code == '*' || code == '+' || code == '&' + || code == ';' || code == '~' || code == '^' || code == '!'); +} + +/* Print a memory operand whose address is ADDR. */ + +static void +ix86_print_operand_address_as (FILE *file, rtx addr, + addr_space_t as, bool no_rip) +{ + struct ix86_address parts; + rtx base, index, disp; + int scale; + int ok; + bool vsib = false; + int code = 0; + + if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR) + { + ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); + gcc_assert (parts.index == NULL_RTX); + parts.index = XVECEXP (addr, 0, 1); + parts.scale = INTVAL (XVECEXP (addr, 0, 2)); + addr = XVECEXP (addr, 0, 0); + vsib = true; + } + else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR) + { + gcc_assert (TARGET_64BIT); + ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); + code = 'q'; + } + else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR) + { + ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts); + gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX); + if (parts.base != NULL_RTX) + { + parts.index = parts.base; + parts.scale = 1; + } + parts.base = XVECEXP (addr, 0, 0); + addr = XVECEXP (addr, 0, 0); + } + else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR) + { + ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); + gcc_assert (parts.index == NULL_RTX); + parts.index = XVECEXP (addr, 0, 1); + addr = XVECEXP (addr, 0, 0); + } + else + ok = ix86_decompose_address (addr, &parts); + + gcc_assert (ok); + + base = parts.base; + index = parts.index; + disp = parts.disp; + scale = parts.scale; + + if (ADDR_SPACE_GENERIC_P (as)) + as = parts.seg; + else + gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg)); + + if (!ADDR_SPACE_GENERIC_P (as)) + { + const char *string; + + if (as == ADDR_SPACE_SEG_FS) + string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:"); + else if (as == ADDR_SPACE_SEG_GS) + string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:"); + else + gcc_unreachable (); + fputs (string, file); + } + + /* Use one byte shorter RIP relative addressing for 64bit mode. */ + if (TARGET_64BIT && !base && !index && !no_rip) + { + rtx symbol = disp; + + if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) + symbol = XEXP (XEXP (disp, 0), 0); + + if (GET_CODE (symbol) == LABEL_REF + || (GET_CODE (symbol) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (symbol) == 0)) + base = pc_rtx; + } + + if (!base && !index) + { + /* Displacement only requires special attention. */ + if (CONST_INT_P (disp)) + { + if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == ADDR_SPACE_GENERIC) + fputs ("ds:", file); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); + } + else if (flag_pic) + output_pic_addr_const (file, disp, 0); + else + output_addr_const (file, disp); + } + else + { + /* Print SImode register names to force addr32 prefix. */ + if (SImode_address_operand (addr, VOIDmode)) + { + if (flag_checking) + { + gcc_assert (TARGET_64BIT); + switch (GET_CODE (addr)) + { + case SUBREG: + gcc_assert (GET_MODE (addr) == SImode); + gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode); + break; + case ZERO_EXTEND: + case AND: + gcc_assert (GET_MODE (addr) == DImode); + break; + default: + gcc_unreachable (); + } + } + gcc_assert (!code); + code = 'k'; + } + else if (code == 0 + && TARGET_X32 + && disp + && CONST_INT_P (disp) + && INTVAL (disp) < -16*1024*1024) + { + /* X32 runs in 64-bit mode, where displacement, DISP, in + address DISP(%r64), is encoded as 32-bit immediate sign- + extended from 32-bit to 64-bit. For -0x40000300(%r64), + address is %r64 + 0xffffffffbffffd00. When %r64 < + 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64, + which is invalid for x32. The correct address is %r64 + - 0x40000300 == 0xf7ffdd64. To properly encode + -0x40000300(%r64) for x32, we zero-extend negative + displacement by forcing addr32 prefix which truncates + 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should + zero-extend all negative displacements, including -1(%rsp). + However, for small negative displacements, sign-extension + won't cause overflow. We only zero-extend negative + displacements if they < -16*1024*1024, which is also used + to check legitimate address displacements for PIC. */ + code = 'k'; + } + + if (ASSEMBLER_DIALECT == ASM_ATT) + { + if (disp) + { + if (flag_pic) + output_pic_addr_const (file, disp, 0); + else if (GET_CODE (disp) == LABEL_REF) + output_asm_label (disp); + else + output_addr_const (file, disp); + } + + putc ('(', file); + if (base) + print_reg (base, code, file); + if (index) + { + putc (',', file); + print_reg (index, vsib ? 0 : code, file); + if (scale != 1 || vsib) + fprintf (file, ",%d", scale); + } + putc (')', file); + } + else + { + rtx offset = NULL_RTX; + + if (disp) + { + /* Pull out the offset of a symbol; print any symbol itself. */ + if (GET_CODE (disp) == CONST + && GET_CODE (XEXP (disp, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) + { + offset = XEXP (XEXP (disp, 0), 1); + disp = gen_rtx_CONST (VOIDmode, + XEXP (XEXP (disp, 0), 0)); + } + + if (flag_pic) + output_pic_addr_const (file, disp, 0); + else if (GET_CODE (disp) == LABEL_REF) + output_asm_label (disp); + else if (CONST_INT_P (disp)) + offset = disp; + else + output_addr_const (file, disp); + } + + putc ('[', file); + if (base) + { + print_reg (base, code, file); + if (offset) + { + if (INTVAL (offset) >= 0) + putc ('+', file); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); + } + } + else if (offset) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); + else + putc ('0', file); + + if (index) + { + putc ('+', file); + print_reg (index, vsib ? 0 : code, file); + if (scale != 1 || vsib) + fprintf (file, "*%d", scale); + } + putc (']', file); + } + } +} + +static void +ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) +{ + ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false); +} + +/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ + +static bool +i386_asm_output_addr_const_extra (FILE *file, rtx x) +{ + rtx op; + + if (GET_CODE (x) != UNSPEC) + return false; + + op = XVECEXP (x, 0, 0); + switch (XINT (x, 1)) + { + case UNSPEC_GOTTPOFF: + output_addr_const (file, op); + /* FIXME: This might be @TPOFF in Sun ld. */ + fputs ("@gottpoff", file); + break; + case UNSPEC_TPOFF: + output_addr_const (file, op); + fputs ("@tpoff", file); + break; + case UNSPEC_NTPOFF: + output_addr_const (file, op); + if (TARGET_64BIT) + fputs ("@tpoff", file); + else + fputs ("@ntpoff", file); + break; + case UNSPEC_DTPOFF: + output_addr_const (file, op); + fputs ("@dtpoff", file); + break; + case UNSPEC_GOTNTPOFF: + output_addr_const (file, op); + if (TARGET_64BIT) + fputs (ASSEMBLER_DIALECT == ASM_ATT ? + "@gottpoff(%rip)" : "@gottpoff[rip]", file); + else + fputs ("@gotntpoff", file); + break; + case UNSPEC_INDNTPOFF: + output_addr_const (file, op); + fputs ("@indntpoff", file); + break; +#if TARGET_MACHO + case UNSPEC_MACHOPIC_OFFSET: + output_addr_const (file, op); + putc ('-', file); + machopic_output_function_base_name (file); + break; +#endif + + case UNSPEC_STACK_CHECK: + { + int offset; + + gcc_assert (flag_split_stack); + +#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET + offset = TARGET_THREAD_SPLIT_STACK_OFFSET; +#else + gcc_unreachable (); +#endif + + fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset); + } + break; + + default: + return false; + } + + return true; +} + +/* Split one or more double-mode RTL references into pairs of half-mode + references. The RTL can be REG, offsettable MEM, integer constant, or + CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to + split and "num" is its length. lo_half and hi_half are output arrays + that parallel "operands". */ + +void +split_double_mode (machine_mode mode, rtx operands[], + int num, rtx lo_half[], rtx hi_half[]) +{ + machine_mode half_mode; + unsigned int byte; + + switch (mode) + { + case TImode: + half_mode = DImode; + break; + case DImode: + half_mode = SImode; + break; + default: + gcc_unreachable (); + } + + byte = GET_MODE_SIZE (half_mode); + + while (num--) + { + rtx op = operands[num]; + + /* simplify_subreg refuse to split volatile memory addresses, + but we still have to handle it. */ + if (MEM_P (op)) + { + lo_half[num] = adjust_address (op, half_mode, 0); + hi_half[num] = adjust_address (op, half_mode, byte); + } + else + { + lo_half[num] = simplify_gen_subreg (half_mode, op, + GET_MODE (op) == VOIDmode + ? mode : GET_MODE (op), 0); + hi_half[num] = simplify_gen_subreg (half_mode, op, + GET_MODE (op) == VOIDmode + ? mode : GET_MODE (op), byte); + } + } +} + +/* Output code to perform a 387 binary operation in INSN, one of PLUS, + MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] + is the expression of the binary operation. The output may either be + emitted here, or returned to the caller, like all output_* functions. + + There is no guarantee that the operands are the same mode, as they + might be within FLOAT or FLOAT_EXTEND expressions. */ + +#ifndef SYSV386_COMPAT +/* Set to 1 for compatibility with brain-damaged assemblers. No-one + wants to fix the assemblers because that causes incompatibility + with gcc. No-one wants to fix gcc because that causes + incompatibility with assemblers... You can use the option of + -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ +#define SYSV386_COMPAT 1 +#endif + +const char * +output_387_binary_op (rtx insn, rtx *operands) +{ + static char buf[40]; + const char *p; + const char *ssep; + int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]); + + /* Even if we do not want to check the inputs, this documents input + constraints. Which helps in understanding the following code. */ + if (flag_checking) + { + if (STACK_REG_P (operands[0]) + && ((REG_P (operands[1]) + && REGNO (operands[0]) == REGNO (operands[1]) + && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) + || (REG_P (operands[2]) + && REGNO (operands[0]) == REGNO (operands[2]) + && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) + && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) + ; /* ok */ + else + gcc_assert (is_sse); + } + + switch (GET_CODE (operands[3])) + { + case PLUS: + if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT + || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) + p = "fiadd"; + else + p = "fadd"; + ssep = "vadd"; + break; + + case MINUS: + if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT + || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) + p = "fisub"; + else + p = "fsub"; + ssep = "vsub"; + break; + + case MULT: + if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT + || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) + p = "fimul"; + else + p = "fmul"; + ssep = "vmul"; + break; + + case DIV: + if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT + || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) + p = "fidiv"; + else + p = "fdiv"; + ssep = "vdiv"; + break; + + default: + gcc_unreachable (); + } + + if (is_sse) + { + if (TARGET_AVX) + { + strcpy (buf, ssep); + if (GET_MODE (operands[0]) == SFmode) + strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}"); + else + strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}"); + } + else + { + strcpy (buf, ssep + 1); + if (GET_MODE (operands[0]) == SFmode) + strcat (buf, "ss\t{%2, %0|%0, %2}"); + else + strcat (buf, "sd\t{%2, %0|%0, %2}"); + } + return buf; + } + strcpy (buf, p); + + switch (GET_CODE (operands[3])) + { + case MULT: + case PLUS: + if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) + std::swap (operands[1], operands[2]); + + /* know operands[0] == operands[1]. */ + + if (MEM_P (operands[2])) + { + p = "%Z2\t%2"; + break; + } + + if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) + { + if (STACK_TOP_P (operands[0])) + /* How is it that we are storing to a dead operand[2]? + Well, presumably operands[1] is dead too. We can't + store the result to st(0) as st(0) gets popped on this + instruction. Instead store to operands[2] (which I + think has to be st(1)). st(1) will be popped later. + gcc <= 2.8.1 didn't have this check and generated + assembly code that the Unixware assembler rejected. */ + p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ + else + p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ + break; + } + + if (STACK_TOP_P (operands[0])) + p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ + else + p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ + break; + + case MINUS: + case DIV: + if (MEM_P (operands[1])) + { + p = "r%Z1\t%1"; + break; + } + + if (MEM_P (operands[2])) + { + p = "%Z2\t%2"; + break; + } + + if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) + { +#if SYSV386_COMPAT + /* The SystemV/386 SVR3.2 assembler, and probably all AT&T + derived assemblers, confusingly reverse the direction of + the operation for fsub{r} and fdiv{r} when the + destination register is not st(0). The Intel assembler + doesn't have this brain damage. Read !SYSV386_COMPAT to + figure out what the hardware really does. */ + if (STACK_TOP_P (operands[0])) + p = "{p\t%0, %2|rp\t%2, %0}"; + else + p = "{rp\t%2, %0|p\t%0, %2}"; +#else + if (STACK_TOP_P (operands[0])) + /* As above for fmul/fadd, we can't store to st(0). */ + p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ + else + p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ +#endif + break; + } + + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + { +#if SYSV386_COMPAT + if (STACK_TOP_P (operands[0])) + p = "{rp\t%0, %1|p\t%1, %0}"; + else + p = "{p\t%1, %0|rp\t%0, %1}"; +#else + if (STACK_TOP_P (operands[0])) + p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ + else + p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ +#endif + break; + } + + if (STACK_TOP_P (operands[0])) + { + if (STACK_TOP_P (operands[1])) + p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ + else + p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ + break; + } + else if (STACK_TOP_P (operands[1])) + { +#if SYSV386_COMPAT + p = "{\t%1, %0|r\t%0, %1}"; +#else + p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ +#endif + } + else + { +#if SYSV386_COMPAT + p = "{r\t%2, %0|\t%0, %2}"; +#else + p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ +#endif + } + break; + + default: + gcc_unreachable (); + } + + strcat (buf, p); + return buf; +} + +/* Check if a 256bit AVX register is referenced inside of EXP. */ + +static bool +ix86_check_avx256_register (const_rtx exp) +{ + if (SUBREG_P (exp)) + exp = SUBREG_REG (exp); + + return (REG_P (exp) + && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))); +} + +/* Return needed mode for entity in optimize_mode_switching pass. */ + +static int +ix86_avx_u128_mode_needed (rtx_insn *insn) +{ + if (CALL_P (insn)) + { + rtx link; + + /* Needed mode is set to AVX_U128_CLEAN if there are + no 256bit modes used in function arguments. */ + for (link = CALL_INSN_FUNCTION_USAGE (insn); + link; + link = XEXP (link, 1)) + { + if (GET_CODE (XEXP (link, 0)) == USE) + { + rtx arg = XEXP (XEXP (link, 0), 0); + + if (ix86_check_avx256_register (arg)) + return AVX_U128_DIRTY; + } + } + + return AVX_U128_CLEAN; + } + + /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware + changes state only when a 256bit register is written to, but we need + to prevent the compiler from moving optimal insertion point above + eventual read from 256bit register. */ + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) + if (ix86_check_avx256_register (*iter)) + return AVX_U128_DIRTY; + + return AVX_U128_ANY; +} + +/* Return mode that i387 must be switched into + prior to the execution of insn. */ + +static int +ix86_i387_mode_needed (int entity, rtx_insn *insn) +{ + enum attr_i387_cw mode; + + /* The mode UNINITIALIZED is used to store control word after a + function call or ASM pattern. The mode ANY specify that function + has no requirements on the control word and make no changes in the + bits we are interested in. */ + + if (CALL_P (insn) + || (NONJUMP_INSN_P (insn) + && (asm_noperands (PATTERN (insn)) >= 0 + || GET_CODE (PATTERN (insn)) == ASM_INPUT))) + return I387_CW_UNINITIALIZED; + + if (recog_memoized (insn) < 0) + return I387_CW_ANY; + + mode = get_attr_i387_cw (insn); + + switch (entity) + { + case I387_TRUNC: + if (mode == I387_CW_TRUNC) + return mode; + break; + + case I387_FLOOR: + if (mode == I387_CW_FLOOR) + return mode; + break; + + case I387_CEIL: + if (mode == I387_CW_CEIL) + return mode; + break; + + case I387_MASK_PM: + if (mode == I387_CW_MASK_PM) + return mode; + break; + + default: + gcc_unreachable (); + } + + return I387_CW_ANY; +} + +/* Return mode that entity must be switched into + prior to the execution of insn. */ + +static int +ix86_mode_needed (int entity, rtx_insn *insn) +{ + switch (entity) + { + case AVX_U128: + return ix86_avx_u128_mode_needed (insn); + case I387_TRUNC: + case I387_FLOOR: + case I387_CEIL: + case I387_MASK_PM: + return ix86_i387_mode_needed (entity, insn); + default: + gcc_unreachable (); + } + return 0; +} + +/* Check if a 256bit AVX register is referenced in stores. */ + +static void +ix86_check_avx256_stores (rtx dest, const_rtx, void *data) + { + if (ix86_check_avx256_register (dest)) + { + bool *used = (bool *) data; + *used = true; + } + } + +/* Calculate mode of upper 128bit AVX registers after the insn. */ + +static int +ix86_avx_u128_mode_after (int mode, rtx_insn *insn) +{ + rtx pat = PATTERN (insn); + + if (vzeroupper_operation (pat, VOIDmode) + || vzeroall_operation (pat, VOIDmode)) + return AVX_U128_CLEAN; + + /* We know that state is clean after CALL insn if there are no + 256bit registers used in the function return register. */ + if (CALL_P (insn)) + { + bool avx_reg256_found = false; + note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found); + + return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; + } + + /* Otherwise, return current mode. Remember that if insn + references AVX 256bit registers, the mode was already changed + to DIRTY from MODE_NEEDED. */ + return mode; +} + +/* Return the mode that an insn results in. */ + +static int +ix86_mode_after (int entity, int mode, rtx_insn *insn) +{ + switch (entity) + { + case AVX_U128: + return ix86_avx_u128_mode_after (mode, insn); + case I387_TRUNC: + case I387_FLOOR: + case I387_CEIL: + case I387_MASK_PM: + return mode; + default: + gcc_unreachable (); + } +} + +static int +ix86_avx_u128_mode_entry (void) +{ + tree arg; + + /* Entry mode is set to AVX_U128_DIRTY if there are + 256bit modes used in function arguments. */ + for (arg = DECL_ARGUMENTS (current_function_decl); arg; + arg = TREE_CHAIN (arg)) + { + rtx incoming = DECL_INCOMING_RTL (arg); + + if (incoming && ix86_check_avx256_register (incoming)) + return AVX_U128_DIRTY; + } + + return AVX_U128_CLEAN; +} + +/* Return a mode that ENTITY is assumed to be + switched to at function entry. */ + +static int +ix86_mode_entry (int entity) +{ + switch (entity) + { + case AVX_U128: + return ix86_avx_u128_mode_entry (); + case I387_TRUNC: + case I387_FLOOR: + case I387_CEIL: + case I387_MASK_PM: + return I387_CW_ANY; + default: + gcc_unreachable (); + } +} + +static int +ix86_avx_u128_mode_exit (void) +{ + rtx reg = crtl->return_rtx; + + /* Exit mode is set to AVX_U128_DIRTY if there are + 256bit modes used in the function return register. */ + if (reg && ix86_check_avx256_register (reg)) + return AVX_U128_DIRTY; + + return AVX_U128_CLEAN; +} + +/* Return a mode that ENTITY is assumed to be + switched to at function exit. */ + +static int +ix86_mode_exit (int entity) +{ + switch (entity) + { + case AVX_U128: + return ix86_avx_u128_mode_exit (); + case I387_TRUNC: + case I387_FLOOR: + case I387_CEIL: + case I387_MASK_PM: + return I387_CW_ANY; + default: + gcc_unreachable (); + } +} + +static int +ix86_mode_priority (int, int n) +{ + return n; +} + +/* Output code to initialize control word copies used by trunc?f?i and + rounding patterns. CURRENT_MODE is set to current control word, + while NEW_MODE is set to new control word. */ + +static void +emit_i387_cw_initialization (int mode) +{ + rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); + rtx new_mode; + + enum ix86_stack_slot slot; + + rtx reg = gen_reg_rtx (HImode); + + emit_insn (gen_x86_fnstcw_1 (stored_mode)); + emit_move_insn (reg, copy_rtx (stored_mode)); + + if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL + || optimize_insn_for_size_p ()) + { + switch (mode) + { + case I387_CW_TRUNC: + /* round toward zero (truncate) */ + emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); + slot = SLOT_CW_TRUNC; + break; + + case I387_CW_FLOOR: + /* round down toward -oo */ + emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); + emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); + slot = SLOT_CW_FLOOR; + break; + + case I387_CW_CEIL: + /* round up toward +oo */ + emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); + emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); + slot = SLOT_CW_CEIL; + break; + + case I387_CW_MASK_PM: + /* mask precision exception for nearbyint() */ + emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); + slot = SLOT_CW_MASK_PM; + break; + + default: + gcc_unreachable (); + } + } + else + { + switch (mode) + { + case I387_CW_TRUNC: + /* round toward zero (truncate) */ + emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc))); + slot = SLOT_CW_TRUNC; + break; + + case I387_CW_FLOOR: + /* round down toward -oo */ + emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4))); + slot = SLOT_CW_FLOOR; + break; + + case I387_CW_CEIL: + /* round up toward +oo */ + emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8))); + slot = SLOT_CW_CEIL; + break; + + case I387_CW_MASK_PM: + /* mask precision exception for nearbyint() */ + emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); + slot = SLOT_CW_MASK_PM; + break; + + default: + gcc_unreachable (); + } + } + + gcc_assert (slot < MAX_386_STACK_LOCALS); + + new_mode = assign_386_stack_local (HImode, slot); + emit_move_insn (new_mode, reg); +} + +/* Emit vzeroupper. */ + +void +ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live) +{ + int i; + + /* Cancel automatic vzeroupper insertion if there are + live call-saved SSE registers at the insertion point. */ + + for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) + if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i]) + return; + + if (TARGET_64BIT) + for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) + if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i]) + return; + + emit_insn (gen_avx_vzeroupper ()); +} + +/* Generate one or more insns to set ENTITY to MODE. */ + +/* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE + is the set of hard registers live at the point where the insn(s) + are to be inserted. */ + +static void +ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, + HARD_REG_SET regs_live) +{ + switch (entity) + { + case AVX_U128: + if (mode == AVX_U128_CLEAN) + ix86_avx_emit_vzeroupper (regs_live); + break; + case I387_TRUNC: + case I387_FLOOR: + case I387_CEIL: + case I387_MASK_PM: + if (mode != I387_CW_ANY + && mode != I387_CW_UNINITIALIZED) + emit_i387_cw_initialization (mode); + break; + default: + gcc_unreachable (); + } +} + +/* Output code for INSN to convert a float to a signed int. OPERANDS + are the insn operands. The output may be [HSD]Imode and the input + operand may be [SDX]Fmode. */ + +const char * +output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp) +{ + int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; + int dimode_p = GET_MODE (operands[0]) == DImode; + int round_mode = get_attr_i387_cw (insn); + + /* Jump through a hoop or two for DImode, since the hardware has no + non-popping instruction. We used to do this a different way, but + that was somewhat fragile and broke with post-reload splitters. */ + if ((dimode_p || fisttp) && !stack_top_dies) + output_asm_insn ("fld\t%y1", operands); + + gcc_assert (STACK_TOP_P (operands[1])); + gcc_assert (MEM_P (operands[0])); + gcc_assert (GET_MODE (operands[1]) != TFmode); + + if (fisttp) + output_asm_insn ("fisttp%Z0\t%0", operands); + else + { + if (round_mode != I387_CW_ANY) + output_asm_insn ("fldcw\t%3", operands); + if (stack_top_dies || dimode_p) + output_asm_insn ("fistp%Z0\t%0", operands); + else + output_asm_insn ("fist%Z0\t%0", operands); + if (round_mode != I387_CW_ANY) + output_asm_insn ("fldcw\t%2", operands); + } + + return ""; +} + +/* Output code for x87 ffreep insn. The OPNO argument, which may only + have the values zero or one, indicates the ffreep insn's operand + from the OPERANDS array. */ + +static const char * +output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) +{ + if (TARGET_USE_FFREEP) +#ifdef HAVE_AS_IX86_FFREEP + return opno ? "ffreep\t%y1" : "ffreep\t%y0"; +#else + { + static char retval[32]; + int regno = REGNO (operands[opno]); + + gcc_assert (STACK_REGNO_P (regno)); + + regno -= FIRST_STACK_REG; + + snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno); + return retval; + } +#endif + + return opno ? "fstp\t%y1" : "fstp\t%y0"; +} + + +/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi + should be used. UNORDERED_P is true when fucom should be used. */ + +const char * +output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p) +{ + int stack_top_dies; + rtx cmp_op0, cmp_op1; + int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]); + + if (eflags_p) + { + cmp_op0 = operands[0]; + cmp_op1 = operands[1]; + } + else + { + cmp_op0 = operands[1]; + cmp_op1 = operands[2]; + } + + if (is_sse) + { + if (GET_MODE (operands[0]) == SFmode) + if (unordered_p) + return "%vucomiss\t{%1, %0|%0, %1}"; + else + return "%vcomiss\t{%1, %0|%0, %1}"; + else + if (unordered_p) + return "%vucomisd\t{%1, %0|%0, %1}"; + else + return "%vcomisd\t{%1, %0|%0, %1}"; + } + + gcc_assert (STACK_TOP_P (cmp_op0)); + + stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; + + if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1))) + { + if (stack_top_dies) + { + output_asm_insn ("ftst\n\tfnstsw\t%0", operands); + return output_387_ffreep (operands, 1); + } + else + return "ftst\n\tfnstsw\t%0"; + } + + if (STACK_REG_P (cmp_op1) + && stack_top_dies + && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) + && REGNO (cmp_op1) != FIRST_STACK_REG) + { + /* If both the top of the 387 stack dies, and the other operand + is also a stack register that dies, then this must be a + `fcompp' float compare */ + + if (eflags_p) + { + /* There is no double popping fcomi variant. Fortunately, + eflags is immune from the fstp's cc clobbering. */ + if (unordered_p) + output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); + else + output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); + return output_387_ffreep (operands, 0); + } + else + { + if (unordered_p) + return "fucompp\n\tfnstsw\t%0"; + else + return "fcompp\n\tfnstsw\t%0"; + } + } + else + { + /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ + + static const char * const alt[16] = + { + "fcom%Z2\t%y2\n\tfnstsw\t%0", + "fcomp%Z2\t%y2\n\tfnstsw\t%0", + "fucom%Z2\t%y2\n\tfnstsw\t%0", + "fucomp%Z2\t%y2\n\tfnstsw\t%0", + + "ficom%Z2\t%y2\n\tfnstsw\t%0", + "ficomp%Z2\t%y2\n\tfnstsw\t%0", + NULL, + NULL, + + "fcomi\t{%y1, %0|%0, %y1}", + "fcomip\t{%y1, %0|%0, %y1}", + "fucomi\t{%y1, %0|%0, %y1}", + "fucomip\t{%y1, %0|%0, %y1}", + + NULL, + NULL, + NULL, + NULL + }; + + int mask; + const char *ret; + + mask = eflags_p << 3; + mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2; + mask |= unordered_p << 1; + mask |= stack_top_dies; + + gcc_assert (mask < 16); + ret = alt[mask]; + gcc_assert (ret); + + return ret; + } +} + +void +ix86_output_addr_vec_elt (FILE *file, int value) +{ + const char *directive = ASM_LONG; + +#ifdef ASM_QUAD + if (TARGET_LP64) + directive = ASM_QUAD; +#else + gcc_assert (!TARGET_64BIT); +#endif + + fprintf (file, "%s%s%d\n", directive, LPREFIX, value); +} + +void +ix86_output_addr_diff_elt (FILE *file, int value, int rel) +{ + const char *directive = ASM_LONG; + +#ifdef ASM_QUAD + if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) + directive = ASM_QUAD; +#else + gcc_assert (!TARGET_64BIT); +#endif + /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ + if (TARGET_64BIT || TARGET_VXWORKS_RTP) + fprintf (file, "%s%s%d-%s%d\n", + directive, LPREFIX, value, LPREFIX, rel); + else if (HAVE_AS_GOTOFF_IN_DATA) + fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value); +#if TARGET_MACHO + else if (TARGET_MACHO) + { + fprintf (file, ASM_LONG "%s%d-", LPREFIX, value); + machopic_output_function_base_name (file); + putc ('\n', file); + } +#endif + else + asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n", + GOT_SYMBOL_NAME, LPREFIX, value); +} + +/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate + for the target. */ + +void +ix86_expand_clear (rtx dest) +{ + rtx tmp; + + /* We play register width games, which are only valid after reload. */ + gcc_assert (reload_completed); + + /* Avoid HImode and its attendant prefix byte. */ + if (GET_MODE_SIZE (GET_MODE (dest)) < 4) + dest = gen_rtx_REG (SImode, REGNO (dest)); + tmp = gen_rtx_SET (dest, const0_rtx); + + if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) + { + rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); + } + + emit_insn (tmp); +} + +/* X is an unchanging MEM. If it is a constant pool reference, return + the constant pool rtx, else NULL. */ + +rtx +maybe_get_pool_constant (rtx x) +{ + x = ix86_delegitimize_address (XEXP (x, 0)); + + if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) + return get_pool_constant (x); + + return NULL_RTX; +} + +void +ix86_expand_move (machine_mode mode, rtx operands[]) +{ + rtx op0, op1; + enum tls_model model; + + op0 = operands[0]; + op1 = operands[1]; + + if (GET_CODE (op1) == SYMBOL_REF) + { + rtx tmp; + + model = SYMBOL_REF_TLS_MODEL (op1); + if (model) + { + op1 = legitimize_tls_address (op1, model, true); + op1 = force_operand (op1, op0); + if (op1 == op0) + return; + op1 = convert_to_mode (mode, op1, 1); + } + else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX) + op1 = tmp; + } + else if (GET_CODE (op1) == CONST + && GET_CODE (XEXP (op1, 0)) == PLUS + && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF) + { + rtx addend = XEXP (XEXP (op1, 0), 1); + rtx symbol = XEXP (XEXP (op1, 0), 0); + rtx tmp; + + model = SYMBOL_REF_TLS_MODEL (symbol); + if (model) + tmp = legitimize_tls_address (symbol, model, true); + else + tmp = legitimize_pe_coff_symbol (symbol, true); + + if (tmp) + { + tmp = force_operand (tmp, NULL); + tmp = expand_simple_binop (Pmode, PLUS, tmp, addend, + op0, 1, OPTAB_DIRECT); + if (tmp == op0) + return; + op1 = convert_to_mode (mode, tmp, 1); + } + } + + if ((flag_pic || MACHOPIC_INDIRECT) + && symbolic_operand (op1, mode)) + { + if (TARGET_MACHO && !TARGET_64BIT) + { +#if TARGET_MACHO + /* dynamic-no-pic */ + if (MACHOPIC_INDIRECT) + { + rtx temp = (op0 && REG_P (op0) && mode == Pmode) + ? op0 : gen_reg_rtx (Pmode); + op1 = machopic_indirect_data_reference (op1, temp); + if (MACHOPIC_PURE) + op1 = machopic_legitimize_pic_address (op1, mode, + temp == op1 ? 0 : temp); + } + if (op0 != op1 && GET_CODE (op0) != MEM) + { + rtx insn = gen_rtx_SET (op0, op1); + emit_insn (insn); + return; + } + if (GET_CODE (op0) == MEM) + op1 = force_reg (Pmode, op1); + else + { + rtx temp = op0; + if (GET_CODE (temp) != REG) + temp = gen_reg_rtx (Pmode); + temp = legitimize_pic_address (op1, temp); + if (temp == op0) + return; + op1 = temp; + } + /* dynamic-no-pic */ +#endif + } + else + { + if (MEM_P (op0)) + op1 = force_reg (mode, op1); + else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode))) + { + rtx reg = can_create_pseudo_p () ? NULL_RTX : op0; + op1 = legitimize_pic_address (op1, reg); + if (op0 == op1) + return; + op1 = convert_to_mode (mode, op1, 1); + } + } + } + else + { + if (MEM_P (op0) + && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) + || !push_operand (op0, mode)) + && MEM_P (op1)) + op1 = force_reg (mode, op1); + + if (push_operand (op0, mode) + && ! general_no_elim_operand (op1, mode)) + op1 = copy_to_mode_reg (mode, op1); + + /* Force large constants in 64bit compilation into register + to get them CSEed. */ + if (can_create_pseudo_p () + && (mode == DImode) && TARGET_64BIT + && immediate_operand (op1, mode) + && !x86_64_zext_immediate_operand (op1, VOIDmode) + && !register_operand (op0, mode) + && optimize) + op1 = copy_to_mode_reg (mode, op1); + + if (can_create_pseudo_p () + && CONST_DOUBLE_P (op1)) + { + /* If we are loading a floating point constant to a register, + force the value to memory now, since we'll get better code + out the back end. */ + + op1 = validize_mem (force_const_mem (mode, op1)); + if (!register_operand (op0, mode)) + { + rtx temp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (temp, op1)); + emit_move_insn (op0, temp); + return; + } + } + } + + emit_insn (gen_rtx_SET (op0, op1)); +} + +void +ix86_expand_vector_move (machine_mode mode, rtx operands[]) +{ + rtx op0 = operands[0], op1 = operands[1]; + /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU + psABI since the biggest alignment is 4 byte for IA MCU psABI. */ + unsigned int align = (TARGET_IAMCU + ? GET_MODE_BITSIZE (mode) + : GET_MODE_ALIGNMENT (mode)); + + if (push_operand (op0, VOIDmode)) + op0 = emit_move_resolve_push (mode, op0); + + /* Force constants other than zero into memory. We do not know how + the instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if (can_create_pseudo_p () + && (CONSTANT_P (op1) + || (SUBREG_P (op1) + && CONSTANT_P (SUBREG_REG (op1)))) + && ((register_operand (op0, mode) + && !standard_sse_constant_p (op1)) + /* ix86_expand_vector_move_misalign() does not like constants. */ + || (SSE_REG_MODE_P (mode) + && MEM_P (op0) + && MEM_ALIGN (op0) < align))) + { + if (SUBREG_P (op1)) + { + machine_mode imode = GET_MODE (SUBREG_REG (op1)); + rtx r = force_const_mem (imode, SUBREG_REG (op1)); + if (r) + r = validize_mem (r); + else + r = force_reg (imode, SUBREG_REG (op1)); + op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1)); + } + else + op1 = validize_mem (force_const_mem (mode, op1)); + } + + /* We need to check memory alignment for SSE mode since attribute + can make operands unaligned. */ + if (can_create_pseudo_p () + && SSE_REG_MODE_P (mode) + && ((MEM_P (op0) && (MEM_ALIGN (op0) < align)) + || (MEM_P (op1) && (MEM_ALIGN (op1) < align)))) + { + rtx tmp[2]; + + /* ix86_expand_vector_move_misalign() does not like both + arguments in memory. */ + if (!register_operand (op0, mode) + && !register_operand (op1, mode)) + op1 = force_reg (mode, op1); + + tmp[0] = op0; tmp[1] = op1; + ix86_expand_vector_move_misalign (mode, tmp); + return; + } + + /* Make operand1 a register if it isn't already. */ + if (can_create_pseudo_p () + && !register_operand (op0, mode) + && !register_operand (op1, mode)) + { + emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); + return; + } + + emit_insn (gen_rtx_SET (op0, op1)); +} + +/* Split 32-byte AVX unaligned load and store if needed. */ + +static void +ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) +{ + rtx m; + rtx (*extract) (rtx, rtx, rtx); + rtx (*load_unaligned) (rtx, rtx); + rtx (*store_unaligned) (rtx, rtx); + machine_mode mode; + + switch (GET_MODE (op0)) + { + default: + gcc_unreachable (); + case V32QImode: + extract = gen_avx_vextractf128v32qi; + load_unaligned = gen_avx_loaddquv32qi; + store_unaligned = gen_avx_storedquv32qi; + mode = V16QImode; + break; + case V8SFmode: + extract = gen_avx_vextractf128v8sf; + load_unaligned = gen_avx_loadups256; + store_unaligned = gen_avx_storeups256; + mode = V4SFmode; + break; + case V4DFmode: + extract = gen_avx_vextractf128v4df; + load_unaligned = gen_avx_loadupd256; + store_unaligned = gen_avx_storeupd256; + mode = V2DFmode; + break; + } + + if (MEM_P (op1)) + { + if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD + && optimize_insn_for_speed_p ()) + { + rtx r = gen_reg_rtx (mode); + m = adjust_address (op1, mode, 0); + emit_move_insn (r, m); + m = adjust_address (op1, mode, 16); + r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); + emit_move_insn (op0, r); + } + /* Normal *mov_internal pattern will handle + unaligned loads just fine if misaligned_operand + is true, and without the UNSPEC it can be combined + with arithmetic instructions. */ + else if (misaligned_operand (op1, GET_MODE (op1))) + emit_insn (gen_rtx_SET (op0, op1)); + else + emit_insn (load_unaligned (op0, op1)); + } + else if (MEM_P (op0)) + { + if (TARGET_AVX256_SPLIT_UNALIGNED_STORE + && optimize_insn_for_speed_p ()) + { + m = adjust_address (op0, mode, 0); + emit_insn (extract (m, op1, const0_rtx)); + m = adjust_address (op0, mode, 16); + emit_insn (extract (m, copy_rtx (op1), const1_rtx)); + } + else + emit_insn (store_unaligned (op0, op1)); + } + else + gcc_unreachable (); +} + +/* Implement the movmisalign patterns for SSE. Non-SSE modes go + straight to ix86_expand_vector_move. */ +/* Code generation for scalar reg-reg moves of single and double precision data: + if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) + movaps reg, reg + else + movss reg, reg + if (x86_sse_partial_reg_dependency == true) + movapd reg, reg + else + movsd reg, reg + + Code generation for scalar loads of double precision data: + if (x86_sse_split_regs == true) + movlpd mem, reg (gas syntax) + else + movsd mem, reg + + Code generation for unaligned packed loads of single precision data + (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency): + if (x86_sse_unaligned_move_optimal) + movups mem, reg + + if (x86_sse_partial_reg_dependency == true) + { + xorps reg, reg + movlps mem, reg + movhps mem+8, reg + } + else + { + movlps mem, reg + movhps mem+8, reg + } + + Code generation for unaligned packed loads of double precision data + (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs): + if (x86_sse_unaligned_move_optimal) + movupd mem, reg + + if (x86_sse_split_regs == true) + { + movlpd mem, reg + movhpd mem+8, reg + } + else + { + movsd mem, reg + movhpd mem+8, reg + } + */ + +void +ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) +{ + rtx op0, op1, orig_op0 = NULL_RTX, m; + rtx (*load_unaligned) (rtx, rtx); + rtx (*store_unaligned) (rtx, rtx); + + op0 = operands[0]; + op1 = operands[1]; + + if (GET_MODE_SIZE (mode) == 64) + { + switch (GET_MODE_CLASS (mode)) + { + case MODE_VECTOR_INT: + case MODE_INT: + if (GET_MODE (op0) != V16SImode) + { + if (!MEM_P (op0)) + { + orig_op0 = op0; + op0 = gen_reg_rtx (V16SImode); + } + else + op0 = gen_lowpart (V16SImode, op0); + } + op1 = gen_lowpart (V16SImode, op1); + /* FALLTHRU */ + + case MODE_VECTOR_FLOAT: + switch (GET_MODE (op0)) + { + default: + gcc_unreachable (); + case V16SImode: + load_unaligned = gen_avx512f_loaddquv16si; + store_unaligned = gen_avx512f_storedquv16si; + break; + case V16SFmode: + load_unaligned = gen_avx512f_loadups512; + store_unaligned = gen_avx512f_storeups512; + break; + case V8DFmode: + load_unaligned = gen_avx512f_loadupd512; + store_unaligned = gen_avx512f_storeupd512; + break; + } + + if (MEM_P (op1)) + emit_insn (load_unaligned (op0, op1)); + else if (MEM_P (op0)) + emit_insn (store_unaligned (op0, op1)); + else + gcc_unreachable (); + if (orig_op0) + emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0)); + break; + + default: + gcc_unreachable (); + } + + return; + } + + if (TARGET_AVX + && GET_MODE_SIZE (mode) == 32) + { + switch (GET_MODE_CLASS (mode)) + { + case MODE_VECTOR_INT: + case MODE_INT: + if (GET_MODE (op0) != V32QImode) + { + if (!MEM_P (op0)) + { + orig_op0 = op0; + op0 = gen_reg_rtx (V32QImode); + } + else + op0 = gen_lowpart (V32QImode, op0); + } + op1 = gen_lowpart (V32QImode, op1); + /* FALLTHRU */ + + case MODE_VECTOR_FLOAT: + ix86_avx256_split_vector_move_misalign (op0, op1); + if (orig_op0) + emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0)); + break; + + default: + gcc_unreachable (); + } + + return; + } + + if (MEM_P (op1)) + { + /* Normal *mov_internal pattern will handle + unaligned loads just fine if misaligned_operand + is true, and without the UNSPEC it can be combined + with arithmetic instructions. */ + if (TARGET_AVX + && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + && misaligned_operand (op1, GET_MODE (op1))) + emit_insn (gen_rtx_SET (op0, op1)); + /* ??? If we have typed data, then it would appear that using + movdqu is the only way to get unaligned data loaded with + integer type. */ + else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + if (GET_MODE (op0) != V16QImode) + { + orig_op0 = op0; + op0 = gen_reg_rtx (V16QImode); + } + op1 = gen_lowpart (V16QImode, op1); + /* We will eventually emit movups based on insn attributes. */ + emit_insn (gen_sse2_loaddquv16qi (op0, op1)); + if (orig_op0) + emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0)); + } + else if (TARGET_SSE2 && mode == V2DFmode) + { + rtx zero; + + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_insn_for_size_p ()) + { + /* We will eventually emit movups based on insn attributes. */ + emit_insn (gen_sse2_loadupd (op0, op1)); + return; + } + + /* When SSE registers are split into halves, we can avoid + writing to the top half twice. */ + if (TARGET_SSE_SPLIT_REGS) + { + emit_clobber (op0); + zero = op0; + } + else + { + /* ??? Not sure about the best option for the Intel chips. + The following would seem to satisfy; the register is + entirely cleared, breaking the dependency chain. We + then store to the upper half, with a dependency depth + of one. A rumor has it that Intel recommends two movsd + followed by an unpacklpd, but this is unconfirmed. And + given that the dependency depth of the unpacklpd would + still be one, I'm not sure why this would be better. */ + zero = CONST0_RTX (V2DFmode); + } + + m = adjust_address (op1, DFmode, 0); + emit_insn (gen_sse2_loadlpd (op0, zero, m)); + m = adjust_address (op1, DFmode, 8); + emit_insn (gen_sse2_loadhpd (op0, op0, m)); + } + else + { + rtx t; + + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_insn_for_size_p ()) + { + if (GET_MODE (op0) != V4SFmode) + { + orig_op0 = op0; + op0 = gen_reg_rtx (V4SFmode); + } + op1 = gen_lowpart (V4SFmode, op1); + emit_insn (gen_sse_loadups (op0, op1)); + if (orig_op0) + emit_move_insn (orig_op0, + gen_lowpart (GET_MODE (orig_op0), op0)); + return; + } + + if (mode != V4SFmode) + t = gen_reg_rtx (V4SFmode); + else + t = op0; + + if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) + emit_move_insn (t, CONST0_RTX (V4SFmode)); + else + emit_clobber (t); + + m = adjust_address (op1, V2SFmode, 0); + emit_insn (gen_sse_loadlps (t, t, m)); + m = adjust_address (op1, V2SFmode, 8); + emit_insn (gen_sse_loadhps (t, t, m)); + if (mode != V4SFmode) + emit_move_insn (op0, gen_lowpart (mode, t)); + } + } + else if (MEM_P (op0)) + { + if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + op0 = gen_lowpart (V16QImode, op0); + op1 = gen_lowpart (V16QImode, op1); + /* We will eventually emit movups based on insn attributes. */ + emit_insn (gen_sse2_storedquv16qi (op0, op1)); + } + else if (TARGET_SSE2 && mode == V2DFmode) + { + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_STORE_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_insn_for_size_p ()) + /* We will eventually emit movups based on insn attributes. */ + emit_insn (gen_sse2_storeupd (op0, op1)); + else + { + m = adjust_address (op0, DFmode, 0); + emit_insn (gen_sse2_storelpd (m, op1)); + m = adjust_address (op0, DFmode, 8); + emit_insn (gen_sse2_storehpd (m, op1)); + } + } + else + { + if (mode != V4SFmode) + op1 = gen_lowpart (V4SFmode, op1); + + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_STORE_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_insn_for_size_p ()) + { + op0 = gen_lowpart (V4SFmode, op0); + emit_insn (gen_sse_storeups (op0, op1)); + } + else + { + m = adjust_address (op0, V2SFmode, 0); + emit_insn (gen_sse_storelps (m, op1)); + m = adjust_address (op0, V2SFmode, 8); + emit_insn (gen_sse_storehps (m, copy_rtx (op1))); + } + } + } + else + gcc_unreachable (); +} + +/* Helper function of ix86_fixup_binary_operands to canonicalize + operand order. Returns true if the operands should be swapped. */ + +static bool +ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode, + rtx operands[]) +{ + rtx dst = operands[0]; + rtx src1 = operands[1]; + rtx src2 = operands[2]; + + /* If the operation is not commutative, we can't do anything. */ + if (GET_RTX_CLASS (code) != RTX_COMM_ARITH) + return false; + + /* Highest priority is that src1 should match dst. */ + if (rtx_equal_p (dst, src1)) + return false; + if (rtx_equal_p (dst, src2)) + return true; + + /* Next highest priority is that immediate constants come second. */ + if (immediate_operand (src2, mode)) + return false; + if (immediate_operand (src1, mode)) + return true; + + /* Lowest priority is that memory references should come second. */ + if (MEM_P (src2)) + return false; + if (MEM_P (src1)) + return true; + + return false; +} + + +/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the + destination to use for the operation. If different from the true + destination in operands[0], a copy operation will be required. */ + +rtx +ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode, + rtx operands[]) +{ + rtx dst = operands[0]; + rtx src1 = operands[1]; + rtx src2 = operands[2]; + + /* Canonicalize operand order. */ + if (ix86_swap_binary_operands_p (code, mode, operands)) + { + /* It is invalid to swap operands of different modes. */ + gcc_assert (GET_MODE (src1) == GET_MODE (src2)); + + std::swap (src1, src2); + } + + /* Both source operands cannot be in memory. */ + if (MEM_P (src1) && MEM_P (src2)) + { + /* Optimization: Only read from memory once. */ + if (rtx_equal_p (src1, src2)) + { + src2 = force_reg (mode, src2); + src1 = src2; + } + else if (rtx_equal_p (dst, src1)) + src2 = force_reg (mode, src2); + else + src1 = force_reg (mode, src1); + } + + /* If the destination is memory, and we do not have matching source + operands, do things in registers. */ + if (MEM_P (dst) && !rtx_equal_p (dst, src1)) + dst = gen_reg_rtx (mode); + + /* Source 1 cannot be a constant. */ + if (CONSTANT_P (src1)) + src1 = force_reg (mode, src1); + + /* Source 1 cannot be a non-matching memory. */ + if (MEM_P (src1) && !rtx_equal_p (dst, src1)) + src1 = force_reg (mode, src1); + + /* Improve address combine. */ + if (code == PLUS + && GET_MODE_CLASS (mode) == MODE_INT + && MEM_P (src2)) + src2 = force_reg (mode, src2); + + operands[1] = src1; + operands[2] = src2; + return dst; +} + +/* Similarly, but assume that the destination has already been + set up properly. */ + +void +ix86_fixup_binary_operands_no_copy (enum rtx_code code, + machine_mode mode, rtx operands[]) +{ + rtx dst = ix86_fixup_binary_operands (code, mode, operands); + gcc_assert (dst == operands[0]); +} + +/* Attempt to expand a binary operator. Make the expansion closer to the + actual machine, then just general_operand, which will allow 3 separate + memory references (one output, two input) in a single insn. */ + +void +ix86_expand_binary_operator (enum rtx_code code, machine_mode mode, + rtx operands[]) +{ + rtx src1, src2, dst, op, clob; + + dst = ix86_fixup_binary_operands (code, mode, operands); + src1 = operands[1]; + src2 = operands[2]; + + /* Emit the instruction. */ + + op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2)); + + if (reload_completed + && code == PLUS + && !rtx_equal_p (dst, src1)) + { + /* This is going to be an LEA; avoid splitting it later. */ + emit_insn (op); + } + else + { + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); + } + + /* Fix up the destination if needed. */ + if (dst != operands[0]) + emit_move_insn (operands[0], dst); +} + +/* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with + the given OPERANDS. */ + +void +ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode, + rtx operands[]) +{ + rtx op1 = NULL_RTX, op2 = NULL_RTX; + if (SUBREG_P (operands[1])) + { + op1 = operands[1]; + op2 = operands[2]; + } + else if (SUBREG_P (operands[2])) + { + op1 = operands[2]; + op2 = operands[1]; + } + /* Optimize (__m128i) d | (__m128i) e and similar code + when d and e are float vectors into float vector logical + insn. In C/C++ without using intrinsics there is no other way + to express vector logical operation on float vectors than + to cast them temporarily to integer vectors. */ + if (op1 + && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR) + && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode) + && SUBREG_BYTE (op1) == 0 + && (GET_CODE (op2) == CONST_VECTOR + || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2)) + && SUBREG_BYTE (op2) == 0)) + && can_create_pseudo_p ()) + { + rtx dst; + switch (GET_MODE (SUBREG_REG (op1))) + { + case V4SFmode: + case V8SFmode: + case V16SFmode: + case V2DFmode: + case V4DFmode: + case V8DFmode: + dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1))); + if (GET_CODE (op2) == CONST_VECTOR) + { + op2 = gen_lowpart (GET_MODE (dst), op2); + op2 = force_reg (GET_MODE (dst), op2); + } + else + { + op1 = operands[1]; + op2 = SUBREG_REG (operands[2]); + if (!vector_operand (op2, GET_MODE (dst))) + op2 = force_reg (GET_MODE (dst), op2); + } + op1 = SUBREG_REG (op1); + if (!vector_operand (op1, GET_MODE (dst))) + op1 = force_reg (GET_MODE (dst), op1); + emit_insn (gen_rtx_SET (dst, + gen_rtx_fmt_ee (code, GET_MODE (dst), + op1, op2))); + emit_move_insn (operands[0], gen_lowpart (mode, dst)); + return; + default: + break; + } + } + if (!vector_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); + if (!vector_operand (operands[2], mode)) + operands[2] = force_reg (mode, operands[2]); + ix86_fixup_binary_operands_no_copy (code, mode, operands); + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_fmt_ee (code, mode, operands[1], + operands[2]))); +} + +/* Return TRUE or FALSE depending on whether the binary operator meets the + appropriate constraints. */ + +bool +ix86_binary_operator_ok (enum rtx_code code, machine_mode mode, + rtx operands[3]) +{ + rtx dst = operands[0]; + rtx src1 = operands[1]; + rtx src2 = operands[2]; + + /* Both source operands cannot be in memory. */ + if (MEM_P (src1) && MEM_P (src2)) + return false; + + /* Canonicalize operand order for commutative operators. */ + if (ix86_swap_binary_operands_p (code, mode, operands)) + std::swap (src1, src2); + + /* If the destination is memory, we must have a matching source operand. */ + if (MEM_P (dst) && !rtx_equal_p (dst, src1)) + return false; + + /* Source 1 cannot be a constant. */ + if (CONSTANT_P (src1)) + return false; + + /* Source 1 cannot be a non-matching memory. */ + if (MEM_P (src1) && !rtx_equal_p (dst, src1)) + /* Support "andhi/andsi/anddi" as a zero-extending move. */ + return (code == AND + && (mode == HImode + || mode == SImode + || (TARGET_64BIT && mode == DImode)) + && satisfies_constraint_L (src2)); + + return true; +} + +/* Attempt to expand a unary operator. Make the expansion closer to the + actual machine, then just general_operand, which will allow 2 separate + memory references (one output, one input) in a single insn. */ + +void +ix86_expand_unary_operator (enum rtx_code code, machine_mode mode, + rtx operands[]) +{ + bool matching_memory = false; + rtx src, dst, op, clob; + + dst = operands[0]; + src = operands[1]; + + /* If the destination is memory, and we do not have matching source + operands, do things in registers. */ + if (MEM_P (dst)) + { + if (rtx_equal_p (dst, src)) + matching_memory = true; + else + dst = gen_reg_rtx (mode); + } + + /* When source operand is memory, destination must match. */ + if (MEM_P (src) && !matching_memory) + src = force_reg (mode, src); + + /* Emit the instruction. */ + + op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src)); + + if (code == NOT) + emit_insn (op); + else + { + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); + } + + /* Fix up the destination if needed. */ + if (dst != operands[0]) + emit_move_insn (operands[0], dst); +} + +/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and + divisor are within the range [0-255]. */ + +void +ix86_split_idivmod (machine_mode mode, rtx operands[], + bool signed_p) +{ + rtx_code_label *end_label, *qimode_label; + rtx insn, div, mod; + rtx scratch, tmp0, tmp1, tmp2; + rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx); + rtx (*gen_zero_extend) (rtx, rtx); + rtx (*gen_test_ccno_1) (rtx, rtx); + + switch (mode) + { + case SImode: + gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1; + gen_test_ccno_1 = gen_testsi_ccno_1; + gen_zero_extend = gen_zero_extendqisi2; + break; + case DImode: + gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1; + gen_test_ccno_1 = gen_testdi_ccno_1; + gen_zero_extend = gen_zero_extendqidi2; + break; + default: + gcc_unreachable (); + } + + end_label = gen_label_rtx (); + qimode_label = gen_label_rtx (); + + scratch = gen_reg_rtx (mode); + + /* Use 8bit unsigned divimod if dividend and divisor are within + the range [0-255]. */ + emit_move_insn (scratch, operands[2]); + scratch = expand_simple_binop (mode, IOR, scratch, operands[3], + scratch, 1, OPTAB_DIRECT); + emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100))); + tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG); + tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx); + tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0, + gen_rtx_LABEL_REF (VOIDmode, qimode_label), + pc_rtx); + insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0)); + predict_jump (REG_BR_PROB_BASE * 50 / 100); + JUMP_LABEL (insn) = qimode_label; + + /* Generate original signed/unsigned divimod. */ + div = gen_divmod4_1 (operands[0], operands[1], + operands[2], operands[3]); + emit_insn (div); + + /* Branch to the end. */ + emit_jump_insn (gen_jump (end_label)); + emit_barrier (); + + /* Generate 8bit unsigned divide. */ + emit_label (qimode_label); + /* Don't use operands[0] for result of 8bit divide since not all + registers support QImode ZERO_EXTRACT. */ + tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0); + tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0); + tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0); + emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2)); + + if (signed_p) + { + div = gen_rtx_DIV (SImode, operands[2], operands[3]); + mod = gen_rtx_MOD (SImode, operands[2], operands[3]); + } + else + { + div = gen_rtx_UDIV (SImode, operands[2], operands[3]); + mod = gen_rtx_UMOD (SImode, operands[2], operands[3]); + } + + /* Extract remainder from AH. */ + tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8)); + if (REG_P (operands[1])) + insn = emit_move_insn (operands[1], tmp1); + else + { + /* Need a new scratch register since the old one has result + of 8bit divide. */ + scratch = gen_reg_rtx (mode); + emit_move_insn (scratch, tmp1); + insn = emit_move_insn (operands[1], scratch); + } + set_unique_reg_note (insn, REG_EQUAL, mod); + + /* Zero extend quotient from AL. */ + tmp1 = gen_lowpart (QImode, tmp0); + insn = emit_insn (gen_zero_extend (operands[0], tmp1)); + set_unique_reg_note (insn, REG_EQUAL, div); + + emit_label (end_label); +} + +#define LEA_MAX_STALL (3) +#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) + +/* Increase given DISTANCE in half-cycles according to + dependencies between PREV and NEXT instructions. + Add 1 half-cycle if there is no dependency and + go to next cycle if there is some dependecy. */ + +static unsigned int +increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance) +{ + df_ref def, use; + + if (!prev || !next) + return distance + (distance & 1) + 2; + + if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) + return distance + 1; + + FOR_EACH_INSN_USE (use, next) + FOR_EACH_INSN_DEF (def, prev) + if (!DF_REF_IS_ARTIFICIAL (def) + && DF_REF_REGNO (use) == DF_REF_REGNO (def)) + return distance + (distance & 1) + 2; + + return distance + 1; +} + +/* Function checks if instruction INSN defines register number + REGNO1 or REGNO2. */ + +static bool +insn_defines_reg (unsigned int regno1, unsigned int regno2, + rtx_insn *insn) +{ + df_ref def; + + FOR_EACH_INSN_DEF (def, insn) + if (DF_REF_REG_DEF_P (def) + && !DF_REF_IS_ARTIFICIAL (def) + && (regno1 == DF_REF_REGNO (def) + || regno2 == DF_REF_REGNO (def))) + return true; + + return false; +} + +/* Function checks if instruction INSN uses register number + REGNO as a part of address expression. */ + +static bool +insn_uses_reg_mem (unsigned int regno, rtx insn) +{ + df_ref use; + + FOR_EACH_INSN_USE (use, insn) + if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use)) + return true; + + return false; +} + +/* Search backward for non-agu definition of register number REGNO1 + or register number REGNO2 in basic block starting from instruction + START up to head of basic block or instruction INSN. + + Function puts true value into *FOUND var if definition was found + and false otherwise. + + Distance in half-cycles between START and found instruction or head + of BB is added to DISTANCE and returned. */ + +static int +distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, + rtx_insn *insn, int distance, + rtx_insn *start, bool *found) +{ + basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL; + rtx_insn *prev = start; + rtx_insn *next = NULL; + + *found = false; + + while (prev + && prev != insn + && distance < LEA_SEARCH_THRESHOLD) + { + if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev)) + { + distance = increase_distance (prev, next, distance); + if (insn_defines_reg (regno1, regno2, prev)) + { + if (recog_memoized (prev) < 0 + || get_attr_type (prev) != TYPE_LEA) + { + *found = true; + return distance; + } + } + + next = prev; + } + if (prev == BB_HEAD (bb)) + break; + + prev = PREV_INSN (prev); + } + + return distance; +} + +/* Search backward for non-agu definition of register number REGNO1 + or register number REGNO2 in INSN's basic block until + 1. Pass LEA_SEARCH_THRESHOLD instructions, or + 2. Reach neighbor BBs boundary, or + 3. Reach agu definition. + Returns the distance between the non-agu definition point and INSN. + If no definition point, returns -1. */ + +static int +distance_non_agu_define (unsigned int regno1, unsigned int regno2, + rtx_insn *insn) +{ + basic_block bb = BLOCK_FOR_INSN (insn); + int distance = 0; + bool found = false; + + if (insn != BB_HEAD (bb)) + distance = distance_non_agu_define_in_bb (regno1, regno2, insn, + distance, PREV_INSN (insn), + &found); + + if (!found && distance < LEA_SEARCH_THRESHOLD) + { + edge e; + edge_iterator ei; + bool simple_loop = false; + + FOR_EACH_EDGE (e, ei, bb->preds) + if (e->src == bb) + { + simple_loop = true; + break; + } + + if (simple_loop) + distance = distance_non_agu_define_in_bb (regno1, regno2, + insn, distance, + BB_END (bb), &found); + else + { + int shortest_dist = -1; + bool found_in_bb = false; + + FOR_EACH_EDGE (e, ei, bb->preds) + { + int bb_dist + = distance_non_agu_define_in_bb (regno1, regno2, + insn, distance, + BB_END (e->src), + &found_in_bb); + if (found_in_bb) + { + if (shortest_dist < 0) + shortest_dist = bb_dist; + else if (bb_dist > 0) + shortest_dist = MIN (bb_dist, shortest_dist); + + found = true; + } + } + + distance = shortest_dist; + } + } + + /* get_attr_type may modify recog data. We want to make sure + that recog data is valid for instruction INSN, on which + distance_non_agu_define is called. INSN is unchanged here. */ + extract_insn_cached (insn); + + if (!found) + return -1; + + return distance >> 1; +} + +/* Return the distance in half-cycles between INSN and the next + insn that uses register number REGNO in memory address added + to DISTANCE. Return -1 if REGNO0 is set. + + Put true value into *FOUND if register usage was found and + false otherwise. + Put true value into *REDEFINED if register redefinition was + found and false otherwise. */ + +static int +distance_agu_use_in_bb (unsigned int regno, + rtx_insn *insn, int distance, rtx_insn *start, + bool *found, bool *redefined) +{ + basic_block bb = NULL; + rtx_insn *next = start; + rtx_insn *prev = NULL; + + *found = false; + *redefined = false; + + if (start != NULL_RTX) + { + bb = BLOCK_FOR_INSN (start); + if (start != BB_HEAD (bb)) + /* If insn and start belong to the same bb, set prev to insn, + so the call to increase_distance will increase the distance + between insns by 1. */ + prev = insn; + } + + while (next + && next != insn + && distance < LEA_SEARCH_THRESHOLD) + { + if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next)) + { + distance = increase_distance(prev, next, distance); + if (insn_uses_reg_mem (regno, next)) + { + /* Return DISTANCE if OP0 is used in memory + address in NEXT. */ + *found = true; + return distance; + } + + if (insn_defines_reg (regno, INVALID_REGNUM, next)) + { + /* Return -1 if OP0 is set in NEXT. */ + *redefined = true; + return -1; + } + + prev = next; + } + + if (next == BB_END (bb)) + break; + + next = NEXT_INSN (next); + } + + return distance; +} + +/* Return the distance between INSN and the next insn that uses + register number REGNO0 in memory address. Return -1 if no such + a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ + +static int +distance_agu_use (unsigned int regno0, rtx_insn *insn) +{ + basic_block bb = BLOCK_FOR_INSN (insn); + int distance = 0; + bool found = false; + bool redefined = false; + + if (insn != BB_END (bb)) + distance = distance_agu_use_in_bb (regno0, insn, distance, + NEXT_INSN (insn), + &found, &redefined); + + if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD) + { + edge e; + edge_iterator ei; + bool simple_loop = false; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->dest == bb) + { + simple_loop = true; + break; + } + + if (simple_loop) + distance = distance_agu_use_in_bb (regno0, insn, + distance, BB_HEAD (bb), + &found, &redefined); + else + { + int shortest_dist = -1; + bool found_in_bb = false; + bool redefined_in_bb = false; + + FOR_EACH_EDGE (e, ei, bb->succs) + { + int bb_dist + = distance_agu_use_in_bb (regno0, insn, + distance, BB_HEAD (e->dest), + &found_in_bb, &redefined_in_bb); + if (found_in_bb) + { + if (shortest_dist < 0) + shortest_dist = bb_dist; + else if (bb_dist > 0) + shortest_dist = MIN (bb_dist, shortest_dist); + + found = true; + } + } + + distance = shortest_dist; + } + } + + if (!found || redefined) + return -1; + + return distance >> 1; +} + +/* Define this macro to tune LEA priority vs ADD, it take effect when + there is a dilemma of choicing LEA or ADD + Negative value: ADD is more preferred than LEA + Zero: Netrual + Positive value: LEA is more preferred than ADD*/ +#define IX86_LEA_PRIORITY 0 + +/* Return true if usage of lea INSN has performance advantage + over a sequence of instructions. Instructions sequence has + SPLIT_COST cycles higher latency than lea latency. */ + +static bool +ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, + unsigned int regno2, int split_cost, bool has_scale) +{ + int dist_define, dist_use; + + /* For Silvermont if using a 2-source or 3-source LEA for + non-destructive destination purposes, or due to wanting + ability to use SCALE, the use of LEA is justified. */ + if (TARGET_SILVERMONT || TARGET_INTEL) + { + if (has_scale) + return true; + if (split_cost < 1) + return false; + if (regno0 == regno1 || regno0 == regno2) + return false; + return true; + } + + dist_define = distance_non_agu_define (regno1, regno2, insn); + dist_use = distance_agu_use (regno0, insn); + + if (dist_define < 0 || dist_define >= LEA_MAX_STALL) + { + /* If there is no non AGU operand definition, no AGU + operand usage and split cost is 0 then both lea + and non lea variants have same priority. Currently + we prefer lea for 64 bit code and non lea on 32 bit + code. */ + if (dist_use < 0 && split_cost == 0) + return TARGET_64BIT || IX86_LEA_PRIORITY; + else + return true; + } + + /* With longer definitions distance lea is more preferable. + Here we change it to take into account splitting cost and + lea priority. */ + dist_define += split_cost + IX86_LEA_PRIORITY; + + /* If there is no use in memory addess then we just check + that split cost exceeds AGU stall. */ + if (dist_use < 0) + return dist_define > LEA_MAX_STALL; + + /* If this insn has both backward non-agu dependence and forward + agu dependence, the one with short distance takes effect. */ + return dist_define >= dist_use; +} + +/* Return true if it is legal to clobber flags by INSN and + false otherwise. */ + +static bool +ix86_ok_to_clobber_flags (rtx_insn *insn) +{ + basic_block bb = BLOCK_FOR_INSN (insn); + df_ref use; + bitmap live; + + while (insn) + { + if (NONDEBUG_INSN_P (insn)) + { + FOR_EACH_INSN_USE (use, insn) + if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG) + return false; + + if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn)) + return true; + } + + if (insn == BB_END (bb)) + break; + + insn = NEXT_INSN (insn); + } + + live = df_get_live_out(bb); + return !REGNO_REG_SET_P (live, FLAGS_REG); +} + +/* Return true if we need to split op0 = op1 + op2 into a sequence of + move and add to avoid AGU stalls. */ + +bool +ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) +{ + unsigned int regno0, regno1, regno2; + + /* Check if we need to optimize. */ + if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) + return false; + + /* Check it is correct to split here. */ + if (!ix86_ok_to_clobber_flags(insn)) + return false; + + regno0 = true_regnum (operands[0]); + regno1 = true_regnum (operands[1]); + regno2 = true_regnum (operands[2]); + + /* We need to split only adds with non destructive + destination operand. */ + if (regno0 == regno1 || regno0 == regno2) + return false; + else + return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false); +} + +/* Return true if we should emit lea instruction instead of mov + instruction. */ + +bool +ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) +{ + unsigned int regno0, regno1; + + /* Check if we need to optimize. */ + if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) + return false; + + /* Use lea for reg to reg moves only. */ + if (!REG_P (operands[0]) || !REG_P (operands[1])) + return false; + + regno0 = true_regnum (operands[0]); + regno1 = true_regnum (operands[1]); + + return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false); +} + +/* Return true if we need to split lea into a sequence of + instructions to avoid AGU stalls. */ + +bool +ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) +{ + unsigned int regno0, regno1, regno2; + int split_cost; + struct ix86_address parts; + int ok; + + /* Check we need to optimize. */ + if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)) + return false; + + /* The "at least two components" test below might not catch simple + move or zero extension insns if parts.base is non-NULL and parts.disp + is const0_rtx as the only components in the address, e.g. if the + register is %rbp or %r13. As this test is much cheaper and moves or + zero extensions are the common case, do this check first. */ + if (REG_P (operands[1]) + || (SImode_address_operand (operands[1], VOIDmode) + && REG_P (XEXP (operands[1], 0)))) + return false; + + /* Check if it is OK to split here. */ + if (!ix86_ok_to_clobber_flags (insn)) + return false; + + ok = ix86_decompose_address (operands[1], &parts); + gcc_assert (ok); + + /* There should be at least two components in the address. */ + if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX) + + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2) + return false; + + /* We should not split into add if non legitimate pic + operand is used as displacement. */ + if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp)) + return false; + + regno0 = true_regnum (operands[0]) ; + regno1 = INVALID_REGNUM; + regno2 = INVALID_REGNUM; + + if (parts.base) + regno1 = true_regnum (parts.base); + if (parts.index) + regno2 = true_regnum (parts.index); + + split_cost = 0; + + /* Compute how many cycles we will add to execution time + if split lea into a sequence of instructions. */ + if (parts.base || parts.index) + { + /* Have to use mov instruction if non desctructive + destination form is used. */ + if (regno1 != regno0 && regno2 != regno0) + split_cost += 1; + + /* Have to add index to base if both exist. */ + if (parts.base && parts.index) + split_cost += 1; + + /* Have to use shift and adds if scale is 2 or greater. */ + if (parts.scale > 1) + { + if (regno0 != regno1) + split_cost += 1; + else if (regno2 == regno0) + split_cost += 4; + else + split_cost += parts.scale; + } + + /* Have to use add instruction with immediate if + disp is non zero. */ + if (parts.disp && parts.disp != const0_rtx) + split_cost += 1; + + /* Subtract the price of lea. */ + split_cost -= 1; + } + + return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, + parts.scale > 1); +} + +/* Emit x86 binary operand CODE in mode MODE, where the first operand + matches destination. RTX includes clobber of FLAGS_REG. */ + +static void +ix86_emit_binop (enum rtx_code code, machine_mode mode, + rtx dst, rtx src) +{ + rtx op, clob; + + op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src)); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); +} + +/* Return true if regno1 def is nearest to the insn. */ + +static bool +find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2) +{ + rtx_insn *prev = insn; + rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn)); + + if (insn == start) + return false; + while (prev && prev != start) + { + if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev)) + { + prev = PREV_INSN (prev); + continue; + } + if (insn_defines_reg (regno1, INVALID_REGNUM, prev)) + return true; + else if (insn_defines_reg (regno2, INVALID_REGNUM, prev)) + return false; + prev = PREV_INSN (prev); + } + + /* None of the regs is defined in the bb. */ + return false; +} + +/* Split lea instructions into a sequence of instructions + which are executed on ALU to avoid AGU stalls. + It is assumed that it is allowed to clobber flags register + at lea position. */ + +void +ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode) +{ + unsigned int regno0, regno1, regno2; + struct ix86_address parts; + rtx target, tmp; + int ok, adds; + + ok = ix86_decompose_address (operands[1], &parts); + gcc_assert (ok); + + target = gen_lowpart (mode, operands[0]); + + regno0 = true_regnum (target); + regno1 = INVALID_REGNUM; + regno2 = INVALID_REGNUM; + + if (parts.base) + { + parts.base = gen_lowpart (mode, parts.base); + regno1 = true_regnum (parts.base); + } + + if (parts.index) + { + parts.index = gen_lowpart (mode, parts.index); + regno2 = true_regnum (parts.index); + } + + if (parts.disp) + parts.disp = gen_lowpart (mode, parts.disp); + + if (parts.scale > 1) + { + /* Case r1 = r1 + ... */ + if (regno1 == regno0) + { + /* If we have a case r1 = r1 + C * r2 then we + should use multiplication which is very + expensive. Assume cost model is wrong if we + have such case here. */ + gcc_assert (regno2 != regno0); + + for (adds = parts.scale; adds > 0; adds--) + ix86_emit_binop (PLUS, mode, target, parts.index); + } + else + { + /* r1 = r2 + r3 * C case. Need to move r3 into r1. */ + if (regno0 != regno2) + emit_insn (gen_rtx_SET (target, parts.index)); + + /* Use shift for scaling. */ + ix86_emit_binop (ASHIFT, mode, target, + GEN_INT (exact_log2 (parts.scale))); + + if (parts.base) + ix86_emit_binop (PLUS, mode, target, parts.base); + + if (parts.disp && parts.disp != const0_rtx) + ix86_emit_binop (PLUS, mode, target, parts.disp); + } + } + else if (!parts.base && !parts.index) + { + gcc_assert(parts.disp); + emit_insn (gen_rtx_SET (target, parts.disp)); + } + else + { + if (!parts.base) + { + if (regno0 != regno2) + emit_insn (gen_rtx_SET (target, parts.index)); + } + else if (!parts.index) + { + if (regno0 != regno1) + emit_insn (gen_rtx_SET (target, parts.base)); + } + else + { + if (regno0 == regno1) + tmp = parts.index; + else if (regno0 == regno2) + tmp = parts.base; + else + { + rtx tmp1; + + /* Find better operand for SET instruction, depending + on which definition is farther from the insn. */ + if (find_nearest_reg_def (insn, regno1, regno2)) + tmp = parts.index, tmp1 = parts.base; + else + tmp = parts.base, tmp1 = parts.index; + + emit_insn (gen_rtx_SET (target, tmp)); + + if (parts.disp && parts.disp != const0_rtx) + ix86_emit_binop (PLUS, mode, target, parts.disp); + + ix86_emit_binop (PLUS, mode, target, tmp1); + return; + } + + ix86_emit_binop (PLUS, mode, target, tmp); + } + + if (parts.disp && parts.disp != const0_rtx) + ix86_emit_binop (PLUS, mode, target, parts.disp); + } +} + +/* Return true if it is ok to optimize an ADD operation to LEA + operation to avoid flag register consumation. For most processors, + ADD is faster than LEA. For the processors like BONNELL, if the + destination register of LEA holds an actual address which will be + used soon, LEA is better and otherwise ADD is better. */ + +bool +ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[]) +{ + unsigned int regno0 = true_regnum (operands[0]); + unsigned int regno1 = true_regnum (operands[1]); + unsigned int regno2 = true_regnum (operands[2]); + + /* If a = b + c, (a!=b && a!=c), must use lea form. */ + if (regno0 != regno1 && regno0 != regno2) + return true; + + if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) + return false; + + return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false); +} + +/* Return true if destination reg of SET_BODY is shift count of + USE_BODY. */ + +static bool +ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) +{ + rtx set_dest; + rtx shift_rtx; + int i; + + /* Retrieve destination of SET_BODY. */ + switch (GET_CODE (set_body)) + { + case SET: + set_dest = SET_DEST (set_body); + if (!set_dest || !REG_P (set_dest)) + return false; + break; + case PARALLEL: + for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) + if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), + use_body)) + return true; + default: + return false; + break; + } + + /* Retrieve shift count of USE_BODY. */ + switch (GET_CODE (use_body)) + { + case SET: + shift_rtx = XEXP (use_body, 1); + break; + case PARALLEL: + for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) + if (ix86_dep_by_shift_count_body (set_body, + XVECEXP (use_body, 0, i))) + return true; + default: + return false; + break; + } + + if (shift_rtx + && (GET_CODE (shift_rtx) == ASHIFT + || GET_CODE (shift_rtx) == LSHIFTRT + || GET_CODE (shift_rtx) == ASHIFTRT + || GET_CODE (shift_rtx) == ROTATE + || GET_CODE (shift_rtx) == ROTATERT)) + { + rtx shift_count = XEXP (shift_rtx, 1); + + /* Return true if shift count is dest of SET_BODY. */ + if (REG_P (shift_count)) + { + /* Add check since it can be invoked before register + allocation in pre-reload schedule. */ + if (reload_completed + && true_regnum (set_dest) == true_regnum (shift_count)) + return true; + else if (REGNO(set_dest) == REGNO(shift_count)) + return true; + } + } + + return false; +} + +/* Return true if destination reg of SET_INSN is shift count of + USE_INSN. */ + +bool +ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) +{ + return ix86_dep_by_shift_count_body (PATTERN (set_insn), + PATTERN (use_insn)); +} + +/* Return TRUE or FALSE depending on whether the unary operator meets the + appropriate constraints. */ + +bool +ix86_unary_operator_ok (enum rtx_code, + machine_mode, + rtx operands[2]) +{ + /* If one of operands is memory, source and destination must match. */ + if ((MEM_P (operands[0]) + || MEM_P (operands[1])) + && ! rtx_equal_p (operands[0], operands[1])) + return false; + return true; +} + +/* Return TRUE if the operands to a vec_interleave_{high,low}v2df + are ok, keeping in mind the possible movddup alternative. */ + +bool +ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) +{ + if (MEM_P (operands[0])) + return rtx_equal_p (operands[0], operands[1 + high]); + if (MEM_P (operands[1]) && MEM_P (operands[2])) + return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]); + return true; +} + +/* Post-reload splitter for converting an SF or DFmode value in an + SSE register into an unsigned SImode. */ + +void +ix86_split_convert_uns_si_sse (rtx operands[]) +{ + machine_mode vecmode; + rtx value, large, zero_or_two31, input, two31, x; + + large = operands[1]; + zero_or_two31 = operands[2]; + input = operands[3]; + two31 = operands[4]; + vecmode = GET_MODE (large); + value = gen_rtx_REG (vecmode, REGNO (operands[0])); + + /* Load up the value into the low element. We must ensure that the other + elements are valid floats -- zero is the easiest such value. */ + if (MEM_P (input)) + { + if (vecmode == V4SFmode) + emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input)); + else + emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input)); + } + else + { + input = gen_rtx_REG (vecmode, REGNO (input)); + emit_move_insn (value, CONST0_RTX (vecmode)); + if (vecmode == V4SFmode) + emit_insn (gen_sse_movss (value, value, input)); + else + emit_insn (gen_sse2_movsd (value, value, input)); + } + + emit_move_insn (large, two31); + emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31); + + x = gen_rtx_fmt_ee (LE, vecmode, large, value); + emit_insn (gen_rtx_SET (large, x)); + + x = gen_rtx_AND (vecmode, zero_or_two31, large); + emit_insn (gen_rtx_SET (zero_or_two31, x)); + + x = gen_rtx_MINUS (vecmode, value, zero_or_two31); + emit_insn (gen_rtx_SET (value, x)); + + large = gen_rtx_REG (V4SImode, REGNO (large)); + emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31))); + + x = gen_rtx_REG (V4SImode, REGNO (value)); + if (vecmode == V4SFmode) + emit_insn (gen_fix_truncv4sfv4si2 (x, value)); + else + emit_insn (gen_sse2_cvttpd2dq (x, value)); + value = x; + + emit_insn (gen_xorv4si3 (value, value, large)); +} + +/* Convert an unsigned DImode value into a DFmode, using only SSE. + Expects the 64-bit DImode to be supplied in a pair of integral + registers. Requires SSE2; will use SSE3 if available. For x86_32, + -mfpmath=sse, !optimize_size only. */ + +void +ix86_expand_convert_uns_didf_sse (rtx target, rtx input) +{ + REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; + rtx int_xmm, fp_xmm; + rtx biases, exponents; + rtx x; + + int_xmm = gen_reg_rtx (V4SImode); + if (TARGET_INTER_UNIT_MOVES_TO_VEC) + emit_insn (gen_movdi_to_sse (int_xmm, input)); + else if (TARGET_SSE_SPLIT_REGS) + { + emit_clobber (int_xmm); + emit_move_insn (gen_lowpart (DImode, int_xmm), input); + } + else + { + x = gen_reg_rtx (V2DImode); + ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0); + emit_move_insn (int_xmm, gen_lowpart (V4SImode, x)); + } + + x = gen_rtx_CONST_VECTOR (V4SImode, + gen_rtvec (4, GEN_INT (0x43300000UL), + GEN_INT (0x45300000UL), + const0_rtx, const0_rtx)); + exponents = validize_mem (force_const_mem (V4SImode, x)); + + /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ + emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents)); + + /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) + yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). + Similarly (0x45300000UL ## fp_value_hi_xmm) yields + (0x1.0p84 + double(fp_value_hi_xmm)). + Note these exponents differ by 32. */ + + fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm)); + + /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values + in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */ + real_ldexp (&bias_lo_rvt, &dconst1, 52); + real_ldexp (&bias_hi_rvt, &dconst1, 84); + biases = const_double_from_real_value (bias_lo_rvt, DFmode); + x = const_double_from_real_value (bias_hi_rvt, DFmode); + biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x)); + biases = validize_mem (force_const_mem (V2DFmode, biases)); + emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases)); + + /* Add the upper and lower DFmode values together. */ + if (TARGET_SSE3) + emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm)); + else + { + x = copy_to_mode_reg (V2DFmode, fp_xmm); + emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm)); + emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); + } + + ix86_expand_vector_extract (false, target, fp_xmm, 0); +} + +/* Not used, but eases macroization of patterns. */ +void +ix86_expand_convert_uns_sixf_sse (rtx, rtx) +{ + gcc_unreachable (); +} + +/* Convert an unsigned SImode value into a DFmode. Only currently used + for SSE, but applicable anywhere. */ + +void +ix86_expand_convert_uns_sidf_sse (rtx target, rtx input) +{ + REAL_VALUE_TYPE TWO31r; + rtx x, fp; + + x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1), + NULL, 1, OPTAB_DIRECT); + + fp = gen_reg_rtx (DFmode); + emit_insn (gen_floatsidf2 (fp, x)); + + real_ldexp (&TWO31r, &dconst1, 31); + x = const_double_from_real_value (TWO31r, DFmode); + + x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT); + if (x != target) + emit_move_insn (target, x); +} + +/* Convert a signed DImode value into a DFmode. Only used for SSE in + 32-bit mode; otherwise we have a direct convert instruction. */ + +void +ix86_expand_convert_sign_didf_sse (rtx target, rtx input) +{ + REAL_VALUE_TYPE TWO32r; + rtx fp_lo, fp_hi, x; + + fp_lo = gen_reg_rtx (DFmode); + fp_hi = gen_reg_rtx (DFmode); + + emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input))); + + real_ldexp (&TWO32r, &dconst1, 32); + x = const_double_from_real_value (TWO32r, DFmode); + fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT); + + ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input)); + + x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target, + 0, OPTAB_DIRECT); + if (x != target) + emit_move_insn (target, x); +} + +/* Convert an unsigned SImode value into a SFmode, using only SSE. + For x86_32, -mfpmath=sse, !optimize_size only. */ +void +ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) +{ + REAL_VALUE_TYPE ONE16r; + rtx fp_hi, fp_lo, int_hi, int_lo, x; + + real_ldexp (&ONE16r, &dconst1, 16); + x = const_double_from_real_value (ONE16r, SFmode); + int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff), + NULL, 0, OPTAB_DIRECT); + int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16), + NULL, 0, OPTAB_DIRECT); + fp_hi = gen_reg_rtx (SFmode); + fp_lo = gen_reg_rtx (SFmode); + emit_insn (gen_floatsisf2 (fp_hi, int_hi)); + emit_insn (gen_floatsisf2 (fp_lo, int_lo)); + fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, + 0, OPTAB_DIRECT); + fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, + 0, OPTAB_DIRECT); + if (!rtx_equal_p (target, fp_hi)) + emit_move_insn (target, fp_hi); +} + +/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert + a vector of unsigned ints VAL to vector of floats TARGET. */ + +void +ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val) +{ + rtx tmp[8]; + REAL_VALUE_TYPE TWO16r; + machine_mode intmode = GET_MODE (val); + machine_mode fltmode = GET_MODE (target); + rtx (*cvt) (rtx, rtx); + + if (intmode == V4SImode) + cvt = gen_floatv4siv4sf2; + else + cvt = gen_floatv8siv8sf2; + tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff)); + tmp[0] = force_reg (intmode, tmp[0]); + tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1, + OPTAB_DIRECT); + tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16), + NULL_RTX, 1, OPTAB_DIRECT); + tmp[3] = gen_reg_rtx (fltmode); + emit_insn (cvt (tmp[3], tmp[1])); + tmp[4] = gen_reg_rtx (fltmode); + emit_insn (cvt (tmp[4], tmp[2])); + real_ldexp (&TWO16r, &dconst1, 16); + tmp[5] = const_double_from_real_value (TWO16r, SFmode); + tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5])); + tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1, + OPTAB_DIRECT); + tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1, + OPTAB_DIRECT); + if (tmp[7] != target) + emit_move_insn (target, tmp[7]); +} + +/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc* + pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*. + This is done by doing just signed conversion if < 0x1p31, and otherwise by + subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */ + +rtx +ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp) +{ + REAL_VALUE_TYPE TWO31r; + rtx two31r, tmp[4]; + machine_mode mode = GET_MODE (val); + machine_mode scalarmode = GET_MODE_INNER (mode); + machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode; + rtx (*cmp) (rtx, rtx, rtx, rtx); + int i; + + for (i = 0; i < 3; i++) + tmp[i] = gen_reg_rtx (mode); + real_ldexp (&TWO31r, &dconst1, 31); + two31r = const_double_from_real_value (TWO31r, scalarmode); + two31r = ix86_build_const_vector (mode, 1, two31r); + two31r = force_reg (mode, two31r); + switch (mode) + { + case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break; + case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break; + case V4DFmode: cmp = gen_avx_maskcmpv4df3; break; + case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break; + default: gcc_unreachable (); + } + tmp[3] = gen_rtx_LE (mode, two31r, val); + emit_insn (cmp (tmp[0], two31r, val, tmp[3])); + tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1], + 0, OPTAB_DIRECT); + if (intmode == V4SImode || TARGET_AVX2) + *xorp = expand_simple_binop (intmode, ASHIFT, + gen_lowpart (intmode, tmp[0]), + GEN_INT (31), NULL_RTX, 0, + OPTAB_DIRECT); + else + { + rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31); + two31 = ix86_build_const_vector (intmode, 1, two31); + *xorp = expand_simple_binop (intmode, AND, + gen_lowpart (intmode, tmp[0]), + two31, NULL_RTX, 0, + OPTAB_DIRECT); + } + return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2], + 0, OPTAB_DIRECT); +} + +/* A subroutine of ix86_build_signbit_mask. If VECT is true, + then replicate the value for all elements of the vector + register. */ + +rtx +ix86_build_const_vector (machine_mode mode, bool vect, rtx value) +{ + int i, n_elt; + rtvec v; + machine_mode scalar_mode; + + switch (mode) + { + case V64QImode: + case V32QImode: + case V16QImode: + case V32HImode: + case V16HImode: + case V8HImode: + case V16SImode: + case V8SImode: + case V4SImode: + case V8DImode: + case V4DImode: + case V2DImode: + gcc_assert (vect); + case V16SFmode: + case V8SFmode: + case V4SFmode: + case V8DFmode: + case V4DFmode: + case V2DFmode: + n_elt = GET_MODE_NUNITS (mode); + v = rtvec_alloc (n_elt); + scalar_mode = GET_MODE_INNER (mode); + + RTVEC_ELT (v, 0) = value; + + for (i = 1; i < n_elt; ++i) + RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode); + + return gen_rtx_CONST_VECTOR (mode, v); + + default: + gcc_unreachable (); + } +} + +/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders + and ix86_expand_int_vcond. Create a mask for the sign bit in MODE + for an SSE register. If VECT is true, then replicate the mask for + all elements of the vector register. If INVERT is true, then create + a mask excluding the sign bit. */ + +rtx +ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) +{ + machine_mode vec_mode, imode; + wide_int w; + rtx mask, v; + + switch (mode) + { + case V16SImode: + case V16SFmode: + case V8SImode: + case V4SImode: + case V8SFmode: + case V4SFmode: + vec_mode = mode; + imode = SImode; + break; + + case V8DImode: + case V4DImode: + case V2DImode: + case V8DFmode: + case V4DFmode: + case V2DFmode: + vec_mode = mode; + imode = DImode; + break; + + case TImode: + case TFmode: + vec_mode = VOIDmode; + imode = TImode; + break; + + default: + gcc_unreachable (); + } + + machine_mode inner_mode = GET_MODE_INNER (mode); + w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1, + GET_MODE_BITSIZE (inner_mode)); + if (invert) + w = wi::bit_not (w); + + /* Force this value into the low part of a fp vector constant. */ + mask = immed_wide_int_const (w, imode); + mask = gen_lowpart (inner_mode, mask); + + if (vec_mode == VOIDmode) + return force_reg (inner_mode, mask); + + v = ix86_build_const_vector (vec_mode, vect, mask); + return force_reg (vec_mode, v); +} + +/* Generate code for floating point ABS or NEG. */ + +void +ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, + rtx operands[]) +{ + rtx mask, set, dst, src; + bool use_sse = false; + bool vector_mode = VECTOR_MODE_P (mode); + machine_mode vmode = mode; + + if (vector_mode) + use_sse = true; + else if (mode == TFmode) + use_sse = true; + else if (TARGET_SSE_MATH) + { + use_sse = SSE_FLOAT_MODE_P (mode); + if (mode == SFmode) + vmode = V4SFmode; + else if (mode == DFmode) + vmode = V2DFmode; + } + + /* NEG and ABS performed with SSE use bitwise mask operations. + Create the appropriate mask now. */ + if (use_sse) + mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS); + else + mask = NULL_RTX; + + dst = operands[0]; + src = operands[1]; + + set = gen_rtx_fmt_e (code, mode, src); + set = gen_rtx_SET (dst, set); + + if (mask) + { + rtx use, clob; + rtvec par; + + use = gen_rtx_USE (VOIDmode, mask); + if (vector_mode) + par = gen_rtvec (2, set, use); + else + { + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + par = gen_rtvec (3, set, use, clob); + } + emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); + } + else + emit_insn (set); +} + +/* Expand a copysign operation. Special case operand 0 being a constant. */ + +void +ix86_expand_copysign (rtx operands[]) +{ + machine_mode mode, vmode; + rtx dest, op0, op1, mask, nmask; + + dest = operands[0]; + op0 = operands[1]; + op1 = operands[2]; + + mode = GET_MODE (dest); + + if (mode == SFmode) + vmode = V4SFmode; + else if (mode == DFmode) + vmode = V2DFmode; + else + vmode = mode; + + if (CONST_DOUBLE_P (op0)) + { + rtx (*copysign_insn)(rtx, rtx, rtx, rtx); + + if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) + op0 = simplify_unary_operation (ABS, mode, op0, mode); + + if (mode == SFmode || mode == DFmode) + { + if (op0 == CONST0_RTX (mode)) + op0 = CONST0_RTX (vmode); + else + { + rtx v = ix86_build_const_vector (vmode, false, op0); + + op0 = force_reg (vmode, v); + } + } + else if (op0 != CONST0_RTX (mode)) + op0 = force_reg (mode, op0); + + mask = ix86_build_signbit_mask (vmode, 0, 0); + + if (mode == SFmode) + copysign_insn = gen_copysignsf3_const; + else if (mode == DFmode) + copysign_insn = gen_copysigndf3_const; + else + copysign_insn = gen_copysigntf3_const; + + emit_insn (copysign_insn (dest, op0, op1, mask)); + } + else + { + rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx); + + nmask = ix86_build_signbit_mask (vmode, 0, 1); + mask = ix86_build_signbit_mask (vmode, 0, 0); + + if (mode == SFmode) + copysign_insn = gen_copysignsf3_var; + else if (mode == DFmode) + copysign_insn = gen_copysigndf3_var; + else + copysign_insn = gen_copysigntf3_var; + + emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask)); + } +} + +/* Deconstruct a copysign operation into bit masks. Operand 0 is known to + be a constant, and so has already been expanded into a vector constant. */ + +void +ix86_split_copysign_const (rtx operands[]) +{ + machine_mode mode, vmode; + rtx dest, op0, mask, x; + + dest = operands[0]; + op0 = operands[1]; + mask = operands[3]; + + mode = GET_MODE (dest); + vmode = GET_MODE (mask); + + dest = simplify_gen_subreg (vmode, dest, mode, 0); + x = gen_rtx_AND (vmode, dest, mask); + emit_insn (gen_rtx_SET (dest, x)); + + if (op0 != CONST0_RTX (vmode)) + { + x = gen_rtx_IOR (vmode, dest, op0); + emit_insn (gen_rtx_SET (dest, x)); + } +} + +/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, + so we have to do two masks. */ + +void +ix86_split_copysign_var (rtx operands[]) +{ + machine_mode mode, vmode; + rtx dest, scratch, op0, op1, mask, nmask, x; + + dest = operands[0]; + scratch = operands[1]; + op0 = operands[2]; + op1 = operands[3]; + nmask = operands[4]; + mask = operands[5]; + + mode = GET_MODE (dest); + vmode = GET_MODE (mask); + + if (rtx_equal_p (op0, op1)) + { + /* Shouldn't happen often (it's useless, obviously), but when it does + we'd generate incorrect code if we continue below. */ + emit_move_insn (dest, op0); + return; + } + + if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ + { + gcc_assert (REGNO (op1) == REGNO (scratch)); + + x = gen_rtx_AND (vmode, scratch, mask); + emit_insn (gen_rtx_SET (scratch, x)); + + dest = mask; + op0 = simplify_gen_subreg (vmode, op0, mode, 0); + x = gen_rtx_NOT (vmode, dest); + x = gen_rtx_AND (vmode, x, op0); + emit_insn (gen_rtx_SET (dest, x)); + } + else + { + if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ + { + x = gen_rtx_AND (vmode, scratch, mask); + } + else /* alternative 2,4 */ + { + gcc_assert (REGNO (mask) == REGNO (scratch)); + op1 = simplify_gen_subreg (vmode, op1, mode, 0); + x = gen_rtx_AND (vmode, scratch, op1); + } + emit_insn (gen_rtx_SET (scratch, x)); + + if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ + { + dest = simplify_gen_subreg (vmode, op0, mode, 0); + x = gen_rtx_AND (vmode, dest, nmask); + } + else /* alternative 3,4 */ + { + gcc_assert (REGNO (nmask) == REGNO (dest)); + dest = nmask; + op0 = simplify_gen_subreg (vmode, op0, mode, 0); + x = gen_rtx_AND (vmode, dest, op0); + } + emit_insn (gen_rtx_SET (dest, x)); + } + + x = gen_rtx_IOR (vmode, dest, scratch); + emit_insn (gen_rtx_SET (dest, x)); +} + +/* Return TRUE or FALSE depending on whether the first SET in INSN + has source and destination with matching CC modes, and that the + CC mode is at least as constrained as REQ_MODE. */ + +bool +ix86_match_ccmode (rtx insn, machine_mode req_mode) +{ + rtx set; + machine_mode set_mode; + + set = PATTERN (insn); + if (GET_CODE (set) == PARALLEL) + set = XVECEXP (set, 0, 0); + gcc_assert (GET_CODE (set) == SET); + gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); + + set_mode = GET_MODE (SET_DEST (set)); + switch (set_mode) + { + case CCNOmode: + if (req_mode != CCNOmode + && (req_mode != CCmode + || XEXP (SET_SRC (set), 1) != const0_rtx)) + return false; + break; + case CCmode: + if (req_mode == CCGCmode) + return false; + /* FALLTHRU */ + case CCGCmode: + if (req_mode == CCGOCmode || req_mode == CCNOmode) + return false; + /* FALLTHRU */ + case CCGOCmode: + if (req_mode == CCZmode) + return false; + /* FALLTHRU */ + case CCZmode: + break; + + case CCAmode: + case CCCmode: + case CCOmode: + case CCPmode: + case CCSmode: + if (set_mode != req_mode) + return false; + break; + + default: + gcc_unreachable (); + } + + return GET_MODE (SET_SRC (set)) == set_mode; +} + +/* Generate insn patterns to do an integer compare of OPERANDS. */ + +static rtx +ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) +{ + machine_mode cmpmode; + rtx tmp, flags; + + cmpmode = SELECT_CC_MODE (code, op0, op1); + flags = gen_rtx_REG (cmpmode, FLAGS_REG); + + /* This is very simple, but making the interface the same as in the + FP case makes the rest of the code easier. */ + tmp = gen_rtx_COMPARE (cmpmode, op0, op1); + emit_insn (gen_rtx_SET (flags, tmp)); + + /* Return the test that should be put into the flags user, i.e. + the bcc, scc, or cmov instruction. */ + return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); +} + +/* Figure out whether to use ordered or unordered fp comparisons. + Return the appropriate mode to use. */ + +machine_mode +ix86_fp_compare_mode (enum rtx_code) +{ + /* ??? In order to make all comparisons reversible, we do all comparisons + non-trapping when compiling for IEEE. Once gcc is able to distinguish + all forms trapping and nontrapping comparisons, we can make inequality + comparisons trapping again, since it results in better code when using + FCOM based compares. */ + return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; +} + +machine_mode +ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) +{ + machine_mode mode = GET_MODE (op0); + + if (SCALAR_FLOAT_MODE_P (mode)) + { + gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); + return ix86_fp_compare_mode (code); + } + + switch (code) + { + /* Only zero flag is needed. */ + case EQ: /* ZF=0 */ + case NE: /* ZF!=0 */ + return CCZmode; + /* Codes needing carry flag. */ + case GEU: /* CF=0 */ + case LTU: /* CF=1 */ + /* Detect overflow checks. They need just the carry flag. */ + if (GET_CODE (op0) == PLUS + && (rtx_equal_p (op1, XEXP (op0, 0)) + || rtx_equal_p (op1, XEXP (op0, 1)))) + return CCCmode; + else + return CCmode; + case GTU: /* CF=0 & ZF=0 */ + case LEU: /* CF=1 | ZF=1 */ + return CCmode; + /* Codes possibly doable only with sign flag when + comparing against zero. */ + case GE: /* SF=OF or SF=0 */ + case LT: /* SF<>OF or SF=1 */ + if (op1 == const0_rtx) + return CCGOCmode; + else + /* For other cases Carry flag is not required. */ + return CCGCmode; + /* Codes doable only with sign flag when comparing + against zero, but we miss jump instruction for it + so we need to use relational tests against overflow + that thus needs to be zero. */ + case GT: /* ZF=0 & SF=OF */ + case LE: /* ZF=1 | SF<>OF */ + if (op1 == const0_rtx) + return CCNOmode; + else + return CCGCmode; + /* strcmp pattern do (use flags) and combine may ask us for proper + mode. */ + case USE: + return CCmode; + default: + gcc_unreachable (); + } +} + +/* Return the fixed registers used for condition codes. */ + +static bool +ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) +{ + *p1 = FLAGS_REG; + *p2 = FPSR_REG; + return true; +} + +/* If two condition code modes are compatible, return a condition code + mode which is compatible with both. Otherwise, return + VOIDmode. */ + +static machine_mode +ix86_cc_modes_compatible (machine_mode m1, machine_mode m2) +{ + if (m1 == m2) + return m1; + + if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) + return VOIDmode; + + if ((m1 == CCGCmode && m2 == CCGOCmode) + || (m1 == CCGOCmode && m2 == CCGCmode)) + return CCGCmode; + + if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode)) + return m2; + else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode)) + return m1; + + switch (m1) + { + default: + gcc_unreachable (); + + case CCmode: + case CCGCmode: + case CCGOCmode: + case CCNOmode: + case CCAmode: + case CCCmode: + case CCOmode: + case CCPmode: + case CCSmode: + case CCZmode: + switch (m2) + { + default: + return VOIDmode; + + case CCmode: + case CCGCmode: + case CCGOCmode: + case CCNOmode: + case CCAmode: + case CCCmode: + case CCOmode: + case CCPmode: + case CCSmode: + case CCZmode: + return CCmode; + } + + case CCFPmode: + case CCFPUmode: + /* These are only compatible with themselves, which we already + checked above. */ + return VOIDmode; + } +} + + +/* Return a comparison we can do and that it is equivalent to + swap_condition (code) apart possibly from orderedness. + But, never change orderedness if TARGET_IEEE_FP, returning + UNKNOWN in that case if necessary. */ + +static enum rtx_code +ix86_fp_swap_condition (enum rtx_code code) +{ + switch (code) + { + case GT: /* GTU - CF=0 & ZF=0 */ + return TARGET_IEEE_FP ? UNKNOWN : UNLT; + case GE: /* GEU - CF=0 */ + return TARGET_IEEE_FP ? UNKNOWN : UNLE; + case UNLT: /* LTU - CF=1 */ + return TARGET_IEEE_FP ? UNKNOWN : GT; + case UNLE: /* LEU - CF=1 | ZF=1 */ + return TARGET_IEEE_FP ? UNKNOWN : GE; + default: + return swap_condition (code); + } +} + +/* Return cost of comparison CODE using the best strategy for performance. + All following functions do use number of instructions as a cost metrics. + In future this should be tweaked to compute bytes for optimize_size and + take into account performance of various instructions on various CPUs. */ + +static int +ix86_fp_comparison_cost (enum rtx_code code) +{ + int arith_cost; + + /* The cost of code using bit-twiddling on %ah. */ + switch (code) + { + case UNLE: + case UNLT: + case LTGT: + case GT: + case GE: + case UNORDERED: + case ORDERED: + case UNEQ: + arith_cost = 4; + break; + case LT: + case NE: + case EQ: + case UNGE: + arith_cost = TARGET_IEEE_FP ? 5 : 4; + break; + case LE: + case UNGT: + arith_cost = TARGET_IEEE_FP ? 6 : 4; + break; + default: + gcc_unreachable (); + } + + switch (ix86_fp_comparison_strategy (code)) + { + case IX86_FPCMP_COMI: + return arith_cost > 4 ? 3 : 2; + case IX86_FPCMP_SAHF: + return arith_cost > 4 ? 4 : 3; + default: + return arith_cost; + } +} + +/* Return strategy to use for floating-point. We assume that fcomi is always + preferrable where available, since that is also true when looking at size + (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ + +enum ix86_fpcmp_strategy +ix86_fp_comparison_strategy (enum rtx_code) +{ + /* Do fcomi/sahf based test when profitable. */ + + if (TARGET_CMOVE) + return IX86_FPCMP_COMI; + + if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) + return IX86_FPCMP_SAHF; + + return IX86_FPCMP_ARITH; +} + +/* Swap, force into registers, or otherwise massage the two operands + to a fp comparison. The operands are updated in place; the new + comparison code is returned. */ + +static enum rtx_code +ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) +{ + machine_mode fpcmp_mode = ix86_fp_compare_mode (code); + rtx op0 = *pop0, op1 = *pop1; + machine_mode op_mode = GET_MODE (op0); + int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); + + /* All of the unordered compare instructions only work on registers. + The same is true of the fcomi compare instructions. The XFmode + compare instructions require registers except when comparing + against zero or when converting operand 1 from fixed point to + floating point. */ + + if (!is_sse + && (fpcmp_mode == CCFPUmode + || (op_mode == XFmode + && ! (standard_80387_constant_p (op0) == 1 + || standard_80387_constant_p (op1) == 1) + && GET_CODE (op1) != FLOAT) + || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI)) + { + op0 = force_reg (op_mode, op0); + op1 = force_reg (op_mode, op1); + } + else + { + /* %%% We only allow op1 in memory; op0 must be st(0). So swap + things around if they appear profitable, otherwise force op0 + into a register. */ + + if (standard_80387_constant_p (op0) == 0 + || (MEM_P (op0) + && ! (standard_80387_constant_p (op1) == 0 + || MEM_P (op1)))) + { + enum rtx_code new_code = ix86_fp_swap_condition (code); + if (new_code != UNKNOWN) + { + std::swap (op0, op1); + code = new_code; + } + } + + if (!REG_P (op0)) + op0 = force_reg (op_mode, op0); + + if (CONSTANT_P (op1)) + { + int tmp = standard_80387_constant_p (op1); + if (tmp == 0) + op1 = validize_mem (force_const_mem (op_mode, op1)); + else if (tmp == 1) + { + if (TARGET_CMOVE) + op1 = force_reg (op_mode, op1); + } + else + op1 = force_reg (op_mode, op1); + } + } + + /* Try to rearrange the comparison to make it cheaper. */ + if (ix86_fp_comparison_cost (code) + > ix86_fp_comparison_cost (swap_condition (code)) + && (REG_P (op1) || can_create_pseudo_p ())) + { + std::swap (op0, op1); + code = swap_condition (code); + if (!REG_P (op0)) + op0 = force_reg (op_mode, op0); + } + + *pop0 = op0; + *pop1 = op1; + return code; +} + +/* Convert comparison codes we use to represent FP comparison to integer + code that will result in proper branch. Return UNKNOWN if no such code + is available. */ + +enum rtx_code +ix86_fp_compare_code_to_integer (enum rtx_code code) +{ + switch (code) + { + case GT: + return GTU; + case GE: + return GEU; + case ORDERED: + case UNORDERED: + return code; + break; + case UNEQ: + return EQ; + break; + case UNLT: + return LTU; + break; + case UNLE: + return LEU; + break; + case LTGT: + return NE; + break; + default: + return UNKNOWN; + } +} + +/* Generate insn patterns to do a floating point compare of OPERANDS. */ + +static rtx +ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch) +{ + machine_mode fpcmp_mode, intcmp_mode; + rtx tmp, tmp2; + + fpcmp_mode = ix86_fp_compare_mode (code); + code = ix86_prepare_fp_compare_args (code, &op0, &op1); + + /* Do fcomi/sahf based test when profitable. */ + switch (ix86_fp_comparison_strategy (code)) + { + case IX86_FPCMP_COMI: + intcmp_mode = fpcmp_mode; + tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); + tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp); + emit_insn (tmp); + break; + + case IX86_FPCMP_SAHF: + intcmp_mode = fpcmp_mode; + tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); + tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp); + + if (!scratch) + scratch = gen_reg_rtx (HImode); + tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2))); + break; + + case IX86_FPCMP_ARITH: + /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ + tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); + tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); + if (!scratch) + scratch = gen_reg_rtx (HImode); + emit_insn (gen_rtx_SET (scratch, tmp2)); + + /* In the unordered case, we have to check C2 for NaN's, which + doesn't happen to work out to anything nice combination-wise. + So do some bit twiddling on the value we've got in AH to come + up with an appropriate set of condition codes. */ + + intcmp_mode = CCNOmode; + switch (code) + { + case GT: + case UNGT: + if (code == GT || !TARGET_IEEE_FP) + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); + code = EQ; + } + else + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); + emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); + intcmp_mode = CCmode; + code = GEU; + } + break; + case LT: + case UNLT: + if (code == LT && TARGET_IEEE_FP) + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx)); + intcmp_mode = CCmode; + code = EQ; + } + else + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx)); + code = NE; + } + break; + case GE: + case UNGE: + if (code == GE || !TARGET_IEEE_FP) + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); + code = EQ; + } + else + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx)); + code = NE; + } + break; + case LE: + case UNLE: + if (code == LE && TARGET_IEEE_FP) + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); + emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); + intcmp_mode = CCmode; + code = LTU; + } + else + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); + code = NE; + } + break; + case EQ: + case UNEQ: + if (code == EQ && TARGET_IEEE_FP) + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); + intcmp_mode = CCmode; + code = EQ; + } + else + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); + code = NE; + } + break; + case NE: + case LTGT: + if (code == NE && TARGET_IEEE_FP) + { + emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, + GEN_INT (0x40))); + code = NE; + } + else + { + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); + code = EQ; + } + break; + + case UNORDERED: + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); + code = NE; + break; + case ORDERED: + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); + code = EQ; + break; + + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable(); + } + + /* Return the test that should be put into the flags user, i.e. + the bcc, scc, or cmov instruction. */ + return gen_rtx_fmt_ee (code, VOIDmode, + gen_rtx_REG (intcmp_mode, FLAGS_REG), + const0_rtx); +} + +static rtx +ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1) +{ + rtx ret; + + if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) + ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); + + else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) + { + gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0))); + ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX); + } + else + ret = ix86_expand_int_compare (code, op0, op1); + + return ret; +} + +void +ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) +{ + machine_mode mode = GET_MODE (op0); + rtx tmp; + + /* Handle special case - vector comparsion with boolean result, transform + it using ptest instruction. */ + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG); + machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode; + + gcc_assert (code == EQ || code == NE); + /* Generate XOR since we can't check that one operand is zero vector. */ + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1))); + tmp = gen_lowpart (p_mode, tmp); + emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG), + gen_rtx_UNSPEC (CCmode, + gen_rtvec (2, tmp, tmp), + UNSPEC_PTEST))); + tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + return; + } + + switch (mode) + { + case SFmode: + case DFmode: + case XFmode: + case QImode: + case HImode: + case SImode: + simple: + tmp = ix86_expand_compare (code, op0, op1); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + return; + + case DImode: + if (TARGET_64BIT) + goto simple; + /* For 32-bit target DI comparison may be performed on + SSE registers. To allow this we should avoid split + to SI mode which is achieved by doing xor in DI mode + and then comparing with zero (which is recognized by + STV pass). We don't compare using xor when optimizing + for size. */ + if (!optimize_insn_for_size_p () + && TARGET_STV + && (code == EQ || code == NE)) + { + op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1)); + op1 = const0_rtx; + } + case TImode: + /* Expand DImode branch into multiple compare+branch. */ + { + rtx lo[2], hi[2]; + rtx_code_label *label2; + enum rtx_code code1, code2, code3; + machine_mode submode; + + if (CONSTANT_P (op0) && !CONSTANT_P (op1)) + { + std::swap (op0, op1); + code = swap_condition (code); + } + + split_double_mode (mode, &op0, 1, lo+0, hi+0); + split_double_mode (mode, &op1, 1, lo+1, hi+1); + + submode = mode == DImode ? SImode : DImode; + + /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to + avoid two branches. This costs one extra insn, so disable when + optimizing for size. */ + + if ((code == EQ || code == NE) + && (!optimize_insn_for_size_p () + || hi[1] == const0_rtx || lo[1] == const0_rtx)) + { + rtx xor0, xor1; + + xor1 = hi[0]; + if (hi[1] != const0_rtx) + xor1 = expand_binop (submode, xor_optab, xor1, hi[1], + NULL_RTX, 0, OPTAB_WIDEN); + + xor0 = lo[0]; + if (lo[1] != const0_rtx) + xor0 = expand_binop (submode, xor_optab, xor0, lo[1], + NULL_RTX, 0, OPTAB_WIDEN); + + tmp = expand_binop (submode, ior_optab, xor1, xor0, + NULL_RTX, 0, OPTAB_WIDEN); + + ix86_expand_branch (code, tmp, const0_rtx, label); + return; + } + + /* Otherwise, if we are doing less-than or greater-or-equal-than, + op1 is a constant and the low word is zero, then we can just + examine the high word. Similarly for low word -1 and + less-or-equal-than or greater-than. */ + + if (CONST_INT_P (hi[1])) + switch (code) + { + case LT: case LTU: case GE: case GEU: + if (lo[1] == const0_rtx) + { + ix86_expand_branch (code, hi[0], hi[1], label); + return; + } + break; + case LE: case LEU: case GT: case GTU: + if (lo[1] == constm1_rtx) + { + ix86_expand_branch (code, hi[0], hi[1], label); + return; + } + break; + default: + break; + } + + /* Otherwise, we need two or three jumps. */ + + label2 = gen_label_rtx (); + + code1 = code; + code2 = swap_condition (code); + code3 = unsigned_condition (code); + + switch (code) + { + case LT: case GT: case LTU: case GTU: + break; + + case LE: code1 = LT; code2 = GT; break; + case GE: code1 = GT; code2 = LT; break; + case LEU: code1 = LTU; code2 = GTU; break; + case GEU: code1 = GTU; code2 = LTU; break; + + case EQ: code1 = UNKNOWN; code2 = NE; break; + case NE: code2 = UNKNOWN; break; + + default: + gcc_unreachable (); + } + + /* + * a < b => + * if (hi(a) < hi(b)) goto true; + * if (hi(a) > hi(b)) goto false; + * if (lo(a) < lo(b)) goto true; + * false: + */ + + if (code1 != UNKNOWN) + ix86_expand_branch (code1, hi[0], hi[1], label); + if (code2 != UNKNOWN) + ix86_expand_branch (code2, hi[0], hi[1], label2); + + ix86_expand_branch (code3, lo[0], lo[1], label); + + if (code2 != UNKNOWN) + emit_label (label2); + return; + } + + default: + gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC); + goto simple; + } +} + +/* Split branch based on floating point condition. */ +void +ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, + rtx target1, rtx target2, rtx tmp) +{ + rtx condition; + rtx i; + + if (target2 != pc_rtx) + { + std::swap (target1, target2); + code = reverse_condition_maybe_unordered (code); + } + + condition = ix86_expand_fp_compare (code, op1, op2, + tmp); + + i = emit_jump_insn (gen_rtx_SET + (pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + condition, target1, target2))); + if (split_branch_probability >= 0) + add_int_reg_note (i, REG_BR_PROB, split_branch_probability); +} + +void +ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1) +{ + rtx ret; + + gcc_assert (GET_MODE (dest) == QImode); + + ret = ix86_expand_compare (code, op0, op1); + PUT_MODE (ret, QImode); + emit_insn (gen_rtx_SET (dest, ret)); +} + +/* Expand comparison setting or clearing carry flag. Return true when + successful and set pop for the operation. */ +static bool +ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) +{ + machine_mode mode = + GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); + + /* Do not handle double-mode compares that go through special path. */ + if (mode == (TARGET_64BIT ? TImode : DImode)) + return false; + + if (SCALAR_FLOAT_MODE_P (mode)) + { + rtx compare_op; + rtx_insn *compare_seq; + + gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); + + /* Shortcut: following common codes never translate + into carry flag compares. */ + if (code == EQ || code == NE || code == UNEQ || code == LTGT + || code == ORDERED || code == UNORDERED) + return false; + + /* These comparisons require zero flag; swap operands so they won't. */ + if ((code == GT || code == UNLE || code == LE || code == UNGT) + && !TARGET_IEEE_FP) + { + std::swap (op0, op1); + code = swap_condition (code); + } + + /* Try to expand the comparison and verify that we end up with + carry flag based comparison. This fails to be true only when + we decide to expand comparison using arithmetic that is not + too common scenario. */ + start_sequence (); + compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX); + compare_seq = get_insns (); + end_sequence (); + + if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode + || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) + code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); + else + code = GET_CODE (compare_op); + + if (code != LTU && code != GEU) + return false; + + emit_insn (compare_seq); + *pop = compare_op; + return true; + } + + if (!INTEGRAL_MODE_P (mode)) + return false; + + switch (code) + { + case LTU: + case GEU: + break; + + /* Convert a==0 into (unsigned)a<1. */ + case EQ: + case NE: + if (op1 != const0_rtx) + return false; + op1 = const1_rtx; + code = (code == EQ ? LTU : GEU); + break; + + /* Convert a>b into b=b-1. */ + case GTU: + case LEU: + if (CONST_INT_P (op1)) + { + op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); + /* Bail out on overflow. We still can swap operands but that + would force loading of the constant into register. */ + if (op1 == const0_rtx + || !x86_64_immediate_operand (op1, GET_MODE (op1))) + return false; + code = (code == GTU ? GEU : LTU); + } + else + { + std::swap (op0, op1); + code = (code == GTU ? LTU : GEU); + } + break; + + /* Convert a>=0 into (unsigned)a<0x80000000. */ + case LT: + case GE: + if (mode == DImode || op1 != const0_rtx) + return false; + op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); + code = (code == LT ? GEU : LTU); + break; + case LE: + case GT: + if (mode == DImode || op1 != constm1_rtx) + return false; + op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); + code = (code == LE ? GEU : LTU); + break; + + default: + return false; + } + /* Swapping operands may cause constant to appear as first operand. */ + if (!nonimmediate_operand (op0, VOIDmode)) + { + if (!can_create_pseudo_p ()) + return false; + op0 = force_reg (mode, op0); + } + *pop = ix86_expand_compare (code, op0, op1); + gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); + return true; +} + +bool +ix86_expand_int_movcc (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[1]), compare_code; + rtx_insn *compare_seq; + rtx compare_op; + machine_mode mode = GET_MODE (operands[0]); + bool sign_bit_compare_p = false; + rtx op0 = XEXP (operands[1], 0); + rtx op1 = XEXP (operands[1], 1); + + if (GET_MODE (op0) == TImode + || (GET_MODE (op0) == DImode + && !TARGET_64BIT)) + return false; + + start_sequence (); + compare_op = ix86_expand_compare (code, op0, op1); + compare_seq = get_insns (); + end_sequence (); + + compare_code = GET_CODE (compare_op); + + if ((op1 == const0_rtx && (code == GE || code == LT)) + || (op1 == constm1_rtx && (code == GT || code == LE))) + sign_bit_compare_p = true; + + /* Don't attempt mode expansion here -- if we had to expand 5 or 6 + HImode insns, we'd be swallowed in word prefix ops. */ + + if ((mode != HImode || TARGET_FAST_PREFIX) + && (mode != (TARGET_64BIT ? TImode : DImode)) + && CONST_INT_P (operands[2]) + && CONST_INT_P (operands[3])) + { + rtx out = operands[0]; + HOST_WIDE_INT ct = INTVAL (operands[2]); + HOST_WIDE_INT cf = INTVAL (operands[3]); + HOST_WIDE_INT diff; + + diff = ct - cf; + /* Sign bit compares are better done using shifts than we do by using + sbb. */ + if (sign_bit_compare_p + || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) + { + /* Detect overlap between destination and compare sources. */ + rtx tmp = out; + + if (!sign_bit_compare_p) + { + rtx flags; + bool fpcmp = false; + + compare_code = GET_CODE (compare_op); + + flags = XEXP (compare_op, 0); + + if (GET_MODE (flags) == CCFPmode + || GET_MODE (flags) == CCFPUmode) + { + fpcmp = true; + compare_code + = ix86_fp_compare_code_to_integer (compare_code); + } + + /* To simplify rest of code, restrict to the GEU case. */ + if (compare_code == LTU) + { + std::swap (ct, cf); + compare_code = reverse_condition (compare_code); + code = reverse_condition (code); + } + else + { + if (fpcmp) + PUT_CODE (compare_op, + reverse_condition_maybe_unordered + (GET_CODE (compare_op))); + else + PUT_CODE (compare_op, + reverse_condition (GET_CODE (compare_op))); + } + diff = ct - cf; + + if (reg_overlap_mentioned_p (out, op0) + || reg_overlap_mentioned_p (out, op1)) + tmp = gen_reg_rtx (mode); + + if (mode == DImode) + emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op)); + else + emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), + flags, compare_op)); + } + else + { + if (code == GT || code == GE) + code = reverse_condition (code); + else + { + std::swap (ct, cf); + diff = ct - cf; + } + tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1); + } + + if (diff == 1) + { + /* + * cmpl op0,op1 + * sbbl dest,dest + * [addl dest, ct] + * + * Size 5 - 8. + */ + if (ct) + tmp = expand_simple_binop (mode, PLUS, + tmp, GEN_INT (ct), + copy_rtx (tmp), 1, OPTAB_DIRECT); + } + else if (cf == -1) + { + /* + * cmpl op0,op1 + * sbbl dest,dest + * orl $ct, dest + * + * Size 8. + */ + tmp = expand_simple_binop (mode, IOR, + tmp, GEN_INT (ct), + copy_rtx (tmp), 1, OPTAB_DIRECT); + } + else if (diff == -1 && ct) + { + /* + * cmpl op0,op1 + * sbbl dest,dest + * notl dest + * [addl dest, cf] + * + * Size 8 - 11. + */ + tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); + if (cf) + tmp = expand_simple_binop (mode, PLUS, + copy_rtx (tmp), GEN_INT (cf), + copy_rtx (tmp), 1, OPTAB_DIRECT); + } + else + { + /* + * cmpl op0,op1 + * sbbl dest,dest + * [notl dest] + * andl cf - ct, dest + * [addl dest, ct] + * + * Size 8 - 11. + */ + + if (cf == 0) + { + cf = ct; + ct = 0; + tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); + } + + tmp = expand_simple_binop (mode, AND, + copy_rtx (tmp), + gen_int_mode (cf - ct, mode), + copy_rtx (tmp), 1, OPTAB_DIRECT); + if (ct) + tmp = expand_simple_binop (mode, PLUS, + copy_rtx (tmp), GEN_INT (ct), + copy_rtx (tmp), 1, OPTAB_DIRECT); + } + + if (!rtx_equal_p (tmp, out)) + emit_move_insn (copy_rtx (out), copy_rtx (tmp)); + + return true; + } + + if (diff < 0) + { + machine_mode cmp_mode = GET_MODE (op0); + enum rtx_code new_code; + + if (SCALAR_FLOAT_MODE_P (cmp_mode)) + { + gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); + + /* We may be reversing unordered compare to normal compare, that + is not valid in general (we may convert non-trapping condition + to trapping one), however on i386 we currently emit all + comparisons unordered. */ + new_code = reverse_condition_maybe_unordered (code); + } + else + new_code = ix86_reverse_condition (code, cmp_mode); + if (new_code != UNKNOWN) + { + std::swap (ct, cf); + diff = -diff; + code = new_code; + } + } + + compare_code = UNKNOWN; + if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT + && CONST_INT_P (op1)) + { + if (op1 == const0_rtx + && (code == LT || code == GE)) + compare_code = code; + else if (op1 == constm1_rtx) + { + if (code == LE) + compare_code = LT; + else if (code == GT) + compare_code = GE; + } + } + + /* Optimize dest = (op0 < 0) ? -1 : cf. */ + if (compare_code != UNKNOWN + && GET_MODE (op0) == GET_MODE (out) + && (cf == -1 || ct == -1)) + { + /* If lea code below could be used, only optimize + if it results in a 2 insn sequence. */ + + if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 + || diff == 3 || diff == 5 || diff == 9) + || (compare_code == LT && ct == -1) + || (compare_code == GE && cf == -1)) + { + /* + * notl op1 (if necessary) + * sarl $31, op1 + * orl cf, op1 + */ + if (ct != -1) + { + cf = ct; + ct = -1; + code = reverse_condition (code); + } + + out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); + + out = expand_simple_binop (mode, IOR, + out, GEN_INT (cf), + out, 1, OPTAB_DIRECT); + if (out != operands[0]) + emit_move_insn (operands[0], out); + + return true; + } + } + + + if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 + || diff == 3 || diff == 5 || diff == 9) + && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) + && (mode != DImode + || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) + { + /* + * xorl dest,dest + * cmpl op1,op2 + * setcc dest + * lea cf(dest*(ct-cf)),dest + * + * Size 14. + * + * This also catches the degenerate setcc-only case. + */ + + rtx tmp; + int nops; + + out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); + + nops = 0; + /* On x86_64 the lea instruction operates on Pmode, so we need + to get arithmetics done in proper mode to match. */ + if (diff == 1) + tmp = copy_rtx (out); + else + { + rtx out1; + out1 = copy_rtx (out); + tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); + nops++; + if (diff & 1) + { + tmp = gen_rtx_PLUS (mode, tmp, out1); + nops++; + } + } + if (cf != 0) + { + tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); + nops++; + } + if (!rtx_equal_p (tmp, out)) + { + if (nops == 1) + out = force_operand (tmp, copy_rtx (out)); + else + emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp))); + } + if (!rtx_equal_p (out, operands[0])) + emit_move_insn (operands[0], copy_rtx (out)); + + return true; + } + + /* + * General case: Jumpful: + * xorl dest,dest cmpl op1, op2 + * cmpl op1, op2 movl ct, dest + * setcc dest jcc 1f + * decl dest movl cf, dest + * andl (cf-ct),dest 1: + * addl ct,dest + * + * Size 20. Size 14. + * + * This is reasonably steep, but branch mispredict costs are + * high on modern cpus, so consider failing only if optimizing + * for space. + */ + + if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) + && BRANCH_COST (optimize_insn_for_speed_p (), + false) >= 2) + { + if (cf == 0) + { + machine_mode cmp_mode = GET_MODE (op0); + enum rtx_code new_code; + + if (SCALAR_FLOAT_MODE_P (cmp_mode)) + { + gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); + + /* We may be reversing unordered compare to normal compare, + that is not valid in general (we may convert non-trapping + condition to trapping one), however on i386 we currently + emit all comparisons unordered. */ + new_code = reverse_condition_maybe_unordered (code); + } + else + { + new_code = ix86_reverse_condition (code, cmp_mode); + if (compare_code != UNKNOWN && new_code != UNKNOWN) + compare_code = reverse_condition (compare_code); + } + + if (new_code != UNKNOWN) + { + cf = ct; + ct = 0; + code = new_code; + } + } + + if (compare_code != UNKNOWN) + { + /* notl op1 (if needed) + sarl $31, op1 + andl (cf-ct), op1 + addl ct, op1 + + For x < 0 (resp. x <= -1) there will be no notl, + so if possible swap the constants to get rid of the + complement. + True/false will be -1/0 while code below (store flag + followed by decrement) is 0/-1, so the constants need + to be exchanged once more. */ + + if (compare_code == GE || !cf) + { + code = reverse_condition (code); + compare_code = LT; + } + else + std::swap (ct, cf); + + out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); + } + else + { + out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); + + out = expand_simple_binop (mode, PLUS, copy_rtx (out), + constm1_rtx, + copy_rtx (out), 1, OPTAB_DIRECT); + } + + out = expand_simple_binop (mode, AND, copy_rtx (out), + gen_int_mode (cf - ct, mode), + copy_rtx (out), 1, OPTAB_DIRECT); + if (ct) + out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), + copy_rtx (out), 1, OPTAB_DIRECT); + if (!rtx_equal_p (out, operands[0])) + emit_move_insn (operands[0], copy_rtx (out)); + + return true; + } + } + + if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) + { + /* Try a few things more with specific constants and a variable. */ + + optab op; + rtx var, orig_out, out, tmp; + + if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2) + return false; + + /* If one of the two operands is an interesting constant, load a + constant with the above and mask it in with a logical operation. */ + + if (CONST_INT_P (operands[2])) + { + var = operands[3]; + if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) + operands[3] = constm1_rtx, op = and_optab; + else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) + operands[3] = const0_rtx, op = ior_optab; + else + return false; + } + else if (CONST_INT_P (operands[3])) + { + var = operands[2]; + if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) + operands[2] = constm1_rtx, op = and_optab; + else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) + operands[2] = const0_rtx, op = ior_optab; + else + return false; + } + else + return false; + + orig_out = operands[0]; + tmp = gen_reg_rtx (mode); + operands[0] = tmp; + + /* Recurse to get the constant loaded. */ + if (!ix86_expand_int_movcc (operands)) + return false; + + /* Mask in the interesting variable. */ + out = expand_binop (mode, op, var, tmp, orig_out, 0, + OPTAB_WIDEN); + if (!rtx_equal_p (out, orig_out)) + emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); + + return true; + } + + /* + * For comparison with above, + * + * movl cf,dest + * movl ct,tmp + * cmpl op1,op2 + * cmovcc tmp,dest + * + * Size 15. + */ + + if (! nonimmediate_operand (operands[2], mode)) + operands[2] = force_reg (mode, operands[2]); + if (! nonimmediate_operand (operands[3], mode)) + operands[3] = force_reg (mode, operands[3]); + + if (! register_operand (operands[2], VOIDmode) + && (mode == QImode + || ! register_operand (operands[3], VOIDmode))) + operands[2] = force_reg (mode, operands[2]); + + if (mode == QImode + && ! register_operand (operands[3], VOIDmode)) + operands[3] = force_reg (mode, operands[3]); + + emit_insn (compare_seq); + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_IF_THEN_ELSE (mode, + compare_op, operands[2], + operands[3]))); + return true; +} + +/* Swap, force into registers, or otherwise massage the two operands + to an sse comparison with a mask result. Thus we differ a bit from + ix86_prepare_fp_compare_args which expects to produce a flags result. + + The DEST operand exists to help determine whether to commute commutative + operators. The POP0/POP1 operands are updated in place. The new + comparison code is returned, or UNKNOWN if not implementable. */ + +static enum rtx_code +ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, + rtx *pop0, rtx *pop1) +{ + switch (code) + { + case LTGT: + case UNEQ: + /* AVX supports all the needed comparisons. */ + if (TARGET_AVX) + break; + /* We have no LTGT as an operator. We could implement it with + NE & ORDERED, but this requires an extra temporary. It's + not clear that it's worth it. */ + return UNKNOWN; + + case LT: + case LE: + case UNGT: + case UNGE: + /* These are supported directly. */ + break; + + case EQ: + case NE: + case UNORDERED: + case ORDERED: + /* AVX has 3 operand comparisons, no need to swap anything. */ + if (TARGET_AVX) + break; + /* For commutative operators, try to canonicalize the destination + operand to be first in the comparison - this helps reload to + avoid extra moves. */ + if (!dest || !rtx_equal_p (dest, *pop1)) + break; + /* FALLTHRU */ + + case GE: + case GT: + case UNLE: + case UNLT: + /* These are not supported directly before AVX, and furthermore + ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the + comparison operands to transform into something that is + supported. */ + std::swap (*pop0, *pop1); + code = swap_condition (code); + break; + + default: + gcc_unreachable (); + } + + return code; +} + +/* Detect conditional moves that exactly match min/max operational + semantics. Note that this is IEEE safe, as long as we don't + interchange the operands. + + Returns FALSE if this conditional move doesn't match a MIN/MAX, + and TRUE if the operation is successful and instructions are emitted. */ + +static bool +ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, + rtx cmp_op1, rtx if_true, rtx if_false) +{ + machine_mode mode; + bool is_min; + rtx tmp; + + if (code == LT) + ; + else if (code == UNGE) + std::swap (if_true, if_false); + else + return false; + + if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) + is_min = true; + else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) + is_min = false; + else + return false; + + mode = GET_MODE (dest); + + /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, + but MODE may be a vector mode and thus not appropriate. */ + if (!flag_finite_math_only || flag_signed_zeros) + { + int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; + rtvec v; + + if_true = force_reg (mode, if_true); + v = gen_rtvec (2, if_true, if_false); + tmp = gen_rtx_UNSPEC (mode, v, u); + } + else + { + code = is_min ? SMIN : SMAX; + tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); + } + + emit_insn (gen_rtx_SET (dest, tmp)); + return true; +} + +/* Expand an sse vector comparison. Return the register with the result. */ + +static rtx +ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, + rtx op_true, rtx op_false) +{ + machine_mode mode = GET_MODE (dest); + machine_mode cmp_ops_mode = GET_MODE (cmp_op0); + + /* In general case result of comparison can differ from operands' type. */ + machine_mode cmp_mode; + + /* In AVX512F the result of comparison is an integer mask. */ + bool maskcmp = false; + rtx x; + + if (GET_MODE_SIZE (cmp_ops_mode) == 64) + { + cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0); + gcc_assert (cmp_mode != BLKmode); + + maskcmp = true; + } + else + cmp_mode = cmp_ops_mode; + + + cmp_op0 = force_reg (cmp_ops_mode, cmp_op0); + if (!nonimmediate_operand (cmp_op1, cmp_ops_mode)) + cmp_op1 = force_reg (cmp_ops_mode, cmp_op1); + + if (optimize + || (maskcmp && cmp_mode != mode) + || (op_true && reg_overlap_mentioned_p (dest, op_true)) + || (op_false && reg_overlap_mentioned_p (dest, op_false))) + dest = gen_reg_rtx (maskcmp ? cmp_mode : mode); + + /* Compare patterns for int modes are unspec in AVX512F only. */ + if (maskcmp && (code == GT || code == EQ)) + { + rtx (*gen)(rtx, rtx, rtx); + + switch (cmp_ops_mode) + { + case V64QImode: + gcc_assert (TARGET_AVX512BW); + gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1; + break; + case V32HImode: + gcc_assert (TARGET_AVX512BW); + gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1; + break; + case V16SImode: + gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1; + break; + case V8DImode: + gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1; + break; + default: + gen = NULL; + } + + if (gen) + { + emit_insn (gen (dest, cmp_op0, cmp_op1)); + return dest; + } + } + x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1); + + if (cmp_mode != mode && !maskcmp) + { + x = force_reg (cmp_ops_mode, x); + convert_move (dest, x, false); + } + else + emit_insn (gen_rtx_SET (dest, x)); + + return dest; +} + +/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical + operations. This is used for both scalar and vector conditional moves. */ + +void +ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) +{ + machine_mode mode = GET_MODE (dest); + machine_mode cmpmode = GET_MODE (cmp); + + /* In AVX512F the result of comparison is an integer mask. */ + bool maskcmp = (mode != cmpmode && TARGET_AVX512F); + + rtx t2, t3, x; + + /* If we have an integer mask and FP value then we need + to cast mask to FP mode. */ + if (mode != cmpmode && VECTOR_MODE_P (cmpmode)) + { + cmp = force_reg (cmpmode, cmp); + cmp = gen_rtx_SUBREG (mode, cmp, 0); + } + + if (vector_all_ones_operand (op_true, mode) + && rtx_equal_p (op_false, CONST0_RTX (mode)) + && !maskcmp) + { + emit_insn (gen_rtx_SET (dest, cmp)); + } + else if (op_false == CONST0_RTX (mode) + && !maskcmp) + { + op_true = force_reg (mode, op_true); + x = gen_rtx_AND (mode, cmp, op_true); + emit_insn (gen_rtx_SET (dest, x)); + } + else if (op_true == CONST0_RTX (mode) + && !maskcmp) + { + op_false = force_reg (mode, op_false); + x = gen_rtx_NOT (mode, cmp); + x = gen_rtx_AND (mode, x, op_false); + emit_insn (gen_rtx_SET (dest, x)); + } + else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode) + && !maskcmp) + { + op_false = force_reg (mode, op_false); + x = gen_rtx_IOR (mode, cmp, op_false); + emit_insn (gen_rtx_SET (dest, x)); + } + else if (TARGET_XOP + && !maskcmp) + { + op_true = force_reg (mode, op_true); + + if (!nonimmediate_operand (op_false, mode)) + op_false = force_reg (mode, op_false); + + emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp, + op_true, + op_false))); + } + else + { + rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; + rtx d = dest; + + if (!nonimmediate_operand (op_true, mode)) + op_true = force_reg (mode, op_true); + + op_false = force_reg (mode, op_false); + + switch (mode) + { + case V4SFmode: + if (TARGET_SSE4_1) + gen = gen_sse4_1_blendvps; + break; + case V2DFmode: + if (TARGET_SSE4_1) + gen = gen_sse4_1_blendvpd; + break; + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + if (TARGET_SSE4_1) + { + gen = gen_sse4_1_pblendvb; + if (mode != V16QImode) + d = gen_reg_rtx (V16QImode); + op_false = gen_lowpart (V16QImode, op_false); + op_true = gen_lowpart (V16QImode, op_true); + cmp = gen_lowpart (V16QImode, cmp); + } + break; + case V8SFmode: + if (TARGET_AVX) + gen = gen_avx_blendvps256; + break; + case V4DFmode: + if (TARGET_AVX) + gen = gen_avx_blendvpd256; + break; + case V32QImode: + case V16HImode: + case V8SImode: + case V4DImode: + if (TARGET_AVX2) + { + gen = gen_avx2_pblendvb; + if (mode != V32QImode) + d = gen_reg_rtx (V32QImode); + op_false = gen_lowpart (V32QImode, op_false); + op_true = gen_lowpart (V32QImode, op_true); + cmp = gen_lowpart (V32QImode, cmp); + } + break; + + case V64QImode: + gen = gen_avx512bw_blendmv64qi; + break; + case V32HImode: + gen = gen_avx512bw_blendmv32hi; + break; + case V16SImode: + gen = gen_avx512f_blendmv16si; + break; + case V8DImode: + gen = gen_avx512f_blendmv8di; + break; + case V8DFmode: + gen = gen_avx512f_blendmv8df; + break; + case V16SFmode: + gen = gen_avx512f_blendmv16sf; + break; + + default: + break; + } + + if (gen != NULL) + { + emit_insn (gen (d, op_false, op_true, cmp)); + if (d != dest) + emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); + } + else + { + op_true = force_reg (mode, op_true); + + t2 = gen_reg_rtx (mode); + if (optimize) + t3 = gen_reg_rtx (mode); + else + t3 = dest; + + x = gen_rtx_AND (mode, op_true, cmp); + emit_insn (gen_rtx_SET (t2, x)); + + x = gen_rtx_NOT (mode, cmp); + x = gen_rtx_AND (mode, x, op_false); + emit_insn (gen_rtx_SET (t3, x)); + + x = gen_rtx_IOR (mode, t3, t2); + emit_insn (gen_rtx_SET (dest, x)); + } + } +} + +/* Expand a floating-point conditional move. Return true if successful. */ + +bool +ix86_expand_fp_movcc (rtx operands[]) +{ + machine_mode mode = GET_MODE (operands[0]); + enum rtx_code code = GET_CODE (operands[1]); + rtx tmp, compare_op; + rtx op0 = XEXP (operands[1], 0); + rtx op1 = XEXP (operands[1], 1); + + if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) + { + machine_mode cmode; + + /* Since we've no cmove for sse registers, don't force bad register + allocation just to gain access to it. Deny movcc when the + comparison mode doesn't match the move mode. */ + cmode = GET_MODE (op0); + if (cmode == VOIDmode) + cmode = GET_MODE (op1); + if (cmode != mode) + return false; + + code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1); + if (code == UNKNOWN) + return false; + + if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1, + operands[2], operands[3])) + return true; + + tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1, + operands[2], operands[3]); + ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); + return true; + } + + if (GET_MODE (op0) == TImode + || (GET_MODE (op0) == DImode + && !TARGET_64BIT)) + return false; + + /* The floating point conditional move instructions don't directly + support conditions resulting from a signed integer comparison. */ + + compare_op = ix86_expand_compare (code, op0, op1); + if (!fcmov_comparison_operator (compare_op, VOIDmode)) + { + tmp = gen_reg_rtx (QImode); + ix86_expand_setcc (tmp, code, op0, op1); + + compare_op = ix86_expand_compare (NE, tmp, const0_rtx); + } + + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_IF_THEN_ELSE (mode, compare_op, + operands[2], operands[3]))); + + return true; +} + +/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */ + +static int +ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code) +{ + switch (code) + { + case EQ: + return 0; + case LT: + case LTU: + return 1; + case LE: + case LEU: + return 2; + case NE: + return 4; + case GE: + case GEU: + return 5; + case GT: + case GTU: + return 6; + default: + gcc_unreachable (); + } +} + +/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */ + +static int +ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code) +{ + switch (code) + { + case EQ: + return 0x00; + case NE: + return 0x04; + case GT: + return 0x0e; + case LE: + return 0x02; + case GE: + return 0x0d; + case LT: + return 0x01; + case UNLE: + return 0x0a; + case UNLT: + return 0x09; + case UNGE: + return 0x05; + case UNGT: + return 0x06; + case UNEQ: + return 0x18; + case LTGT: + return 0x0c; + case ORDERED: + return 0x07; + case UNORDERED: + return 0x03; + default: + gcc_unreachable (); + } +} + +/* Return immediate value to be used in UNSPEC_PCMP + for comparison CODE in MODE. */ + +static int +ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode) +{ + if (FLOAT_MODE_P (mode)) + return ix86_fp_cmp_code_to_pcmp_immediate (code); + return ix86_int_cmp_code_to_pcmp_immediate (code); +} + +/* Expand AVX-512 vector comparison. */ + +bool +ix86_expand_mask_vec_cmp (rtx operands[]) +{ + machine_mode mask_mode = GET_MODE (operands[0]); + machine_mode cmp_mode = GET_MODE (operands[2]); + enum rtx_code code = GET_CODE (operands[1]); + rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode)); + int unspec_code; + rtx unspec; + + switch (code) + { + case LEU: + case GTU: + case GEU: + case LTU: + unspec_code = UNSPEC_UNSIGNED_PCMP; + break; + + default: + unspec_code = UNSPEC_PCMP; + } + + unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2], + operands[3], imm), + unspec_code); + emit_insn (gen_rtx_SET (operands[0], unspec)); + + return true; +} + +/* Expand fp vector comparison. */ + +bool +ix86_expand_fp_vec_cmp (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx cmp; + + code = ix86_prepare_sse_fp_compare_args (operands[0], code, + &operands[2], &operands[3]); + if (code == UNKNOWN) + { + rtx temp; + switch (GET_CODE (operands[1])) + { + case LTGT: + temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2], + operands[3], NULL, NULL); + cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2], + operands[3], NULL, NULL); + code = AND; + break; + case UNEQ: + temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2], + operands[3], NULL, NULL); + cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2], + operands[3], NULL, NULL); + code = IOR; + break; + default: + gcc_unreachable (); + } + cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, + OPTAB_DIRECT); + } + else + cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3], + operands[1], operands[2]); + + if (operands[0] != cmp) + emit_move_insn (operands[0], cmp); + + return true; +} + +static rtx +ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, + rtx op_true, rtx op_false, bool *negate) +{ + machine_mode data_mode = GET_MODE (dest); + machine_mode mode = GET_MODE (cop0); + rtx x; + + *negate = false; + + /* XOP supports all of the comparisons on all 128-bit vector int types. */ + if (TARGET_XOP + && (mode == V16QImode || mode == V8HImode + || mode == V4SImode || mode == V2DImode)) + ; + else + { + /* Canonicalize the comparison to EQ, GT, GTU. */ + switch (code) + { + case EQ: + case GT: + case GTU: + break; + + case NE: + case LE: + case LEU: + code = reverse_condition (code); + *negate = true; + break; + + case GE: + case GEU: + code = reverse_condition (code); + *negate = true; + /* FALLTHRU */ + + case LT: + case LTU: + std::swap (cop0, cop1); + code = swap_condition (code); + break; + + default: + gcc_unreachable (); + } + + /* Only SSE4.1/SSE4.2 supports V2DImode. */ + if (mode == V2DImode) + { + switch (code) + { + case EQ: + /* SSE4.1 supports EQ. */ + if (!TARGET_SSE4_1) + return NULL; + break; + + case GT: + case GTU: + /* SSE4.2 supports GT/GTU. */ + if (!TARGET_SSE4_2) + return NULL; + break; + + default: + gcc_unreachable (); + } + } + + /* Unsigned parallel compare is not supported by the hardware. + Play some tricks to turn this into a signed comparison + against 0. */ + if (code == GTU) + { + cop0 = force_reg (mode, cop0); + + switch (mode) + { + case V16SImode: + case V8DImode: + case V8SImode: + case V4DImode: + case V4SImode: + case V2DImode: + { + rtx t1, t2, mask; + rtx (*gen_sub3) (rtx, rtx, rtx); + + switch (mode) + { + case V16SImode: gen_sub3 = gen_subv16si3; break; + case V8DImode: gen_sub3 = gen_subv8di3; break; + case V8SImode: gen_sub3 = gen_subv8si3; break; + case V4DImode: gen_sub3 = gen_subv4di3; break; + case V4SImode: gen_sub3 = gen_subv4si3; break; + case V2DImode: gen_sub3 = gen_subv2di3; break; + default: + gcc_unreachable (); + } + /* Subtract (-(INT MAX) - 1) from both operands to make + them signed. */ + mask = ix86_build_signbit_mask (mode, true, false); + t1 = gen_reg_rtx (mode); + emit_insn (gen_sub3 (t1, cop0, mask)); + + t2 = gen_reg_rtx (mode); + emit_insn (gen_sub3 (t2, cop1, mask)); + + cop0 = t1; + cop1 = t2; + code = GT; + } + break; + + case V64QImode: + case V32HImode: + case V32QImode: + case V16HImode: + case V16QImode: + case V8HImode: + /* Perform a parallel unsigned saturating subtraction. */ + x = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, + cop1))); + + cop0 = x; + cop1 = CONST0_RTX (mode); + code = EQ; + *negate = !*negate; + break; + + default: + gcc_unreachable (); + } + } + } + + if (*negate) + std::swap (op_true, op_false); + + /* Allow the comparison to be done in one mode, but the movcc to + happen in another mode. */ + if (data_mode == mode) + { + x = ix86_expand_sse_cmp (dest, code, cop0, cop1, + op_true, op_false); + } + else + { + gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode)); + x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1, + op_true, op_false); + if (GET_MODE (x) == mode) + x = gen_lowpart (data_mode, x); + } + + return x; +} + +/* Expand integer vector comparison. */ + +bool +ix86_expand_int_vec_cmp (rtx operands[]) +{ + rtx_code code = GET_CODE (operands[1]); + bool negate = false; + rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2], + operands[3], NULL, NULL, &negate); + + if (!cmp) + return false; + + if (negate) + cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp, + CONST0_RTX (GET_MODE (cmp)), + NULL, NULL, &negate); + + gcc_assert (!negate); + + if (operands[0] != cmp) + emit_move_insn (operands[0], cmp); + + return true; +} + +/* Expand a floating-point vector conditional move; a vcond operation + rather than a movcc operation. */ + +bool +ix86_expand_fp_vcond (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[3]); + rtx cmp; + + code = ix86_prepare_sse_fp_compare_args (operands[0], code, + &operands[4], &operands[5]); + if (code == UNKNOWN) + { + rtx temp; + switch (GET_CODE (operands[3])) + { + case LTGT: + temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], + operands[5], operands[1], operands[2]); + code = AND; + break; + case UNEQ: + temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], + operands[5], operands[1], operands[2]); + code = IOR; + break; + default: + gcc_unreachable (); + } + cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, + OPTAB_DIRECT); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; + } + + if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], + operands[5], operands[1], operands[2])) + return true; + + cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], + operands[1], operands[2]); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; +} + +/* Expand a signed/unsigned integral vector conditional move. */ + +bool +ix86_expand_int_vcond (rtx operands[]) +{ + machine_mode data_mode = GET_MODE (operands[0]); + machine_mode mode = GET_MODE (operands[4]); + enum rtx_code code = GET_CODE (operands[3]); + bool negate = false; + rtx x, cop0, cop1; + + cop0 = operands[4]; + cop1 = operands[5]; + + /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 + and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ + if ((code == LT || code == GE) + && data_mode == mode + && cop1 == CONST0_RTX (mode) + && operands[1 + (code == LT)] == CONST0_RTX (data_mode) + && GET_MODE_UNIT_SIZE (data_mode) > 1 + && GET_MODE_UNIT_SIZE (data_mode) <= 8 + && (GET_MODE_SIZE (data_mode) == 16 + || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) + { + rtx negop = operands[2 - (code == LT)]; + int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; + if (negop == CONST1_RTX (data_mode)) + { + rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), + operands[0], 1, OPTAB_DIRECT); + if (res != operands[0]) + emit_move_insn (operands[0], res); + return true; + } + else if (GET_MODE_INNER (data_mode) != DImode + && vector_all_ones_operand (negop, data_mode)) + { + rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), + operands[0], 0, OPTAB_DIRECT); + if (res != operands[0]) + emit_move_insn (operands[0], res); + return true; + } + } + + if (!nonimmediate_operand (cop1, mode)) + cop1 = force_reg (mode, cop1); + if (!general_operand (operands[1], data_mode)) + operands[1] = force_reg (data_mode, operands[1]); + if (!general_operand (operands[2], data_mode)) + operands[2] = force_reg (data_mode, operands[2]); + + x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1, + operands[1], operands[2], &negate); + + if (!x) + return false; + + ix86_expand_sse_movcc (operands[0], x, operands[1+negate], + operands[2-negate]); + return true; +} + +/* AVX512F does support 64-byte integer vector operations, + thus the longest vector we are faced with is V64QImode. */ +#define MAX_VECT_LEN 64 + +struct expand_vec_perm_d +{ + rtx target, op0, op1; + unsigned char perm[MAX_VECT_LEN]; + machine_mode vmode; + unsigned char nelt; + bool one_operand_p; + bool testing_p; +}; + +static bool +ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1, + struct expand_vec_perm_d *d) +{ + /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const + expander, so args are either in d, or in op0, op1 etc. */ + machine_mode mode = GET_MODE (d ? d->op0 : op0); + machine_mode maskmode = mode; + rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; + + switch (mode) + { + case V8HImode: + if (TARGET_AVX512VL && TARGET_AVX512BW) + gen = gen_avx512vl_vpermi2varv8hi3; + break; + case V16HImode: + if (TARGET_AVX512VL && TARGET_AVX512BW) + gen = gen_avx512vl_vpermi2varv16hi3; + break; + case V64QImode: + if (TARGET_AVX512VBMI) + gen = gen_avx512bw_vpermi2varv64qi3; + break; + case V32HImode: + if (TARGET_AVX512BW) + gen = gen_avx512bw_vpermi2varv32hi3; + break; + case V4SImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv4si3; + break; + case V8SImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv8si3; + break; + case V16SImode: + if (TARGET_AVX512F) + gen = gen_avx512f_vpermi2varv16si3; + break; + case V4SFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv4sf3; + maskmode = V4SImode; + } + break; + case V8SFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv8sf3; + maskmode = V8SImode; + } + break; + case V16SFmode: + if (TARGET_AVX512F) + { + gen = gen_avx512f_vpermi2varv16sf3; + maskmode = V16SImode; + } + break; + case V2DImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv2di3; + break; + case V4DImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv4di3; + break; + case V8DImode: + if (TARGET_AVX512F) + gen = gen_avx512f_vpermi2varv8di3; + break; + case V2DFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv2df3; + maskmode = V2DImode; + } + break; + case V4DFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv4df3; + maskmode = V4DImode; + } + break; + case V8DFmode: + if (TARGET_AVX512F) + { + gen = gen_avx512f_vpermi2varv8df3; + maskmode = V8DImode; + } + break; + default: + break; + } + + if (gen == NULL) + return false; + + /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const + expander, so args are either in d, or in op0, op1 etc. */ + if (d) + { + rtx vec[64]; + target = d->target; + op0 = d->op0; + op1 = d->op1; + for (int i = 0; i < d->nelt; ++i) + vec[i] = GEN_INT (d->perm[i]); + mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); + } + + emit_insn (gen (target, op0, force_reg (maskmode, mask), op1)); + return true; +} + +/* Expand a variable vector permutation. */ + +void +ix86_expand_vec_perm (rtx operands[]) +{ + rtx target = operands[0]; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + rtx mask = operands[3]; + rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32]; + machine_mode mode = GET_MODE (op0); + machine_mode maskmode = GET_MODE (mask); + int w, e, i; + bool one_operand_shuffle = rtx_equal_p (op0, op1); + + /* Number of elements in the vector. */ + w = GET_MODE_NUNITS (mode); + e = GET_MODE_UNIT_SIZE (mode); + gcc_assert (w <= 64); + + if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL)) + return; + + if (TARGET_AVX2) + { + if (mode == V4DImode || mode == V4DFmode || mode == V16HImode) + { + /* Unfortunately, the VPERMQ and VPERMPD instructions only support + an constant shuffle operand. With a tiny bit of effort we can + use VPERMD instead. A re-interpretation stall for V4DFmode is + unfortunate but there's no avoiding it. + Similarly for V16HImode we don't have instructions for variable + shuffling, while for V32QImode we can use after preparing suitable + masks vpshufb; vpshufb; vpermq; vpor. */ + + if (mode == V16HImode) + { + maskmode = mode = V32QImode; + w = 32; + e = 1; + } + else + { + maskmode = mode = V8SImode; + w = 8; + e = 4; + } + t1 = gen_reg_rtx (maskmode); + + /* Replicate the low bits of the V4DImode mask into V8SImode: + mask = { A B C D } + t1 = { A A B B C C D D }. */ + for (i = 0; i < w / 2; ++i) + vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2); + vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); + vt = force_reg (maskmode, vt); + mask = gen_lowpart (maskmode, mask); + if (maskmode == V8SImode) + emit_insn (gen_avx2_permvarv8si (t1, mask, vt)); + else + emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt)); + + /* Multiply the shuffle indicies by two. */ + t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1, + OPTAB_DIRECT); + + /* Add one to the odd shuffle indicies: + t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */ + for (i = 0; i < w / 2; ++i) + { + vec[i * 2] = const0_rtx; + vec[i * 2 + 1] = const1_rtx; + } + vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); + vt = validize_mem (force_const_mem (maskmode, vt)); + t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1, + OPTAB_DIRECT); + + /* Continue as if V8SImode (resp. V32QImode) was used initially. */ + operands[3] = mask = t1; + target = gen_reg_rtx (mode); + op0 = gen_lowpart (mode, op0); + op1 = gen_lowpart (mode, op1); + } + + switch (mode) + { + case V8SImode: + /* The VPERMD and VPERMPS instructions already properly ignore + the high bits of the shuffle elements. No need for us to + perform an AND ourselves. */ + if (one_operand_shuffle) + { + emit_insn (gen_avx2_permvarv8si (target, op0, mask)); + if (target != operands[0]) + emit_move_insn (operands[0], + gen_lowpart (GET_MODE (operands[0]), target)); + } + else + { + t1 = gen_reg_rtx (V8SImode); + t2 = gen_reg_rtx (V8SImode); + emit_insn (gen_avx2_permvarv8si (t1, op0, mask)); + emit_insn (gen_avx2_permvarv8si (t2, op1, mask)); + goto merge_two; + } + return; + + case V8SFmode: + mask = gen_lowpart (V8SImode, mask); + if (one_operand_shuffle) + emit_insn (gen_avx2_permvarv8sf (target, op0, mask)); + else + { + t1 = gen_reg_rtx (V8SFmode); + t2 = gen_reg_rtx (V8SFmode); + emit_insn (gen_avx2_permvarv8sf (t1, op0, mask)); + emit_insn (gen_avx2_permvarv8sf (t2, op1, mask)); + goto merge_two; + } + return; + + case V4SImode: + /* By combining the two 128-bit input vectors into one 256-bit + input vector, we can use VPERMD and VPERMPS for the full + two-operand shuffle. */ + t1 = gen_reg_rtx (V8SImode); + t2 = gen_reg_rtx (V8SImode); + emit_insn (gen_avx_vec_concatv8si (t1, op0, op1)); + emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); + emit_insn (gen_avx2_permvarv8si (t1, t1, t2)); + emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx)); + return; + + case V4SFmode: + t1 = gen_reg_rtx (V8SFmode); + t2 = gen_reg_rtx (V8SImode); + mask = gen_lowpart (V4SImode, mask); + emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1)); + emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); + emit_insn (gen_avx2_permvarv8sf (t1, t1, t2)); + emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx)); + return; + + case V32QImode: + t1 = gen_reg_rtx (V32QImode); + t2 = gen_reg_rtx (V32QImode); + t3 = gen_reg_rtx (V32QImode); + vt2 = GEN_INT (-128); + for (i = 0; i < 32; i++) + vec[i] = vt2; + vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec)); + vt = force_reg (V32QImode, vt); + for (i = 0; i < 32; i++) + vec[i] = i < 16 ? vt2 : const0_rtx; + vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec)); + vt2 = force_reg (V32QImode, vt2); + /* From mask create two adjusted masks, which contain the same + bits as mask in the low 7 bits of each vector element. + The first mask will have the most significant bit clear + if it requests element from the same 128-bit lane + and MSB set if it requests element from the other 128-bit lane. + The second mask will have the opposite values of the MSB, + and additionally will have its 128-bit lanes swapped. + E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have + t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and + t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ... + stands for other 12 bytes. */ + /* The bit whether element is from the same lane or the other + lane is bit 4, so shift it up by 3 to the MSB position. */ + t5 = gen_reg_rtx (V4DImode); + emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask), + GEN_INT (3))); + /* Clear MSB bits from the mask just in case it had them set. */ + emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask)); + /* After this t1 will have MSB set for elements from other lane. */ + emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2)); + /* Clear bits other than MSB. */ + emit_insn (gen_andv32qi3 (t1, t1, vt)); + /* Or in the lower bits from mask into t3. */ + emit_insn (gen_iorv32qi3 (t3, t1, t2)); + /* And invert MSB bits in t1, so MSB is set for elements from the same + lane. */ + emit_insn (gen_xorv32qi3 (t1, t1, vt)); + /* Swap 128-bit lanes in t3. */ + t6 = gen_reg_rtx (V4DImode); + emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3), + const2_rtx, GEN_INT (3), + const0_rtx, const1_rtx)); + /* And or in the lower bits from mask into t1. */ + emit_insn (gen_iorv32qi3 (t1, t1, t2)); + if (one_operand_shuffle) + { + /* Each of these shuffles will put 0s in places where + element from the other 128-bit lane is needed, otherwise + will shuffle in the requested value. */ + emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, + gen_lowpart (V32QImode, t6))); + emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1)); + /* For t3 the 128-bit lanes are swapped again. */ + t7 = gen_reg_rtx (V4DImode); + emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3), + const2_rtx, GEN_INT (3), + const0_rtx, const1_rtx)); + /* And oring both together leads to the result. */ + emit_insn (gen_iorv32qi3 (target, t1, + gen_lowpart (V32QImode, t7))); + if (target != operands[0]) + emit_move_insn (operands[0], + gen_lowpart (GET_MODE (operands[0]), target)); + return; + } + + t4 = gen_reg_rtx (V32QImode); + /* Similarly to the above one_operand_shuffle code, + just for repeated twice for each operand. merge_two: + code will merge the two results together. */ + emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, + gen_lowpart (V32QImode, t6))); + emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, + gen_lowpart (V32QImode, t6))); + emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1)); + emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1)); + t7 = gen_reg_rtx (V4DImode); + emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4), + const2_rtx, GEN_INT (3), + const0_rtx, const1_rtx)); + t8 = gen_reg_rtx (V4DImode); + emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3), + const2_rtx, GEN_INT (3), + const0_rtx, const1_rtx)); + emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7))); + emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8))); + t1 = t4; + t2 = t3; + goto merge_two; + + default: + gcc_assert (GET_MODE_SIZE (mode) <= 16); + break; + } + } + + if (TARGET_XOP) + { + /* The XOP VPPERM insn supports three inputs. By ignoring the + one_operand_shuffle special case, we avoid creating another + set of constant vectors in memory. */ + one_operand_shuffle = false; + + /* mask = mask & {2*w-1, ...} */ + vt = GEN_INT (2*w - 1); + } + else + { + /* mask = mask & {w-1, ...} */ + vt = GEN_INT (w - 1); + } + + for (i = 0; i < w; i++) + vec[i] = vt; + vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); + mask = expand_simple_binop (maskmode, AND, mask, vt, + NULL_RTX, 0, OPTAB_DIRECT); + + /* For non-QImode operations, convert the word permutation control + into a byte permutation control. */ + if (mode != V16QImode) + { + mask = expand_simple_binop (maskmode, ASHIFT, mask, + GEN_INT (exact_log2 (e)), + NULL_RTX, 0, OPTAB_DIRECT); + + /* Convert mask to vector of chars. */ + mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask)); + + /* Replicate each of the input bytes into byte positions: + (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8} + (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12} + (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */ + for (i = 0; i < 16; ++i) + vec[i] = GEN_INT (i/e * e); + vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); + vt = validize_mem (force_const_mem (V16QImode, vt)); + if (TARGET_XOP) + emit_insn (gen_xop_pperm (mask, mask, mask, vt)); + else + emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt)); + + /* Convert it into the byte positions by doing + mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */ + for (i = 0; i < 16; ++i) + vec[i] = GEN_INT (i % e); + vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); + vt = validize_mem (force_const_mem (V16QImode, vt)); + emit_insn (gen_addv16qi3 (mask, mask, vt)); + } + + /* The actual shuffle operations all operate on V16QImode. */ + op0 = gen_lowpart (V16QImode, op0); + op1 = gen_lowpart (V16QImode, op1); + + if (TARGET_XOP) + { + if (GET_MODE (target) != V16QImode) + target = gen_reg_rtx (V16QImode); + emit_insn (gen_xop_pperm (target, op0, op1, mask)); + if (target != operands[0]) + emit_move_insn (operands[0], + gen_lowpart (GET_MODE (operands[0]), target)); + } + else if (one_operand_shuffle) + { + if (GET_MODE (target) != V16QImode) + target = gen_reg_rtx (V16QImode); + emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask)); + if (target != operands[0]) + emit_move_insn (operands[0], + gen_lowpart (GET_MODE (operands[0]), target)); + } + else + { + rtx xops[6]; + bool ok; + + /* Shuffle the two input vectors independently. */ + t1 = gen_reg_rtx (V16QImode); + t2 = gen_reg_rtx (V16QImode); + emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask)); + emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask)); + + merge_two: + /* Then merge them together. The key is whether any given control + element contained a bit set that indicates the second word. */ + mask = operands[3]; + vt = GEN_INT (w); + if (maskmode == V2DImode && !TARGET_SSE4_1) + { + /* Without SSE4.1, we don't have V2DImode EQ. Perform one + more shuffle to convert the V2DI input mask into a V4SI + input mask. At which point the masking that expand_int_vcond + will work as desired. */ + rtx t3 = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask), + const0_rtx, const0_rtx, + const2_rtx, const2_rtx)); + mask = t3; + maskmode = V4SImode; + e = w = 4; + } + + for (i = 0; i < w; i++) + vec[i] = vt; + vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); + vt = force_reg (maskmode, vt); + mask = expand_simple_binop (maskmode, AND, mask, vt, + NULL_RTX, 0, OPTAB_DIRECT); + + if (GET_MODE (target) != mode) + target = gen_reg_rtx (mode); + xops[0] = target; + xops[1] = gen_lowpart (mode, t2); + xops[2] = gen_lowpart (mode, t1); + xops[3] = gen_rtx_EQ (maskmode, mask, vt); + xops[4] = mask; + xops[5] = vt; + ok = ix86_expand_int_vcond (xops); + gcc_assert (ok); + if (target != operands[0]) + emit_move_insn (operands[0], + gen_lowpart (GET_MODE (operands[0]), target)); + } +} + +/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is + true if we should do zero extension, else sign extension. HIGH_P is + true if we want the N/2 high elements, else the low elements. */ + +void +ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) +{ + machine_mode imode = GET_MODE (src); + rtx tmp; + + if (TARGET_SSE4_1) + { + rtx (*unpack)(rtx, rtx); + rtx (*extract)(rtx, rtx) = NULL; + machine_mode halfmode = BLKmode; + + switch (imode) + { + case V64QImode: + if (unsigned_p) + unpack = gen_avx512bw_zero_extendv32qiv32hi2; + else + unpack = gen_avx512bw_sign_extendv32qiv32hi2; + halfmode = V32QImode; + extract + = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi; + break; + case V32QImode: + if (unsigned_p) + unpack = gen_avx2_zero_extendv16qiv16hi2; + else + unpack = gen_avx2_sign_extendv16qiv16hi2; + halfmode = V16QImode; + extract + = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi; + break; + case V32HImode: + if (unsigned_p) + unpack = gen_avx512f_zero_extendv16hiv16si2; + else + unpack = gen_avx512f_sign_extendv16hiv16si2; + halfmode = V16HImode; + extract + = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi; + break; + case V16HImode: + if (unsigned_p) + unpack = gen_avx2_zero_extendv8hiv8si2; + else + unpack = gen_avx2_sign_extendv8hiv8si2; + halfmode = V8HImode; + extract + = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi; + break; + case V16SImode: + if (unsigned_p) + unpack = gen_avx512f_zero_extendv8siv8di2; + else + unpack = gen_avx512f_sign_extendv8siv8di2; + halfmode = V8SImode; + extract + = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si; + break; + case V8SImode: + if (unsigned_p) + unpack = gen_avx2_zero_extendv4siv4di2; + else + unpack = gen_avx2_sign_extendv4siv4di2; + halfmode = V4SImode; + extract + = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si; + break; + case V16QImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv8qiv8hi2; + else + unpack = gen_sse4_1_sign_extendv8qiv8hi2; + break; + case V8HImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv4hiv4si2; + else + unpack = gen_sse4_1_sign_extendv4hiv4si2; + break; + case V4SImode: + if (unsigned_p) + unpack = gen_sse4_1_zero_extendv2siv2di2; + else + unpack = gen_sse4_1_sign_extendv2siv2di2; + break; + default: + gcc_unreachable (); + } + + if (GET_MODE_SIZE (imode) >= 32) + { + tmp = gen_reg_rtx (halfmode); + emit_insn (extract (tmp, src)); + } + else if (high_p) + { + /* Shift higher 8 bytes to lower 8 bytes. */ + tmp = gen_reg_rtx (V1TImode); + emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src), + GEN_INT (64))); + tmp = gen_lowpart (imode, tmp); + } + else + tmp = src; + + emit_insn (unpack (dest, tmp)); + } + else + { + rtx (*unpack)(rtx, rtx, rtx); + + switch (imode) + { + case V16QImode: + if (high_p) + unpack = gen_vec_interleave_highv16qi; + else + unpack = gen_vec_interleave_lowv16qi; + break; + case V8HImode: + if (high_p) + unpack = gen_vec_interleave_highv8hi; + else + unpack = gen_vec_interleave_lowv8hi; + break; + case V4SImode: + if (high_p) + unpack = gen_vec_interleave_highv4si; + else + unpack = gen_vec_interleave_lowv4si; + break; + default: + gcc_unreachable (); + } + + if (unsigned_p) + tmp = force_reg (imode, CONST0_RTX (imode)); + else + tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), + src, pc_rtx, pc_rtx); + + rtx tmp2 = gen_reg_rtx (imode); + emit_insn (unpack (tmp2, src, tmp)); + emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2)); + } +} + +/* Expand conditional increment or decrement using adb/sbb instructions. + The default case using setcc followed by the conditional move can be + done by generic code. */ +bool +ix86_expand_int_addcc (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx flags; + rtx (*insn)(rtx, rtx, rtx, rtx, rtx); + rtx compare_op; + rtx val = const0_rtx; + bool fpcmp = false; + machine_mode mode; + rtx op0 = XEXP (operands[1], 0); + rtx op1 = XEXP (operands[1], 1); + + if (operands[3] != const1_rtx + && operands[3] != constm1_rtx) + return false; + if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) + return false; + code = GET_CODE (compare_op); + + flags = XEXP (compare_op, 0); + + if (GET_MODE (flags) == CCFPmode + || GET_MODE (flags) == CCFPUmode) + { + fpcmp = true; + code = ix86_fp_compare_code_to_integer (code); + } + + if (code != LTU) + { + val = constm1_rtx; + if (fpcmp) + PUT_CODE (compare_op, + reverse_condition_maybe_unordered + (GET_CODE (compare_op))); + else + PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); + } + + mode = GET_MODE (operands[0]); + + /* Construct either adc or sbb insn. */ + if ((code == LTU) == (operands[3] == constm1_rtx)) + { + switch (mode) + { + case QImode: + insn = gen_subqi3_carry; + break; + case HImode: + insn = gen_subhi3_carry; + break; + case SImode: + insn = gen_subsi3_carry; + break; + case DImode: + insn = gen_subdi3_carry; + break; + default: + gcc_unreachable (); + } + } + else + { + switch (mode) + { + case QImode: + insn = gen_addqi3_carry; + break; + case HImode: + insn = gen_addhi3_carry; + break; + case SImode: + insn = gen_addsi3_carry; + break; + case DImode: + insn = gen_adddi3_carry; + break; + default: + gcc_unreachable (); + } + } + emit_insn (insn (operands[0], operands[2], val, flags, compare_op)); + + return true; +} + + +/* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode, + but works for floating pointer parameters and nonoffsetable memories. + For pushes, it returns just stack offsets; the values will be saved + in the right order. Maximally three parts are generated. */ + +static int +ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode) +{ + int size; + + if (!TARGET_64BIT) + size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; + else + size = (GET_MODE_SIZE (mode) + 4) / 8; + + gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand))); + gcc_assert (size >= 2 && size <= 4); + + /* Optimize constant pool reference to immediates. This is used by fp + moves, that force all constants to memory to allow combining. */ + if (MEM_P (operand) && MEM_READONLY_P (operand)) + { + rtx tmp = maybe_get_pool_constant (operand); + if (tmp) + operand = tmp; + } + + if (MEM_P (operand) && !offsettable_memref_p (operand)) + { + /* The only non-offsetable memories we handle are pushes. */ + int ok = push_operand (operand, VOIDmode); + + gcc_assert (ok); + + operand = copy_rtx (operand); + PUT_MODE (operand, word_mode); + parts[0] = parts[1] = parts[2] = parts[3] = operand; + return size; + } + + if (GET_CODE (operand) == CONST_VECTOR) + { + machine_mode imode = int_mode_for_mode (mode); + /* Caution: if we looked through a constant pool memory above, + the operand may actually have a different mode now. That's + ok, since we want to pun this all the way back to an integer. */ + operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); + gcc_assert (operand != NULL); + mode = imode; + } + + if (!TARGET_64BIT) + { + if (mode == DImode) + split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); + else + { + int i; + + if (REG_P (operand)) + { + gcc_assert (reload_completed); + for (i = 0; i < size; i++) + parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i); + } + else if (offsettable_memref_p (operand)) + { + operand = adjust_address (operand, SImode, 0); + parts[0] = operand; + for (i = 1; i < size; i++) + parts[i] = adjust_address (operand, SImode, 4 * i); + } + else if (CONST_DOUBLE_P (operand)) + { + const REAL_VALUE_TYPE *r; + long l[4]; + + r = CONST_DOUBLE_REAL_VALUE (operand); + switch (mode) + { + case TFmode: + real_to_target (l, r, mode); + parts[3] = gen_int_mode (l[3], SImode); + parts[2] = gen_int_mode (l[2], SImode); + break; + case XFmode: + /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since + long double may not be 80-bit. */ + real_to_target (l, r, mode); + parts[2] = gen_int_mode (l[2], SImode); + break; + case DFmode: + REAL_VALUE_TO_TARGET_DOUBLE (*r, l); + break; + default: + gcc_unreachable (); + } + parts[1] = gen_int_mode (l[1], SImode); + parts[0] = gen_int_mode (l[0], SImode); + } + else + gcc_unreachable (); + } + } + else + { + if (mode == TImode) + split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); + if (mode == XFmode || mode == TFmode) + { + machine_mode upper_mode = mode==XFmode ? SImode : DImode; + if (REG_P (operand)) + { + gcc_assert (reload_completed); + parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); + parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); + } + else if (offsettable_memref_p (operand)) + { + operand = adjust_address (operand, DImode, 0); + parts[0] = operand; + parts[1] = adjust_address (operand, upper_mode, 8); + } + else if (CONST_DOUBLE_P (operand)) + { + long l[4]; + + real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode); + + /* real_to_target puts 32-bit pieces in each long. */ + parts[0] = + gen_int_mode + ((l[0] & (HOST_WIDE_INT) 0xffffffff) + | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32), + DImode); + + if (upper_mode == SImode) + parts[1] = gen_int_mode (l[2], SImode); + else + parts[1] = + gen_int_mode + ((l[2] & (HOST_WIDE_INT) 0xffffffff) + | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32), + DImode); + } + else + gcc_unreachable (); + } + } + + return size; +} + +/* Emit insns to perform a move or push of DI, DF, XF, and TF values. + Return false when normal moves are needed; true when all required + insns have been emitted. Operands 2-4 contain the input values + int the correct order; operands 5-7 contain the output values. */ + +void +ix86_split_long_move (rtx operands[]) +{ + rtx part[2][4]; + int nparts, i, j; + int push = 0; + int collisions = 0; + machine_mode mode = GET_MODE (operands[0]); + bool collisionparts[4]; + + /* The DFmode expanders may ask us to move double. + For 64bit target this is single move. By hiding the fact + here we simplify i386.md splitters. */ + if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8) + { + /* Optimize constant pool reference to immediates. This is used by + fp moves, that force all constants to memory to allow combining. */ + + if (MEM_P (operands[1]) + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) + operands[1] = get_pool_constant (XEXP (operands[1], 0)); + if (push_operand (operands[0], VOIDmode)) + { + operands[0] = copy_rtx (operands[0]); + PUT_MODE (operands[0], word_mode); + } + else + operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, operands[1]); + emit_move_insn (operands[0], operands[1]); + return; + } + + /* The only non-offsettable memory we handle is push. */ + if (push_operand (operands[0], VOIDmode)) + push = 1; + else + gcc_assert (!MEM_P (operands[0]) + || offsettable_memref_p (operands[0])); + + nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); + ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); + + /* When emitting push, take care for source operands on the stack. */ + if (push && MEM_P (operands[1]) + && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) + { + rtx src_base = XEXP (part[1][nparts - 1], 0); + + /* Compensate for the stack decrement by 4. */ + if (!TARGET_64BIT && nparts == 3 + && mode == XFmode && TARGET_128BIT_LONG_DOUBLE) + src_base = plus_constant (Pmode, src_base, 4); + + /* src_base refers to the stack pointer and is + automatically decreased by emitted push. */ + for (i = 0; i < nparts; i++) + part[1][i] = change_address (part[1][i], + GET_MODE (part[1][i]), src_base); + } + + /* We need to do copy in the right order in case an address register + of the source overlaps the destination. */ + if (REG_P (part[0][0]) && MEM_P (part[1][0])) + { + rtx tmp; + + for (i = 0; i < nparts; i++) + { + collisionparts[i] + = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0)); + if (collisionparts[i]) + collisions++; + } + + /* Collision in the middle part can be handled by reordering. */ + if (collisions == 1 && nparts == 3 && collisionparts [1]) + { + std::swap (part[0][1], part[0][2]); + std::swap (part[1][1], part[1][2]); + } + else if (collisions == 1 + && nparts == 4 + && (collisionparts [1] || collisionparts [2])) + { + if (collisionparts [1]) + { + std::swap (part[0][1], part[0][2]); + std::swap (part[1][1], part[1][2]); + } + else + { + std::swap (part[0][2], part[0][3]); + std::swap (part[1][2], part[1][3]); + } + } + + /* If there are more collisions, we can't handle it by reordering. + Do an lea to the last part and use only one colliding move. */ + else if (collisions > 1) + { + rtx base, addr, tls_base = NULL_RTX; + + collisions = 1; + + base = part[0][nparts - 1]; + + /* Handle the case when the last part isn't valid for lea. + Happens in 64-bit mode storing the 12-byte XFmode. */ + if (GET_MODE (base) != Pmode) + base = gen_rtx_REG (Pmode, REGNO (base)); + + addr = XEXP (part[1][0], 0); + if (TARGET_TLS_DIRECT_SEG_REFS) + { + struct ix86_address parts; + int ok = ix86_decompose_address (addr, &parts); + gcc_assert (ok); + if (parts.seg == DEFAULT_TLS_SEG_REG) + { + /* It is not valid to use %gs: or %fs: in + lea though, so we need to remove it from the + address used for lea and add it to each individual + memory loads instead. */ + addr = copy_rtx (addr); + rtx *x = &addr; + while (GET_CODE (*x) == PLUS) + { + for (i = 0; i < 2; i++) + { + rtx u = XEXP (*x, i); + if (GET_CODE (u) == ZERO_EXTEND) + u = XEXP (u, 0); + if (GET_CODE (u) == UNSPEC + && XINT (u, 1) == UNSPEC_TP) + { + tls_base = XEXP (*x, i); + *x = XEXP (*x, 1 - i); + break; + } + } + if (tls_base) + break; + x = &XEXP (*x, 0); + } + gcc_assert (tls_base); + } + } + emit_insn (gen_rtx_SET (base, addr)); + if (tls_base) + base = gen_rtx_PLUS (GET_MODE (base), base, tls_base); + part[1][0] = replace_equiv_address (part[1][0], base); + for (i = 1; i < nparts; i++) + { + if (tls_base) + base = copy_rtx (base); + tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i); + part[1][i] = replace_equiv_address (part[1][i], tmp); + } + } + } + + if (push) + { + if (!TARGET_64BIT) + { + if (nparts == 3) + { + if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) + emit_insn (ix86_gen_add3 (stack_pointer_rtx, + stack_pointer_rtx, GEN_INT (-4))); + emit_move_insn (part[0][2], part[1][2]); + } + else if (nparts == 4) + { + emit_move_insn (part[0][3], part[1][3]); + emit_move_insn (part[0][2], part[1][2]); + } + } + else + { + /* In 64bit mode we don't have 32bit push available. In case this is + register, it is OK - we will just use larger counterpart. We also + retype memory - these comes from attempt to avoid REX prefix on + moving of second half of TFmode value. */ + if (GET_MODE (part[1][1]) == SImode) + { + switch (GET_CODE (part[1][1])) + { + case MEM: + part[1][1] = adjust_address (part[1][1], DImode, 0); + break; + + case REG: + part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); + break; + + default: + gcc_unreachable (); + } + + if (GET_MODE (part[1][0]) == SImode) + part[1][0] = part[1][1]; + } + } + emit_move_insn (part[0][1], part[1][1]); + emit_move_insn (part[0][0], part[1][0]); + return; + } + + /* Choose correct order to not overwrite the source before it is copied. */ + if ((REG_P (part[0][0]) + && REG_P (part[1][1]) + && (REGNO (part[0][0]) == REGNO (part[1][1]) + || (nparts == 3 + && REGNO (part[0][0]) == REGNO (part[1][2])) + || (nparts == 4 + && REGNO (part[0][0]) == REGNO (part[1][3])))) + || (collisions > 0 + && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) + { + for (i = 0, j = nparts - 1; i < nparts; i++, j--) + { + operands[2 + i] = part[0][j]; + operands[6 + i] = part[1][j]; + } + } + else + { + for (i = 0; i < nparts; i++) + { + operands[2 + i] = part[0][i]; + operands[6 + i] = part[1][i]; + } + } + + /* If optimizing for size, attempt to locally unCSE nonzero constants. */ + if (optimize_insn_for_size_p ()) + { + for (j = 0; j < nparts - 1; j++) + if (CONST_INT_P (operands[6 + j]) + && operands[6 + j] != const0_rtx + && REG_P (operands[2 + j])) + for (i = j; i < nparts - 1; i++) + if (CONST_INT_P (operands[7 + i]) + && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j])) + operands[7 + i] = operands[2 + j]; + } + + for (i = 0; i < nparts; i++) + emit_move_insn (operands[2 + i], operands[6 + i]); + + return; +} + +/* Helper function of ix86_split_ashl used to generate an SImode/DImode + left shift by a constant, either using a single shift or + a sequence of add instructions. */ + +static void +ix86_expand_ashl_const (rtx operand, int count, machine_mode mode) +{ + rtx (*insn)(rtx, rtx, rtx); + + if (count == 1 + || (count * ix86_cost->add <= ix86_cost->shift_const + && !optimize_insn_for_size_p ())) + { + insn = mode == DImode ? gen_addsi3 : gen_adddi3; + while (count-- > 0) + emit_insn (insn (operand, operand, operand)); + } + else + { + insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3; + emit_insn (insn (operand, operand, GEN_INT (count))); + } +} + +void +ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode) +{ + rtx (*gen_ashl3)(rtx, rtx, rtx); + rtx (*gen_shld)(rtx, rtx, rtx); + int half_width = GET_MODE_BITSIZE (mode) >> 1; + + rtx low[2], high[2]; + int count; + + if (CONST_INT_P (operands[2])) + { + split_double_mode (mode, operands, 2, low, high); + count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); + + if (count >= half_width) + { + emit_move_insn (high[0], low[1]); + emit_move_insn (low[0], const0_rtx); + + if (count > half_width) + ix86_expand_ashl_const (high[0], count - half_width, mode); + } + else + { + gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; + + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + emit_insn (gen_shld (high[0], low[0], GEN_INT (count))); + ix86_expand_ashl_const (low[0], count, mode); + } + return; + } + + split_double_mode (mode, operands, 1, low, high); + + gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3; + + if (operands[1] == const1_rtx) + { + /* Assuming we've chosen a QImode capable registers, then 1 << N + can be done with two 32/64-bit shifts, no branches, no cmoves. */ + if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) + { + rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); + + ix86_expand_clear (low[0]); + ix86_expand_clear (high[0]); + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width))); + + d = gen_lowpart (QImode, low[0]); + d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); + s = gen_rtx_EQ (QImode, flags, const0_rtx); + emit_insn (gen_rtx_SET (d, s)); + + d = gen_lowpart (QImode, high[0]); + d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); + s = gen_rtx_NE (QImode, flags, const0_rtx); + emit_insn (gen_rtx_SET (d, s)); + } + + /* Otherwise, we can get the same results by manually performing + a bit extract operation on bit 5/6, and then performing the two + shifts. The two methods of getting 0/1 into low/high are exactly + the same size. Avoiding the shift in the bit extract case helps + pentium4 a bit; no one else seems to care much either way. */ + else + { + machine_mode half_mode; + rtx (*gen_lshr3)(rtx, rtx, rtx); + rtx (*gen_and3)(rtx, rtx, rtx); + rtx (*gen_xor3)(rtx, rtx, rtx); + HOST_WIDE_INT bits; + rtx x; + + if (mode == DImode) + { + half_mode = SImode; + gen_lshr3 = gen_lshrsi3; + gen_and3 = gen_andsi3; + gen_xor3 = gen_xorsi3; + bits = 5; + } + else + { + half_mode = DImode; + gen_lshr3 = gen_lshrdi3; + gen_and3 = gen_anddi3; + gen_xor3 = gen_xordi3; + bits = 6; + } + + if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ()) + x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]); + else + x = gen_lowpart (half_mode, operands[2]); + emit_insn (gen_rtx_SET (high[0], x)); + + emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits))); + emit_insn (gen_and3 (high[0], high[0], const1_rtx)); + emit_move_insn (low[0], high[0]); + emit_insn (gen_xor3 (low[0], low[0], const1_rtx)); + } + + emit_insn (gen_ashl3 (low[0], low[0], operands[2])); + emit_insn (gen_ashl3 (high[0], high[0], operands[2])); + return; + } + + if (operands[1] == constm1_rtx) + { + /* For -1 << N, we can avoid the shld instruction, because we + know that we're shifting 0...31/63 ones into a -1. */ + emit_move_insn (low[0], constm1_rtx); + if (optimize_insn_for_size_p ()) + emit_move_insn (high[0], low[0]); + else + emit_move_insn (high[0], constm1_rtx); + } + else + { + gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; + + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + split_double_mode (mode, operands, 1, low, high); + emit_insn (gen_shld (high[0], low[0], operands[2])); + } + + emit_insn (gen_ashl3 (low[0], low[0], operands[2])); + + if (TARGET_CMOVE && scratch) + { + rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; + + ix86_expand_clear (scratch); + emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch)); + } + else + { + rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; + + emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); + } +} + +void +ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode) +{ + rtx (*gen_ashr3)(rtx, rtx, rtx) + = mode == DImode ? gen_ashrsi3 : gen_ashrdi3; + rtx (*gen_shrd)(rtx, rtx, rtx); + int half_width = GET_MODE_BITSIZE (mode) >> 1; + + rtx low[2], high[2]; + int count; + + if (CONST_INT_P (operands[2])) + { + split_double_mode (mode, operands, 2, low, high); + count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); + + if (count == GET_MODE_BITSIZE (mode) - 1) + { + emit_move_insn (high[0], high[1]); + emit_insn (gen_ashr3 (high[0], high[0], + GEN_INT (half_width - 1))); + emit_move_insn (low[0], high[0]); + + } + else if (count >= half_width) + { + emit_move_insn (low[0], high[1]); + emit_move_insn (high[0], low[0]); + emit_insn (gen_ashr3 (high[0], high[0], + GEN_INT (half_width - 1))); + + if (count > half_width) + emit_insn (gen_ashr3 (low[0], low[0], + GEN_INT (count - half_width))); + } + else + { + gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; + + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); + emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count))); + } + } + else + { + gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; + + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + split_double_mode (mode, operands, 1, low, high); + + emit_insn (gen_shrd (low[0], high[0], operands[2])); + emit_insn (gen_ashr3 (high[0], high[0], operands[2])); + + if (TARGET_CMOVE && scratch) + { + rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; + + emit_move_insn (scratch, high[0]); + emit_insn (gen_ashr3 (scratch, scratch, + GEN_INT (half_width - 1))); + emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], + scratch)); + } + else + { + rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3; + + emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); + } + } +} + +void +ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode) +{ + rtx (*gen_lshr3)(rtx, rtx, rtx) + = mode == DImode ? gen_lshrsi3 : gen_lshrdi3; + rtx (*gen_shrd)(rtx, rtx, rtx); + int half_width = GET_MODE_BITSIZE (mode) >> 1; + + rtx low[2], high[2]; + int count; + + if (CONST_INT_P (operands[2])) + { + split_double_mode (mode, operands, 2, low, high); + count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); + + if (count >= half_width) + { + emit_move_insn (low[0], high[1]); + ix86_expand_clear (high[0]); + + if (count > half_width) + emit_insn (gen_lshr3 (low[0], low[0], + GEN_INT (count - half_width))); + } + else + { + gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; + + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); + emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count))); + } + } + else + { + gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; + + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + split_double_mode (mode, operands, 1, low, high); + + emit_insn (gen_shrd (low[0], high[0], operands[2])); + emit_insn (gen_lshr3 (high[0], high[0], operands[2])); + + if (TARGET_CMOVE && scratch) + { + rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; + + ix86_expand_clear (scratch); + emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], + scratch)); + } + else + { + rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) + = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; + + emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); + } + } +} + +/* Predict just emitted jump instruction to be taken with probability PROB. */ +static void +predict_jump (int prob) +{ + rtx insn = get_last_insn (); + gcc_assert (JUMP_P (insn)); + add_int_reg_note (insn, REG_BR_PROB, prob); +} + +/* Helper function for the string operations below. Dest VARIABLE whether + it is aligned to VALUE bytes. If true, jump to the label. */ +static rtx_code_label * +ix86_expand_aligntest (rtx variable, int value, bool epilogue) +{ + rtx_code_label *label = gen_label_rtx (); + rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); + if (GET_MODE (variable) == DImode) + emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); + else + emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); + emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), + 1, label); + if (epilogue) + predict_jump (REG_BR_PROB_BASE * 50 / 100); + else + predict_jump (REG_BR_PROB_BASE * 90 / 100); + return label; +} + +/* Adjust COUNTER by the VALUE. */ +static void +ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) +{ + rtx (*gen_add)(rtx, rtx, rtx) + = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3; + + emit_insn (gen_add (countreg, countreg, GEN_INT (-value))); +} + +/* Zero extend possibly SImode EXP to Pmode register. */ +rtx +ix86_zero_extend_to_Pmode (rtx exp) +{ + return force_reg (Pmode, convert_to_mode (Pmode, exp, 1)); +} + +/* Divide COUNTREG by SCALE. */ +static rtx +scale_counter (rtx countreg, int scale) +{ + rtx sc; + + if (scale == 1) + return countreg; + if (CONST_INT_P (countreg)) + return GEN_INT (INTVAL (countreg) / scale); + gcc_assert (REG_P (countreg)); + + sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg, + GEN_INT (exact_log2 (scale)), + NULL, 1, OPTAB_DIRECT); + return sc; +} + +/* Return mode for the memcpy/memset loop counter. Prefer SImode over + DImode for constant loop counts. */ + +static machine_mode +counter_mode (rtx count_exp) +{ + if (GET_MODE (count_exp) != VOIDmode) + return GET_MODE (count_exp); + if (!CONST_INT_P (count_exp)) + return Pmode; + if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff)) + return DImode; + return SImode; +} + +/* Copy the address to a Pmode register. This is used for x32 to + truncate DImode TLS address to a SImode register. */ + +static rtx +ix86_copy_addr_to_reg (rtx addr) +{ + rtx reg; + if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode) + { + reg = copy_addr_to_reg (addr); + REG_POINTER (reg) = 1; + return reg; + } + else + { + gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode); + reg = copy_to_mode_reg (DImode, addr); + REG_POINTER (reg) = 1; + return gen_rtx_SUBREG (SImode, reg, 0); + } +} + +/* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR + to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT + specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set + memory by VALUE (supposed to be in MODE). + + The size is rounded down to whole number of chunk size moved at once. + SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */ + + +static void +expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem, + rtx destptr, rtx srcptr, rtx value, + rtx count, machine_mode mode, int unroll, + int expected_size, bool issetmem) +{ + rtx_code_label *out_label, *top_label; + rtx iter, tmp; + machine_mode iter_mode = counter_mode (count); + int piece_size_n = GET_MODE_SIZE (mode) * unroll; + rtx piece_size = GEN_INT (piece_size_n); + rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1)); + rtx size; + int i; + + top_label = gen_label_rtx (); + out_label = gen_label_rtx (); + iter = gen_reg_rtx (iter_mode); + + size = expand_simple_binop (iter_mode, AND, count, piece_size_mask, + NULL, 1, OPTAB_DIRECT); + /* Those two should combine. */ + if (piece_size == const1_rtx) + { + emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode, + true, out_label); + predict_jump (REG_BR_PROB_BASE * 10 / 100); + } + emit_move_insn (iter, const0_rtx); + + emit_label (top_label); + + tmp = convert_modes (Pmode, iter_mode, iter, true); + + /* This assert could be relaxed - in this case we'll need to compute + smallest power of two, containing in PIECE_SIZE_N and pass it to + offset_address. */ + gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0); + destmem = offset_address (destmem, tmp, piece_size_n); + destmem = adjust_address (destmem, mode, 0); + + if (!issetmem) + { + srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n); + srcmem = adjust_address (srcmem, mode, 0); + + /* When unrolling for chips that reorder memory reads and writes, + we can save registers by using single temporary. + Also using 4 temporaries is overkill in 32bit mode. */ + if (!TARGET_64BIT && 0) + { + for (i = 0; i < unroll; i++) + { + if (i) + { + destmem = + adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); + srcmem = + adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode)); + } + emit_move_insn (destmem, srcmem); + } + } + else + { + rtx tmpreg[4]; + gcc_assert (unroll <= 4); + for (i = 0; i < unroll; i++) + { + tmpreg[i] = gen_reg_rtx (mode); + if (i) + { + srcmem = + adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode)); + } + emit_move_insn (tmpreg[i], srcmem); + } + for (i = 0; i < unroll; i++) + { + if (i) + { + destmem = + adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); + } + emit_move_insn (destmem, tmpreg[i]); + } + } + } + else + for (i = 0; i < unroll; i++) + { + if (i) + destmem = + adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); + emit_move_insn (destmem, value); + } + + tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter, + true, OPTAB_LIB_WIDEN); + if (tmp != iter) + emit_move_insn (iter, tmp); + + emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, + true, top_label); + if (expected_size != -1) + { + expected_size /= GET_MODE_SIZE (mode) * unroll; + if (expected_size == 0) + predict_jump (0); + else if (expected_size > REG_BR_PROB_BASE) + predict_jump (REG_BR_PROB_BASE - 1); + else + predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size); + } + else + predict_jump (REG_BR_PROB_BASE * 80 / 100); + iter = ix86_zero_extend_to_Pmode (iter); + tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr, + true, OPTAB_LIB_WIDEN); + if (tmp != destptr) + emit_move_insn (destptr, tmp); + if (!issetmem) + { + tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr, + true, OPTAB_LIB_WIDEN); + if (tmp != srcptr) + emit_move_insn (srcptr, tmp); + } + emit_label (out_label); +} + +/* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument. + When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored. + When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored. + For setmem case, VALUE is a promoted to a wider size ORIG_VALUE. + ORIG_VALUE is the original value passed to memset to fill the memory with. + Other arguments have same meaning as for previous function. */ + +static void +expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem, + rtx destptr, rtx srcptr, rtx value, rtx orig_value, + rtx count, + machine_mode mode, bool issetmem) +{ + rtx destexp; + rtx srcexp; + rtx countreg; + HOST_WIDE_INT rounded_count; + + /* If possible, it is shorter to use rep movs. + TODO: Maybe it is better to move this logic to decide_alg. */ + if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3) + && (!issetmem || orig_value == const0_rtx)) + mode = SImode; + + if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) + destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); + + countreg = ix86_zero_extend_to_Pmode (scale_counter (count, + GET_MODE_SIZE (mode))); + if (mode != QImode) + { + destexp = gen_rtx_ASHIFT (Pmode, countreg, + GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); + destexp = gen_rtx_PLUS (Pmode, destexp, destptr); + } + else + destexp = gen_rtx_PLUS (Pmode, destptr, countreg); + if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count)) + { + rounded_count + = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); + destmem = shallow_copy_rtx (destmem); + set_mem_size (destmem, rounded_count); + } + else if (MEM_SIZE_KNOWN_P (destmem)) + clear_mem_size (destmem); + + if (issetmem) + { + value = force_reg (mode, gen_lowpart (mode, value)); + emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp)); + } + else + { + if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode) + srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0); + if (mode != QImode) + { + srcexp = gen_rtx_ASHIFT (Pmode, countreg, + GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); + srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr); + } + else + srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg); + if (CONST_INT_P (count)) + { + rounded_count + = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); + srcmem = shallow_copy_rtx (srcmem); + set_mem_size (srcmem, rounded_count); + } + else + { + if (MEM_SIZE_KNOWN_P (srcmem)) + clear_mem_size (srcmem); + } + emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg, + destexp, srcexp)); + } +} + +/* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to + DESTMEM. + SRC is passed by pointer to be updated on return. + Return value is updated DST. */ +static rtx +emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr, + HOST_WIDE_INT size_to_move) +{ + rtx dst = destmem, src = *srcmem, adjust, tempreg; + enum insn_code code; + machine_mode move_mode; + int piece_size, i; + + /* Find the widest mode in which we could perform moves. + Start with the biggest power of 2 less than SIZE_TO_MOVE and half + it until move of such size is supported. */ + piece_size = 1 << floor_log2 (size_to_move); + move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0); + code = optab_handler (mov_optab, move_mode); + while (code == CODE_FOR_nothing && piece_size > 1) + { + piece_size >>= 1; + move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0); + code = optab_handler (mov_optab, move_mode); + } + + /* Find the corresponding vector mode with the same size as MOVE_MODE. + MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ + if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) + { + int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); + move_mode = mode_for_vector (word_mode, nunits); + code = optab_handler (mov_optab, move_mode); + if (code == CODE_FOR_nothing) + { + move_mode = word_mode; + piece_size = GET_MODE_SIZE (move_mode); + code = optab_handler (mov_optab, move_mode); + } + } + gcc_assert (code != CODE_FOR_nothing); + + dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); + src = adjust_automodify_address_nv (src, move_mode, srcptr, 0); + + /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ + gcc_assert (size_to_move % piece_size == 0); + adjust = GEN_INT (piece_size); + for (i = 0; i < size_to_move; i += piece_size) + { + /* We move from memory to memory, so we'll need to do it via + a temporary register. */ + tempreg = gen_reg_rtx (move_mode); + emit_insn (GEN_FCN (code) (tempreg, src)); + emit_insn (GEN_FCN (code) (dst, tempreg)); + + emit_move_insn (destptr, + gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust)); + emit_move_insn (srcptr, + gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust)); + + dst = adjust_automodify_address_nv (dst, move_mode, destptr, + piece_size); + src = adjust_automodify_address_nv (src, move_mode, srcptr, + piece_size); + } + + /* Update DST and SRC rtx. */ + *srcmem = src; + return dst; +} + +/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */ +static void +expand_movmem_epilogue (rtx destmem, rtx srcmem, + rtx destptr, rtx srcptr, rtx count, int max_size) +{ + rtx src, dest; + if (CONST_INT_P (count)) + { + HOST_WIDE_INT countval = INTVAL (count); + HOST_WIDE_INT epilogue_size = countval % max_size; + int i; + + /* For now MAX_SIZE should be a power of 2. This assert could be + relaxed, but it'll require a bit more complicated epilogue + expanding. */ + gcc_assert ((max_size & (max_size - 1)) == 0); + for (i = max_size; i >= 1; i >>= 1) + { + if (epilogue_size & i) + destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); + } + return; + } + if (max_size > 8) + { + count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1), + count, 1, OPTAB_DIRECT); + expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL, + count, QImode, 1, 4, false); + return; + } + + /* When there are stringops, we can cheaply increase dest and src pointers. + Otherwise we save code size by maintaining offset (zero is readily + available from preceding rep operation) and using x86 addressing modes. + */ + if (TARGET_SINGLE_STRINGOP) + { + if (max_size > 4) + { + rtx_code_label *label = ix86_expand_aligntest (count, 4, true); + src = change_address (srcmem, SImode, srcptr); + dest = change_address (destmem, SImode, destptr); + emit_insn (gen_strmov (destptr, dest, srcptr, src)); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (max_size > 2) + { + rtx_code_label *label = ix86_expand_aligntest (count, 2, true); + src = change_address (srcmem, HImode, srcptr); + dest = change_address (destmem, HImode, destptr); + emit_insn (gen_strmov (destptr, dest, srcptr, src)); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (max_size > 1) + { + rtx_code_label *label = ix86_expand_aligntest (count, 1, true); + src = change_address (srcmem, QImode, srcptr); + dest = change_address (destmem, QImode, destptr); + emit_insn (gen_strmov (destptr, dest, srcptr, src)); + emit_label (label); + LABEL_NUSES (label) = 1; + } + } + else + { + rtx offset = force_reg (Pmode, const0_rtx); + rtx tmp; + + if (max_size > 4) + { + rtx_code_label *label = ix86_expand_aligntest (count, 4, true); + src = change_address (srcmem, SImode, srcptr); + dest = change_address (destmem, SImode, destptr); + emit_move_insn (dest, src); + tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL, + true, OPTAB_LIB_WIDEN); + if (tmp != offset) + emit_move_insn (offset, tmp); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (max_size > 2) + { + rtx_code_label *label = ix86_expand_aligntest (count, 2, true); + tmp = gen_rtx_PLUS (Pmode, srcptr, offset); + src = change_address (srcmem, HImode, tmp); + tmp = gen_rtx_PLUS (Pmode, destptr, offset); + dest = change_address (destmem, HImode, tmp); + emit_move_insn (dest, src); + tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp, + true, OPTAB_LIB_WIDEN); + if (tmp != offset) + emit_move_insn (offset, tmp); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (max_size > 1) + { + rtx_code_label *label = ix86_expand_aligntest (count, 1, true); + tmp = gen_rtx_PLUS (Pmode, srcptr, offset); + src = change_address (srcmem, QImode, tmp); + tmp = gen_rtx_PLUS (Pmode, destptr, offset); + dest = change_address (destmem, QImode, tmp); + emit_move_insn (dest, src); + emit_label (label); + LABEL_NUSES (label) = 1; + } + } +} + +/* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM + with value PROMOTED_VAL. + SRC is passed by pointer to be updated on return. + Return value is updated DST. */ +static rtx +emit_memset (rtx destmem, rtx destptr, rtx promoted_val, + HOST_WIDE_INT size_to_move) +{ + rtx dst = destmem, adjust; + enum insn_code code; + machine_mode move_mode; + int piece_size, i; + + /* Find the widest mode in which we could perform moves. + Start with the biggest power of 2 less than SIZE_TO_MOVE and half + it until move of such size is supported. */ + move_mode = GET_MODE (promoted_val); + if (move_mode == VOIDmode) + move_mode = QImode; + if (size_to_move < GET_MODE_SIZE (move_mode)) + { + move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0); + promoted_val = gen_lowpart (move_mode, promoted_val); + } + piece_size = GET_MODE_SIZE (move_mode); + code = optab_handler (mov_optab, move_mode); + gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX); + + dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); + + /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ + gcc_assert (size_to_move % piece_size == 0); + adjust = GEN_INT (piece_size); + for (i = 0; i < size_to_move; i += piece_size) + { + if (piece_size <= GET_MODE_SIZE (word_mode)) + { + emit_insn (gen_strset (destptr, dst, promoted_val)); + dst = adjust_automodify_address_nv (dst, move_mode, destptr, + piece_size); + continue; + } + + emit_insn (GEN_FCN (code) (dst, promoted_val)); + + emit_move_insn (destptr, + gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust)); + + dst = adjust_automodify_address_nv (dst, move_mode, destptr, + piece_size); + } + + /* Update DST rtx. */ + return dst; +} +/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ +static void +expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value, + rtx count, int max_size) +{ + count = + expand_simple_binop (counter_mode (count), AND, count, + GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT); + expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL, + gen_lowpart (QImode, value), count, QImode, + 1, max_size / 2, true); +} + +/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ +static void +expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, + rtx count, int max_size) +{ + rtx dest; + + if (CONST_INT_P (count)) + { + HOST_WIDE_INT countval = INTVAL (count); + HOST_WIDE_INT epilogue_size = countval % max_size; + int i; + + /* For now MAX_SIZE should be a power of 2. This assert could be + relaxed, but it'll require a bit more complicated epilogue + expanding. */ + gcc_assert ((max_size & (max_size - 1)) == 0); + for (i = max_size; i >= 1; i >>= 1) + { + if (epilogue_size & i) + { + if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) + destmem = emit_memset (destmem, destptr, vec_value, i); + else + destmem = emit_memset (destmem, destptr, value, i); + } + } + return; + } + if (max_size > 32) + { + expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size); + return; + } + if (max_size > 16) + { + rtx_code_label *label = ix86_expand_aligntest (count, 16, true); + if (TARGET_64BIT) + { + dest = change_address (destmem, DImode, destptr); + emit_insn (gen_strset (destptr, dest, value)); + dest = adjust_automodify_address_nv (dest, DImode, destptr, 8); + emit_insn (gen_strset (destptr, dest, value)); + } + else + { + dest = change_address (destmem, SImode, destptr); + emit_insn (gen_strset (destptr, dest, value)); + dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); + emit_insn (gen_strset (destptr, dest, value)); + dest = adjust_automodify_address_nv (dest, SImode, destptr, 8); + emit_insn (gen_strset (destptr, dest, value)); + dest = adjust_automodify_address_nv (dest, SImode, destptr, 12); + emit_insn (gen_strset (destptr, dest, value)); + } + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (max_size > 8) + { + rtx_code_label *label = ix86_expand_aligntest (count, 8, true); + if (TARGET_64BIT) + { + dest = change_address (destmem, DImode, destptr); + emit_insn (gen_strset (destptr, dest, value)); + } + else + { + dest = change_address (destmem, SImode, destptr); + emit_insn (gen_strset (destptr, dest, value)); + dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); + emit_insn (gen_strset (destptr, dest, value)); + } + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (max_size > 4) + { + rtx_code_label *label = ix86_expand_aligntest (count, 4, true); + dest = change_address (destmem, SImode, destptr); + emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (max_size > 2) + { + rtx_code_label *label = ix86_expand_aligntest (count, 2, true); + dest = change_address (destmem, HImode, destptr); + emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); + emit_label (label); + LABEL_NUSES (label) = 1; + } + if (max_size > 1) + { + rtx_code_label *label = ix86_expand_aligntest (count, 1, true); + dest = change_address (destmem, QImode, destptr); + emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); + emit_label (label); + LABEL_NUSES (label) = 1; + } +} + +/* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to + DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN. + Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are + ignored. + Return value is updated DESTMEM. */ +static rtx +expand_set_or_movmem_prologue (rtx destmem, rtx srcmem, + rtx destptr, rtx srcptr, rtx value, + rtx vec_value, rtx count, int align, + int desired_alignment, bool issetmem) +{ + int i; + for (i = 1; i < desired_alignment; i <<= 1) + { + if (align <= i) + { + rtx_code_label *label = ix86_expand_aligntest (destptr, i, false); + if (issetmem) + { + if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) + destmem = emit_memset (destmem, destptr, vec_value, i); + else + destmem = emit_memset (destmem, destptr, value, i); + } + else + destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); + ix86_adjust_counter (count, i); + emit_label (label); + LABEL_NUSES (label) = 1; + set_mem_align (destmem, i * 2 * BITS_PER_UNIT); + } + } + return destmem; +} + +/* Test if COUNT&SIZE is nonzero and if so, expand movme + or setmem sequence that is valid for SIZE..2*SIZE-1 bytes + and jump to DONE_LABEL. */ +static void +expand_small_movmem_or_setmem (rtx destmem, rtx srcmem, + rtx destptr, rtx srcptr, + rtx value, rtx vec_value, + rtx count, int size, + rtx done_label, bool issetmem) +{ + rtx_code_label *label = ix86_expand_aligntest (count, size, false); + machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1); + rtx modesize; + int n; + + /* If we do not have vector value to copy, we must reduce size. */ + if (issetmem) + { + if (!vec_value) + { + if (GET_MODE (value) == VOIDmode && size > 8) + mode = Pmode; + else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value))) + mode = GET_MODE (value); + } + else + mode = GET_MODE (vec_value), value = vec_value; + } + else + { + /* Choose appropriate vector mode. */ + if (size >= 32) + mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode; + else if (size >= 16) + mode = TARGET_SSE ? V16QImode : DImode; + srcmem = change_address (srcmem, mode, srcptr); + } + destmem = change_address (destmem, mode, destptr); + modesize = GEN_INT (GET_MODE_SIZE (mode)); + gcc_assert (GET_MODE_SIZE (mode) <= size); + for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) + { + if (issetmem) + emit_move_insn (destmem, gen_lowpart (mode, value)); + else + { + emit_move_insn (destmem, srcmem); + srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); + } + destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); + } + + destmem = offset_address (destmem, count, 1); + destmem = offset_address (destmem, GEN_INT (-2 * size), + GET_MODE_SIZE (mode)); + if (!issetmem) + { + srcmem = offset_address (srcmem, count, 1); + srcmem = offset_address (srcmem, GEN_INT (-2 * size), + GET_MODE_SIZE (mode)); + } + for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) + { + if (issetmem) + emit_move_insn (destmem, gen_lowpart (mode, value)); + else + { + emit_move_insn (destmem, srcmem); + srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); + } + destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); + } + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + + emit_label (label); + LABEL_NUSES (label) = 1; +} + +/* Handle small memcpy (up to SIZE that is supposed to be small power of 2. + and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN + bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can + proceed with an loop copying SIZE bytes at once. Do moves in MODE. + DONE_LABEL is a label after the whole copying sequence. The label is created + on demand if *DONE_LABEL is NULL. + MIN_SIZE is minimal size of block copied. This value gets adjusted for new + bounds after the initial copies. + + DESTMEM/SRCMEM are memory expressions pointing to the copies block, + DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether + we will dispatch to a library call for large blocks. + + In pseudocode we do: + + if (COUNT < SIZE) + { + Assume that SIZE is 4. Bigger sizes are handled analogously + if (COUNT & 4) + { + copy 4 bytes from SRCPTR to DESTPTR + copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4 + goto done_label + } + if (!COUNT) + goto done_label; + copy 1 byte from SRCPTR to DESTPTR + if (COUNT & 2) + { + copy 2 bytes from SRCPTR to DESTPTR + copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2 + } + } + else + { + copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR + copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE + + OLD_DESPTR = DESTPTR; + Align DESTPTR up to DESIRED_ALIGN + SRCPTR += DESTPTR - OLD_DESTPTR + COUNT -= DEST_PTR - OLD_DESTPTR + if (DYNAMIC_CHECK) + Round COUNT down to multiple of SIZE + << optional caller supplied zero size guard is here >> + << optional caller supplied dynamic check is here >> + << caller supplied main copy loop is here >> + } + done_label: + */ +static void +expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem, + rtx *destptr, rtx *srcptr, + machine_mode mode, + rtx value, rtx vec_value, + rtx *count, + rtx_code_label **done_label, + int size, + int desired_align, + int align, + unsigned HOST_WIDE_INT *min_size, + bool dynamic_check, + bool issetmem) +{ + rtx_code_label *loop_label = NULL, *label; + int n; + rtx modesize; + int prolog_size = 0; + rtx mode_value; + + /* Chose proper value to copy. */ + if (issetmem && VECTOR_MODE_P (mode)) + mode_value = vec_value; + else + mode_value = value; + gcc_assert (GET_MODE_SIZE (mode) <= size); + + /* See if block is big or small, handle small blocks. */ + if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size) + { + int size2 = size; + loop_label = gen_label_rtx (); + + if (!*done_label) + *done_label = gen_label_rtx (); + + emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count), + 1, loop_label); + size2 >>= 1; + + /* Handle sizes > 3. */ + for (;size2 > 2; size2 >>= 1) + expand_small_movmem_or_setmem (destmem, srcmem, + *destptr, *srcptr, + value, vec_value, + *count, + size2, *done_label, issetmem); + /* Nothing to copy? Jump to DONE_LABEL if so */ + emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count), + 1, *done_label); + + /* Do a byte copy. */ + destmem = change_address (destmem, QImode, *destptr); + if (issetmem) + emit_move_insn (destmem, gen_lowpart (QImode, value)); + else + { + srcmem = change_address (srcmem, QImode, *srcptr); + emit_move_insn (destmem, srcmem); + } + + /* Handle sizes 2 and 3. */ + label = ix86_expand_aligntest (*count, 2, false); + destmem = change_address (destmem, HImode, *destptr); + destmem = offset_address (destmem, *count, 1); + destmem = offset_address (destmem, GEN_INT (-2), 2); + if (issetmem) + emit_move_insn (destmem, gen_lowpart (HImode, value)); + else + { + srcmem = change_address (srcmem, HImode, *srcptr); + srcmem = offset_address (srcmem, *count, 1); + srcmem = offset_address (srcmem, GEN_INT (-2), 2); + emit_move_insn (destmem, srcmem); + } + + emit_label (label); + LABEL_NUSES (label) = 1; + emit_jump_insn (gen_jump (*done_label)); + emit_barrier (); + } + else + gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size + || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size); + + /* Start memcpy for COUNT >= SIZE. */ + if (loop_label) + { + emit_label (loop_label); + LABEL_NUSES (loop_label) = 1; + } + + /* Copy first desired_align bytes. */ + if (!issetmem) + srcmem = change_address (srcmem, mode, *srcptr); + destmem = change_address (destmem, mode, *destptr); + modesize = GEN_INT (GET_MODE_SIZE (mode)); + for (n = 0; prolog_size < desired_align - align; n++) + { + if (issetmem) + emit_move_insn (destmem, mode_value); + else + { + emit_move_insn (destmem, srcmem); + srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); + } + destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); + prolog_size += GET_MODE_SIZE (mode); + } + + + /* Copy last SIZE bytes. */ + destmem = offset_address (destmem, *count, 1); + destmem = offset_address (destmem, + GEN_INT (-size - prolog_size), + 1); + if (issetmem) + emit_move_insn (destmem, mode_value); + else + { + srcmem = offset_address (srcmem, *count, 1); + srcmem = offset_address (srcmem, + GEN_INT (-size - prolog_size), + 1); + emit_move_insn (destmem, srcmem); + } + for (n = 1; n * GET_MODE_SIZE (mode) < size; n++) + { + destmem = offset_address (destmem, modesize, 1); + if (issetmem) + emit_move_insn (destmem, mode_value); + else + { + srcmem = offset_address (srcmem, modesize, 1); + emit_move_insn (destmem, srcmem); + } + } + + /* Align destination. */ + if (desired_align > 1 && desired_align > align) + { + rtx saveddest = *destptr; + + gcc_assert (desired_align <= size); + /* Align destptr up, place it to new register. */ + *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr, + GEN_INT (prolog_size), + NULL_RTX, 1, OPTAB_DIRECT); + if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest)) + REG_POINTER (*destptr) = 1; + *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr, + GEN_INT (-desired_align), + *destptr, 1, OPTAB_DIRECT); + /* See how many bytes we skipped. */ + saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest, + *destptr, + saveddest, 1, OPTAB_DIRECT); + /* Adjust srcptr and count. */ + if (!issetmem) + *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, + saveddest, *srcptr, 1, OPTAB_DIRECT); + *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, + saveddest, *count, 1, OPTAB_DIRECT); + /* We copied at most size + prolog_size. */ + if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size)) + *min_size + = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size); + else + *min_size = 0; + + /* Our loops always round down the block size, but for dispatch to + library we need precise value. */ + if (dynamic_check) + *count = expand_simple_binop (GET_MODE (*count), AND, *count, + GEN_INT (-size), *count, 1, OPTAB_DIRECT); + } + else + { + gcc_assert (prolog_size == 0); + /* Decrease count, so we won't end up copying last word twice. */ + if (!CONST_INT_P (*count)) + *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, + constm1_rtx, *count, 1, OPTAB_DIRECT); + else + *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1, + (unsigned HOST_WIDE_INT)size)); + if (*min_size) + *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size); + } +} + + +/* This function is like the previous one, except here we know how many bytes + need to be copied. That allows us to update alignment not only of DST, which + is returned, but also of SRC, which is passed as a pointer for that + reason. */ +static rtx +expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, + rtx srcreg, rtx value, rtx vec_value, + int desired_align, int align_bytes, + bool issetmem) +{ + rtx src = NULL; + rtx orig_dst = dst; + rtx orig_src = NULL; + int piece_size = 1; + int copied_bytes = 0; + + if (!issetmem) + { + gcc_assert (srcp != NULL); + src = *srcp; + orig_src = src; + } + + for (piece_size = 1; + piece_size <= desired_align && copied_bytes < align_bytes; + piece_size <<= 1) + { + if (align_bytes & piece_size) + { + if (issetmem) + { + if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value))) + dst = emit_memset (dst, destreg, vec_value, piece_size); + else + dst = emit_memset (dst, destreg, value, piece_size); + } + else + dst = emit_memmov (dst, &src, destreg, srcreg, piece_size); + copied_bytes += piece_size; + } + } + if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) + set_mem_align (dst, desired_align * BITS_PER_UNIT); + if (MEM_SIZE_KNOWN_P (orig_dst)) + set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes); + + if (!issetmem) + { + int src_align_bytes = get_mem_align_offset (src, desired_align + * BITS_PER_UNIT); + if (src_align_bytes >= 0) + src_align_bytes = desired_align - src_align_bytes; + if (src_align_bytes >= 0) + { + unsigned int src_align; + for (src_align = desired_align; src_align >= 2; src_align >>= 1) + { + if ((src_align_bytes & (src_align - 1)) + == (align_bytes & (src_align - 1))) + break; + } + if (src_align > (unsigned int) desired_align) + src_align = desired_align; + if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) + set_mem_align (src, src_align * BITS_PER_UNIT); + } + if (MEM_SIZE_KNOWN_P (orig_src)) + set_mem_size (src, MEM_SIZE (orig_src) - align_bytes); + *srcp = src; + } + + return dst; +} + +/* Return true if ALG can be used in current context. + Assume we expand memset if MEMSET is true. */ +static bool +alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) +{ + if (alg == no_stringop) + return false; + if (alg == vector_loop) + return TARGET_SSE || TARGET_AVX; + /* Algorithms using the rep prefix want at least edi and ecx; + additionally, memset wants eax and memcpy wants esi. Don't + consider such algorithms if the user has appropriated those + registers for their own purposes, or if we have a non-default + address space, since some string insns cannot override the segment. */ + if (alg == rep_prefix_1_byte + || alg == rep_prefix_4_byte + || alg == rep_prefix_8_byte) + { + if (have_as) + return false; + if (fixed_regs[CX_REG] + || fixed_regs[DI_REG] + || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])) + return false; + } + return true; +} + +/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */ +static enum stringop_alg +decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, + unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, + bool memset, bool zero_memset, bool have_as, + int *dynamic_check, bool *noalign, bool recur) +{ + const struct stringop_algs *algs; + bool optimize_for_speed; + int max = 0; + const struct processor_costs *cost; + int i; + bool any_alg_usable_p = false; + + *noalign = false; + *dynamic_check = -1; + + /* Even if the string operation call is cold, we still might spend a lot + of time processing large blocks. */ + if (optimize_function_for_size_p (cfun) + || (optimize_insn_for_size_p () + && (max_size < 256 + || (expected_size != -1 && expected_size < 256)))) + optimize_for_speed = false; + else + optimize_for_speed = true; + + cost = optimize_for_speed ? ix86_cost : &ix86_size_cost; + if (memset) + algs = &cost->memset[TARGET_64BIT != 0]; + else + algs = &cost->memcpy[TARGET_64BIT != 0]; + + /* See maximal size for user defined algorithm. */ + for (i = 0; i < MAX_STRINGOP_ALGS; i++) + { + enum stringop_alg candidate = algs->size[i].alg; + bool usable = alg_usable_p (candidate, memset, have_as); + any_alg_usable_p |= usable; + + if (candidate != libcall && candidate && usable) + max = algs->size[i].max; + } + + /* If expected size is not known but max size is small enough + so inline version is a win, set expected size into + the range. */ + if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1) + && expected_size == -1) + expected_size = min_size / 2 + max_size / 2; + + /* If user specified the algorithm, honor it if possible. */ + if (ix86_stringop_alg != no_stringop + && alg_usable_p (ix86_stringop_alg, memset, have_as)) + return ix86_stringop_alg; + /* rep; movq or rep; movl is the smallest variant. */ + else if (!optimize_for_speed) + { + *noalign = true; + if (!count || (count & 3) || (memset && !zero_memset)) + return alg_usable_p (rep_prefix_1_byte, memset, have_as) + ? rep_prefix_1_byte : loop_1_byte; + else + return alg_usable_p (rep_prefix_4_byte, memset, have_as) + ? rep_prefix_4_byte : loop; + } + /* Very tiny blocks are best handled via the loop, REP is expensive to + setup. */ + else if (expected_size != -1 && expected_size < 4) + return loop_1_byte; + else if (expected_size != -1) + { + enum stringop_alg alg = libcall; + bool alg_noalign = false; + for (i = 0; i < MAX_STRINGOP_ALGS; i++) + { + /* We get here if the algorithms that were not libcall-based + were rep-prefix based and we are unable to use rep prefixes + based on global register usage. Break out of the loop and + use the heuristic below. */ + if (algs->size[i].max == 0) + break; + if (algs->size[i].max >= expected_size || algs->size[i].max == -1) + { + enum stringop_alg candidate = algs->size[i].alg; + + if (candidate != libcall + && alg_usable_p (candidate, memset, have_as)) + { + alg = candidate; + alg_noalign = algs->size[i].noalign; + } + /* Honor TARGET_INLINE_ALL_STRINGOPS by picking + last non-libcall inline algorithm. */ + if (TARGET_INLINE_ALL_STRINGOPS) + { + /* When the current size is best to be copied by a libcall, + but we are still forced to inline, run the heuristic below + that will pick code for medium sized blocks. */ + if (alg != libcall) + { + *noalign = alg_noalign; + return alg; + } + else if (!any_alg_usable_p) + break; + } + else if (alg_usable_p (candidate, memset, have_as)) + { + *noalign = algs->size[i].noalign; + return candidate; + } + } + } + } + /* When asked to inline the call anyway, try to pick meaningful choice. + We look for maximal size of block that is faster to copy by hand and + take blocks of at most of that size guessing that average size will + be roughly half of the block. + + If this turns out to be bad, we might simply specify the preferred + choice in ix86_costs. */ + if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) + && (algs->unknown_size == libcall + || !alg_usable_p (algs->unknown_size, memset, have_as))) + { + enum stringop_alg alg; + HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2; + + /* If there aren't any usable algorithms or if recursing already, + then recursing on smaller sizes or same size isn't going to + find anything. Just return the simple byte-at-a-time copy loop. */ + if (!any_alg_usable_p || recur) + { + /* Pick something reasonable. */ + if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur) + *dynamic_check = 128; + return loop_1_byte; + } + alg = decide_alg (count, new_expected_size, min_size, max_size, memset, + zero_memset, have_as, dynamic_check, noalign, true); + gcc_assert (*dynamic_check == -1); + if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) + *dynamic_check = max; + else + gcc_assert (alg != libcall); + return alg; + } + return (alg_usable_p (algs->unknown_size, memset, have_as) + ? algs->unknown_size : libcall); +} + +/* Decide on alignment. We know that the operand is already aligned to ALIGN + (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */ +static int +decide_alignment (int align, + enum stringop_alg alg, + int expected_size, + machine_mode move_mode) +{ + int desired_align = 0; + + gcc_assert (alg != no_stringop); + + if (alg == libcall) + return 0; + if (move_mode == VOIDmode) + return 0; + + desired_align = GET_MODE_SIZE (move_mode); + /* PentiumPro has special logic triggering for 8 byte aligned blocks. + copying whole cacheline at once. */ + if (TARGET_PENTIUMPRO + && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte)) + desired_align = 8; + + if (optimize_size) + desired_align = 1; + if (desired_align < align) + desired_align = align; + if (expected_size != -1 && expected_size < 4) + desired_align = align; + + return desired_align; +} + + +/* Helper function for memcpy. For QImode value 0xXY produce + 0xXYXYXYXY of wide specified by MODE. This is essentially + a * 0x10101010, but we can do slightly better than + synth_mult by unwinding the sequence by hand on CPUs with + slow multiply. */ +static rtx +promote_duplicated_reg (machine_mode mode, rtx val) +{ + machine_mode valmode = GET_MODE (val); + rtx tmp; + int nops = mode == DImode ? 3 : 2; + + gcc_assert (mode == SImode || mode == DImode || val == const0_rtx); + if (val == const0_rtx) + return copy_to_mode_reg (mode, CONST0_RTX (mode)); + if (CONST_INT_P (val)) + { + HOST_WIDE_INT v = INTVAL (val) & 255; + + v |= v << 8; + v |= v << 16; + if (mode == DImode) + v |= (v << 16) << 16; + return copy_to_mode_reg (mode, gen_int_mode (v, mode)); + } + + if (valmode == VOIDmode) + valmode = QImode; + if (valmode != QImode) + val = gen_lowpart (QImode, val); + if (mode == QImode) + return val; + if (!TARGET_PARTIAL_REG_STALL) + nops--; + if (ix86_cost->mult_init[mode == DImode ? 3 : 2] + + ix86_cost->mult_bit * (mode == DImode ? 8 : 4) + <= (ix86_cost->shift_const + ix86_cost->add) * nops + + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0))) + { + rtx reg = convert_modes (mode, QImode, val, true); + tmp = promote_duplicated_reg (mode, const1_rtx); + return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1, + OPTAB_DIRECT); + } + else + { + rtx reg = convert_modes (mode, QImode, val, true); + + if (!TARGET_PARTIAL_REG_STALL) + if (mode == SImode) + emit_insn (gen_insvsi_1 (reg, reg)); + else + emit_insn (gen_insvdi_1 (reg, reg)); + else + { + tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8), + NULL, 1, OPTAB_DIRECT); + reg = + expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); + } + tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16), + NULL, 1, OPTAB_DIRECT); + reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); + if (mode == SImode) + return reg; + tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32), + NULL, 1, OPTAB_DIRECT); + reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); + return reg; + } +} + +/* Duplicate value VAL using promote_duplicated_reg into maximal size that will + be needed by main loop copying SIZE_NEEDED chunks and prologue getting + alignment from ALIGN to DESIRED_ALIGN. */ +static rtx +promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, + int align) +{ + rtx promoted_val; + + if (TARGET_64BIT + && (size_needed > 4 || (desired_align > align && desired_align > 4))) + promoted_val = promote_duplicated_reg (DImode, val); + else if (size_needed > 2 || (desired_align > align && desired_align > 2)) + promoted_val = promote_duplicated_reg (SImode, val); + else if (size_needed > 1 || (desired_align > align && desired_align > 1)) + promoted_val = promote_duplicated_reg (HImode, val); + else + promoted_val = val; + + return promoted_val; +} + +/* Expand string move (memcpy) ot store (memset) operation. Use i386 string + operations when profitable. The code depends upon architecture, block size + and alignment, but always has one of the following overall structures: + + Aligned move sequence: + + 1) Prologue guard: Conditional that jumps up to epilogues for small + blocks that can be handled by epilogue alone. This is faster + but also needed for correctness, since prologue assume the block + is larger than the desired alignment. + + Optional dynamic check for size and libcall for large + blocks is emitted here too, with -minline-stringops-dynamically. + + 2) Prologue: copy first few bytes in order to get destination + aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less + than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be + copied. We emit either a jump tree on power of two sized + blocks, or a byte loop. + + 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks + with specified algorithm. + + 4) Epilogue: code copying tail of the block that is too small to be + handled by main body (or up to size guarded by prologue guard). + + Misaligned move sequence + + 1) missaligned move prologue/epilogue containing: + a) Prologue handling small memory blocks and jumping to done_label + (skipped if blocks are known to be large enough) + b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is + needed by single possibly misaligned move + (skipped if alignment is not needed) + c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves + + 2) Zero size guard dispatching to done_label, if needed + + 3) dispatch to library call, if needed, + + 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks + with specified algorithm. */ +bool +ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp, + rtx align_exp, rtx expected_align_exp, + rtx expected_size_exp, rtx min_size_exp, + rtx max_size_exp, rtx probable_max_size_exp, + bool issetmem) +{ + rtx destreg; + rtx srcreg = NULL; + rtx_code_label *label = NULL; + rtx tmp; + rtx_code_label *jump_around_label = NULL; + HOST_WIDE_INT align = 1; + unsigned HOST_WIDE_INT count = 0; + HOST_WIDE_INT expected_size = -1; + int size_needed = 0, epilogue_size_needed; + int desired_align = 0, align_bytes = 0; + enum stringop_alg alg; + rtx promoted_val = NULL; + rtx vec_promoted_val = NULL; + bool force_loopy_epilogue = false; + int dynamic_check; + bool need_zero_guard = false; + bool noalign; + machine_mode move_mode = VOIDmode; + int unroll_factor = 1; + /* TODO: Once value ranges are available, fill in proper data. */ + unsigned HOST_WIDE_INT min_size = 0; + unsigned HOST_WIDE_INT max_size = -1; + unsigned HOST_WIDE_INT probable_max_size = -1; + bool misaligned_prologue_used = false; + bool have_as; + + if (CONST_INT_P (align_exp)) + align = INTVAL (align_exp); + /* i386 can do misaligned access on reasonably increased cost. */ + if (CONST_INT_P (expected_align_exp) + && INTVAL (expected_align_exp) > align) + align = INTVAL (expected_align_exp); + /* ALIGN is the minimum of destination and source alignment, but we care here + just about destination alignment. */ + else if (!issetmem + && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT) + align = MEM_ALIGN (dst) / BITS_PER_UNIT; + + if (CONST_INT_P (count_exp)) + { + min_size = max_size = probable_max_size = count = expected_size + = INTVAL (count_exp); + /* When COUNT is 0, there is nothing to do. */ + if (!count) + return true; + } + else + { + if (min_size_exp) + min_size = INTVAL (min_size_exp); + if (max_size_exp) + max_size = INTVAL (max_size_exp); + if (probable_max_size_exp) + probable_max_size = INTVAL (probable_max_size_exp); + if (CONST_INT_P (expected_size_exp)) + expected_size = INTVAL (expected_size_exp); + } + + /* Make sure we don't need to care about overflow later on. */ + if (count > (HOST_WIDE_INT_1U << 30)) + return false; + + have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)); + if (!issetmem) + have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)); + + /* Step 0: Decide on preferred algorithm, desired alignment and + size of chunks to be copied by main loop. */ + alg = decide_alg (count, expected_size, min_size, probable_max_size, + issetmem, + issetmem && val_exp == const0_rtx, have_as, + &dynamic_check, &noalign, false); + if (alg == libcall) + return false; + gcc_assert (alg != no_stringop); + + /* For now vector-version of memset is generated only for memory zeroing, as + creating of promoted vector value is very cheap in this case. */ + if (issetmem && alg == vector_loop && val_exp != const0_rtx) + alg = unrolled_loop; + + if (!count) + count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp); + destreg = ix86_copy_addr_to_reg (XEXP (dst, 0)); + if (!issetmem) + srcreg = ix86_copy_addr_to_reg (XEXP (src, 0)); + + unroll_factor = 1; + move_mode = word_mode; + switch (alg) + { + case libcall: + case no_stringop: + case last_alg: + gcc_unreachable (); + case loop_1_byte: + need_zero_guard = true; + move_mode = QImode; + break; + case loop: + need_zero_guard = true; + break; + case unrolled_loop: + need_zero_guard = true; + unroll_factor = (TARGET_64BIT ? 4 : 2); + break; + case vector_loop: + need_zero_guard = true; + unroll_factor = 4; + /* Find the widest supported mode. */ + move_mode = word_mode; + while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode)) + != CODE_FOR_nothing) + move_mode = GET_MODE_WIDER_MODE (move_mode); + + /* Find the corresponding vector mode with the same size as MOVE_MODE. + MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ + if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) + { + int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); + move_mode = mode_for_vector (word_mode, nunits); + if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing) + move_mode = word_mode; + } + gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing); + break; + case rep_prefix_8_byte: + move_mode = DImode; + break; + case rep_prefix_4_byte: + move_mode = SImode; + break; + case rep_prefix_1_byte: + move_mode = QImode; + break; + } + size_needed = GET_MODE_SIZE (move_mode) * unroll_factor; + epilogue_size_needed = size_needed; + + /* If we are going to call any library calls conditionally, make sure any + pending stack adjustment happen before the first conditional branch, + otherwise they will be emitted before the library call only and won't + happen from the other branches. */ + if (dynamic_check != -1) + do_pending_stack_adjust (); + + desired_align = decide_alignment (align, alg, expected_size, move_mode); + if (!TARGET_ALIGN_STRINGOPS || noalign) + align = desired_align; + + /* Step 1: Prologue guard. */ + + /* Alignment code needs count to be in register. */ + if (CONST_INT_P (count_exp) && desired_align > align) + { + if (INTVAL (count_exp) > desired_align + && INTVAL (count_exp) > size_needed) + { + align_bytes + = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); + if (align_bytes <= 0) + align_bytes = 0; + else + align_bytes = desired_align - align_bytes; + } + if (align_bytes == 0) + count_exp = force_reg (counter_mode (count_exp), count_exp); + } + gcc_assert (desired_align >= 1 && align >= 1); + + /* Misaligned move sequences handle both prologue and epilogue at once. + Default code generation results in a smaller code for large alignments + and also avoids redundant job when sizes are known precisely. */ + misaligned_prologue_used + = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES + && MAX (desired_align, epilogue_size_needed) <= 32 + && desired_align <= epilogue_size_needed + && ((desired_align > align && !align_bytes) + || (!count && epilogue_size_needed > 1))); + + /* Do the cheap promotion to allow better CSE across the + main loop and epilogue (ie one load of the big constant in the + front of all code. + For now the misaligned move sequences do not have fast path + without broadcasting. */ + if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used))) + { + if (alg == vector_loop) + { + gcc_assert (val_exp == const0_rtx); + vec_promoted_val = promote_duplicated_reg (move_mode, val_exp); + promoted_val = promote_duplicated_reg_to_size (val_exp, + GET_MODE_SIZE (word_mode), + desired_align, align); + } + else + { + promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, + desired_align, align); + } + } + /* Misaligned move sequences handles both prologues and epilogues at once. + Default code generation results in smaller code for large alignments and + also avoids redundant job when sizes are known precisely. */ + if (misaligned_prologue_used) + { + /* Misaligned move prologue handled small blocks by itself. */ + expand_set_or_movmem_prologue_epilogue_by_misaligned_moves + (dst, src, &destreg, &srcreg, + move_mode, promoted_val, vec_promoted_val, + &count_exp, + &jump_around_label, + desired_align < align + ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed, + desired_align, align, &min_size, dynamic_check, issetmem); + if (!issetmem) + src = change_address (src, BLKmode, srcreg); + dst = change_address (dst, BLKmode, destreg); + set_mem_align (dst, desired_align * BITS_PER_UNIT); + epilogue_size_needed = 0; + if (need_zero_guard + && min_size < (unsigned HOST_WIDE_INT) size_needed) + { + /* It is possible that we copied enough so the main loop will not + execute. */ + gcc_assert (size_needed > 1); + if (jump_around_label == NULL_RTX) + jump_around_label = gen_label_rtx (); + emit_cmp_and_jump_insns (count_exp, + GEN_INT (size_needed), + LTU, 0, counter_mode (count_exp), 1, jump_around_label); + if (expected_size == -1 + || expected_size < (desired_align - align) / 2 + size_needed) + predict_jump (REG_BR_PROB_BASE * 20 / 100); + else + predict_jump (REG_BR_PROB_BASE * 60 / 100); + } + } + /* Ensure that alignment prologue won't copy past end of block. */ + else if (size_needed > 1 || (desired_align > 1 && desired_align > align)) + { + epilogue_size_needed = MAX (size_needed - 1, desired_align - align); + /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes. + Make sure it is power of 2. */ + epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1); + + /* To improve performance of small blocks, we jump around the VAL + promoting mode. This mean that if the promoted VAL is not constant, + we might not use it in the epilogue and have to use byte + loop variant. */ + if (issetmem && epilogue_size_needed > 2 && !promoted_val) + force_loopy_epilogue = true; + if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed) + || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) + { + /* If main algorithm works on QImode, no epilogue is needed. + For small sizes just don't align anything. */ + if (size_needed == 1) + desired_align = align; + else + goto epilogue; + } + else if (!count + && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) + { + label = gen_label_rtx (); + emit_cmp_and_jump_insns (count_exp, + GEN_INT (epilogue_size_needed), + LTU, 0, counter_mode (count_exp), 1, label); + if (expected_size == -1 || expected_size < epilogue_size_needed) + predict_jump (REG_BR_PROB_BASE * 60 / 100); + else + predict_jump (REG_BR_PROB_BASE * 20 / 100); + } + } + + /* Emit code to decide on runtime whether library call or inline should be + used. */ + if (dynamic_check != -1) + { + if (!issetmem && CONST_INT_P (count_exp)) + { + if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check) + { + emit_block_move_via_libcall (dst, src, count_exp, false); + count_exp = const0_rtx; + goto epilogue; + } + } + else + { + rtx_code_label *hot_label = gen_label_rtx (); + if (jump_around_label == NULL_RTX) + jump_around_label = gen_label_rtx (); + emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), + LEU, 0, counter_mode (count_exp), + 1, hot_label); + predict_jump (REG_BR_PROB_BASE * 90 / 100); + if (issetmem) + set_storage_via_libcall (dst, count_exp, val_exp, false); + else + emit_block_move_via_libcall (dst, src, count_exp, false); + emit_jump (jump_around_label); + emit_label (hot_label); + } + } + + /* Step 2: Alignment prologue. */ + /* Do the expensive promotion once we branched off the small blocks. */ + if (issetmem && !promoted_val) + promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, + desired_align, align); + + if (desired_align > align && !misaligned_prologue_used) + { + if (align_bytes == 0) + { + /* Except for the first move in prologue, we no longer know + constant offset in aliasing info. It don't seems to worth + the pain to maintain it for the first move, so throw away + the info early. */ + dst = change_address (dst, BLKmode, destreg); + if (!issetmem) + src = change_address (src, BLKmode, srcreg); + dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg, + promoted_val, vec_promoted_val, + count_exp, align, desired_align, + issetmem); + /* At most desired_align - align bytes are copied. */ + if (min_size < (unsigned)(desired_align - align)) + min_size = 0; + else + min_size -= desired_align - align; + } + else + { + /* If we know how many bytes need to be stored before dst is + sufficiently aligned, maintain aliasing info accurately. */ + dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg, + srcreg, + promoted_val, + vec_promoted_val, + desired_align, + align_bytes, + issetmem); + + count_exp = plus_constant (counter_mode (count_exp), + count_exp, -align_bytes); + count -= align_bytes; + min_size -= align_bytes; + max_size -= align_bytes; + } + if (need_zero_guard + && min_size < (unsigned HOST_WIDE_INT) size_needed + && (count < (unsigned HOST_WIDE_INT) size_needed + || (align_bytes == 0 + && count < ((unsigned HOST_WIDE_INT) size_needed + + desired_align - align)))) + { + /* It is possible that we copied enough so the main loop will not + execute. */ + gcc_assert (size_needed > 1); + if (label == NULL_RTX) + label = gen_label_rtx (); + emit_cmp_and_jump_insns (count_exp, + GEN_INT (size_needed), + LTU, 0, counter_mode (count_exp), 1, label); + if (expected_size == -1 + || expected_size < (desired_align - align) / 2 + size_needed) + predict_jump (REG_BR_PROB_BASE * 20 / 100); + else + predict_jump (REG_BR_PROB_BASE * 60 / 100); + } + } + if (label && size_needed == 1) + { + emit_label (label); + LABEL_NUSES (label) = 1; + label = NULL; + epilogue_size_needed = 1; + if (issetmem) + promoted_val = val_exp; + } + else if (label == NULL_RTX && !misaligned_prologue_used) + epilogue_size_needed = size_needed; + + /* Step 3: Main loop. */ + + switch (alg) + { + case libcall: + case no_stringop: + case last_alg: + gcc_unreachable (); + case loop_1_byte: + case loop: + case unrolled_loop: + expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val, + count_exp, move_mode, unroll_factor, + expected_size, issetmem); + break; + case vector_loop: + expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, + vec_promoted_val, count_exp, move_mode, + unroll_factor, expected_size, issetmem); + break; + case rep_prefix_8_byte: + case rep_prefix_4_byte: + case rep_prefix_1_byte: + expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val, + val_exp, count_exp, move_mode, issetmem); + break; + } + /* Adjust properly the offset of src and dest memory for aliasing. */ + if (CONST_INT_P (count_exp)) + { + if (!issetmem) + src = adjust_automodify_address_nv (src, BLKmode, srcreg, + (count / size_needed) * size_needed); + dst = adjust_automodify_address_nv (dst, BLKmode, destreg, + (count / size_needed) * size_needed); + } + else + { + if (!issetmem) + src = change_address (src, BLKmode, srcreg); + dst = change_address (dst, BLKmode, destreg); + } + + /* Step 4: Epilogue to copy the remaining bytes. */ + epilogue: + if (label) + { + /* When the main loop is done, COUNT_EXP might hold original count, + while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. + Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED + bytes. Compensate if needed. */ + + if (size_needed < epilogue_size_needed) + { + tmp = + expand_simple_binop (counter_mode (count_exp), AND, count_exp, + GEN_INT (size_needed - 1), count_exp, 1, + OPTAB_DIRECT); + if (tmp != count_exp) + emit_move_insn (count_exp, tmp); + } + emit_label (label); + LABEL_NUSES (label) = 1; + } + + if (count_exp != const0_rtx && epilogue_size_needed > 1) + { + if (force_loopy_epilogue) + expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp, + epilogue_size_needed); + else + { + if (issetmem) + expand_setmem_epilogue (dst, destreg, promoted_val, + vec_promoted_val, count_exp, + epilogue_size_needed); + else + expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp, + epilogue_size_needed); + } + } + if (jump_around_label) + emit_label (jump_around_label); + return true; +} + + +/* Expand the appropriate insns for doing strlen if not just doing + repnz; scasb + + out = result, initialized with the start address + align_rtx = alignment of the address. + scratch = scratch register, initialized with the startaddress when + not aligned, otherwise undefined + + This is just the body. It needs the initializations mentioned above and + some address computing at the end. These things are done in i386.md. */ + +static void +ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) +{ + int align; + rtx tmp; + rtx_code_label *align_2_label = NULL; + rtx_code_label *align_3_label = NULL; + rtx_code_label *align_4_label = gen_label_rtx (); + rtx_code_label *end_0_label = gen_label_rtx (); + rtx mem; + rtx tmpreg = gen_reg_rtx (SImode); + rtx scratch = gen_reg_rtx (SImode); + rtx cmp; + + align = 0; + if (CONST_INT_P (align_rtx)) + align = INTVAL (align_rtx); + + /* Loop to check 1..3 bytes for null to get an aligned pointer. */ + + /* Is there a known alignment and is it less than 4? */ + if (align < 4) + { + rtx scratch1 = gen_reg_rtx (Pmode); + emit_move_insn (scratch1, out); + /* Is there a known alignment and is it not 2? */ + if (align != 2) + { + align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ + align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ + + /* Leave just the 3 lower bits. */ + align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), + NULL_RTX, 0, OPTAB_WIDEN); + + emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, + Pmode, 1, align_4_label); + emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, + Pmode, 1, align_2_label); + emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, + Pmode, 1, align_3_label); + } + else + { + /* Since the alignment is 2, we have to check 2 or 0 bytes; + check if is aligned to 4 - byte. */ + + align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, + NULL_RTX, 0, OPTAB_WIDEN); + + emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, + Pmode, 1, align_4_label); + } + + mem = change_address (src, QImode, out); + + /* Now compare the bytes. */ + + /* Compare the first n unaligned byte on a byte per byte basis. */ + emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, + QImode, 1, end_0_label); + + /* Increment the address. */ + emit_insn (ix86_gen_add3 (out, out, const1_rtx)); + + /* Not needed with an alignment of 2 */ + if (align != 2) + { + emit_label (align_2_label); + + emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, + end_0_label); + + emit_insn (ix86_gen_add3 (out, out, const1_rtx)); + + emit_label (align_3_label); + } + + emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, + end_0_label); + + emit_insn (ix86_gen_add3 (out, out, const1_rtx)); + } + + /* Generate loop to check 4 bytes at a time. It is not a good idea to + align this loop. It gives only huge programs, but does not help to + speed up. */ + emit_label (align_4_label); + + mem = change_address (src, SImode, out); + emit_move_insn (scratch, mem); + emit_insn (ix86_gen_add3 (out, out, GEN_INT (4))); + + /* This formula yields a nonzero result iff one of the bytes is zero. + This saves three branches inside loop and many cycles. */ + + emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); + emit_insn (gen_one_cmplsi2 (scratch, scratch)); + emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); + emit_insn (gen_andsi3 (tmpreg, tmpreg, + gen_int_mode (0x80808080, SImode))); + emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, + align_4_label); + + if (TARGET_CMOVE) + { + rtx reg = gen_reg_rtx (SImode); + rtx reg2 = gen_reg_rtx (Pmode); + emit_move_insn (reg, tmpreg); + emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); + + /* If zero is not in the first two bytes, move two bytes forward. */ + emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); + tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + emit_insn (gen_rtx_SET (tmpreg, + gen_rtx_IF_THEN_ELSE (SImode, tmp, + reg, + tmpreg))); + /* Emit lea manually to avoid clobbering of flags. */ + emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx))); + + tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + emit_insn (gen_rtx_SET (out, + gen_rtx_IF_THEN_ELSE (Pmode, tmp, + reg2, + out))); + } + else + { + rtx_code_label *end_2_label = gen_label_rtx (); + /* Is zero in the first two bytes? */ + + emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); + tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); + tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, end_2_label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + JUMP_LABEL (tmp) = end_2_label; + + /* Not in the first two. Move two bytes forward. */ + emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); + emit_insn (ix86_gen_add3 (out, out, const2_rtx)); + + emit_label (end_2_label); + + } + + /* Avoid branch in fixing the byte. */ + tmpreg = gen_lowpart (QImode, tmpreg); + emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg)); + tmp = gen_rtx_REG (CCmode, FLAGS_REG); + cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx); + emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp)); + + emit_label (end_0_label); +} + +/* Expand strlen. */ + +bool +ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) +{ + rtx addr, scratch1, scratch2, scratch3, scratch4; + + /* The generic case of strlen expander is long. Avoid it's + expanding unless TARGET_INLINE_ALL_STRINGOPS. */ + + if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 + && !TARGET_INLINE_ALL_STRINGOPS + && !optimize_insn_for_size_p () + && (!CONST_INT_P (align) || INTVAL (align) < 4)) + return false; + + addr = force_reg (Pmode, XEXP (src, 0)); + scratch1 = gen_reg_rtx (Pmode); + + if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 + && !optimize_insn_for_size_p ()) + { + /* Well it seems that some optimizer does not combine a call like + foo(strlen(bar), strlen(bar)); + when the move and the subtraction is done here. It does calculate + the length just once when these instructions are done inside of + output_strlen_unroll(). But I think since &bar[strlen(bar)] is + often used and I use one fewer register for the lifetime of + output_strlen_unroll() this is better. */ + + emit_move_insn (out, addr); + + ix86_expand_strlensi_unroll_1 (out, src, align); + + /* strlensi_unroll_1 returns the address of the zero at the end of + the string, like memchr(), so compute the length by subtracting + the start address. */ + emit_insn (ix86_gen_sub3 (out, out, addr)); + } + else + { + rtx unspec; + + /* Can't use this if the user has appropriated eax, ecx, or edi. */ + if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) + return false; + /* Can't use this for non-default address spaces. */ + if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))) + return false; + + scratch2 = gen_reg_rtx (Pmode); + scratch3 = gen_reg_rtx (Pmode); + scratch4 = force_reg (Pmode, constm1_rtx); + + emit_move_insn (scratch3, addr); + eoschar = force_reg (QImode, eoschar); + + src = replace_equiv_address_nv (src, scratch3); + + /* If .md starts supporting :P, this can be done in .md. */ + unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, + scratch4), UNSPEC_SCAS); + emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); + emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1)); + emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx)); + } + return true; +} + +/* For given symbol (function) construct code to compute address of it's PLT + entry in large x86-64 PIC model. */ +static rtx +construct_plt_address (rtx symbol) +{ + rtx tmp, unspec; + + gcc_assert (GET_CODE (symbol) == SYMBOL_REF); + gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF); + gcc_assert (Pmode == DImode); + + tmp = gen_reg_rtx (Pmode); + unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF); + + emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec)); + emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx)); + return tmp; +} + +rtx +ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, + rtx callarg2, + rtx pop, bool sibcall) +{ + rtx vec[3]; + rtx use = NULL, call; + unsigned int vec_len = 0; + + if (pop == const0_rtx) + pop = NULL; + gcc_assert (!TARGET_64BIT || !pop); + + if (TARGET_MACHO && !TARGET_64BIT) + { +#if TARGET_MACHO + if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) + fnaddr = machopic_indirect_call_target (fnaddr); +#endif + } + else + { + /* Static functions and indirect calls don't need the pic register. Also, + check if PLT was explicitly avoided via no-plt or "noplt" attribute, making + it an indirect call. */ + rtx addr = XEXP (fnaddr, 0); + if (flag_pic + && GET_CODE (addr) == SYMBOL_REF + && !SYMBOL_REF_LOCAL_P (addr)) + { + if (flag_plt + && (SYMBOL_REF_DECL (addr) == NULL_TREE + || !lookup_attribute ("noplt", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr))))) + { + if (!TARGET_64BIT + || (ix86_cmodel == CM_LARGE_PIC + && DEFAULT_ABI != MS_ABI)) + { + use_reg (&use, gen_rtx_REG (Pmode, + REAL_PIC_OFFSET_TABLE_REGNUM)); + if (ix86_use_pseudo_pic_reg ()) + emit_move_insn (gen_rtx_REG (Pmode, + REAL_PIC_OFFSET_TABLE_REGNUM), + pic_offset_table_rtx); + } + } + else if (!TARGET_PECOFF && !TARGET_MACHO) + { + if (TARGET_64BIT) + { + fnaddr = gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, addr), + UNSPEC_GOTPCREL); + fnaddr = gen_rtx_CONST (Pmode, fnaddr); + } + else + { + fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), + UNSPEC_GOT); + fnaddr = gen_rtx_CONST (Pmode, fnaddr); + fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, + fnaddr); + } + fnaddr = gen_const_mem (Pmode, fnaddr); + /* Pmode may not be the same as word_mode for x32, which + doesn't support indirect branch via 32-bit memory slot. + Since x32 GOT slot is 64 bit with zero upper 32 bits, + indirect branch via x32 GOT slot is OK. */ + if (GET_MODE (fnaddr) != word_mode) + fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); + fnaddr = gen_rtx_MEM (QImode, fnaddr); + } + } + } + + /* Skip setting up RAX register for -mskip-rax-setup when there are no + parameters passed in vector registers. */ + if (TARGET_64BIT + && (INTVAL (callarg2) > 0 + || (INTVAL (callarg2) == 0 + && (TARGET_SSE || !flag_skip_rax_setup)))) + { + rtx al = gen_rtx_REG (QImode, AX_REG); + emit_move_insn (al, callarg2); + use_reg (&use, al); + } + + if (ix86_cmodel == CM_LARGE_PIC + && !TARGET_PECOFF + && MEM_P (fnaddr) + && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF + && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode)) + fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0))); + /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect + branch via x32 GOT slot is OK. */ + else if (!(TARGET_X32 + && MEM_P (fnaddr) + && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND + && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode)) + && (sibcall + ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode) + : !call_insn_operand (XEXP (fnaddr, 0), word_mode))) + { + fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1); + fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr)); + } + + call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); + + if (retval) + { + /* We should add bounds as destination register in case + pointer with bounds may be returned. */ + if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval))) + { + rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG); + rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1); + if (GET_CODE (retval) == PARALLEL) + { + b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx); + b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx); + rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1)); + retval = chkp_join_splitted_slot (retval, par); + } + else + { + retval = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (3, retval, b0, b1)); + chkp_put_regs_to_expr_list (retval); + } + } + + call = gen_rtx_SET (retval, call); + } + vec[vec_len++] = call; + + if (pop) + { + pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); + pop = gen_rtx_SET (stack_pointer_rtx, pop); + vec[vec_len++] = pop; + } + + if (TARGET_64BIT_MS_ABI + && (!callarg2 || INTVAL (callarg2) != -2)) + { + int const cregs_size + = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers); + int i; + + for (i = 0; i < cregs_size; i++) + { + int regno = x86_64_ms_sysv_extra_clobbered_registers[i]; + machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode; + + clobber_reg (&use, gen_rtx_REG (mode, regno)); + } + } + + if (vec_len > 1) + call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec)); + call = emit_call_insn (call); + if (use) + CALL_INSN_FUNCTION_USAGE (call) = use; + + return call; +} + +/* Return true if the function being called was marked with attribute "noplt" + or using -fno-plt and we are compiling for non-PIC and x86_64. We need to + handle the non-PIC case in the backend because there is no easy interface + for the front-end to force non-PLT calls to use the GOT. This is currently + used only with 64-bit ELF targets to call the function marked "noplt" + indirectly. */ + +static bool +ix86_nopic_noplt_attribute_p (rtx call_op) +{ + if (flag_pic || ix86_cmodel == CM_LARGE + || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF + || SYMBOL_REF_LOCAL_P (call_op)) + return false; + + tree symbol_decl = SYMBOL_REF_DECL (call_op); + + if (!flag_plt + || (symbol_decl != NULL_TREE + && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))) + return true; + + return false; +} + +/* Output the assembly for a call instruction. */ + +const char * +ix86_output_call_insn (rtx_insn *insn, rtx call_op) +{ + bool direct_p = constant_call_address_operand (call_op, VOIDmode); + bool seh_nop_p = false; + const char *xasm; + + if (SIBLING_CALL_P (insn)) + { + if (direct_p) + { + if (ix86_nopic_noplt_attribute_p (call_op)) + xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; + else + xasm = "%!jmp\t%P0"; + } + /* SEH epilogue detection requires the indirect branch case + to include REX.W. */ + else if (TARGET_SEH) + xasm = "%!rex.W jmp\t%A0"; + else + xasm = "%!jmp\t%A0"; + + output_asm_insn (xasm, &call_op); + return ""; + } + + /* SEH unwinding can require an extra nop to be emitted in several + circumstances. Determine if we have one of those. */ + if (TARGET_SEH) + { + rtx_insn *i; + + for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i)) + { + /* If we get to another real insn, we don't need the nop. */ + if (INSN_P (i)) + break; + + /* If we get to the epilogue note, prevent a catch region from + being adjacent to the standard epilogue sequence. If non- + call-exceptions, we'll have done this during epilogue emission. */ + if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG + && !flag_non_call_exceptions + && !can_throw_internal (insn)) + { + seh_nop_p = true; + break; + } + } + + /* If we didn't find a real insn following the call, prevent the + unwinder from looking into the next function. */ + if (i == NULL) + seh_nop_p = true; + } + + if (direct_p) + { + if (ix86_nopic_noplt_attribute_p (call_op)) + xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; + else + xasm = "%!call\t%P0"; + } + else + xasm = "%!call\t%A0"; + + output_asm_insn (xasm, &call_op); + + if (seh_nop_p) + return "nop"; + + return ""; +} + +/* Clear stack slot assignments remembered from previous functions. + This is called from INIT_EXPANDERS once before RTL is emitted for each + function. */ + +static struct machine_function * +ix86_init_machine_status (void) +{ + struct machine_function *f; + + f = ggc_cleared_alloc (); + f->use_fast_prologue_epilogue_nregs = -1; + f->call_abi = ix86_abi; + + return f; +} + +/* Return a MEM corresponding to a stack slot with mode MODE. + Allocate a new slot if necessary. + + The RTL for a function can have several slots available: N is + which slot to use. */ + +rtx +assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n) +{ + struct stack_local_entry *s; + + gcc_assert (n < MAX_386_STACK_LOCALS); + + for (s = ix86_stack_locals; s; s = s->next) + if (s->mode == mode && s->n == n) + return validize_mem (copy_rtx (s->rtl)); + + s = ggc_alloc (); + s->n = n; + s->mode = mode; + s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); + + s->next = ix86_stack_locals; + ix86_stack_locals = s; + return validize_mem (copy_rtx (s->rtl)); +} + +static void +ix86_instantiate_decls (void) +{ + struct stack_local_entry *s; + + for (s = ix86_stack_locals; s; s = s->next) + if (s->rtl != NULL_RTX) + instantiate_decl_rtl (s->rtl); +} + +/* Return the number used for encoding REG, in the range 0..7. */ + +static int +reg_encoded_number (rtx reg) +{ + unsigned regno = REGNO (reg); + switch (regno) + { + case AX_REG: + return 0; + case CX_REG: + return 1; + case DX_REG: + return 2; + case BX_REG: + return 3; + case SP_REG: + return 4; + case BP_REG: + return 5; + case SI_REG: + return 6; + case DI_REG: + return 7; + default: + break; + } + if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG)) + return regno - FIRST_STACK_REG; + if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG)) + return regno - FIRST_SSE_REG; + if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG)) + return regno - FIRST_MMX_REG; + if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG)) + return regno - FIRST_REX_SSE_REG; + if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG)) + return regno - FIRST_REX_INT_REG; + if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG)) + return regno - FIRST_MASK_REG; + if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG)) + return regno - FIRST_BND_REG; + return -1; +} + +/* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used + in its encoding if it could be relevant for ROP mitigation, otherwise + return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers + used for calculating it into them. */ + +static int +ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands, + int *popno0 = 0, int *popno1 = 0) +{ + if (asm_noperands (PATTERN (insn)) >= 0) + return -1; + int has_modrm = get_attr_modrm (insn); + if (!has_modrm) + return -1; + enum attr_modrm_class cls = get_attr_modrm_class (insn); + rtx op0, op1; + switch (cls) + { + case MODRM_CLASS_OP02: + gcc_assert (noperands >= 3); + if (popno0) + { + *popno0 = 0; + *popno1 = 2; + } + op0 = operands[0]; + op1 = operands[2]; + break; + case MODRM_CLASS_OP01: + gcc_assert (noperands >= 2); + if (popno0) + { + *popno0 = 0; + *popno1 = 1; + } + op0 = operands[0]; + op1 = operands[1]; + break; + default: + return -1; + } + if (REG_P (op0) && REG_P (op1)) + { + int enc0 = reg_encoded_number (op0); + int enc1 = reg_encoded_number (op1); + return 0xc0 + (enc1 << 3) + enc0; + } + return -1; +} + +/* Check whether x86 address PARTS is a pc-relative address. */ + +static bool +rip_relative_addr_p (struct ix86_address *parts) +{ + rtx base, index, disp; + + base = parts->base; + index = parts->index; + disp = parts->disp; + + if (disp && !base && !index) + { + if (TARGET_64BIT) + { + rtx symbol = disp; + + if (GET_CODE (disp) == CONST) + symbol = XEXP (disp, 0); + if (GET_CODE (symbol) == PLUS + && CONST_INT_P (XEXP (symbol, 1))) + symbol = XEXP (symbol, 0); + + if (GET_CODE (symbol) == LABEL_REF + || (GET_CODE (symbol) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (symbol) == 0) + || (GET_CODE (symbol) == UNSPEC + && (XINT (symbol, 1) == UNSPEC_GOTPCREL + || XINT (symbol, 1) == UNSPEC_PCREL + || XINT (symbol, 1) == UNSPEC_GOTNTPOFF))) + return true; + } + } + return false; +} + +/* Calculate the length of the memory address in the instruction encoding. + Includes addr32 prefix, does not include the one-byte modrm, opcode, + or other prefixes. We never generate addr32 prefix for LEA insn. */ + +int +memory_address_length (rtx addr, bool lea) +{ + struct ix86_address parts; + rtx base, index, disp; + int len; + int ok; + + if (GET_CODE (addr) == PRE_DEC + || GET_CODE (addr) == POST_INC + || GET_CODE (addr) == PRE_MODIFY + || GET_CODE (addr) == POST_MODIFY) + return 0; + + ok = ix86_decompose_address (addr, &parts); + gcc_assert (ok); + + len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1; + + /* If this is not LEA instruction, add the length of addr32 prefix. */ + if (TARGET_64BIT && !lea + && (SImode_address_operand (addr, VOIDmode) + || (parts.base && GET_MODE (parts.base) == SImode) + || (parts.index && GET_MODE (parts.index) == SImode))) + len++; + + base = parts.base; + index = parts.index; + disp = parts.disp; + + if (base && SUBREG_P (base)) + base = SUBREG_REG (base); + if (index && SUBREG_P (index)) + index = SUBREG_REG (index); + + gcc_assert (base == NULL_RTX || REG_P (base)); + gcc_assert (index == NULL_RTX || REG_P (index)); + + /* Rule of thumb: + - esp as the base always wants an index, + - ebp as the base always wants a displacement, + - r12 as the base always wants an index, + - r13 as the base always wants a displacement. */ + + /* Register Indirect. */ + if (base && !index && !disp) + { + /* esp (for its index) and ebp (for its displacement) need + the two-byte modrm form. Similarly for r12 and r13 in 64-bit + code. */ + if (base == arg_pointer_rtx + || base == frame_pointer_rtx + || REGNO (base) == SP_REG + || REGNO (base) == BP_REG + || REGNO (base) == R12_REG + || REGNO (base) == R13_REG) + len++; + } + + /* Direct Addressing. In 64-bit mode mod 00 r/m 5 + is not disp32, but disp32(%rip), so for disp32 + SIB byte is needed, unless print_operand_address + optimizes it into disp32(%rip) or (%rip) is implied + by UNSPEC. */ + else if (disp && !base && !index) + { + len += 4; + if (rip_relative_addr_p (&parts)) + len++; + } + else + { + /* Find the length of the displacement constant. */ + if (disp) + { + if (base && satisfies_constraint_K (disp)) + len += 1; + else + len += 4; + } + /* ebp always wants a displacement. Similarly r13. */ + else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG)) + len++; + + /* An index requires the two-byte modrm form.... */ + if (index + /* ...like esp (or r12), which always wants an index. */ + || base == arg_pointer_rtx + || base == frame_pointer_rtx + || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG))) + len++; + } + + return len; +} + +/* Compute default value for "length_immediate" attribute. When SHORTFORM + is set, expect that insn have 8bit immediate alternative. */ +int +ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform) +{ + int len = 0; + int i; + extract_insn_cached (insn); + for (i = recog_data.n_operands - 1; i >= 0; --i) + if (CONSTANT_P (recog_data.operand[i])) + { + enum attr_mode mode = get_attr_mode (insn); + + gcc_assert (!len); + if (shortform && CONST_INT_P (recog_data.operand[i])) + { + HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]); + switch (mode) + { + case MODE_QI: + len = 1; + continue; + case MODE_HI: + ival = trunc_int_for_mode (ival, HImode); + break; + case MODE_SI: + ival = trunc_int_for_mode (ival, SImode); + break; + default: + break; + } + if (IN_RANGE (ival, -128, 127)) + { + len = 1; + continue; + } + } + switch (mode) + { + case MODE_QI: + len = 1; + break; + case MODE_HI: + len = 2; + break; + case MODE_SI: + len = 4; + break; + /* Immediates for DImode instructions are encoded + as 32bit sign extended values. */ + case MODE_DI: + len = 4; + break; + default: + fatal_insn ("unknown insn mode", insn); + } + } + return len; +} + +/* Compute default value for "length_address" attribute. */ +int +ix86_attr_length_address_default (rtx_insn *insn) +{ + int i; + + if (get_attr_type (insn) == TYPE_LEA) + { + rtx set = PATTERN (insn), addr; + + if (GET_CODE (set) == PARALLEL) + set = XVECEXP (set, 0, 0); + + gcc_assert (GET_CODE (set) == SET); + + addr = SET_SRC (set); + + return memory_address_length (addr, true); + } + + extract_insn_cached (insn); + for (i = recog_data.n_operands - 1; i >= 0; --i) + { + rtx op = recog_data.operand[i]; + if (MEM_P (op)) + { + constrain_operands_cached (insn, reload_completed); + if (which_alternative != -1) + { + const char *constraints = recog_data.constraints[i]; + int alt = which_alternative; + + while (*constraints == '=' || *constraints == '+') + constraints++; + while (alt-- > 0) + while (*constraints++ != ',') + ; + /* Skip ignored operands. */ + if (*constraints == 'X') + continue; + } + + int len = memory_address_length (XEXP (op, 0), false); + + /* Account for segment prefix for non-default addr spaces. */ + if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op))) + len++; + + return len; + } + } + return 0; +} + +/* Compute default value for "length_vex" attribute. It includes + 2 or 3 byte VEX prefix and 1 opcode byte. */ + +int +ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode, + bool has_vex_w) +{ + int i; + + /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 + byte VEX prefix. */ + if (!has_0f_opcode || has_vex_w) + return 3 + 1; + + /* We can always use 2 byte VEX prefix in 32bit. */ + if (!TARGET_64BIT) + return 2 + 1; + + extract_insn_cached (insn); + + for (i = recog_data.n_operands - 1; i >= 0; --i) + if (REG_P (recog_data.operand[i])) + { + /* REX.W bit uses 3 byte VEX prefix. */ + if (GET_MODE (recog_data.operand[i]) == DImode + && GENERAL_REG_P (recog_data.operand[i])) + return 3 + 1; + } + else + { + /* REX.X or REX.B bits use 3 byte VEX prefix. */ + if (MEM_P (recog_data.operand[i]) + && x86_extended_reg_mentioned_p (recog_data.operand[i])) + return 3 + 1; + } + + return 2 + 1; +} + +/* Return the maximum number of instructions a cpu can issue. */ + +static int +ix86_issue_rate (void) +{ + switch (ix86_tune) + { + case PROCESSOR_PENTIUM: + case PROCESSOR_LAKEMONT: + case PROCESSOR_BONNELL: + case PROCESSOR_SILVERMONT: + case PROCESSOR_KNL: + case PROCESSOR_INTEL: + case PROCESSOR_K6: + case PROCESSOR_BTVER2: + case PROCESSOR_PENTIUM4: + case PROCESSOR_NOCONA: + return 2; + + case PROCESSOR_PENTIUMPRO: + case PROCESSOR_ATHLON: + case PROCESSOR_K8: + case PROCESSOR_AMDFAM10: + case PROCESSOR_GENERIC: + case PROCESSOR_BTVER1: + return 3; + + case PROCESSOR_BDVER1: + case PROCESSOR_BDVER2: + case PROCESSOR_BDVER3: + case PROCESSOR_BDVER4: + case PROCESSOR_ZNVER1: + case PROCESSOR_CORE2: + case PROCESSOR_NEHALEM: + case PROCESSOR_SANDYBRIDGE: + case PROCESSOR_HASWELL: + return 4; + + default: + return 1; + } +} + +/* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set + by DEP_INSN and nothing set by DEP_INSN. */ + +static bool +ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type) +{ + rtx set, set2; + + /* Simplify the test for uninteresting insns. */ + if (insn_type != TYPE_SETCC + && insn_type != TYPE_ICMOV + && insn_type != TYPE_FCMOV + && insn_type != TYPE_IBR) + return false; + + if ((set = single_set (dep_insn)) != 0) + { + set = SET_DEST (set); + set2 = NULL_RTX; + } + else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL + && XVECLEN (PATTERN (dep_insn), 0) == 2 + && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET + && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) + { + set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); + set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); + } + else + return false; + + if (!REG_P (set) || REGNO (set) != FLAGS_REG) + return false; + + /* This test is true if the dependent insn reads the flags but + not any other potentially set register. */ + if (!reg_overlap_mentioned_p (set, PATTERN (insn))) + return false; + + if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) + return false; + + return true; +} + +/* Return true iff USE_INSN has a memory address with operands set by + SET_INSN. */ + +bool +ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn) +{ + int i; + extract_insn_cached (use_insn); + for (i = recog_data.n_operands - 1; i >= 0; --i) + if (MEM_P (recog_data.operand[i])) + { + rtx addr = XEXP (recog_data.operand[i], 0); + return modified_in_p (addr, set_insn) != 0; + } + return false; +} + +/* Helper function for exact_store_load_dependency. + Return true if addr is found in insn. */ +static bool +exact_dependency_1 (rtx addr, rtx insn) +{ + enum rtx_code code; + const char *format_ptr; + int i, j; + + code = GET_CODE (insn); + switch (code) + { + case MEM: + if (rtx_equal_p (addr, insn)) + return true; + break; + case REG: + CASE_CONST_ANY: + case SYMBOL_REF: + case CODE_LABEL: + case PC: + case CC0: + case EXPR_LIST: + return false; + default: + break; + } + + format_ptr = GET_RTX_FORMAT (code); + for (i = 0; i < GET_RTX_LENGTH (code); i++) + { + switch (*format_ptr++) + { + case 'e': + if (exact_dependency_1 (addr, XEXP (insn, i))) + return true; + break; + case 'E': + for (j = 0; j < XVECLEN (insn, i); j++) + if (exact_dependency_1 (addr, XVECEXP (insn, i, j))) + return true; + break; + } + } + return false; +} + +/* Return true if there exists exact dependency for store & load, i.e. + the same memory address is used in them. */ +static bool +exact_store_load_dependency (rtx_insn *store, rtx_insn *load) +{ + rtx set1, set2; + + set1 = single_set (store); + if (!set1) + return false; + if (!MEM_P (SET_DEST (set1))) + return false; + set2 = single_set (load); + if (!set2) + return false; + if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2))) + return true; + return false; +} + +static int +ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost) +{ + enum attr_type insn_type, dep_insn_type; + enum attr_memory memory; + rtx set, set2; + int dep_insn_code_number; + + /* Anti and output dependencies have zero cost on all CPUs. */ + if (REG_NOTE_KIND (link) != 0) + return 0; + + dep_insn_code_number = recog_memoized (dep_insn); + + /* If we can't recognize the insns, we can't really do anything. */ + if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) + return cost; + + insn_type = get_attr_type (insn); + dep_insn_type = get_attr_type (dep_insn); + + switch (ix86_tune) + { + case PROCESSOR_PENTIUM: + case PROCESSOR_LAKEMONT: + /* Address Generation Interlock adds a cycle of latency. */ + if (insn_type == TYPE_LEA) + { + rtx addr = PATTERN (insn); + + if (GET_CODE (addr) == PARALLEL) + addr = XVECEXP (addr, 0, 0); + + gcc_assert (GET_CODE (addr) == SET); + + addr = SET_SRC (addr); + if (modified_in_p (addr, dep_insn)) + cost += 1; + } + else if (ix86_agi_dependent (dep_insn, insn)) + cost += 1; + + /* ??? Compares pair with jump/setcc. */ + if (ix86_flags_dependent (insn, dep_insn, insn_type)) + cost = 0; + + /* Floating point stores require value to be ready one cycle earlier. */ + if (insn_type == TYPE_FMOV + && get_attr_memory (insn) == MEMORY_STORE + && !ix86_agi_dependent (dep_insn, insn)) + cost += 1; + break; + + case PROCESSOR_PENTIUMPRO: + /* INT->FP conversion is expensive. */ + if (get_attr_fp_int_src (dep_insn)) + cost += 5; + + /* There is one cycle extra latency between an FP op and a store. */ + if (insn_type == TYPE_FMOV + && (set = single_set (dep_insn)) != NULL_RTX + && (set2 = single_set (insn)) != NULL_RTX + && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) + && MEM_P (SET_DEST (set2))) + cost += 1; + + memory = get_attr_memory (insn); + + /* Show ability of reorder buffer to hide latency of load by executing + in parallel with previous instruction in case + previous instruction is not needed to compute the address. */ + if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) + && !ix86_agi_dependent (dep_insn, insn)) + { + /* Claim moves to take one cycle, as core can issue one load + at time and the next load can start cycle later. */ + if (dep_insn_type == TYPE_IMOV + || dep_insn_type == TYPE_FMOV) + cost = 1; + else if (cost > 1) + cost--; + } + break; + + case PROCESSOR_K6: + /* The esp dependency is resolved before + the instruction is really finished. */ + if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) + && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) + return 1; + + /* INT->FP conversion is expensive. */ + if (get_attr_fp_int_src (dep_insn)) + cost += 5; + + memory = get_attr_memory (insn); + + /* Show ability of reorder buffer to hide latency of load by executing + in parallel with previous instruction in case + previous instruction is not needed to compute the address. */ + if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) + && !ix86_agi_dependent (dep_insn, insn)) + { + /* Claim moves to take one cycle, as core can issue one load + at time and the next load can start cycle later. */ + if (dep_insn_type == TYPE_IMOV + || dep_insn_type == TYPE_FMOV) + cost = 1; + else if (cost > 2) + cost -= 2; + else + cost = 1; + } + break; + + case PROCESSOR_AMDFAM10: + case PROCESSOR_BDVER1: + case PROCESSOR_BDVER2: + case PROCESSOR_BDVER3: + case PROCESSOR_BDVER4: + case PROCESSOR_ZNVER1: + case PROCESSOR_BTVER1: + case PROCESSOR_BTVER2: + case PROCESSOR_GENERIC: + /* Stack engine allows to execute push&pop instructions in parall. */ + if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) + && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) + return 0; + /* FALLTHRU */ + + case PROCESSOR_ATHLON: + case PROCESSOR_K8: + memory = get_attr_memory (insn); + + /* Show ability of reorder buffer to hide latency of load by executing + in parallel with previous instruction in case + previous instruction is not needed to compute the address. */ + if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) + && !ix86_agi_dependent (dep_insn, insn)) + { + enum attr_unit unit = get_attr_unit (insn); + int loadcost = 3; + + /* Because of the difference between the length of integer and + floating unit pipeline preparation stages, the memory operands + for floating point are cheaper. + + ??? For Athlon it the difference is most probably 2. */ + if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) + loadcost = 3; + else + loadcost = TARGET_ATHLON ? 2 : 0; + + if (cost >= loadcost) + cost -= loadcost; + else + cost = 0; + } + break; + + case PROCESSOR_CORE2: + case PROCESSOR_NEHALEM: + case PROCESSOR_SANDYBRIDGE: + case PROCESSOR_HASWELL: + /* Stack engine allows to execute push&pop instructions in parall. */ + if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) + && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) + return 0; + + memory = get_attr_memory (insn); + + /* Show ability of reorder buffer to hide latency of load by executing + in parallel with previous instruction in case + previous instruction is not needed to compute the address. */ + if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) + && !ix86_agi_dependent (dep_insn, insn)) + { + if (cost >= 4) + cost -= 4; + else + cost = 0; + } + break; + + case PROCESSOR_SILVERMONT: + case PROCESSOR_KNL: + case PROCESSOR_INTEL: + if (!reload_completed) + return cost; + + /* Increase cost of integer loads. */ + memory = get_attr_memory (dep_insn); + if (memory == MEMORY_LOAD || memory == MEMORY_BOTH) + { + enum attr_unit unit = get_attr_unit (dep_insn); + if (unit == UNIT_INTEGER && cost == 1) + { + if (memory == MEMORY_LOAD) + cost = 3; + else + { + /* Increase cost of ld/st for short int types only + because of store forwarding issue. */ + rtx set = single_set (dep_insn); + if (set && (GET_MODE (SET_DEST (set)) == QImode + || GET_MODE (SET_DEST (set)) == HImode)) + { + /* Increase cost of store/load insn if exact + dependence exists and it is load insn. */ + enum attr_memory insn_memory = get_attr_memory (insn); + if (insn_memory == MEMORY_LOAD + && exact_store_load_dependency (dep_insn, insn)) + cost = 3; + } + } + } + } + + default: + break; + } + + return cost; +} + +/* How many alternative schedules to try. This should be as wide as the + scheduling freedom in the DFA, but no wider. Making this value too + large results extra work for the scheduler. */ + +static int +ia32_multipass_dfa_lookahead (void) +{ + switch (ix86_tune) + { + case PROCESSOR_PENTIUM: + case PROCESSOR_LAKEMONT: + return 2; + + case PROCESSOR_PENTIUMPRO: + case PROCESSOR_K6: + return 1; + + case PROCESSOR_BDVER1: + case PROCESSOR_BDVER2: + case PROCESSOR_BDVER3: + case PROCESSOR_BDVER4: + /* We use lookahead value 4 for BD both before and after reload + schedules. Plan is to have value 8 included for O3. */ + return 4; + + case PROCESSOR_CORE2: + case PROCESSOR_NEHALEM: + case PROCESSOR_SANDYBRIDGE: + case PROCESSOR_HASWELL: + case PROCESSOR_BONNELL: + case PROCESSOR_SILVERMONT: + case PROCESSOR_KNL: + case PROCESSOR_INTEL: + /* Generally, we want haifa-sched:max_issue() to look ahead as far + as many instructions can be executed on a cycle, i.e., + issue_rate. I wonder why tuning for many CPUs does not do this. */ + if (reload_completed) + return ix86_issue_rate (); + /* Don't use lookahead for pre-reload schedule to save compile time. */ + return 0; + + default: + return 0; + } +} + +/* Return true if target platform supports macro-fusion. */ + +static bool +ix86_macro_fusion_p () +{ + return TARGET_FUSE_CMP_AND_BRANCH; +} + +/* Check whether current microarchitecture support macro fusion + for insn pair "CONDGEN + CONDJMP". Refer to + "Intel Architectures Optimization Reference Manual". */ + +static bool +ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp) +{ + rtx src, dest; + enum rtx_code ccode; + rtx compare_set = NULL_RTX, test_if, cond; + rtx alu_set = NULL_RTX, addr = NULL_RTX; + + if (!any_condjump_p (condjmp)) + return false; + + if (get_attr_type (condgen) != TYPE_TEST + && get_attr_type (condgen) != TYPE_ICMP + && get_attr_type (condgen) != TYPE_INCDEC + && get_attr_type (condgen) != TYPE_ALU) + return false; + + compare_set = single_set (condgen); + if (compare_set == NULL_RTX + && !TARGET_FUSE_ALU_AND_BRANCH) + return false; + + if (compare_set == NULL_RTX) + { + int i; + rtx pat = PATTERN (condgen); + for (i = 0; i < XVECLEN (pat, 0); i++) + if (GET_CODE (XVECEXP (pat, 0, i)) == SET) + { + rtx set_src = SET_SRC (XVECEXP (pat, 0, i)); + if (GET_CODE (set_src) == COMPARE) + compare_set = XVECEXP (pat, 0, i); + else + alu_set = XVECEXP (pat, 0, i); + } + } + if (compare_set == NULL_RTX) + return false; + src = SET_SRC (compare_set); + if (GET_CODE (src) != COMPARE) + return false; + + /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not + supported. */ + if ((MEM_P (XEXP (src, 0)) + && CONST_INT_P (XEXP (src, 1))) + || (MEM_P (XEXP (src, 1)) + && CONST_INT_P (XEXP (src, 0)))) + return false; + + /* No fusion for RIP-relative address. */ + if (MEM_P (XEXP (src, 0))) + addr = XEXP (XEXP (src, 0), 0); + else if (MEM_P (XEXP (src, 1))) + addr = XEXP (XEXP (src, 1), 0); + + if (addr) { + ix86_address parts; + int ok = ix86_decompose_address (addr, &parts); + gcc_assert (ok); + + if (rip_relative_addr_p (&parts)) + return false; + } + + test_if = SET_SRC (pc_set (condjmp)); + cond = XEXP (test_if, 0); + ccode = GET_CODE (cond); + /* Check whether conditional jump use Sign or Overflow Flags. */ + if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS + && (ccode == GE + || ccode == GT + || ccode == LE + || ccode == LT)) + return false; + + /* Return true for TYPE_TEST and TYPE_ICMP. */ + if (get_attr_type (condgen) == TYPE_TEST + || get_attr_type (condgen) == TYPE_ICMP) + return true; + + /* The following is the case that macro-fusion for alu + jmp. */ + if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set) + return false; + + /* No fusion for alu op with memory destination operand. */ + dest = SET_DEST (alu_set); + if (MEM_P (dest)) + return false; + + /* Macro-fusion for inc/dec + unsigned conditional jump is not + supported. */ + if (get_attr_type (condgen) == TYPE_INCDEC + && (ccode == GEU + || ccode == GTU + || ccode == LEU + || ccode == LTU)) + return false; + + return true; +} + +/* Try to reorder ready list to take advantage of Atom pipelined IMUL + execution. It is applied if + (1) IMUL instruction is on the top of list; + (2) There exists the only producer of independent IMUL instruction in + ready list. + Return index of IMUL producer if it was found and -1 otherwise. */ +static int +do_reorder_for_imul (rtx_insn **ready, int n_ready) +{ + rtx_insn *insn; + rtx set, insn1, insn2; + sd_iterator_def sd_it; + dep_t dep; + int index = -1; + int i; + + if (!TARGET_BONNELL) + return index; + + /* Check that IMUL instruction is on the top of ready list. */ + insn = ready[n_ready - 1]; + set = single_set (insn); + if (!set) + return index; + if (!(GET_CODE (SET_SRC (set)) == MULT + && GET_MODE (SET_SRC (set)) == SImode)) + return index; + + /* Search for producer of independent IMUL instruction. */ + for (i = n_ready - 2; i >= 0; i--) + { + insn = ready[i]; + if (!NONDEBUG_INSN_P (insn)) + continue; + /* Skip IMUL instruction. */ + insn2 = PATTERN (insn); + if (GET_CODE (insn2) == PARALLEL) + insn2 = XVECEXP (insn2, 0, 0); + if (GET_CODE (insn2) == SET + && GET_CODE (SET_SRC (insn2)) == MULT + && GET_MODE (SET_SRC (insn2)) == SImode) + continue; + + FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) + { + rtx con; + con = DEP_CON (dep); + if (!NONDEBUG_INSN_P (con)) + continue; + insn1 = PATTERN (con); + if (GET_CODE (insn1) == PARALLEL) + insn1 = XVECEXP (insn1, 0, 0); + + if (GET_CODE (insn1) == SET + && GET_CODE (SET_SRC (insn1)) == MULT + && GET_MODE (SET_SRC (insn1)) == SImode) + { + sd_iterator_def sd_it1; + dep_t dep1; + /* Check if there is no other dependee for IMUL. */ + index = i; + FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1) + { + rtx pro; + pro = DEP_PRO (dep1); + if (!NONDEBUG_INSN_P (pro)) + continue; + if (pro != insn) + index = -1; + } + if (index >= 0) + break; + } + } + if (index >= 0) + break; + } + return index; +} + +/* Try to find the best candidate on the top of ready list if two insns + have the same priority - candidate is best if its dependees were + scheduled earlier. Applied for Silvermont only. + Return true if top 2 insns must be interchanged. */ +static bool +swap_top_of_ready_list (rtx_insn **ready, int n_ready) +{ + rtx_insn *top = ready[n_ready - 1]; + rtx_insn *next = ready[n_ready - 2]; + rtx set; + sd_iterator_def sd_it; + dep_t dep; + int clock1 = -1; + int clock2 = -1; + #define INSN_TICK(INSN) (HID (INSN)->tick) + + if (!TARGET_SILVERMONT && !TARGET_INTEL) + return false; + + if (!NONDEBUG_INSN_P (top)) + return false; + if (!NONJUMP_INSN_P (top)) + return false; + if (!NONDEBUG_INSN_P (next)) + return false; + if (!NONJUMP_INSN_P (next)) + return false; + set = single_set (top); + if (!set) + return false; + set = single_set (next); + if (!set) + return false; + + if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next)) + { + if (INSN_PRIORITY (top) != INSN_PRIORITY (next)) + return false; + /* Determine winner more precise. */ + FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep) + { + rtx pro; + pro = DEP_PRO (dep); + if (!NONDEBUG_INSN_P (pro)) + continue; + if (INSN_TICK (pro) > clock1) + clock1 = INSN_TICK (pro); + } + FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep) + { + rtx pro; + pro = DEP_PRO (dep); + if (!NONDEBUG_INSN_P (pro)) + continue; + if (INSN_TICK (pro) > clock2) + clock2 = INSN_TICK (pro); + } + + if (clock1 == clock2) + { + /* Determine winner - load must win. */ + enum attr_memory memory1, memory2; + memory1 = get_attr_memory (top); + memory2 = get_attr_memory (next); + if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD) + return true; + } + return (bool) (clock2 < clock1); + } + return false; + #undef INSN_TICK +} + +/* Perform possible reodering of ready list for Atom/Silvermont only. + Return issue rate. */ +static int +ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready, + int *pn_ready, int clock_var) +{ + int issue_rate = -1; + int n_ready = *pn_ready; + int i; + rtx_insn *insn; + int index = -1; + + /* Set up issue rate. */ + issue_rate = ix86_issue_rate (); + + /* Do reodering for BONNELL/SILVERMONT only. */ + if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL) + return issue_rate; + + /* Nothing to do if ready list contains only 1 instruction. */ + if (n_ready <= 1) + return issue_rate; + + /* Do reodering for post-reload scheduler only. */ + if (!reload_completed) + return issue_rate; + + if ((index = do_reorder_for_imul (ready, n_ready)) >= 0) + { + if (sched_verbose > 1) + fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n", + INSN_UID (ready[index])); + + /* Put IMUL producer (ready[index]) at the top of ready list. */ + insn = ready[index]; + for (i = index; i < n_ready - 1; i++) + ready[i] = ready[i + 1]; + ready[n_ready - 1] = insn; + return issue_rate; + } + + /* Skip selective scheduling since HID is not populated in it. */ + if (clock_var != 0 + && !sel_sched_p () + && swap_top_of_ready_list (ready, n_ready)) + { + if (sched_verbose > 1) + fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n", + INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2])); + /* Swap 2 top elements of ready list. */ + insn = ready[n_ready - 1]; + ready[n_ready - 1] = ready[n_ready - 2]; + ready[n_ready - 2] = insn; + } + return issue_rate; +} + +static bool +ix86_class_likely_spilled_p (reg_class_t); + +/* Returns true if lhs of insn is HW function argument register and set up + is_spilled to true if it is likely spilled HW register. */ +static bool +insn_is_function_arg (rtx insn, bool* is_spilled) +{ + rtx dst; + + if (!NONDEBUG_INSN_P (insn)) + return false; + /* Call instructions are not movable, ignore it. */ + if (CALL_P (insn)) + return false; + insn = PATTERN (insn); + if (GET_CODE (insn) == PARALLEL) + insn = XVECEXP (insn, 0, 0); + if (GET_CODE (insn) != SET) + return false; + dst = SET_DEST (insn); + if (REG_P (dst) && HARD_REGISTER_P (dst) + && ix86_function_arg_regno_p (REGNO (dst))) + { + /* Is it likely spilled HW register? */ + if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst)) + && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))) + *is_spilled = true; + return true; + } + return false; +} + +/* Add output dependencies for chain of function adjacent arguments if only + there is a move to likely spilled HW register. Return first argument + if at least one dependence was added or NULL otherwise. */ +static rtx_insn * +add_parameter_dependencies (rtx_insn *call, rtx_insn *head) +{ + rtx_insn *insn; + rtx_insn *last = call; + rtx_insn *first_arg = NULL; + bool is_spilled = false; + + head = PREV_INSN (head); + + /* Find nearest to call argument passing instruction. */ + while (true) + { + last = PREV_INSN (last); + if (last == head) + return NULL; + if (!NONDEBUG_INSN_P (last)) + continue; + if (insn_is_function_arg (last, &is_spilled)) + break; + return NULL; + } + + first_arg = last; + while (true) + { + insn = PREV_INSN (last); + if (!INSN_P (insn)) + break; + if (insn == head) + break; + if (!NONDEBUG_INSN_P (insn)) + { + last = insn; + continue; + } + if (insn_is_function_arg (insn, &is_spilled)) + { + /* Add output depdendence between two function arguments if chain + of output arguments contains likely spilled HW registers. */ + if (is_spilled) + add_dependence (first_arg, insn, REG_DEP_OUTPUT); + first_arg = last = insn; + } + else + break; + } + if (!is_spilled) + return NULL; + return first_arg; +} + +/* Add output or anti dependency from insn to first_arg to restrict its code + motion. */ +static void +avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn) +{ + rtx set; + rtx tmp; + + /* Add anti dependencies for bounds stores. */ + if (INSN_P (insn) + && GET_CODE (PATTERN (insn)) == PARALLEL + && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC + && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX) + { + add_dependence (first_arg, insn, REG_DEP_ANTI); + return; + } + + set = single_set (insn); + if (!set) + return; + tmp = SET_DEST (set); + if (REG_P (tmp)) + { + /* Add output dependency to the first function argument. */ + add_dependence (first_arg, insn, REG_DEP_OUTPUT); + return; + } + /* Add anti dependency. */ + add_dependence (first_arg, insn, REG_DEP_ANTI); +} + +/* Avoid cross block motion of function argument through adding dependency + from the first non-jump instruction in bb. */ +static void +add_dependee_for_func_arg (rtx_insn *arg, basic_block bb) +{ + rtx_insn *insn = BB_END (bb); + + while (insn) + { + if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn)) + { + rtx set = single_set (insn); + if (set) + { + avoid_func_arg_motion (arg, insn); + return; + } + } + if (insn == BB_HEAD (bb)) + return; + insn = PREV_INSN (insn); + } +} + +/* Hook for pre-reload schedule - avoid motion of function arguments + passed in likely spilled HW registers. */ +static void +ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) +{ + rtx_insn *insn; + rtx_insn *first_arg = NULL; + if (reload_completed) + return; + while (head != tail && DEBUG_INSN_P (head)) + head = NEXT_INSN (head); + for (insn = tail; insn != head; insn = PREV_INSN (insn)) + if (INSN_P (insn) && CALL_P (insn)) + { + first_arg = add_parameter_dependencies (insn, head); + if (first_arg) + { + /* Add dependee for first argument to predecessors if only + region contains more than one block. */ + basic_block bb = BLOCK_FOR_INSN (insn); + int rgn = CONTAINING_RGN (bb->index); + int nr_blks = RGN_NR_BLOCKS (rgn); + /* Skip trivial regions and region head blocks that can have + predecessors outside of region. */ + if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0) + { + edge e; + edge_iterator ei; + + /* Regions are SCCs with the exception of selective + scheduling with pipelining of outer blocks enabled. + So also check that immediate predecessors of a non-head + block are in the same region. */ + FOR_EACH_EDGE (e, ei, bb->preds) + { + /* Avoid creating of loop-carried dependencies through + using topological ordering in the region. */ + if (rgn == CONTAINING_RGN (e->src->index) + && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) + add_dependee_for_func_arg (first_arg, e->src); + } + } + insn = first_arg; + if (insn == head) + break; + } + } + else if (first_arg) + avoid_func_arg_motion (first_arg, insn); +} + +/* Hook for pre-reload schedule - set priority of moves from likely spilled + HW registers to maximum, to schedule them at soon as possible. These are + moves from function argument registers at the top of the function entry + and moves from function return value registers after call. */ +static int +ix86_adjust_priority (rtx_insn *insn, int priority) +{ + rtx set; + + if (reload_completed) + return priority; + + if (!NONDEBUG_INSN_P (insn)) + return priority; + + set = single_set (insn); + if (set) + { + rtx tmp = SET_SRC (set); + if (REG_P (tmp) + && HARD_REGISTER_P (tmp) + && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp)) + && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp)))) + return current_sched_info->sched_max_insns_priority; + } + + return priority; +} + +/* Model decoder of Core 2/i7. + Below hooks for multipass scheduling (see haifa-sched.c:max_issue) + track the instruction fetch block boundaries and make sure that long + (9+ bytes) instructions are assigned to D0. */ + +/* Maximum length of an insn that can be handled by + a secondary decoder unit. '8' for Core 2/i7. */ +static int core2i7_secondary_decoder_max_insn_size; + +/* Ifetch block size, i.e., number of bytes decoder reads per cycle. + '16' for Core 2/i7. */ +static int core2i7_ifetch_block_size; + +/* Maximum number of instructions decoder can handle per cycle. + '6' for Core 2/i7. */ +static int core2i7_ifetch_block_max_insns; + +typedef struct ix86_first_cycle_multipass_data_ * + ix86_first_cycle_multipass_data_t; +typedef const struct ix86_first_cycle_multipass_data_ * + const_ix86_first_cycle_multipass_data_t; + +/* A variable to store target state across calls to max_issue within + one cycle. */ +static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data, + *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data; + +/* Initialize DATA. */ +static void +core2i7_first_cycle_multipass_init (void *_data) +{ + ix86_first_cycle_multipass_data_t data + = (ix86_first_cycle_multipass_data_t) _data; + + data->ifetch_block_len = 0; + data->ifetch_block_n_insns = 0; + data->ready_try_change = NULL; + data->ready_try_change_size = 0; +} + +/* Advancing the cycle; reset ifetch block counts. */ +static void +core2i7_dfa_post_advance_cycle (void) +{ + ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data; + + gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns); + + data->ifetch_block_len = 0; + data->ifetch_block_n_insns = 0; +} + +static int min_insn_size (rtx_insn *); + +/* Filter out insns from ready_try that the core will not be able to issue + on current cycle due to decoder. */ +static void +core2i7_first_cycle_multipass_filter_ready_try +(const_ix86_first_cycle_multipass_data_t data, + signed char *ready_try, int n_ready, bool first_cycle_insn_p) +{ + while (n_ready--) + { + rtx_insn *insn; + int insn_size; + + if (ready_try[n_ready]) + continue; + + insn = get_ready_element (n_ready); + insn_size = min_insn_size (insn); + + if (/* If this is a too long an insn for a secondary decoder ... */ + (!first_cycle_insn_p + && insn_size > core2i7_secondary_decoder_max_insn_size) + /* ... or it would not fit into the ifetch block ... */ + || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size + /* ... or the decoder is full already ... */ + || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns) + /* ... mask the insn out. */ + { + ready_try[n_ready] = 1; + + if (data->ready_try_change) + bitmap_set_bit (data->ready_try_change, n_ready); + } + } +} + +/* Prepare for a new round of multipass lookahead scheduling. */ +static void +core2i7_first_cycle_multipass_begin (void *_data, + signed char *ready_try, int n_ready, + bool first_cycle_insn_p) +{ + ix86_first_cycle_multipass_data_t data + = (ix86_first_cycle_multipass_data_t) _data; + const_ix86_first_cycle_multipass_data_t prev_data + = ix86_first_cycle_multipass_data; + + /* Restore the state from the end of the previous round. */ + data->ifetch_block_len = prev_data->ifetch_block_len; + data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns; + + /* Filter instructions that cannot be issued on current cycle due to + decoder restrictions. */ + core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready, + first_cycle_insn_p); +} + +/* INSN is being issued in current solution. Account for its impact on + the decoder model. */ +static void +core2i7_first_cycle_multipass_issue (void *_data, + signed char *ready_try, int n_ready, + rtx_insn *insn, const void *_prev_data) +{ + ix86_first_cycle_multipass_data_t data + = (ix86_first_cycle_multipass_data_t) _data; + const_ix86_first_cycle_multipass_data_t prev_data + = (const_ix86_first_cycle_multipass_data_t) _prev_data; + + int insn_size = min_insn_size (insn); + + data->ifetch_block_len = prev_data->ifetch_block_len + insn_size; + data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1; + gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size + && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns); + + /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */ + if (!data->ready_try_change) + { + data->ready_try_change = sbitmap_alloc (n_ready); + data->ready_try_change_size = n_ready; + } + else if (data->ready_try_change_size < n_ready) + { + data->ready_try_change = sbitmap_resize (data->ready_try_change, + n_ready, 0); + data->ready_try_change_size = n_ready; + } + bitmap_clear (data->ready_try_change); + + /* Filter out insns from ready_try that the core will not be able to issue + on current cycle due to decoder. */ + core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready, + false); +} + +/* Revert the effect on ready_try. */ +static void +core2i7_first_cycle_multipass_backtrack (const void *_data, + signed char *ready_try, + int n_ready ATTRIBUTE_UNUSED) +{ + const_ix86_first_cycle_multipass_data_t data + = (const_ix86_first_cycle_multipass_data_t) _data; + unsigned int i = 0; + sbitmap_iterator sbi; + + gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready); + EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi) + { + ready_try[i] = 0; + } +} + +/* Save the result of multipass lookahead scheduling for the next round. */ +static void +core2i7_first_cycle_multipass_end (const void *_data) +{ + const_ix86_first_cycle_multipass_data_t data + = (const_ix86_first_cycle_multipass_data_t) _data; + ix86_first_cycle_multipass_data_t next_data + = ix86_first_cycle_multipass_data; + + if (data != NULL) + { + next_data->ifetch_block_len = data->ifetch_block_len; + next_data->ifetch_block_n_insns = data->ifetch_block_n_insns; + } +} + +/* Deallocate target data. */ +static void +core2i7_first_cycle_multipass_fini (void *_data) +{ + ix86_first_cycle_multipass_data_t data + = (ix86_first_cycle_multipass_data_t) _data; + + if (data->ready_try_change) + { + sbitmap_free (data->ready_try_change); + data->ready_try_change = NULL; + data->ready_try_change_size = 0; + } +} + +/* Prepare for scheduling pass. */ +static void +ix86_sched_init_global (FILE *, int, int) +{ + /* Install scheduling hooks for current CPU. Some of these hooks are used + in time-critical parts of the scheduler, so we only set them up when + they are actually used. */ + switch (ix86_tune) + { + case PROCESSOR_CORE2: + case PROCESSOR_NEHALEM: + case PROCESSOR_SANDYBRIDGE: + case PROCESSOR_HASWELL: + /* Do not perform multipass scheduling for pre-reload schedule + to save compile time. */ + if (reload_completed) + { + targetm.sched.dfa_post_advance_cycle + = core2i7_dfa_post_advance_cycle; + targetm.sched.first_cycle_multipass_init + = core2i7_first_cycle_multipass_init; + targetm.sched.first_cycle_multipass_begin + = core2i7_first_cycle_multipass_begin; + targetm.sched.first_cycle_multipass_issue + = core2i7_first_cycle_multipass_issue; + targetm.sched.first_cycle_multipass_backtrack + = core2i7_first_cycle_multipass_backtrack; + targetm.sched.first_cycle_multipass_end + = core2i7_first_cycle_multipass_end; + targetm.sched.first_cycle_multipass_fini + = core2i7_first_cycle_multipass_fini; + + /* Set decoder parameters. */ + core2i7_secondary_decoder_max_insn_size = 8; + core2i7_ifetch_block_size = 16; + core2i7_ifetch_block_max_insns = 6; + break; + } + /* ... Fall through ... */ + default: + targetm.sched.dfa_post_advance_cycle = NULL; + targetm.sched.first_cycle_multipass_init = NULL; + targetm.sched.first_cycle_multipass_begin = NULL; + targetm.sched.first_cycle_multipass_issue = NULL; + targetm.sched.first_cycle_multipass_backtrack = NULL; + targetm.sched.first_cycle_multipass_end = NULL; + targetm.sched.first_cycle_multipass_fini = NULL; + break; + } +} + + +/* Compute the alignment given to a constant that is being placed in memory. + EXP is the constant and ALIGN is the alignment that the object would + ordinarily have. + The value of this function is used instead of that alignment to align + the object. */ + +int +ix86_constant_alignment (tree exp, int align) +{ + if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST + || TREE_CODE (exp) == INTEGER_CST) + { + if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) + return 64; + else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) + return 128; + } + else if (!optimize_size && TREE_CODE (exp) == STRING_CST + && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) + return BITS_PER_WORD; + + return align; +} + +/* Compute the alignment for a variable for Intel MCU psABI. TYPE is + the data type, and ALIGN is the alignment that the object would + ordinarily have. */ + +static int +iamcu_alignment (tree type, int align) +{ + enum machine_mode mode; + + if (align < 32 || TYPE_USER_ALIGN (type)) + return align; + + /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4 + bytes. */ + mode = TYPE_MODE (strip_array_types (type)); + switch (GET_MODE_CLASS (mode)) + { + case MODE_INT: + case MODE_COMPLEX_INT: + case MODE_COMPLEX_FLOAT: + case MODE_FLOAT: + case MODE_DECIMAL_FLOAT: + return 32; + default: + return align; + } +} + +/* Compute the alignment for a static variable. + TYPE is the data type, and ALIGN is the alignment that + the object would ordinarily have. The value of this function is used + instead of that alignment to align the object. */ + +int +ix86_data_alignment (tree type, int align, bool opt) +{ + /* GCC 4.8 and earlier used to incorrectly assume this alignment even + for symbols from other compilation units or symbols that don't need + to bind locally. In order to preserve some ABI compatibility with + those compilers, ensure we don't decrease alignment from what we + used to assume. */ + + int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT); + + /* A data structure, equal or greater than the size of a cache line + (64 bytes in the Pentium 4 and other recent Intel processors, including + processors based on Intel Core microarchitecture) should be aligned + so that its base address is a multiple of a cache line size. */ + + int max_align + = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT); + + if (max_align < BITS_PER_WORD) + max_align = BITS_PER_WORD; + + switch (ix86_align_data_type) + { + case ix86_align_data_type_abi: opt = false; break; + case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break; + case ix86_align_data_type_cacheline: break; + } + + if (TARGET_IAMCU) + align = iamcu_alignment (type, align); + + if (opt + && AGGREGATE_TYPE_P (type) + && TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) + { + if (wi::geu_p (TYPE_SIZE (type), max_align_compat) + && align < max_align_compat) + align = max_align_compat; + if (wi::geu_p (TYPE_SIZE (type), max_align) + && align < max_align) + align = max_align; + } + + /* x86-64 ABI requires arrays greater than 16 bytes to be aligned + to 16byte boundary. */ + if (TARGET_64BIT) + { + if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) + && TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && wi::geu_p (TYPE_SIZE (type), 128) + && align < 128) + return 128; + } + + if (!opt) + return align; + + if (TREE_CODE (type) == ARRAY_TYPE) + { + if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) + return 128; + } + else if (TREE_CODE (type) == COMPLEX_TYPE) + { + + if (TYPE_MODE (type) == DCmode && align < 64) + return 64; + if ((TYPE_MODE (type) == XCmode + || TYPE_MODE (type) == TCmode) && align < 128) + return 128; + } + else if ((TREE_CODE (type) == RECORD_TYPE + || TREE_CODE (type) == UNION_TYPE + || TREE_CODE (type) == QUAL_UNION_TYPE) + && TYPE_FIELDS (type)) + { + if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) + return 128; + } + else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE + || TREE_CODE (type) == INTEGER_TYPE) + { + if (TYPE_MODE (type) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) + return 128; + } + + return align; +} + +/* Compute the alignment for a local variable or a stack slot. EXP is + the data type or decl itself, MODE is the widest mode available and + ALIGN is the alignment that the object would ordinarily have. The + value of this macro is used instead of that alignment to align the + object. */ + +unsigned int +ix86_local_alignment (tree exp, machine_mode mode, + unsigned int align) +{ + tree type, decl; + + if (exp && DECL_P (exp)) + { + type = TREE_TYPE (exp); + decl = exp; + } + else + { + type = exp; + decl = NULL; + } + + /* Don't do dynamic stack realignment for long long objects with + -mpreferred-stack-boundary=2. */ + if (!TARGET_64BIT + && align == 64 + && ix86_preferred_stack_boundary < 64 + && (mode == DImode || (type && TYPE_MODE (type) == DImode)) + && (!type || !TYPE_USER_ALIGN (type)) + && (!decl || !DECL_USER_ALIGN (decl))) + align = 32; + + /* If TYPE is NULL, we are allocating a stack slot for caller-save + register in MODE. We will return the largest alignment of XF + and DF. */ + if (!type) + { + if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) + align = GET_MODE_ALIGNMENT (DFmode); + return align; + } + + /* Don't increase alignment for Intel MCU psABI. */ + if (TARGET_IAMCU) + return align; + + /* x86-64 ABI requires arrays greater than 16 bytes to be aligned + to 16byte boundary. Exact wording is: + + An array uses the same alignment as its elements, except that a local or + global array variable of length at least 16 bytes or + a C99 variable-length array variable always has alignment of at least 16 bytes. + + This was added to allow use of aligned SSE instructions at arrays. This + rule is meant for static storage (where compiler can not do the analysis + by itself). We follow it for automatic variables only when convenient. + We fully control everything in the function compiled and functions from + other unit can not rely on the alignment. + + Exclude va_list type. It is the common case of local array where + we can not benefit from the alignment. + + TODO: Probably one should optimize for size only when var is not escaping. */ + if (TARGET_64BIT && optimize_function_for_speed_p (cfun) + && TARGET_SSE) + { + if (AGGREGATE_TYPE_P (type) + && (va_list_type_node == NULL_TREE + || (TYPE_MAIN_VARIANT (type) + != TYPE_MAIN_VARIANT (va_list_type_node))) + && TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && wi::geu_p (TYPE_SIZE (type), 16) + && align < 128) + return 128; + } + if (TREE_CODE (type) == ARRAY_TYPE) + { + if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) + return 128; + } + else if (TREE_CODE (type) == COMPLEX_TYPE) + { + if (TYPE_MODE (type) == DCmode && align < 64) + return 64; + if ((TYPE_MODE (type) == XCmode + || TYPE_MODE (type) == TCmode) && align < 128) + return 128; + } + else if ((TREE_CODE (type) == RECORD_TYPE + || TREE_CODE (type) == UNION_TYPE + || TREE_CODE (type) == QUAL_UNION_TYPE) + && TYPE_FIELDS (type)) + { + if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) + return 128; + } + else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE + || TREE_CODE (type) == INTEGER_TYPE) + { + + if (TYPE_MODE (type) == DFmode && align < 64) + return 64; + if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) + return 128; + } + return align; +} + +/* Compute the minimum required alignment for dynamic stack realignment + purposes for a local variable, parameter or a stack slot. EXP is + the data type or decl itself, MODE is its mode and ALIGN is the + alignment that the object would ordinarily have. */ + +unsigned int +ix86_minimum_alignment (tree exp, machine_mode mode, + unsigned int align) +{ + tree type, decl; + + if (exp && DECL_P (exp)) + { + type = TREE_TYPE (exp); + decl = exp; + } + else + { + type = exp; + decl = NULL; + } + + if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) + return align; + + /* Don't do dynamic stack realignment for long long objects with + -mpreferred-stack-boundary=2. */ + if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) + && (!type || !TYPE_USER_ALIGN (type)) + && (!decl || !DECL_USER_ALIGN (decl))) + { + gcc_checking_assert (!TARGET_STV); + return 32; + } + + return align; +} + +/* Find a location for the static chain incoming to a nested function. + This is a register, unless all free registers are used by arguments. */ + +static rtx +ix86_static_chain (const_tree fndecl_or_type, bool incoming_p) +{ + unsigned regno; + + /* While this function won't be called by the middle-end when a static + chain isn't needed, it's also used throughout the backend so it's + easiest to keep this check centralized. */ + if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type)) + return NULL; + + if (TARGET_64BIT) + { + /* We always use R10 in 64-bit mode. */ + regno = R10_REG; + } + else + { + const_tree fntype, fndecl; + unsigned int ccvt; + + /* By default in 32-bit mode we use ECX to pass the static chain. */ + regno = CX_REG; + + if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL) + { + fntype = TREE_TYPE (fndecl_or_type); + fndecl = fndecl_or_type; + } + else + { + fntype = fndecl_or_type; + fndecl = NULL; + } + + ccvt = ix86_get_callcvt (fntype); + if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) + { + /* Fastcall functions use ecx/edx for arguments, which leaves + us with EAX for the static chain. + Thiscall functions use ecx for arguments, which also + leaves us with EAX for the static chain. */ + regno = AX_REG; + } + else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) + { + /* Thiscall functions use ecx for arguments, which leaves + us with EAX and EDX for the static chain. + We are using for abi-compatibility EAX. */ + regno = AX_REG; + } + else if (ix86_function_regparm (fntype, fndecl) == 3) + { + /* For regparm 3, we have no free call-clobbered registers in + which to store the static chain. In order to implement this, + we have the trampoline push the static chain to the stack. + However, we can't push a value below the return address when + we call the nested function directly, so we have to use an + alternate entry point. For this we use ESI, and have the + alternate entry point push ESI, so that things appear the + same once we're executing the nested function. */ + if (incoming_p) + { + if (fndecl == current_function_decl) + ix86_static_chain_on_stack = true; + return gen_frame_mem (SImode, + plus_constant (Pmode, + arg_pointer_rtx, -8)); + } + regno = SI_REG; + } + } + + return gen_rtx_REG (Pmode, regno); +} + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNDECL is the decl of the target address; M_TRAMP is a MEM for + the trampoline, and CHAIN_VALUE is an RTX for the static chain + to be passed to the target function. */ + +static void +ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx mem, fnaddr; + int opcode; + int offset = 0; + + fnaddr = XEXP (DECL_RTL (fndecl), 0); + + if (TARGET_64BIT) + { + int size; + + /* Load the function address to r11. Try to load address using + the shorter movl instead of movabs. We may want to support + movq for kernel mode, but kernel does not use trampolines at + the moment. FNADDR is a 32bit address and may not be in + DImode when ptr_mode == SImode. Always use movl in this + case. */ + if (ptr_mode == SImode + || x86_64_zext_immediate_operand (fnaddr, VOIDmode)) + { + fnaddr = copy_addr_to_reg (fnaddr); + + mem = adjust_address (m_tramp, HImode, offset); + emit_move_insn (mem, gen_int_mode (0xbb41, HImode)); + + mem = adjust_address (m_tramp, SImode, offset + 2); + emit_move_insn (mem, gen_lowpart (SImode, fnaddr)); + offset += 6; + } + else + { + mem = adjust_address (m_tramp, HImode, offset); + emit_move_insn (mem, gen_int_mode (0xbb49, HImode)); + + mem = adjust_address (m_tramp, DImode, offset + 2); + emit_move_insn (mem, fnaddr); + offset += 10; + } + + /* Load static chain using movabs to r10. Use the shorter movl + instead of movabs when ptr_mode == SImode. */ + if (ptr_mode == SImode) + { + opcode = 0xba41; + size = 6; + } + else + { + opcode = 0xba49; + size = 10; + } + + mem = adjust_address (m_tramp, HImode, offset); + emit_move_insn (mem, gen_int_mode (opcode, HImode)); + + mem = adjust_address (m_tramp, ptr_mode, offset + 2); + emit_move_insn (mem, chain_value); + offset += size; + + /* Jump to r11; the last (unused) byte is a nop, only there to + pad the write out to a single 32-bit store. */ + mem = adjust_address (m_tramp, SImode, offset); + emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode)); + offset += 4; + } + else + { + rtx disp, chain; + + /* Depending on the static chain location, either load a register + with a constant, or push the constant to the stack. All of the + instructions are the same size. */ + chain = ix86_static_chain (fndecl, true); + if (REG_P (chain)) + { + switch (REGNO (chain)) + { + case AX_REG: + opcode = 0xb8; break; + case CX_REG: + opcode = 0xb9; break; + default: + gcc_unreachable (); + } + } + else + opcode = 0x68; + + mem = adjust_address (m_tramp, QImode, offset); + emit_move_insn (mem, gen_int_mode (opcode, QImode)); + + mem = adjust_address (m_tramp, SImode, offset + 1); + emit_move_insn (mem, chain_value); + offset += 5; + + mem = adjust_address (m_tramp, QImode, offset); + emit_move_insn (mem, gen_int_mode (0xe9, QImode)); + + mem = adjust_address (m_tramp, SImode, offset + 1); + + /* Compute offset from the end of the jmp to the target function. + In the case in which the trampoline stores the static chain on + the stack, we need to skip the first insn which pushes the + (call-saved) register static chain; this push is 1 byte. */ + offset += 5; + disp = expand_binop (SImode, sub_optab, fnaddr, + plus_constant (Pmode, XEXP (m_tramp, 0), + offset - (MEM_P (chain) ? 1 : 0)), + NULL_RTX, 1, OPTAB_DIRECT); + emit_move_insn (mem, disp); + } + + gcc_assert (offset <= TRAMPOLINE_SIZE); + +#ifdef HAVE_ENABLE_EXECUTE_STACK +#ifdef CHECK_EXECUTE_STACK_ENABLED + if (CHECK_EXECUTE_STACK_ENABLED) +#endif + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), + LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); +#endif +} + +/* The following file contains several enumerations and data structures + built from the definitions in i386-builtin-types.def. */ + +#include "i386-builtin-types.inc" + +/* Table for the ix86 builtin non-function types. */ +static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1]; + +/* Retrieve an element from the above table, building some of + the types lazily. */ + +static tree +ix86_get_builtin_type (enum ix86_builtin_type tcode) +{ + unsigned int index; + tree type, itype; + + gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab)); + + type = ix86_builtin_type_tab[(int) tcode]; + if (type != NULL) + return type; + + gcc_assert (tcode > IX86_BT_LAST_PRIM); + if (tcode <= IX86_BT_LAST_VECT) + { + machine_mode mode; + + index = tcode - IX86_BT_LAST_PRIM - 1; + itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]); + mode = ix86_builtin_type_vect_mode[index]; + + type = build_vector_type_for_mode (itype, mode); + } + else + { + int quals; + + index = tcode - IX86_BT_LAST_VECT - 1; + if (tcode <= IX86_BT_LAST_PTR) + quals = TYPE_UNQUALIFIED; + else + quals = TYPE_QUAL_CONST; + + itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]); + if (quals != TYPE_UNQUALIFIED) + itype = build_qualified_type (itype, quals); + + type = build_pointer_type (itype); + } + + ix86_builtin_type_tab[(int) tcode] = type; + return type; +} + +/* Table for the ix86 builtin function types. */ +static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1]; + +/* Retrieve an element from the above table, building some of + the types lazily. */ + +static tree +ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode) +{ + tree type; + + gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab)); + + type = ix86_builtin_func_type_tab[(int) tcode]; + if (type != NULL) + return type; + + if (tcode <= IX86_BT_LAST_FUNC) + { + unsigned start = ix86_builtin_func_start[(int) tcode]; + unsigned after = ix86_builtin_func_start[(int) tcode + 1]; + tree rtype, atype, args = void_list_node; + unsigned i; + + rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]); + for (i = after - 1; i > start; --i) + { + atype = ix86_get_builtin_type (ix86_builtin_func_args[i]); + args = tree_cons (NULL, atype, args); + } + + type = build_function_type (rtype, args); + } + else + { + unsigned index = tcode - IX86_BT_LAST_FUNC - 1; + enum ix86_builtin_func_type icode; + + icode = ix86_builtin_func_alias_base[index]; + type = ix86_get_builtin_func_type (icode); + } + + ix86_builtin_func_type_tab[(int) tcode] = type; + return type; +} + + +/* Codes for all the SSE/MMX builtins. */ +enum ix86_builtins +{ + IX86_BUILTIN_ADDPS, + IX86_BUILTIN_ADDSS, + IX86_BUILTIN_DIVPS, + IX86_BUILTIN_DIVSS, + IX86_BUILTIN_MULPS, + IX86_BUILTIN_MULSS, + IX86_BUILTIN_SUBPS, + IX86_BUILTIN_SUBSS, + + IX86_BUILTIN_CMPEQPS, + IX86_BUILTIN_CMPLTPS, + IX86_BUILTIN_CMPLEPS, + IX86_BUILTIN_CMPGTPS, + IX86_BUILTIN_CMPGEPS, + IX86_BUILTIN_CMPNEQPS, + IX86_BUILTIN_CMPNLTPS, + IX86_BUILTIN_CMPNLEPS, + IX86_BUILTIN_CMPNGTPS, + IX86_BUILTIN_CMPNGEPS, + IX86_BUILTIN_CMPORDPS, + IX86_BUILTIN_CMPUNORDPS, + IX86_BUILTIN_CMPEQSS, + IX86_BUILTIN_CMPLTSS, + IX86_BUILTIN_CMPLESS, + IX86_BUILTIN_CMPNEQSS, + IX86_BUILTIN_CMPNLTSS, + IX86_BUILTIN_CMPNLESS, + IX86_BUILTIN_CMPORDSS, + IX86_BUILTIN_CMPUNORDSS, + + IX86_BUILTIN_COMIEQSS, + IX86_BUILTIN_COMILTSS, + IX86_BUILTIN_COMILESS, + IX86_BUILTIN_COMIGTSS, + IX86_BUILTIN_COMIGESS, + IX86_BUILTIN_COMINEQSS, + IX86_BUILTIN_UCOMIEQSS, + IX86_BUILTIN_UCOMILTSS, + IX86_BUILTIN_UCOMILESS, + IX86_BUILTIN_UCOMIGTSS, + IX86_BUILTIN_UCOMIGESS, + IX86_BUILTIN_UCOMINEQSS, + + IX86_BUILTIN_CVTPI2PS, + IX86_BUILTIN_CVTPS2PI, + IX86_BUILTIN_CVTSI2SS, + IX86_BUILTIN_CVTSI642SS, + IX86_BUILTIN_CVTSS2SI, + IX86_BUILTIN_CVTSS2SI64, + IX86_BUILTIN_CVTTPS2PI, + IX86_BUILTIN_CVTTSS2SI, + IX86_BUILTIN_CVTTSS2SI64, + + IX86_BUILTIN_MAXPS, + IX86_BUILTIN_MAXSS, + IX86_BUILTIN_MINPS, + IX86_BUILTIN_MINSS, + + IX86_BUILTIN_LOADUPS, + IX86_BUILTIN_STOREUPS, + IX86_BUILTIN_MOVSS, + + IX86_BUILTIN_MOVHLPS, + IX86_BUILTIN_MOVLHPS, + IX86_BUILTIN_LOADHPS, + IX86_BUILTIN_LOADLPS, + IX86_BUILTIN_STOREHPS, + IX86_BUILTIN_STORELPS, + + IX86_BUILTIN_MASKMOVQ, + IX86_BUILTIN_MOVMSKPS, + IX86_BUILTIN_PMOVMSKB, + + IX86_BUILTIN_MOVNTPS, + IX86_BUILTIN_MOVNTQ, + + IX86_BUILTIN_LOADDQU, + IX86_BUILTIN_STOREDQU, + + IX86_BUILTIN_PACKSSWB, + IX86_BUILTIN_PACKSSDW, + IX86_BUILTIN_PACKUSWB, + + IX86_BUILTIN_PADDB, + IX86_BUILTIN_PADDW, + IX86_BUILTIN_PADDD, + IX86_BUILTIN_PADDQ, + IX86_BUILTIN_PADDSB, + IX86_BUILTIN_PADDSW, + IX86_BUILTIN_PADDUSB, + IX86_BUILTIN_PADDUSW, + IX86_BUILTIN_PSUBB, + IX86_BUILTIN_PSUBW, + IX86_BUILTIN_PSUBD, + IX86_BUILTIN_PSUBQ, + IX86_BUILTIN_PSUBSB, + IX86_BUILTIN_PSUBSW, + IX86_BUILTIN_PSUBUSB, + IX86_BUILTIN_PSUBUSW, + + IX86_BUILTIN_PAND, + IX86_BUILTIN_PANDN, + IX86_BUILTIN_POR, + IX86_BUILTIN_PXOR, + + IX86_BUILTIN_PAVGB, + IX86_BUILTIN_PAVGW, + + IX86_BUILTIN_PCMPEQB, + IX86_BUILTIN_PCMPEQW, + IX86_BUILTIN_PCMPEQD, + IX86_BUILTIN_PCMPGTB, + IX86_BUILTIN_PCMPGTW, + IX86_BUILTIN_PCMPGTD, + + IX86_BUILTIN_PMADDWD, + + IX86_BUILTIN_PMAXSW, + IX86_BUILTIN_PMAXUB, + IX86_BUILTIN_PMINSW, + IX86_BUILTIN_PMINUB, + + IX86_BUILTIN_PMULHUW, + IX86_BUILTIN_PMULHW, + IX86_BUILTIN_PMULLW, + + IX86_BUILTIN_PSADBW, + IX86_BUILTIN_PSHUFW, + + IX86_BUILTIN_PSLLW, + IX86_BUILTIN_PSLLD, + IX86_BUILTIN_PSLLQ, + IX86_BUILTIN_PSRAW, + IX86_BUILTIN_PSRAD, + IX86_BUILTIN_PSRLW, + IX86_BUILTIN_PSRLD, + IX86_BUILTIN_PSRLQ, + IX86_BUILTIN_PSLLWI, + IX86_BUILTIN_PSLLDI, + IX86_BUILTIN_PSLLQI, + IX86_BUILTIN_PSRAWI, + IX86_BUILTIN_PSRADI, + IX86_BUILTIN_PSRLWI, + IX86_BUILTIN_PSRLDI, + IX86_BUILTIN_PSRLQI, + + IX86_BUILTIN_PUNPCKHBW, + IX86_BUILTIN_PUNPCKHWD, + IX86_BUILTIN_PUNPCKHDQ, + IX86_BUILTIN_PUNPCKLBW, + IX86_BUILTIN_PUNPCKLWD, + IX86_BUILTIN_PUNPCKLDQ, + + IX86_BUILTIN_SHUFPS, + + IX86_BUILTIN_RCPPS, + IX86_BUILTIN_RCPSS, + IX86_BUILTIN_RSQRTPS, + IX86_BUILTIN_RSQRTPS_NR, + IX86_BUILTIN_RSQRTSS, + IX86_BUILTIN_RSQRTF, + IX86_BUILTIN_SQRTPS, + IX86_BUILTIN_SQRTPS_NR, + IX86_BUILTIN_SQRTSS, + + IX86_BUILTIN_UNPCKHPS, + IX86_BUILTIN_UNPCKLPS, + + IX86_BUILTIN_ANDPS, + IX86_BUILTIN_ANDNPS, + IX86_BUILTIN_ORPS, + IX86_BUILTIN_XORPS, + + IX86_BUILTIN_EMMS, + IX86_BUILTIN_LDMXCSR, + IX86_BUILTIN_STMXCSR, + IX86_BUILTIN_SFENCE, + + IX86_BUILTIN_FXSAVE, + IX86_BUILTIN_FXRSTOR, + IX86_BUILTIN_FXSAVE64, + IX86_BUILTIN_FXRSTOR64, + + IX86_BUILTIN_XSAVE, + IX86_BUILTIN_XRSTOR, + IX86_BUILTIN_XSAVE64, + IX86_BUILTIN_XRSTOR64, + + IX86_BUILTIN_XSAVEOPT, + IX86_BUILTIN_XSAVEOPT64, + + IX86_BUILTIN_XSAVEC, + IX86_BUILTIN_XSAVEC64, + + IX86_BUILTIN_XSAVES, + IX86_BUILTIN_XRSTORS, + IX86_BUILTIN_XSAVES64, + IX86_BUILTIN_XRSTORS64, + + /* 3DNow! Original */ + IX86_BUILTIN_FEMMS, + IX86_BUILTIN_PAVGUSB, + IX86_BUILTIN_PF2ID, + IX86_BUILTIN_PFACC, + IX86_BUILTIN_PFADD, + IX86_BUILTIN_PFCMPEQ, + IX86_BUILTIN_PFCMPGE, + IX86_BUILTIN_PFCMPGT, + IX86_BUILTIN_PFMAX, + IX86_BUILTIN_PFMIN, + IX86_BUILTIN_PFMUL, + IX86_BUILTIN_PFRCP, + IX86_BUILTIN_PFRCPIT1, + IX86_BUILTIN_PFRCPIT2, + IX86_BUILTIN_PFRSQIT1, + IX86_BUILTIN_PFRSQRT, + IX86_BUILTIN_PFSUB, + IX86_BUILTIN_PFSUBR, + IX86_BUILTIN_PI2FD, + IX86_BUILTIN_PMULHRW, + + /* 3DNow! Athlon Extensions */ + IX86_BUILTIN_PF2IW, + IX86_BUILTIN_PFNACC, + IX86_BUILTIN_PFPNACC, + IX86_BUILTIN_PI2FW, + IX86_BUILTIN_PSWAPDSI, + IX86_BUILTIN_PSWAPDSF, + + /* SSE2 */ + IX86_BUILTIN_ADDPD, + IX86_BUILTIN_ADDSD, + IX86_BUILTIN_DIVPD, + IX86_BUILTIN_DIVSD, + IX86_BUILTIN_MULPD, + IX86_BUILTIN_MULSD, + IX86_BUILTIN_SUBPD, + IX86_BUILTIN_SUBSD, + + IX86_BUILTIN_CMPEQPD, + IX86_BUILTIN_CMPLTPD, + IX86_BUILTIN_CMPLEPD, + IX86_BUILTIN_CMPGTPD, + IX86_BUILTIN_CMPGEPD, + IX86_BUILTIN_CMPNEQPD, + IX86_BUILTIN_CMPNLTPD, + IX86_BUILTIN_CMPNLEPD, + IX86_BUILTIN_CMPNGTPD, + IX86_BUILTIN_CMPNGEPD, + IX86_BUILTIN_CMPORDPD, + IX86_BUILTIN_CMPUNORDPD, + IX86_BUILTIN_CMPEQSD, + IX86_BUILTIN_CMPLTSD, + IX86_BUILTIN_CMPLESD, + IX86_BUILTIN_CMPNEQSD, + IX86_BUILTIN_CMPNLTSD, + IX86_BUILTIN_CMPNLESD, + IX86_BUILTIN_CMPORDSD, + IX86_BUILTIN_CMPUNORDSD, + + IX86_BUILTIN_COMIEQSD, + IX86_BUILTIN_COMILTSD, + IX86_BUILTIN_COMILESD, + IX86_BUILTIN_COMIGTSD, + IX86_BUILTIN_COMIGESD, + IX86_BUILTIN_COMINEQSD, + IX86_BUILTIN_UCOMIEQSD, + IX86_BUILTIN_UCOMILTSD, + IX86_BUILTIN_UCOMILESD, + IX86_BUILTIN_UCOMIGTSD, + IX86_BUILTIN_UCOMIGESD, + IX86_BUILTIN_UCOMINEQSD, + + IX86_BUILTIN_MAXPD, + IX86_BUILTIN_MAXSD, + IX86_BUILTIN_MINPD, + IX86_BUILTIN_MINSD, + + IX86_BUILTIN_ANDPD, + IX86_BUILTIN_ANDNPD, + IX86_BUILTIN_ORPD, + IX86_BUILTIN_XORPD, + + IX86_BUILTIN_SQRTPD, + IX86_BUILTIN_SQRTSD, + + IX86_BUILTIN_UNPCKHPD, + IX86_BUILTIN_UNPCKLPD, + + IX86_BUILTIN_SHUFPD, + + IX86_BUILTIN_LOADUPD, + IX86_BUILTIN_STOREUPD, + IX86_BUILTIN_MOVSD, + + IX86_BUILTIN_LOADHPD, + IX86_BUILTIN_LOADLPD, + + IX86_BUILTIN_CVTDQ2PD, + IX86_BUILTIN_CVTDQ2PS, + + IX86_BUILTIN_CVTPD2DQ, + IX86_BUILTIN_CVTPD2PI, + IX86_BUILTIN_CVTPD2PS, + IX86_BUILTIN_CVTTPD2DQ, + IX86_BUILTIN_CVTTPD2PI, + + IX86_BUILTIN_CVTPI2PD, + IX86_BUILTIN_CVTSI2SD, + IX86_BUILTIN_CVTSI642SD, + + IX86_BUILTIN_CVTSD2SI, + IX86_BUILTIN_CVTSD2SI64, + IX86_BUILTIN_CVTSD2SS, + IX86_BUILTIN_CVTSS2SD, + IX86_BUILTIN_CVTTSD2SI, + IX86_BUILTIN_CVTTSD2SI64, + + IX86_BUILTIN_CVTPS2DQ, + IX86_BUILTIN_CVTPS2PD, + IX86_BUILTIN_CVTTPS2DQ, + + IX86_BUILTIN_MOVNTI, + IX86_BUILTIN_MOVNTI64, + IX86_BUILTIN_MOVNTPD, + IX86_BUILTIN_MOVNTDQ, + + IX86_BUILTIN_MOVQ128, + + /* SSE2 MMX */ + IX86_BUILTIN_MASKMOVDQU, + IX86_BUILTIN_MOVMSKPD, + IX86_BUILTIN_PMOVMSKB128, + + IX86_BUILTIN_PACKSSWB128, + IX86_BUILTIN_PACKSSDW128, + IX86_BUILTIN_PACKUSWB128, + + IX86_BUILTIN_PADDB128, + IX86_BUILTIN_PADDW128, + IX86_BUILTIN_PADDD128, + IX86_BUILTIN_PADDQ128, + IX86_BUILTIN_PADDSB128, + IX86_BUILTIN_PADDSW128, + IX86_BUILTIN_PADDUSB128, + IX86_BUILTIN_PADDUSW128, + IX86_BUILTIN_PSUBB128, + IX86_BUILTIN_PSUBW128, + IX86_BUILTIN_PSUBD128, + IX86_BUILTIN_PSUBQ128, + IX86_BUILTIN_PSUBSB128, + IX86_BUILTIN_PSUBSW128, + IX86_BUILTIN_PSUBUSB128, + IX86_BUILTIN_PSUBUSW128, + + IX86_BUILTIN_PAND128, + IX86_BUILTIN_PANDN128, + IX86_BUILTIN_POR128, + IX86_BUILTIN_PXOR128, + + IX86_BUILTIN_PAVGB128, + IX86_BUILTIN_PAVGW128, + + IX86_BUILTIN_PCMPEQB128, + IX86_BUILTIN_PCMPEQW128, + IX86_BUILTIN_PCMPEQD128, + IX86_BUILTIN_PCMPGTB128, + IX86_BUILTIN_PCMPGTW128, + IX86_BUILTIN_PCMPGTD128, + + IX86_BUILTIN_PMADDWD128, + + IX86_BUILTIN_PMAXSW128, + IX86_BUILTIN_PMAXUB128, + IX86_BUILTIN_PMINSW128, + IX86_BUILTIN_PMINUB128, + + IX86_BUILTIN_PMULUDQ, + IX86_BUILTIN_PMULUDQ128, + IX86_BUILTIN_PMULHUW128, + IX86_BUILTIN_PMULHW128, + IX86_BUILTIN_PMULLW128, + + IX86_BUILTIN_PSADBW128, + IX86_BUILTIN_PSHUFHW, + IX86_BUILTIN_PSHUFLW, + IX86_BUILTIN_PSHUFD, + + IX86_BUILTIN_PSLLDQI128, + IX86_BUILTIN_PSLLWI128, + IX86_BUILTIN_PSLLDI128, + IX86_BUILTIN_PSLLQI128, + IX86_BUILTIN_PSRAWI128, + IX86_BUILTIN_PSRADI128, + IX86_BUILTIN_PSRLDQI128, + IX86_BUILTIN_PSRLWI128, + IX86_BUILTIN_PSRLDI128, + IX86_BUILTIN_PSRLQI128, + + IX86_BUILTIN_PSLLDQ128, + IX86_BUILTIN_PSLLW128, + IX86_BUILTIN_PSLLD128, + IX86_BUILTIN_PSLLQ128, + IX86_BUILTIN_PSRAW128, + IX86_BUILTIN_PSRAD128, + IX86_BUILTIN_PSRLW128, + IX86_BUILTIN_PSRLD128, + IX86_BUILTIN_PSRLQ128, + + IX86_BUILTIN_PUNPCKHBW128, + IX86_BUILTIN_PUNPCKHWD128, + IX86_BUILTIN_PUNPCKHDQ128, + IX86_BUILTIN_PUNPCKHQDQ128, + IX86_BUILTIN_PUNPCKLBW128, + IX86_BUILTIN_PUNPCKLWD128, + IX86_BUILTIN_PUNPCKLDQ128, + IX86_BUILTIN_PUNPCKLQDQ128, + + IX86_BUILTIN_CLFLUSH, + IX86_BUILTIN_MFENCE, + IX86_BUILTIN_LFENCE, + IX86_BUILTIN_PAUSE, + + IX86_BUILTIN_FNSTENV, + IX86_BUILTIN_FLDENV, + IX86_BUILTIN_FNSTSW, + IX86_BUILTIN_FNCLEX, + + IX86_BUILTIN_BSRSI, + IX86_BUILTIN_BSRDI, + IX86_BUILTIN_RDPMC, + IX86_BUILTIN_RDTSC, + IX86_BUILTIN_RDTSCP, + IX86_BUILTIN_ROLQI, + IX86_BUILTIN_ROLHI, + IX86_BUILTIN_RORQI, + IX86_BUILTIN_RORHI, + + /* SSE3. */ + IX86_BUILTIN_ADDSUBPS, + IX86_BUILTIN_HADDPS, + IX86_BUILTIN_HSUBPS, + IX86_BUILTIN_MOVSHDUP, + IX86_BUILTIN_MOVSLDUP, + IX86_BUILTIN_ADDSUBPD, + IX86_BUILTIN_HADDPD, + IX86_BUILTIN_HSUBPD, + IX86_BUILTIN_LDDQU, + + IX86_BUILTIN_MONITOR, + IX86_BUILTIN_MWAIT, + IX86_BUILTIN_CLZERO, + + /* SSSE3. */ + IX86_BUILTIN_PHADDW, + IX86_BUILTIN_PHADDD, + IX86_BUILTIN_PHADDSW, + IX86_BUILTIN_PHSUBW, + IX86_BUILTIN_PHSUBD, + IX86_BUILTIN_PHSUBSW, + IX86_BUILTIN_PMADDUBSW, + IX86_BUILTIN_PMULHRSW, + IX86_BUILTIN_PSHUFB, + IX86_BUILTIN_PSIGNB, + IX86_BUILTIN_PSIGNW, + IX86_BUILTIN_PSIGND, + IX86_BUILTIN_PALIGNR, + IX86_BUILTIN_PABSB, + IX86_BUILTIN_PABSW, + IX86_BUILTIN_PABSD, + + IX86_BUILTIN_PHADDW128, + IX86_BUILTIN_PHADDD128, + IX86_BUILTIN_PHADDSW128, + IX86_BUILTIN_PHSUBW128, + IX86_BUILTIN_PHSUBD128, + IX86_BUILTIN_PHSUBSW128, + IX86_BUILTIN_PMADDUBSW128, + IX86_BUILTIN_PMULHRSW128, + IX86_BUILTIN_PSHUFB128, + IX86_BUILTIN_PSIGNB128, + IX86_BUILTIN_PSIGNW128, + IX86_BUILTIN_PSIGND128, + IX86_BUILTIN_PALIGNR128, + IX86_BUILTIN_PABSB128, + IX86_BUILTIN_PABSW128, + IX86_BUILTIN_PABSD128, + + /* AMDFAM10 - SSE4A New Instructions. */ + IX86_BUILTIN_MOVNTSD, + IX86_BUILTIN_MOVNTSS, + IX86_BUILTIN_EXTRQI, + IX86_BUILTIN_EXTRQ, + IX86_BUILTIN_INSERTQI, + IX86_BUILTIN_INSERTQ, + + /* SSE4.1. */ + IX86_BUILTIN_BLENDPD, + IX86_BUILTIN_BLENDPS, + IX86_BUILTIN_BLENDVPD, + IX86_BUILTIN_BLENDVPS, + IX86_BUILTIN_PBLENDVB128, + IX86_BUILTIN_PBLENDW128, + + IX86_BUILTIN_DPPD, + IX86_BUILTIN_DPPS, + + IX86_BUILTIN_INSERTPS128, + + IX86_BUILTIN_MOVNTDQA, + IX86_BUILTIN_MPSADBW128, + IX86_BUILTIN_PACKUSDW128, + IX86_BUILTIN_PCMPEQQ, + IX86_BUILTIN_PHMINPOSUW128, + + IX86_BUILTIN_PMAXSB128, + IX86_BUILTIN_PMAXSD128, + IX86_BUILTIN_PMAXUD128, + IX86_BUILTIN_PMAXUW128, + + IX86_BUILTIN_PMINSB128, + IX86_BUILTIN_PMINSD128, + IX86_BUILTIN_PMINUD128, + IX86_BUILTIN_PMINUW128, + + IX86_BUILTIN_PMOVSXBW128, + IX86_BUILTIN_PMOVSXBD128, + IX86_BUILTIN_PMOVSXBQ128, + IX86_BUILTIN_PMOVSXWD128, + IX86_BUILTIN_PMOVSXWQ128, + IX86_BUILTIN_PMOVSXDQ128, + + IX86_BUILTIN_PMOVZXBW128, + IX86_BUILTIN_PMOVZXBD128, + IX86_BUILTIN_PMOVZXBQ128, + IX86_BUILTIN_PMOVZXWD128, + IX86_BUILTIN_PMOVZXWQ128, + IX86_BUILTIN_PMOVZXDQ128, + + IX86_BUILTIN_PMULDQ128, + IX86_BUILTIN_PMULLD128, + + IX86_BUILTIN_ROUNDSD, + IX86_BUILTIN_ROUNDSS, + + IX86_BUILTIN_ROUNDPD, + IX86_BUILTIN_ROUNDPS, + + IX86_BUILTIN_FLOORPD, + IX86_BUILTIN_CEILPD, + IX86_BUILTIN_TRUNCPD, + IX86_BUILTIN_RINTPD, + IX86_BUILTIN_ROUNDPD_AZ, + + IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, + IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, + IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, + + IX86_BUILTIN_FLOORPS, + IX86_BUILTIN_CEILPS, + IX86_BUILTIN_TRUNCPS, + IX86_BUILTIN_RINTPS, + IX86_BUILTIN_ROUNDPS_AZ, + + IX86_BUILTIN_FLOORPS_SFIX, + IX86_BUILTIN_CEILPS_SFIX, + IX86_BUILTIN_ROUNDPS_AZ_SFIX, + + IX86_BUILTIN_PTESTZ, + IX86_BUILTIN_PTESTC, + IX86_BUILTIN_PTESTNZC, + + IX86_BUILTIN_VEC_INIT_V2SI, + IX86_BUILTIN_VEC_INIT_V4HI, + IX86_BUILTIN_VEC_INIT_V8QI, + IX86_BUILTIN_VEC_EXT_V2DF, + IX86_BUILTIN_VEC_EXT_V2DI, + IX86_BUILTIN_VEC_EXT_V4SF, + IX86_BUILTIN_VEC_EXT_V4SI, + IX86_BUILTIN_VEC_EXT_V8HI, + IX86_BUILTIN_VEC_EXT_V2SI, + IX86_BUILTIN_VEC_EXT_V4HI, + IX86_BUILTIN_VEC_EXT_V16QI, + IX86_BUILTIN_VEC_SET_V2DI, + IX86_BUILTIN_VEC_SET_V4SF, + IX86_BUILTIN_VEC_SET_V4SI, + IX86_BUILTIN_VEC_SET_V8HI, + IX86_BUILTIN_VEC_SET_V4HI, + IX86_BUILTIN_VEC_SET_V16QI, + + IX86_BUILTIN_VEC_PACK_SFIX, + IX86_BUILTIN_VEC_PACK_SFIX256, + + /* SSE4.2. */ + IX86_BUILTIN_CRC32QI, + IX86_BUILTIN_CRC32HI, + IX86_BUILTIN_CRC32SI, + IX86_BUILTIN_CRC32DI, + + IX86_BUILTIN_PCMPESTRI128, + IX86_BUILTIN_PCMPESTRM128, + IX86_BUILTIN_PCMPESTRA128, + IX86_BUILTIN_PCMPESTRC128, + IX86_BUILTIN_PCMPESTRO128, + IX86_BUILTIN_PCMPESTRS128, + IX86_BUILTIN_PCMPESTRZ128, + IX86_BUILTIN_PCMPISTRI128, + IX86_BUILTIN_PCMPISTRM128, + IX86_BUILTIN_PCMPISTRA128, + IX86_BUILTIN_PCMPISTRC128, + IX86_BUILTIN_PCMPISTRO128, + IX86_BUILTIN_PCMPISTRS128, + IX86_BUILTIN_PCMPISTRZ128, + + IX86_BUILTIN_PCMPGTQ, + + /* AES instructions */ + IX86_BUILTIN_AESENC128, + IX86_BUILTIN_AESENCLAST128, + IX86_BUILTIN_AESDEC128, + IX86_BUILTIN_AESDECLAST128, + IX86_BUILTIN_AESIMC128, + IX86_BUILTIN_AESKEYGENASSIST128, + + /* PCLMUL instruction */ + IX86_BUILTIN_PCLMULQDQ128, + + /* AVX */ + IX86_BUILTIN_ADDPD256, + IX86_BUILTIN_ADDPS256, + IX86_BUILTIN_ADDSUBPD256, + IX86_BUILTIN_ADDSUBPS256, + IX86_BUILTIN_ANDPD256, + IX86_BUILTIN_ANDPS256, + IX86_BUILTIN_ANDNPD256, + IX86_BUILTIN_ANDNPS256, + IX86_BUILTIN_BLENDPD256, + IX86_BUILTIN_BLENDPS256, + IX86_BUILTIN_BLENDVPD256, + IX86_BUILTIN_BLENDVPS256, + IX86_BUILTIN_DIVPD256, + IX86_BUILTIN_DIVPS256, + IX86_BUILTIN_DPPS256, + IX86_BUILTIN_HADDPD256, + IX86_BUILTIN_HADDPS256, + IX86_BUILTIN_HSUBPD256, + IX86_BUILTIN_HSUBPS256, + IX86_BUILTIN_MAXPD256, + IX86_BUILTIN_MAXPS256, + IX86_BUILTIN_MINPD256, + IX86_BUILTIN_MINPS256, + IX86_BUILTIN_MULPD256, + IX86_BUILTIN_MULPS256, + IX86_BUILTIN_ORPD256, + IX86_BUILTIN_ORPS256, + IX86_BUILTIN_SHUFPD256, + IX86_BUILTIN_SHUFPS256, + IX86_BUILTIN_SUBPD256, + IX86_BUILTIN_SUBPS256, + IX86_BUILTIN_XORPD256, + IX86_BUILTIN_XORPS256, + IX86_BUILTIN_CMPSD, + IX86_BUILTIN_CMPSS, + IX86_BUILTIN_CMPPD, + IX86_BUILTIN_CMPPS, + IX86_BUILTIN_CMPPD256, + IX86_BUILTIN_CMPPS256, + IX86_BUILTIN_CVTDQ2PD256, + IX86_BUILTIN_CVTDQ2PS256, + IX86_BUILTIN_CVTPD2PS256, + IX86_BUILTIN_CVTPS2DQ256, + IX86_BUILTIN_CVTPS2PD256, + IX86_BUILTIN_CVTTPD2DQ256, + IX86_BUILTIN_CVTPD2DQ256, + IX86_BUILTIN_CVTTPS2DQ256, + IX86_BUILTIN_EXTRACTF128PD256, + IX86_BUILTIN_EXTRACTF128PS256, + IX86_BUILTIN_EXTRACTF128SI256, + IX86_BUILTIN_VZEROALL, + IX86_BUILTIN_VZEROUPPER, + IX86_BUILTIN_VPERMILVARPD, + IX86_BUILTIN_VPERMILVARPS, + IX86_BUILTIN_VPERMILVARPD256, + IX86_BUILTIN_VPERMILVARPS256, + IX86_BUILTIN_VPERMILPD, + IX86_BUILTIN_VPERMILPS, + IX86_BUILTIN_VPERMILPD256, + IX86_BUILTIN_VPERMILPS256, + IX86_BUILTIN_VPERMIL2PD, + IX86_BUILTIN_VPERMIL2PS, + IX86_BUILTIN_VPERMIL2PD256, + IX86_BUILTIN_VPERMIL2PS256, + IX86_BUILTIN_VPERM2F128PD256, + IX86_BUILTIN_VPERM2F128PS256, + IX86_BUILTIN_VPERM2F128SI256, + IX86_BUILTIN_VBROADCASTSS, + IX86_BUILTIN_VBROADCASTSD256, + IX86_BUILTIN_VBROADCASTSS256, + IX86_BUILTIN_VBROADCASTPD256, + IX86_BUILTIN_VBROADCASTPS256, + IX86_BUILTIN_VINSERTF128PD256, + IX86_BUILTIN_VINSERTF128PS256, + IX86_BUILTIN_VINSERTF128SI256, + IX86_BUILTIN_LOADUPD256, + IX86_BUILTIN_LOADUPS256, + IX86_BUILTIN_STOREUPD256, + IX86_BUILTIN_STOREUPS256, + IX86_BUILTIN_LDDQU256, + IX86_BUILTIN_MOVNTDQ256, + IX86_BUILTIN_MOVNTPD256, + IX86_BUILTIN_MOVNTPS256, + IX86_BUILTIN_LOADDQU256, + IX86_BUILTIN_STOREDQU256, + IX86_BUILTIN_MASKLOADPD, + IX86_BUILTIN_MASKLOADPS, + IX86_BUILTIN_MASKSTOREPD, + IX86_BUILTIN_MASKSTOREPS, + IX86_BUILTIN_MASKLOADPD256, + IX86_BUILTIN_MASKLOADPS256, + IX86_BUILTIN_MASKSTOREPD256, + IX86_BUILTIN_MASKSTOREPS256, + IX86_BUILTIN_MOVSHDUP256, + IX86_BUILTIN_MOVSLDUP256, + IX86_BUILTIN_MOVDDUP256, + + IX86_BUILTIN_SQRTPD256, + IX86_BUILTIN_SQRTPS256, + IX86_BUILTIN_SQRTPS_NR256, + IX86_BUILTIN_RSQRTPS256, + IX86_BUILTIN_RSQRTPS_NR256, + + IX86_BUILTIN_RCPPS256, + + IX86_BUILTIN_ROUNDPD256, + IX86_BUILTIN_ROUNDPS256, + + IX86_BUILTIN_FLOORPD256, + IX86_BUILTIN_CEILPD256, + IX86_BUILTIN_TRUNCPD256, + IX86_BUILTIN_RINTPD256, + IX86_BUILTIN_ROUNDPD_AZ256, + + IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, + IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, + IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, + + IX86_BUILTIN_FLOORPS256, + IX86_BUILTIN_CEILPS256, + IX86_BUILTIN_TRUNCPS256, + IX86_BUILTIN_RINTPS256, + IX86_BUILTIN_ROUNDPS_AZ256, + + IX86_BUILTIN_FLOORPS_SFIX256, + IX86_BUILTIN_CEILPS_SFIX256, + IX86_BUILTIN_ROUNDPS_AZ_SFIX256, + + IX86_BUILTIN_UNPCKHPD256, + IX86_BUILTIN_UNPCKLPD256, + IX86_BUILTIN_UNPCKHPS256, + IX86_BUILTIN_UNPCKLPS256, + + IX86_BUILTIN_SI256_SI, + IX86_BUILTIN_PS256_PS, + IX86_BUILTIN_PD256_PD, + IX86_BUILTIN_SI_SI256, + IX86_BUILTIN_PS_PS256, + IX86_BUILTIN_PD_PD256, + + IX86_BUILTIN_VTESTZPD, + IX86_BUILTIN_VTESTCPD, + IX86_BUILTIN_VTESTNZCPD, + IX86_BUILTIN_VTESTZPS, + IX86_BUILTIN_VTESTCPS, + IX86_BUILTIN_VTESTNZCPS, + IX86_BUILTIN_VTESTZPD256, + IX86_BUILTIN_VTESTCPD256, + IX86_BUILTIN_VTESTNZCPD256, + IX86_BUILTIN_VTESTZPS256, + IX86_BUILTIN_VTESTCPS256, + IX86_BUILTIN_VTESTNZCPS256, + IX86_BUILTIN_PTESTZ256, + IX86_BUILTIN_PTESTC256, + IX86_BUILTIN_PTESTNZC256, + + IX86_BUILTIN_MOVMSKPD256, + IX86_BUILTIN_MOVMSKPS256, + + /* AVX2 */ + IX86_BUILTIN_MPSADBW256, + IX86_BUILTIN_PABSB256, + IX86_BUILTIN_PABSW256, + IX86_BUILTIN_PABSD256, + IX86_BUILTIN_PACKSSDW256, + IX86_BUILTIN_PACKSSWB256, + IX86_BUILTIN_PACKUSDW256, + IX86_BUILTIN_PACKUSWB256, + IX86_BUILTIN_PADDB256, + IX86_BUILTIN_PADDW256, + IX86_BUILTIN_PADDD256, + IX86_BUILTIN_PADDQ256, + IX86_BUILTIN_PADDSB256, + IX86_BUILTIN_PADDSW256, + IX86_BUILTIN_PADDUSB256, + IX86_BUILTIN_PADDUSW256, + IX86_BUILTIN_PALIGNR256, + IX86_BUILTIN_AND256I, + IX86_BUILTIN_ANDNOT256I, + IX86_BUILTIN_PAVGB256, + IX86_BUILTIN_PAVGW256, + IX86_BUILTIN_PBLENDVB256, + IX86_BUILTIN_PBLENDVW256, + IX86_BUILTIN_PCMPEQB256, + IX86_BUILTIN_PCMPEQW256, + IX86_BUILTIN_PCMPEQD256, + IX86_BUILTIN_PCMPEQQ256, + IX86_BUILTIN_PCMPGTB256, + IX86_BUILTIN_PCMPGTW256, + IX86_BUILTIN_PCMPGTD256, + IX86_BUILTIN_PCMPGTQ256, + IX86_BUILTIN_PHADDW256, + IX86_BUILTIN_PHADDD256, + IX86_BUILTIN_PHADDSW256, + IX86_BUILTIN_PHSUBW256, + IX86_BUILTIN_PHSUBD256, + IX86_BUILTIN_PHSUBSW256, + IX86_BUILTIN_PMADDUBSW256, + IX86_BUILTIN_PMADDWD256, + IX86_BUILTIN_PMAXSB256, + IX86_BUILTIN_PMAXSW256, + IX86_BUILTIN_PMAXSD256, + IX86_BUILTIN_PMAXUB256, + IX86_BUILTIN_PMAXUW256, + IX86_BUILTIN_PMAXUD256, + IX86_BUILTIN_PMINSB256, + IX86_BUILTIN_PMINSW256, + IX86_BUILTIN_PMINSD256, + IX86_BUILTIN_PMINUB256, + IX86_BUILTIN_PMINUW256, + IX86_BUILTIN_PMINUD256, + IX86_BUILTIN_PMOVMSKB256, + IX86_BUILTIN_PMOVSXBW256, + IX86_BUILTIN_PMOVSXBD256, + IX86_BUILTIN_PMOVSXBQ256, + IX86_BUILTIN_PMOVSXWD256, + IX86_BUILTIN_PMOVSXWQ256, + IX86_BUILTIN_PMOVSXDQ256, + IX86_BUILTIN_PMOVZXBW256, + IX86_BUILTIN_PMOVZXBD256, + IX86_BUILTIN_PMOVZXBQ256, + IX86_BUILTIN_PMOVZXWD256, + IX86_BUILTIN_PMOVZXWQ256, + IX86_BUILTIN_PMOVZXDQ256, + IX86_BUILTIN_PMULDQ256, + IX86_BUILTIN_PMULHRSW256, + IX86_BUILTIN_PMULHUW256, + IX86_BUILTIN_PMULHW256, + IX86_BUILTIN_PMULLW256, + IX86_BUILTIN_PMULLD256, + IX86_BUILTIN_PMULUDQ256, + IX86_BUILTIN_POR256, + IX86_BUILTIN_PSADBW256, + IX86_BUILTIN_PSHUFB256, + IX86_BUILTIN_PSHUFD256, + IX86_BUILTIN_PSHUFHW256, + IX86_BUILTIN_PSHUFLW256, + IX86_BUILTIN_PSIGNB256, + IX86_BUILTIN_PSIGNW256, + IX86_BUILTIN_PSIGND256, + IX86_BUILTIN_PSLLDQI256, + IX86_BUILTIN_PSLLWI256, + IX86_BUILTIN_PSLLW256, + IX86_BUILTIN_PSLLDI256, + IX86_BUILTIN_PSLLD256, + IX86_BUILTIN_PSLLQI256, + IX86_BUILTIN_PSLLQ256, + IX86_BUILTIN_PSRAWI256, + IX86_BUILTIN_PSRAW256, + IX86_BUILTIN_PSRADI256, + IX86_BUILTIN_PSRAD256, + IX86_BUILTIN_PSRLDQI256, + IX86_BUILTIN_PSRLWI256, + IX86_BUILTIN_PSRLW256, + IX86_BUILTIN_PSRLDI256, + IX86_BUILTIN_PSRLD256, + IX86_BUILTIN_PSRLQI256, + IX86_BUILTIN_PSRLQ256, + IX86_BUILTIN_PSUBB256, + IX86_BUILTIN_PSUBW256, + IX86_BUILTIN_PSUBD256, + IX86_BUILTIN_PSUBQ256, + IX86_BUILTIN_PSUBSB256, + IX86_BUILTIN_PSUBSW256, + IX86_BUILTIN_PSUBUSB256, + IX86_BUILTIN_PSUBUSW256, + IX86_BUILTIN_PUNPCKHBW256, + IX86_BUILTIN_PUNPCKHWD256, + IX86_BUILTIN_PUNPCKHDQ256, + IX86_BUILTIN_PUNPCKHQDQ256, + IX86_BUILTIN_PUNPCKLBW256, + IX86_BUILTIN_PUNPCKLWD256, + IX86_BUILTIN_PUNPCKLDQ256, + IX86_BUILTIN_PUNPCKLQDQ256, + IX86_BUILTIN_PXOR256, + IX86_BUILTIN_MOVNTDQA256, + IX86_BUILTIN_VBROADCASTSS_PS, + IX86_BUILTIN_VBROADCASTSS_PS256, + IX86_BUILTIN_VBROADCASTSD_PD256, + IX86_BUILTIN_VBROADCASTSI256, + IX86_BUILTIN_PBLENDD256, + IX86_BUILTIN_PBLENDD128, + IX86_BUILTIN_PBROADCASTB256, + IX86_BUILTIN_PBROADCASTW256, + IX86_BUILTIN_PBROADCASTD256, + IX86_BUILTIN_PBROADCASTQ256, + IX86_BUILTIN_PBROADCASTB128, + IX86_BUILTIN_PBROADCASTW128, + IX86_BUILTIN_PBROADCASTD128, + IX86_BUILTIN_PBROADCASTQ128, + IX86_BUILTIN_VPERMVARSI256, + IX86_BUILTIN_VPERMDF256, + IX86_BUILTIN_VPERMVARSF256, + IX86_BUILTIN_VPERMDI256, + IX86_BUILTIN_VPERMTI256, + IX86_BUILTIN_VEXTRACT128I256, + IX86_BUILTIN_VINSERT128I256, + IX86_BUILTIN_MASKLOADD, + IX86_BUILTIN_MASKLOADQ, + IX86_BUILTIN_MASKLOADD256, + IX86_BUILTIN_MASKLOADQ256, + IX86_BUILTIN_MASKSTORED, + IX86_BUILTIN_MASKSTOREQ, + IX86_BUILTIN_MASKSTORED256, + IX86_BUILTIN_MASKSTOREQ256, + IX86_BUILTIN_PSLLVV4DI, + IX86_BUILTIN_PSLLVV2DI, + IX86_BUILTIN_PSLLVV8SI, + IX86_BUILTIN_PSLLVV4SI, + IX86_BUILTIN_PSRAVV8SI, + IX86_BUILTIN_PSRAVV4SI, + IX86_BUILTIN_PSRLVV4DI, + IX86_BUILTIN_PSRLVV2DI, + IX86_BUILTIN_PSRLVV8SI, + IX86_BUILTIN_PSRLVV4SI, + + IX86_BUILTIN_GATHERSIV2DF, + IX86_BUILTIN_GATHERSIV4DF, + IX86_BUILTIN_GATHERDIV2DF, + IX86_BUILTIN_GATHERDIV4DF, + IX86_BUILTIN_GATHERSIV4SF, + IX86_BUILTIN_GATHERSIV8SF, + IX86_BUILTIN_GATHERDIV4SF, + IX86_BUILTIN_GATHERDIV8SF, + IX86_BUILTIN_GATHERSIV2DI, + IX86_BUILTIN_GATHERSIV4DI, + IX86_BUILTIN_GATHERDIV2DI, + IX86_BUILTIN_GATHERDIV4DI, + IX86_BUILTIN_GATHERSIV4SI, + IX86_BUILTIN_GATHERSIV8SI, + IX86_BUILTIN_GATHERDIV4SI, + IX86_BUILTIN_GATHERDIV8SI, + + /* AVX512F */ + IX86_BUILTIN_SI512_SI256, + IX86_BUILTIN_PD512_PD256, + IX86_BUILTIN_PS512_PS256, + IX86_BUILTIN_SI512_SI, + IX86_BUILTIN_PD512_PD, + IX86_BUILTIN_PS512_PS, + IX86_BUILTIN_ADDPD512, + IX86_BUILTIN_ADDPS512, + IX86_BUILTIN_ADDSD_ROUND, + IX86_BUILTIN_ADDSS_ROUND, + IX86_BUILTIN_ALIGND512, + IX86_BUILTIN_ALIGNQ512, + IX86_BUILTIN_BLENDMD512, + IX86_BUILTIN_BLENDMPD512, + IX86_BUILTIN_BLENDMPS512, + IX86_BUILTIN_BLENDMQ512, + IX86_BUILTIN_BROADCASTF32X4_512, + IX86_BUILTIN_BROADCASTF64X4_512, + IX86_BUILTIN_BROADCASTI32X4_512, + IX86_BUILTIN_BROADCASTI64X4_512, + IX86_BUILTIN_BROADCASTSD512, + IX86_BUILTIN_BROADCASTSS512, + IX86_BUILTIN_CMPD512, + IX86_BUILTIN_CMPPD512, + IX86_BUILTIN_CMPPS512, + IX86_BUILTIN_CMPQ512, + IX86_BUILTIN_CMPSD_MASK, + IX86_BUILTIN_CMPSS_MASK, + IX86_BUILTIN_COMIDF, + IX86_BUILTIN_COMISF, + IX86_BUILTIN_COMPRESSPD512, + IX86_BUILTIN_COMPRESSPDSTORE512, + IX86_BUILTIN_COMPRESSPS512, + IX86_BUILTIN_COMPRESSPSSTORE512, + IX86_BUILTIN_CVTDQ2PD512, + IX86_BUILTIN_CVTDQ2PS512, + IX86_BUILTIN_CVTPD2DQ512, + IX86_BUILTIN_CVTPD2PS512, + IX86_BUILTIN_CVTPD2UDQ512, + IX86_BUILTIN_CVTPH2PS512, + IX86_BUILTIN_CVTPS2DQ512_MASK, + IX86_BUILTIN_CVTPS2PD512, + IX86_BUILTIN_CVTPS2PH512, + IX86_BUILTIN_CVTPS2UDQ512, + IX86_BUILTIN_CVTSD2SS_ROUND, + IX86_BUILTIN_CVTSI2SD64, + IX86_BUILTIN_CVTSI2SS32, + IX86_BUILTIN_CVTSI2SS64, + IX86_BUILTIN_CVTSS2SD_ROUND, + IX86_BUILTIN_CVTTPD2DQ512, + IX86_BUILTIN_CVTTPD2UDQ512, + IX86_BUILTIN_CVTTPS2DQ512, + IX86_BUILTIN_CVTTPS2UDQ512, + IX86_BUILTIN_CVTUDQ2PD512, + IX86_BUILTIN_CVTUDQ2PS512, + IX86_BUILTIN_CVTUSI2SD32, + IX86_BUILTIN_CVTUSI2SD64, + IX86_BUILTIN_CVTUSI2SS32, + IX86_BUILTIN_CVTUSI2SS64, + IX86_BUILTIN_DIVPD512, + IX86_BUILTIN_DIVPS512, + IX86_BUILTIN_DIVSD_ROUND, + IX86_BUILTIN_DIVSS_ROUND, + IX86_BUILTIN_EXPANDPD512, + IX86_BUILTIN_EXPANDPD512Z, + IX86_BUILTIN_EXPANDPDLOAD512, + IX86_BUILTIN_EXPANDPDLOAD512Z, + IX86_BUILTIN_EXPANDPS512, + IX86_BUILTIN_EXPANDPS512Z, + IX86_BUILTIN_EXPANDPSLOAD512, + IX86_BUILTIN_EXPANDPSLOAD512Z, + IX86_BUILTIN_EXTRACTF32X4, + IX86_BUILTIN_EXTRACTF64X4, + IX86_BUILTIN_EXTRACTI32X4, + IX86_BUILTIN_EXTRACTI64X4, + IX86_BUILTIN_FIXUPIMMPD512_MASK, + IX86_BUILTIN_FIXUPIMMPD512_MASKZ, + IX86_BUILTIN_FIXUPIMMPS512_MASK, + IX86_BUILTIN_FIXUPIMMPS512_MASKZ, + IX86_BUILTIN_FIXUPIMMSD128_MASK, + IX86_BUILTIN_FIXUPIMMSD128_MASKZ, + IX86_BUILTIN_FIXUPIMMSS128_MASK, + IX86_BUILTIN_FIXUPIMMSS128_MASKZ, + IX86_BUILTIN_GETEXPPD512, + IX86_BUILTIN_GETEXPPS512, + IX86_BUILTIN_GETEXPSD128, + IX86_BUILTIN_GETEXPSS128, + IX86_BUILTIN_GETMANTPD512, + IX86_BUILTIN_GETMANTPS512, + IX86_BUILTIN_GETMANTSD128, + IX86_BUILTIN_GETMANTSS128, + IX86_BUILTIN_INSERTF32X4, + IX86_BUILTIN_INSERTF64X4, + IX86_BUILTIN_INSERTI32X4, + IX86_BUILTIN_INSERTI64X4, + IX86_BUILTIN_LOADAPD512, + IX86_BUILTIN_LOADAPS512, + IX86_BUILTIN_LOADDQUDI512, + IX86_BUILTIN_LOADDQUSI512, + IX86_BUILTIN_LOADUPD512, + IX86_BUILTIN_LOADUPS512, + IX86_BUILTIN_MAXPD512, + IX86_BUILTIN_MAXPS512, + IX86_BUILTIN_MAXSD_ROUND, + IX86_BUILTIN_MAXSS_ROUND, + IX86_BUILTIN_MINPD512, + IX86_BUILTIN_MINPS512, + IX86_BUILTIN_MINSD_ROUND, + IX86_BUILTIN_MINSS_ROUND, + IX86_BUILTIN_MOVAPD512, + IX86_BUILTIN_MOVAPS512, + IX86_BUILTIN_MOVDDUP512, + IX86_BUILTIN_MOVDQA32LOAD512, + IX86_BUILTIN_MOVDQA32STORE512, + IX86_BUILTIN_MOVDQA32_512, + IX86_BUILTIN_MOVDQA64LOAD512, + IX86_BUILTIN_MOVDQA64STORE512, + IX86_BUILTIN_MOVDQA64_512, + IX86_BUILTIN_MOVNTDQ512, + IX86_BUILTIN_MOVNTDQA512, + IX86_BUILTIN_MOVNTPD512, + IX86_BUILTIN_MOVNTPS512, + IX86_BUILTIN_MOVSHDUP512, + IX86_BUILTIN_MOVSLDUP512, + IX86_BUILTIN_MULPD512, + IX86_BUILTIN_MULPS512, + IX86_BUILTIN_MULSD_ROUND, + IX86_BUILTIN_MULSS_ROUND, + IX86_BUILTIN_PABSD512, + IX86_BUILTIN_PABSQ512, + IX86_BUILTIN_PADDD512, + IX86_BUILTIN_PADDQ512, + IX86_BUILTIN_PANDD512, + IX86_BUILTIN_PANDND512, + IX86_BUILTIN_PANDNQ512, + IX86_BUILTIN_PANDQ512, + IX86_BUILTIN_PBROADCASTD512, + IX86_BUILTIN_PBROADCASTD512_GPR, + IX86_BUILTIN_PBROADCASTMB512, + IX86_BUILTIN_PBROADCASTMW512, + IX86_BUILTIN_PBROADCASTQ512, + IX86_BUILTIN_PBROADCASTQ512_GPR, + IX86_BUILTIN_PCMPEQD512_MASK, + IX86_BUILTIN_PCMPEQQ512_MASK, + IX86_BUILTIN_PCMPGTD512_MASK, + IX86_BUILTIN_PCMPGTQ512_MASK, + IX86_BUILTIN_PCOMPRESSD512, + IX86_BUILTIN_PCOMPRESSDSTORE512, + IX86_BUILTIN_PCOMPRESSQ512, + IX86_BUILTIN_PCOMPRESSQSTORE512, + IX86_BUILTIN_PEXPANDD512, + IX86_BUILTIN_PEXPANDD512Z, + IX86_BUILTIN_PEXPANDDLOAD512, + IX86_BUILTIN_PEXPANDDLOAD512Z, + IX86_BUILTIN_PEXPANDQ512, + IX86_BUILTIN_PEXPANDQ512Z, + IX86_BUILTIN_PEXPANDQLOAD512, + IX86_BUILTIN_PEXPANDQLOAD512Z, + IX86_BUILTIN_PMAXSD512, + IX86_BUILTIN_PMAXSQ512, + IX86_BUILTIN_PMAXUD512, + IX86_BUILTIN_PMAXUQ512, + IX86_BUILTIN_PMINSD512, + IX86_BUILTIN_PMINSQ512, + IX86_BUILTIN_PMINUD512, + IX86_BUILTIN_PMINUQ512, + IX86_BUILTIN_PMOVDB512, + IX86_BUILTIN_PMOVDB512_MEM, + IX86_BUILTIN_PMOVDW512, + IX86_BUILTIN_PMOVDW512_MEM, + IX86_BUILTIN_PMOVQB512, + IX86_BUILTIN_PMOVQB512_MEM, + IX86_BUILTIN_PMOVQD512, + IX86_BUILTIN_PMOVQD512_MEM, + IX86_BUILTIN_PMOVQW512, + IX86_BUILTIN_PMOVQW512_MEM, + IX86_BUILTIN_PMOVSDB512, + IX86_BUILTIN_PMOVSDB512_MEM, + IX86_BUILTIN_PMOVSDW512, + IX86_BUILTIN_PMOVSDW512_MEM, + IX86_BUILTIN_PMOVSQB512, + IX86_BUILTIN_PMOVSQB512_MEM, + IX86_BUILTIN_PMOVSQD512, + IX86_BUILTIN_PMOVSQD512_MEM, + IX86_BUILTIN_PMOVSQW512, + IX86_BUILTIN_PMOVSQW512_MEM, + IX86_BUILTIN_PMOVSXBD512, + IX86_BUILTIN_PMOVSXBQ512, + IX86_BUILTIN_PMOVSXDQ512, + IX86_BUILTIN_PMOVSXWD512, + IX86_BUILTIN_PMOVSXWQ512, + IX86_BUILTIN_PMOVUSDB512, + IX86_BUILTIN_PMOVUSDB512_MEM, + IX86_BUILTIN_PMOVUSDW512, + IX86_BUILTIN_PMOVUSDW512_MEM, + IX86_BUILTIN_PMOVUSQB512, + IX86_BUILTIN_PMOVUSQB512_MEM, + IX86_BUILTIN_PMOVUSQD512, + IX86_BUILTIN_PMOVUSQD512_MEM, + IX86_BUILTIN_PMOVUSQW512, + IX86_BUILTIN_PMOVUSQW512_MEM, + IX86_BUILTIN_PMOVZXBD512, + IX86_BUILTIN_PMOVZXBQ512, + IX86_BUILTIN_PMOVZXDQ512, + IX86_BUILTIN_PMOVZXWD512, + IX86_BUILTIN_PMOVZXWQ512, + IX86_BUILTIN_PMULDQ512, + IX86_BUILTIN_PMULLD512, + IX86_BUILTIN_PMULUDQ512, + IX86_BUILTIN_PORD512, + IX86_BUILTIN_PORQ512, + IX86_BUILTIN_PROLD512, + IX86_BUILTIN_PROLQ512, + IX86_BUILTIN_PROLVD512, + IX86_BUILTIN_PROLVQ512, + IX86_BUILTIN_PRORD512, + IX86_BUILTIN_PRORQ512, + IX86_BUILTIN_PRORVD512, + IX86_BUILTIN_PRORVQ512, + IX86_BUILTIN_PSHUFD512, + IX86_BUILTIN_PSLLD512, + IX86_BUILTIN_PSLLDI512, + IX86_BUILTIN_PSLLQ512, + IX86_BUILTIN_PSLLQI512, + IX86_BUILTIN_PSLLVV16SI, + IX86_BUILTIN_PSLLVV8DI, + IX86_BUILTIN_PSRAD512, + IX86_BUILTIN_PSRADI512, + IX86_BUILTIN_PSRAQ512, + IX86_BUILTIN_PSRAQI512, + IX86_BUILTIN_PSRAVV16SI, + IX86_BUILTIN_PSRAVV8DI, + IX86_BUILTIN_PSRLD512, + IX86_BUILTIN_PSRLDI512, + IX86_BUILTIN_PSRLQ512, + IX86_BUILTIN_PSRLQI512, + IX86_BUILTIN_PSRLVV16SI, + IX86_BUILTIN_PSRLVV8DI, + IX86_BUILTIN_PSUBD512, + IX86_BUILTIN_PSUBQ512, + IX86_BUILTIN_PTESTMD512, + IX86_BUILTIN_PTESTMQ512, + IX86_BUILTIN_PTESTNMD512, + IX86_BUILTIN_PTESTNMQ512, + IX86_BUILTIN_PUNPCKHDQ512, + IX86_BUILTIN_PUNPCKHQDQ512, + IX86_BUILTIN_PUNPCKLDQ512, + IX86_BUILTIN_PUNPCKLQDQ512, + IX86_BUILTIN_PXORD512, + IX86_BUILTIN_PXORQ512, + IX86_BUILTIN_RCP14PD512, + IX86_BUILTIN_RCP14PS512, + IX86_BUILTIN_RCP14SD, + IX86_BUILTIN_RCP14SS, + IX86_BUILTIN_RNDSCALEPD, + IX86_BUILTIN_RNDSCALEPS, + IX86_BUILTIN_RNDSCALESD, + IX86_BUILTIN_RNDSCALESS, + IX86_BUILTIN_RSQRT14PD512, + IX86_BUILTIN_RSQRT14PS512, + IX86_BUILTIN_RSQRT14SD, + IX86_BUILTIN_RSQRT14SS, + IX86_BUILTIN_SCALEFPD512, + IX86_BUILTIN_SCALEFPS512, + IX86_BUILTIN_SCALEFSD, + IX86_BUILTIN_SCALEFSS, + IX86_BUILTIN_SHUFPD512, + IX86_BUILTIN_SHUFPS512, + IX86_BUILTIN_SHUF_F32x4, + IX86_BUILTIN_SHUF_F64x2, + IX86_BUILTIN_SHUF_I32x4, + IX86_BUILTIN_SHUF_I64x2, + IX86_BUILTIN_SQRTPD512, + IX86_BUILTIN_SQRTPD512_MASK, + IX86_BUILTIN_SQRTPS512_MASK, + IX86_BUILTIN_SQRTPS_NR512, + IX86_BUILTIN_SQRTSD_ROUND, + IX86_BUILTIN_SQRTSS_ROUND, + IX86_BUILTIN_STOREAPD512, + IX86_BUILTIN_STOREAPS512, + IX86_BUILTIN_STOREDQUDI512, + IX86_BUILTIN_STOREDQUSI512, + IX86_BUILTIN_STOREUPD512, + IX86_BUILTIN_STOREUPS512, + IX86_BUILTIN_SUBPD512, + IX86_BUILTIN_SUBPS512, + IX86_BUILTIN_SUBSD_ROUND, + IX86_BUILTIN_SUBSS_ROUND, + IX86_BUILTIN_UCMPD512, + IX86_BUILTIN_UCMPQ512, + IX86_BUILTIN_UNPCKHPD512, + IX86_BUILTIN_UNPCKHPS512, + IX86_BUILTIN_UNPCKLPD512, + IX86_BUILTIN_UNPCKLPS512, + IX86_BUILTIN_VCVTSD2SI32, + IX86_BUILTIN_VCVTSD2SI64, + IX86_BUILTIN_VCVTSD2USI32, + IX86_BUILTIN_VCVTSD2USI64, + IX86_BUILTIN_VCVTSS2SI32, + IX86_BUILTIN_VCVTSS2SI64, + IX86_BUILTIN_VCVTSS2USI32, + IX86_BUILTIN_VCVTSS2USI64, + IX86_BUILTIN_VCVTTSD2SI32, + IX86_BUILTIN_VCVTTSD2SI64, + IX86_BUILTIN_VCVTTSD2USI32, + IX86_BUILTIN_VCVTTSD2USI64, + IX86_BUILTIN_VCVTTSS2SI32, + IX86_BUILTIN_VCVTTSS2SI64, + IX86_BUILTIN_VCVTTSS2USI32, + IX86_BUILTIN_VCVTTSS2USI64, + IX86_BUILTIN_VFMADDPD512_MASK, + IX86_BUILTIN_VFMADDPD512_MASK3, + IX86_BUILTIN_VFMADDPD512_MASKZ, + IX86_BUILTIN_VFMADDPS512_MASK, + IX86_BUILTIN_VFMADDPS512_MASK3, + IX86_BUILTIN_VFMADDPS512_MASKZ, + IX86_BUILTIN_VFMADDSD3_ROUND, + IX86_BUILTIN_VFMADDSS3_ROUND, + IX86_BUILTIN_VFMADDSUBPD512_MASK, + IX86_BUILTIN_VFMADDSUBPD512_MASK3, + IX86_BUILTIN_VFMADDSUBPD512_MASKZ, + IX86_BUILTIN_VFMADDSUBPS512_MASK, + IX86_BUILTIN_VFMADDSUBPS512_MASK3, + IX86_BUILTIN_VFMADDSUBPS512_MASKZ, + IX86_BUILTIN_VFMSUBADDPD512_MASK3, + IX86_BUILTIN_VFMSUBADDPS512_MASK3, + IX86_BUILTIN_VFMSUBPD512_MASK3, + IX86_BUILTIN_VFMSUBPS512_MASK3, + IX86_BUILTIN_VFMSUBSD3_MASK3, + IX86_BUILTIN_VFMSUBSS3_MASK3, + IX86_BUILTIN_VFNMADDPD512_MASK, + IX86_BUILTIN_VFNMADDPS512_MASK, + IX86_BUILTIN_VFNMSUBPD512_MASK, + IX86_BUILTIN_VFNMSUBPD512_MASK3, + IX86_BUILTIN_VFNMSUBPS512_MASK, + IX86_BUILTIN_VFNMSUBPS512_MASK3, + IX86_BUILTIN_VPCLZCNTD512, + IX86_BUILTIN_VPCLZCNTQ512, + IX86_BUILTIN_VPCONFLICTD512, + IX86_BUILTIN_VPCONFLICTQ512, + IX86_BUILTIN_VPERMDF512, + IX86_BUILTIN_VPERMDI512, + IX86_BUILTIN_VPERMI2VARD512, + IX86_BUILTIN_VPERMI2VARPD512, + IX86_BUILTIN_VPERMI2VARPS512, + IX86_BUILTIN_VPERMI2VARQ512, + IX86_BUILTIN_VPERMILPD512, + IX86_BUILTIN_VPERMILPS512, + IX86_BUILTIN_VPERMILVARPD512, + IX86_BUILTIN_VPERMILVARPS512, + IX86_BUILTIN_VPERMT2VARD512, + IX86_BUILTIN_VPERMT2VARD512_MASKZ, + IX86_BUILTIN_VPERMT2VARPD512, + IX86_BUILTIN_VPERMT2VARPD512_MASKZ, + IX86_BUILTIN_VPERMT2VARPS512, + IX86_BUILTIN_VPERMT2VARPS512_MASKZ, + IX86_BUILTIN_VPERMT2VARQ512, + IX86_BUILTIN_VPERMT2VARQ512_MASKZ, + IX86_BUILTIN_VPERMVARDF512, + IX86_BUILTIN_VPERMVARDI512, + IX86_BUILTIN_VPERMVARSF512, + IX86_BUILTIN_VPERMVARSI512, + IX86_BUILTIN_VTERNLOGD512_MASK, + IX86_BUILTIN_VTERNLOGD512_MASKZ, + IX86_BUILTIN_VTERNLOGQ512_MASK, + IX86_BUILTIN_VTERNLOGQ512_MASKZ, + + /* Mask arithmetic operations */ + IX86_BUILTIN_KAND16, + IX86_BUILTIN_KANDN16, + IX86_BUILTIN_KNOT16, + IX86_BUILTIN_KOR16, + IX86_BUILTIN_KORTESTC16, + IX86_BUILTIN_KORTESTZ16, + IX86_BUILTIN_KUNPCKBW, + IX86_BUILTIN_KXNOR16, + IX86_BUILTIN_KXOR16, + IX86_BUILTIN_KMOV16, + + /* AVX512VL. */ + IX86_BUILTIN_PMOVUSQD256_MEM, + IX86_BUILTIN_PMOVUSQD128_MEM, + IX86_BUILTIN_PMOVSQD256_MEM, + IX86_BUILTIN_PMOVSQD128_MEM, + IX86_BUILTIN_PMOVQD256_MEM, + IX86_BUILTIN_PMOVQD128_MEM, + IX86_BUILTIN_PMOVUSQW256_MEM, + IX86_BUILTIN_PMOVUSQW128_MEM, + IX86_BUILTIN_PMOVSQW256_MEM, + IX86_BUILTIN_PMOVSQW128_MEM, + IX86_BUILTIN_PMOVQW256_MEM, + IX86_BUILTIN_PMOVQW128_MEM, + IX86_BUILTIN_PMOVUSQB256_MEM, + IX86_BUILTIN_PMOVUSQB128_MEM, + IX86_BUILTIN_PMOVSQB256_MEM, + IX86_BUILTIN_PMOVSQB128_MEM, + IX86_BUILTIN_PMOVQB256_MEM, + IX86_BUILTIN_PMOVQB128_MEM, + IX86_BUILTIN_PMOVUSDW256_MEM, + IX86_BUILTIN_PMOVUSDW128_MEM, + IX86_BUILTIN_PMOVSDW256_MEM, + IX86_BUILTIN_PMOVSDW128_MEM, + IX86_BUILTIN_PMOVDW256_MEM, + IX86_BUILTIN_PMOVDW128_MEM, + IX86_BUILTIN_PMOVUSDB256_MEM, + IX86_BUILTIN_PMOVUSDB128_MEM, + IX86_BUILTIN_PMOVSDB256_MEM, + IX86_BUILTIN_PMOVSDB128_MEM, + IX86_BUILTIN_PMOVDB256_MEM, + IX86_BUILTIN_PMOVDB128_MEM, + IX86_BUILTIN_MOVDQA64LOAD256_MASK, + IX86_BUILTIN_MOVDQA64LOAD128_MASK, + IX86_BUILTIN_MOVDQA32LOAD256_MASK, + IX86_BUILTIN_MOVDQA32LOAD128_MASK, + IX86_BUILTIN_MOVDQA64STORE256_MASK, + IX86_BUILTIN_MOVDQA64STORE128_MASK, + IX86_BUILTIN_MOVDQA32STORE256_MASK, + IX86_BUILTIN_MOVDQA32STORE128_MASK, + IX86_BUILTIN_LOADAPD256_MASK, + IX86_BUILTIN_LOADAPD128_MASK, + IX86_BUILTIN_LOADAPS256_MASK, + IX86_BUILTIN_LOADAPS128_MASK, + IX86_BUILTIN_STOREAPD256_MASK, + IX86_BUILTIN_STOREAPD128_MASK, + IX86_BUILTIN_STOREAPS256_MASK, + IX86_BUILTIN_STOREAPS128_MASK, + IX86_BUILTIN_LOADUPD256_MASK, + IX86_BUILTIN_LOADUPD128_MASK, + IX86_BUILTIN_LOADUPS256_MASK, + IX86_BUILTIN_LOADUPS128_MASK, + IX86_BUILTIN_STOREUPD256_MASK, + IX86_BUILTIN_STOREUPD128_MASK, + IX86_BUILTIN_STOREUPS256_MASK, + IX86_BUILTIN_STOREUPS128_MASK, + IX86_BUILTIN_LOADDQUDI256_MASK, + IX86_BUILTIN_LOADDQUDI128_MASK, + IX86_BUILTIN_LOADDQUSI256_MASK, + IX86_BUILTIN_LOADDQUSI128_MASK, + IX86_BUILTIN_LOADDQUHI256_MASK, + IX86_BUILTIN_LOADDQUHI128_MASK, + IX86_BUILTIN_LOADDQUQI256_MASK, + IX86_BUILTIN_LOADDQUQI128_MASK, + IX86_BUILTIN_STOREDQUDI256_MASK, + IX86_BUILTIN_STOREDQUDI128_MASK, + IX86_BUILTIN_STOREDQUSI256_MASK, + IX86_BUILTIN_STOREDQUSI128_MASK, + IX86_BUILTIN_STOREDQUHI256_MASK, + IX86_BUILTIN_STOREDQUHI128_MASK, + IX86_BUILTIN_STOREDQUQI256_MASK, + IX86_BUILTIN_STOREDQUQI128_MASK, + IX86_BUILTIN_COMPRESSPDSTORE256, + IX86_BUILTIN_COMPRESSPDSTORE128, + IX86_BUILTIN_COMPRESSPSSTORE256, + IX86_BUILTIN_COMPRESSPSSTORE128, + IX86_BUILTIN_PCOMPRESSQSTORE256, + IX86_BUILTIN_PCOMPRESSQSTORE128, + IX86_BUILTIN_PCOMPRESSDSTORE256, + IX86_BUILTIN_PCOMPRESSDSTORE128, + IX86_BUILTIN_EXPANDPDLOAD256, + IX86_BUILTIN_EXPANDPDLOAD128, + IX86_BUILTIN_EXPANDPSLOAD256, + IX86_BUILTIN_EXPANDPSLOAD128, + IX86_BUILTIN_PEXPANDQLOAD256, + IX86_BUILTIN_PEXPANDQLOAD128, + IX86_BUILTIN_PEXPANDDLOAD256, + IX86_BUILTIN_PEXPANDDLOAD128, + IX86_BUILTIN_EXPANDPDLOAD256Z, + IX86_BUILTIN_EXPANDPDLOAD128Z, + IX86_BUILTIN_EXPANDPSLOAD256Z, + IX86_BUILTIN_EXPANDPSLOAD128Z, + IX86_BUILTIN_PEXPANDQLOAD256Z, + IX86_BUILTIN_PEXPANDQLOAD128Z, + IX86_BUILTIN_PEXPANDDLOAD256Z, + IX86_BUILTIN_PEXPANDDLOAD128Z, + IX86_BUILTIN_PALIGNR256_MASK, + IX86_BUILTIN_PALIGNR128_MASK, + IX86_BUILTIN_MOVDQA64_256_MASK, + IX86_BUILTIN_MOVDQA64_128_MASK, + IX86_BUILTIN_MOVDQA32_256_MASK, + IX86_BUILTIN_MOVDQA32_128_MASK, + IX86_BUILTIN_MOVAPD256_MASK, + IX86_BUILTIN_MOVAPD128_MASK, + IX86_BUILTIN_MOVAPS256_MASK, + IX86_BUILTIN_MOVAPS128_MASK, + IX86_BUILTIN_MOVDQUHI256_MASK, + IX86_BUILTIN_MOVDQUHI128_MASK, + IX86_BUILTIN_MOVDQUQI256_MASK, + IX86_BUILTIN_MOVDQUQI128_MASK, + IX86_BUILTIN_MINPS128_MASK, + IX86_BUILTIN_MAXPS128_MASK, + IX86_BUILTIN_MINPD128_MASK, + IX86_BUILTIN_MAXPD128_MASK, + IX86_BUILTIN_MAXPD256_MASK, + IX86_BUILTIN_MAXPS256_MASK, + IX86_BUILTIN_MINPD256_MASK, + IX86_BUILTIN_MINPS256_MASK, + IX86_BUILTIN_MULPS128_MASK, + IX86_BUILTIN_DIVPS128_MASK, + IX86_BUILTIN_MULPD128_MASK, + IX86_BUILTIN_DIVPD128_MASK, + IX86_BUILTIN_DIVPD256_MASK, + IX86_BUILTIN_DIVPS256_MASK, + IX86_BUILTIN_MULPD256_MASK, + IX86_BUILTIN_MULPS256_MASK, + IX86_BUILTIN_ADDPD128_MASK, + IX86_BUILTIN_ADDPD256_MASK, + IX86_BUILTIN_ADDPS128_MASK, + IX86_BUILTIN_ADDPS256_MASK, + IX86_BUILTIN_SUBPD128_MASK, + IX86_BUILTIN_SUBPD256_MASK, + IX86_BUILTIN_SUBPS128_MASK, + IX86_BUILTIN_SUBPS256_MASK, + IX86_BUILTIN_XORPD256_MASK, + IX86_BUILTIN_XORPD128_MASK, + IX86_BUILTIN_XORPS256_MASK, + IX86_BUILTIN_XORPS128_MASK, + IX86_BUILTIN_ORPD256_MASK, + IX86_BUILTIN_ORPD128_MASK, + IX86_BUILTIN_ORPS256_MASK, + IX86_BUILTIN_ORPS128_MASK, + IX86_BUILTIN_BROADCASTF32x2_256, + IX86_BUILTIN_BROADCASTI32x2_256, + IX86_BUILTIN_BROADCASTI32x2_128, + IX86_BUILTIN_BROADCASTF64X2_256, + IX86_BUILTIN_BROADCASTI64X2_256, + IX86_BUILTIN_BROADCASTF32X4_256, + IX86_BUILTIN_BROADCASTI32X4_256, + IX86_BUILTIN_EXTRACTF32X4_256, + IX86_BUILTIN_EXTRACTI32X4_256, + IX86_BUILTIN_DBPSADBW256, + IX86_BUILTIN_DBPSADBW128, + IX86_BUILTIN_CVTTPD2QQ256, + IX86_BUILTIN_CVTTPD2QQ128, + IX86_BUILTIN_CVTTPD2UQQ256, + IX86_BUILTIN_CVTTPD2UQQ128, + IX86_BUILTIN_CVTPD2QQ256, + IX86_BUILTIN_CVTPD2QQ128, + IX86_BUILTIN_CVTPD2UQQ256, + IX86_BUILTIN_CVTPD2UQQ128, + IX86_BUILTIN_CVTPD2UDQ256_MASK, + IX86_BUILTIN_CVTPD2UDQ128_MASK, + IX86_BUILTIN_CVTTPS2QQ256, + IX86_BUILTIN_CVTTPS2QQ128, + IX86_BUILTIN_CVTTPS2UQQ256, + IX86_BUILTIN_CVTTPS2UQQ128, + IX86_BUILTIN_CVTTPS2DQ256_MASK, + IX86_BUILTIN_CVTTPS2DQ128_MASK, + IX86_BUILTIN_CVTTPS2UDQ256, + IX86_BUILTIN_CVTTPS2UDQ128, + IX86_BUILTIN_CVTTPD2DQ256_MASK, + IX86_BUILTIN_CVTTPD2DQ128_MASK, + IX86_BUILTIN_CVTTPD2UDQ256_MASK, + IX86_BUILTIN_CVTTPD2UDQ128_MASK, + IX86_BUILTIN_CVTPD2DQ256_MASK, + IX86_BUILTIN_CVTPD2DQ128_MASK, + IX86_BUILTIN_CVTDQ2PD256_MASK, + IX86_BUILTIN_CVTDQ2PD128_MASK, + IX86_BUILTIN_CVTUDQ2PD256_MASK, + IX86_BUILTIN_CVTUDQ2PD128_MASK, + IX86_BUILTIN_CVTDQ2PS256_MASK, + IX86_BUILTIN_CVTDQ2PS128_MASK, + IX86_BUILTIN_CVTUDQ2PS256_MASK, + IX86_BUILTIN_CVTUDQ2PS128_MASK, + IX86_BUILTIN_CVTPS2PD256_MASK, + IX86_BUILTIN_CVTPS2PD128_MASK, + IX86_BUILTIN_PBROADCASTB256_MASK, + IX86_BUILTIN_PBROADCASTB256_GPR_MASK, + IX86_BUILTIN_PBROADCASTB128_MASK, + IX86_BUILTIN_PBROADCASTB128_GPR_MASK, + IX86_BUILTIN_PBROADCASTW256_MASK, + IX86_BUILTIN_PBROADCASTW256_GPR_MASK, + IX86_BUILTIN_PBROADCASTW128_MASK, + IX86_BUILTIN_PBROADCASTW128_GPR_MASK, + IX86_BUILTIN_PBROADCASTD256_MASK, + IX86_BUILTIN_PBROADCASTD256_GPR_MASK, + IX86_BUILTIN_PBROADCASTD128_MASK, + IX86_BUILTIN_PBROADCASTD128_GPR_MASK, + IX86_BUILTIN_PBROADCASTQ256_MASK, + IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, + IX86_BUILTIN_PBROADCASTQ128_MASK, + IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, + IX86_BUILTIN_BROADCASTSS256, + IX86_BUILTIN_BROADCASTSS128, + IX86_BUILTIN_BROADCASTSD256, + IX86_BUILTIN_EXTRACTF64X2_256, + IX86_BUILTIN_EXTRACTI64X2_256, + IX86_BUILTIN_INSERTF32X4_256, + IX86_BUILTIN_INSERTI32X4_256, + IX86_BUILTIN_PMOVSXBW256_MASK, + IX86_BUILTIN_PMOVSXBW128_MASK, + IX86_BUILTIN_PMOVSXBD256_MASK, + IX86_BUILTIN_PMOVSXBD128_MASK, + IX86_BUILTIN_PMOVSXBQ256_MASK, + IX86_BUILTIN_PMOVSXBQ128_MASK, + IX86_BUILTIN_PMOVSXWD256_MASK, + IX86_BUILTIN_PMOVSXWD128_MASK, + IX86_BUILTIN_PMOVSXWQ256_MASK, + IX86_BUILTIN_PMOVSXWQ128_MASK, + IX86_BUILTIN_PMOVSXDQ256_MASK, + IX86_BUILTIN_PMOVSXDQ128_MASK, + IX86_BUILTIN_PMOVZXBW256_MASK, + IX86_BUILTIN_PMOVZXBW128_MASK, + IX86_BUILTIN_PMOVZXBD256_MASK, + IX86_BUILTIN_PMOVZXBD128_MASK, + IX86_BUILTIN_PMOVZXBQ256_MASK, + IX86_BUILTIN_PMOVZXBQ128_MASK, + IX86_BUILTIN_PMOVZXWD256_MASK, + IX86_BUILTIN_PMOVZXWD128_MASK, + IX86_BUILTIN_PMOVZXWQ256_MASK, + IX86_BUILTIN_PMOVZXWQ128_MASK, + IX86_BUILTIN_PMOVZXDQ256_MASK, + IX86_BUILTIN_PMOVZXDQ128_MASK, + IX86_BUILTIN_REDUCEPD256_MASK, + IX86_BUILTIN_REDUCEPD128_MASK, + IX86_BUILTIN_REDUCEPS256_MASK, + IX86_BUILTIN_REDUCEPS128_MASK, + IX86_BUILTIN_REDUCESD_MASK, + IX86_BUILTIN_REDUCESS_MASK, + IX86_BUILTIN_VPERMVARHI256_MASK, + IX86_BUILTIN_VPERMVARHI128_MASK, + IX86_BUILTIN_VPERMT2VARHI256, + IX86_BUILTIN_VPERMT2VARHI256_MASKZ, + IX86_BUILTIN_VPERMT2VARHI128, + IX86_BUILTIN_VPERMT2VARHI128_MASKZ, + IX86_BUILTIN_VPERMI2VARHI256, + IX86_BUILTIN_VPERMI2VARHI128, + IX86_BUILTIN_RCP14PD256, + IX86_BUILTIN_RCP14PD128, + IX86_BUILTIN_RCP14PS256, + IX86_BUILTIN_RCP14PS128, + IX86_BUILTIN_RSQRT14PD256_MASK, + IX86_BUILTIN_RSQRT14PD128_MASK, + IX86_BUILTIN_RSQRT14PS256_MASK, + IX86_BUILTIN_RSQRT14PS128_MASK, + IX86_BUILTIN_SQRTPD256_MASK, + IX86_BUILTIN_SQRTPD128_MASK, + IX86_BUILTIN_SQRTPS256_MASK, + IX86_BUILTIN_SQRTPS128_MASK, + IX86_BUILTIN_PADDB128_MASK, + IX86_BUILTIN_PADDW128_MASK, + IX86_BUILTIN_PADDD128_MASK, + IX86_BUILTIN_PADDQ128_MASK, + IX86_BUILTIN_PSUBB128_MASK, + IX86_BUILTIN_PSUBW128_MASK, + IX86_BUILTIN_PSUBD128_MASK, + IX86_BUILTIN_PSUBQ128_MASK, + IX86_BUILTIN_PADDSB128_MASK, + IX86_BUILTIN_PADDSW128_MASK, + IX86_BUILTIN_PSUBSB128_MASK, + IX86_BUILTIN_PSUBSW128_MASK, + IX86_BUILTIN_PADDUSB128_MASK, + IX86_BUILTIN_PADDUSW128_MASK, + IX86_BUILTIN_PSUBUSB128_MASK, + IX86_BUILTIN_PSUBUSW128_MASK, + IX86_BUILTIN_PADDB256_MASK, + IX86_BUILTIN_PADDW256_MASK, + IX86_BUILTIN_PADDD256_MASK, + IX86_BUILTIN_PADDQ256_MASK, + IX86_BUILTIN_PADDSB256_MASK, + IX86_BUILTIN_PADDSW256_MASK, + IX86_BUILTIN_PADDUSB256_MASK, + IX86_BUILTIN_PADDUSW256_MASK, + IX86_BUILTIN_PSUBB256_MASK, + IX86_BUILTIN_PSUBW256_MASK, + IX86_BUILTIN_PSUBD256_MASK, + IX86_BUILTIN_PSUBQ256_MASK, + IX86_BUILTIN_PSUBSB256_MASK, + IX86_BUILTIN_PSUBSW256_MASK, + IX86_BUILTIN_PSUBUSB256_MASK, + IX86_BUILTIN_PSUBUSW256_MASK, + IX86_BUILTIN_SHUF_F64x2_256, + IX86_BUILTIN_SHUF_I64x2_256, + IX86_BUILTIN_SHUF_I32x4_256, + IX86_BUILTIN_SHUF_F32x4_256, + IX86_BUILTIN_PMOVWB128, + IX86_BUILTIN_PMOVWB256, + IX86_BUILTIN_PMOVSWB128, + IX86_BUILTIN_PMOVSWB256, + IX86_BUILTIN_PMOVUSWB128, + IX86_BUILTIN_PMOVUSWB256, + IX86_BUILTIN_PMOVDB128, + IX86_BUILTIN_PMOVDB256, + IX86_BUILTIN_PMOVSDB128, + IX86_BUILTIN_PMOVSDB256, + IX86_BUILTIN_PMOVUSDB128, + IX86_BUILTIN_PMOVUSDB256, + IX86_BUILTIN_PMOVDW128, + IX86_BUILTIN_PMOVDW256, + IX86_BUILTIN_PMOVSDW128, + IX86_BUILTIN_PMOVSDW256, + IX86_BUILTIN_PMOVUSDW128, + IX86_BUILTIN_PMOVUSDW256, + IX86_BUILTIN_PMOVQB128, + IX86_BUILTIN_PMOVQB256, + IX86_BUILTIN_PMOVSQB128, + IX86_BUILTIN_PMOVSQB256, + IX86_BUILTIN_PMOVUSQB128, + IX86_BUILTIN_PMOVUSQB256, + IX86_BUILTIN_PMOVQW128, + IX86_BUILTIN_PMOVQW256, + IX86_BUILTIN_PMOVSQW128, + IX86_BUILTIN_PMOVSQW256, + IX86_BUILTIN_PMOVUSQW128, + IX86_BUILTIN_PMOVUSQW256, + IX86_BUILTIN_PMOVQD128, + IX86_BUILTIN_PMOVQD256, + IX86_BUILTIN_PMOVSQD128, + IX86_BUILTIN_PMOVSQD256, + IX86_BUILTIN_PMOVUSQD128, + IX86_BUILTIN_PMOVUSQD256, + IX86_BUILTIN_RANGEPD256, + IX86_BUILTIN_RANGEPD128, + IX86_BUILTIN_RANGEPS256, + IX86_BUILTIN_RANGEPS128, + IX86_BUILTIN_GETEXPPS256, + IX86_BUILTIN_GETEXPPD256, + IX86_BUILTIN_GETEXPPS128, + IX86_BUILTIN_GETEXPPD128, + IX86_BUILTIN_FIXUPIMMPD256_MASK, + IX86_BUILTIN_FIXUPIMMPD256_MASKZ, + IX86_BUILTIN_FIXUPIMMPS256_MASK, + IX86_BUILTIN_FIXUPIMMPS256_MASKZ, + IX86_BUILTIN_FIXUPIMMPD128_MASK, + IX86_BUILTIN_FIXUPIMMPD128_MASKZ, + IX86_BUILTIN_FIXUPIMMPS128_MASK, + IX86_BUILTIN_FIXUPIMMPS128_MASKZ, + IX86_BUILTIN_PABSQ256, + IX86_BUILTIN_PABSQ128, + IX86_BUILTIN_PABSD256_MASK, + IX86_BUILTIN_PABSD128_MASK, + IX86_BUILTIN_PMULHRSW256_MASK, + IX86_BUILTIN_PMULHRSW128_MASK, + IX86_BUILTIN_PMULHUW128_MASK, + IX86_BUILTIN_PMULHUW256_MASK, + IX86_BUILTIN_PMULHW256_MASK, + IX86_BUILTIN_PMULHW128_MASK, + IX86_BUILTIN_PMULLW256_MASK, + IX86_BUILTIN_PMULLW128_MASK, + IX86_BUILTIN_PMULLQ256, + IX86_BUILTIN_PMULLQ128, + IX86_BUILTIN_ANDPD256_MASK, + IX86_BUILTIN_ANDPD128_MASK, + IX86_BUILTIN_ANDPS256_MASK, + IX86_BUILTIN_ANDPS128_MASK, + IX86_BUILTIN_ANDNPD256_MASK, + IX86_BUILTIN_ANDNPD128_MASK, + IX86_BUILTIN_ANDNPS256_MASK, + IX86_BUILTIN_ANDNPS128_MASK, + IX86_BUILTIN_PSLLWI128_MASK, + IX86_BUILTIN_PSLLDI128_MASK, + IX86_BUILTIN_PSLLQI128_MASK, + IX86_BUILTIN_PSLLW128_MASK, + IX86_BUILTIN_PSLLD128_MASK, + IX86_BUILTIN_PSLLQ128_MASK, + IX86_BUILTIN_PSLLWI256_MASK , + IX86_BUILTIN_PSLLW256_MASK, + IX86_BUILTIN_PSLLDI256_MASK, + IX86_BUILTIN_PSLLD256_MASK, + IX86_BUILTIN_PSLLQI256_MASK, + IX86_BUILTIN_PSLLQ256_MASK, + IX86_BUILTIN_PSRADI128_MASK, + IX86_BUILTIN_PSRAD128_MASK, + IX86_BUILTIN_PSRADI256_MASK, + IX86_BUILTIN_PSRAD256_MASK, + IX86_BUILTIN_PSRAQI128_MASK, + IX86_BUILTIN_PSRAQ128_MASK, + IX86_BUILTIN_PSRAQI256_MASK, + IX86_BUILTIN_PSRAQ256_MASK, + IX86_BUILTIN_PANDD256, + IX86_BUILTIN_PANDD128, + IX86_BUILTIN_PSRLDI128_MASK, + IX86_BUILTIN_PSRLD128_MASK, + IX86_BUILTIN_PSRLDI256_MASK, + IX86_BUILTIN_PSRLD256_MASK, + IX86_BUILTIN_PSRLQI128_MASK, + IX86_BUILTIN_PSRLQ128_MASK, + IX86_BUILTIN_PSRLQI256_MASK, + IX86_BUILTIN_PSRLQ256_MASK, + IX86_BUILTIN_PANDQ256, + IX86_BUILTIN_PANDQ128, + IX86_BUILTIN_PANDND256, + IX86_BUILTIN_PANDND128, + IX86_BUILTIN_PANDNQ256, + IX86_BUILTIN_PANDNQ128, + IX86_BUILTIN_PORD256, + IX86_BUILTIN_PORD128, + IX86_BUILTIN_PORQ256, + IX86_BUILTIN_PORQ128, + IX86_BUILTIN_PXORD256, + IX86_BUILTIN_PXORD128, + IX86_BUILTIN_PXORQ256, + IX86_BUILTIN_PXORQ128, + IX86_BUILTIN_PACKSSWB256_MASK, + IX86_BUILTIN_PACKSSWB128_MASK, + IX86_BUILTIN_PACKUSWB256_MASK, + IX86_BUILTIN_PACKUSWB128_MASK, + IX86_BUILTIN_RNDSCALEPS256, + IX86_BUILTIN_RNDSCALEPD256, + IX86_BUILTIN_RNDSCALEPS128, + IX86_BUILTIN_RNDSCALEPD128, + IX86_BUILTIN_VTERNLOGQ256_MASK, + IX86_BUILTIN_VTERNLOGQ256_MASKZ, + IX86_BUILTIN_VTERNLOGD256_MASK, + IX86_BUILTIN_VTERNLOGD256_MASKZ, + IX86_BUILTIN_VTERNLOGQ128_MASK, + IX86_BUILTIN_VTERNLOGQ128_MASKZ, + IX86_BUILTIN_VTERNLOGD128_MASK, + IX86_BUILTIN_VTERNLOGD128_MASKZ, + IX86_BUILTIN_SCALEFPD256, + IX86_BUILTIN_SCALEFPS256, + IX86_BUILTIN_SCALEFPD128, + IX86_BUILTIN_SCALEFPS128, + IX86_BUILTIN_VFMADDPD256_MASK, + IX86_BUILTIN_VFMADDPD256_MASK3, + IX86_BUILTIN_VFMADDPD256_MASKZ, + IX86_BUILTIN_VFMADDPD128_MASK, + IX86_BUILTIN_VFMADDPD128_MASK3, + IX86_BUILTIN_VFMADDPD128_MASKZ, + IX86_BUILTIN_VFMADDPS256_MASK, + IX86_BUILTIN_VFMADDPS256_MASK3, + IX86_BUILTIN_VFMADDPS256_MASKZ, + IX86_BUILTIN_VFMADDPS128_MASK, + IX86_BUILTIN_VFMADDPS128_MASK3, + IX86_BUILTIN_VFMADDPS128_MASKZ, + IX86_BUILTIN_VFMSUBPD256_MASK3, + IX86_BUILTIN_VFMSUBPD128_MASK3, + IX86_BUILTIN_VFMSUBPS256_MASK3, + IX86_BUILTIN_VFMSUBPS128_MASK3, + IX86_BUILTIN_VFNMADDPD256_MASK, + IX86_BUILTIN_VFNMADDPD128_MASK, + IX86_BUILTIN_VFNMADDPS256_MASK, + IX86_BUILTIN_VFNMADDPS128_MASK, + IX86_BUILTIN_VFNMSUBPD256_MASK, + IX86_BUILTIN_VFNMSUBPD256_MASK3, + IX86_BUILTIN_VFNMSUBPD128_MASK, + IX86_BUILTIN_VFNMSUBPD128_MASK3, + IX86_BUILTIN_VFNMSUBPS256_MASK, + IX86_BUILTIN_VFNMSUBPS256_MASK3, + IX86_BUILTIN_VFNMSUBPS128_MASK, + IX86_BUILTIN_VFNMSUBPS128_MASK3, + IX86_BUILTIN_VFMADDSUBPD256_MASK, + IX86_BUILTIN_VFMADDSUBPD256_MASK3, + IX86_BUILTIN_VFMADDSUBPD256_MASKZ, + IX86_BUILTIN_VFMADDSUBPD128_MASK, + IX86_BUILTIN_VFMADDSUBPD128_MASK3, + IX86_BUILTIN_VFMADDSUBPD128_MASKZ, + IX86_BUILTIN_VFMADDSUBPS256_MASK, + IX86_BUILTIN_VFMADDSUBPS256_MASK3, + IX86_BUILTIN_VFMADDSUBPS256_MASKZ, + IX86_BUILTIN_VFMADDSUBPS128_MASK, + IX86_BUILTIN_VFMADDSUBPS128_MASK3, + IX86_BUILTIN_VFMADDSUBPS128_MASKZ, + IX86_BUILTIN_VFMSUBADDPD256_MASK3, + IX86_BUILTIN_VFMSUBADDPD128_MASK3, + IX86_BUILTIN_VFMSUBADDPS256_MASK3, + IX86_BUILTIN_VFMSUBADDPS128_MASK3, + IX86_BUILTIN_INSERTF64X2_256, + IX86_BUILTIN_INSERTI64X2_256, + IX86_BUILTIN_PSRAVV16HI, + IX86_BUILTIN_PSRAVV8HI, + IX86_BUILTIN_PMADDUBSW256_MASK, + IX86_BUILTIN_PMADDUBSW128_MASK, + IX86_BUILTIN_PMADDWD256_MASK, + IX86_BUILTIN_PMADDWD128_MASK, + IX86_BUILTIN_PSRLVV16HI, + IX86_BUILTIN_PSRLVV8HI, + IX86_BUILTIN_CVTPS2DQ256_MASK, + IX86_BUILTIN_CVTPS2DQ128_MASK, + IX86_BUILTIN_CVTPS2UDQ256, + IX86_BUILTIN_CVTPS2UDQ128, + IX86_BUILTIN_CVTPS2QQ256, + IX86_BUILTIN_CVTPS2QQ128, + IX86_BUILTIN_CVTPS2UQQ256, + IX86_BUILTIN_CVTPS2UQQ128, + IX86_BUILTIN_GETMANTPS256, + IX86_BUILTIN_GETMANTPS128, + IX86_BUILTIN_GETMANTPD256, + IX86_BUILTIN_GETMANTPD128, + IX86_BUILTIN_MOVDDUP256_MASK, + IX86_BUILTIN_MOVDDUP128_MASK, + IX86_BUILTIN_MOVSHDUP256_MASK, + IX86_BUILTIN_MOVSHDUP128_MASK, + IX86_BUILTIN_MOVSLDUP256_MASK, + IX86_BUILTIN_MOVSLDUP128_MASK, + IX86_BUILTIN_CVTQQ2PS256, + IX86_BUILTIN_CVTQQ2PS128, + IX86_BUILTIN_CVTUQQ2PS256, + IX86_BUILTIN_CVTUQQ2PS128, + IX86_BUILTIN_CVTQQ2PD256, + IX86_BUILTIN_CVTQQ2PD128, + IX86_BUILTIN_CVTUQQ2PD256, + IX86_BUILTIN_CVTUQQ2PD128, + IX86_BUILTIN_VPERMT2VARQ256, + IX86_BUILTIN_VPERMT2VARQ256_MASKZ, + IX86_BUILTIN_VPERMT2VARD256, + IX86_BUILTIN_VPERMT2VARD256_MASKZ, + IX86_BUILTIN_VPERMI2VARQ256, + IX86_BUILTIN_VPERMI2VARD256, + IX86_BUILTIN_VPERMT2VARPD256, + IX86_BUILTIN_VPERMT2VARPD256_MASKZ, + IX86_BUILTIN_VPERMT2VARPS256, + IX86_BUILTIN_VPERMT2VARPS256_MASKZ, + IX86_BUILTIN_VPERMI2VARPD256, + IX86_BUILTIN_VPERMI2VARPS256, + IX86_BUILTIN_VPERMT2VARQ128, + IX86_BUILTIN_VPERMT2VARQ128_MASKZ, + IX86_BUILTIN_VPERMT2VARD128, + IX86_BUILTIN_VPERMT2VARD128_MASKZ, + IX86_BUILTIN_VPERMI2VARQ128, + IX86_BUILTIN_VPERMI2VARD128, + IX86_BUILTIN_VPERMT2VARPD128, + IX86_BUILTIN_VPERMT2VARPD128_MASKZ, + IX86_BUILTIN_VPERMT2VARPS128, + IX86_BUILTIN_VPERMT2VARPS128_MASKZ, + IX86_BUILTIN_VPERMI2VARPD128, + IX86_BUILTIN_VPERMI2VARPS128, + IX86_BUILTIN_PSHUFB256_MASK, + IX86_BUILTIN_PSHUFB128_MASK, + IX86_BUILTIN_PSHUFHW256_MASK, + IX86_BUILTIN_PSHUFHW128_MASK, + IX86_BUILTIN_PSHUFLW256_MASK, + IX86_BUILTIN_PSHUFLW128_MASK, + IX86_BUILTIN_PSHUFD256_MASK, + IX86_BUILTIN_PSHUFD128_MASK, + IX86_BUILTIN_SHUFPD256_MASK, + IX86_BUILTIN_SHUFPD128_MASK, + IX86_BUILTIN_SHUFPS256_MASK, + IX86_BUILTIN_SHUFPS128_MASK, + IX86_BUILTIN_PROLVQ256, + IX86_BUILTIN_PROLVQ128, + IX86_BUILTIN_PROLQ256, + IX86_BUILTIN_PROLQ128, + IX86_BUILTIN_PRORVQ256, + IX86_BUILTIN_PRORVQ128, + IX86_BUILTIN_PRORQ256, + IX86_BUILTIN_PRORQ128, + IX86_BUILTIN_PSRAVQ128, + IX86_BUILTIN_PSRAVQ256, + IX86_BUILTIN_PSLLVV4DI_MASK, + IX86_BUILTIN_PSLLVV2DI_MASK, + IX86_BUILTIN_PSLLVV8SI_MASK, + IX86_BUILTIN_PSLLVV4SI_MASK, + IX86_BUILTIN_PSRAVV8SI_MASK, + IX86_BUILTIN_PSRAVV4SI_MASK, + IX86_BUILTIN_PSRLVV4DI_MASK, + IX86_BUILTIN_PSRLVV2DI_MASK, + IX86_BUILTIN_PSRLVV8SI_MASK, + IX86_BUILTIN_PSRLVV4SI_MASK, + IX86_BUILTIN_PSRAWI256_MASK, + IX86_BUILTIN_PSRAW256_MASK, + IX86_BUILTIN_PSRAWI128_MASK, + IX86_BUILTIN_PSRAW128_MASK, + IX86_BUILTIN_PSRLWI256_MASK, + IX86_BUILTIN_PSRLW256_MASK, + IX86_BUILTIN_PSRLWI128_MASK, + IX86_BUILTIN_PSRLW128_MASK, + IX86_BUILTIN_PRORVD256, + IX86_BUILTIN_PROLVD256, + IX86_BUILTIN_PRORD256, + IX86_BUILTIN_PROLD256, + IX86_BUILTIN_PRORVD128, + IX86_BUILTIN_PROLVD128, + IX86_BUILTIN_PRORD128, + IX86_BUILTIN_PROLD128, + IX86_BUILTIN_FPCLASSPD256, + IX86_BUILTIN_FPCLASSPD128, + IX86_BUILTIN_FPCLASSSD, + IX86_BUILTIN_FPCLASSPS256, + IX86_BUILTIN_FPCLASSPS128, + IX86_BUILTIN_FPCLASSSS, + IX86_BUILTIN_CVTB2MASK128, + IX86_BUILTIN_CVTB2MASK256, + IX86_BUILTIN_CVTW2MASK128, + IX86_BUILTIN_CVTW2MASK256, + IX86_BUILTIN_CVTD2MASK128, + IX86_BUILTIN_CVTD2MASK256, + IX86_BUILTIN_CVTQ2MASK128, + IX86_BUILTIN_CVTQ2MASK256, + IX86_BUILTIN_CVTMASK2B128, + IX86_BUILTIN_CVTMASK2B256, + IX86_BUILTIN_CVTMASK2W128, + IX86_BUILTIN_CVTMASK2W256, + IX86_BUILTIN_CVTMASK2D128, + IX86_BUILTIN_CVTMASK2D256, + IX86_BUILTIN_CVTMASK2Q128, + IX86_BUILTIN_CVTMASK2Q256, + IX86_BUILTIN_PCMPEQB128_MASK, + IX86_BUILTIN_PCMPEQB256_MASK, + IX86_BUILTIN_PCMPEQW128_MASK, + IX86_BUILTIN_PCMPEQW256_MASK, + IX86_BUILTIN_PCMPEQD128_MASK, + IX86_BUILTIN_PCMPEQD256_MASK, + IX86_BUILTIN_PCMPEQQ128_MASK, + IX86_BUILTIN_PCMPEQQ256_MASK, + IX86_BUILTIN_PCMPGTB128_MASK, + IX86_BUILTIN_PCMPGTB256_MASK, + IX86_BUILTIN_PCMPGTW128_MASK, + IX86_BUILTIN_PCMPGTW256_MASK, + IX86_BUILTIN_PCMPGTD128_MASK, + IX86_BUILTIN_PCMPGTD256_MASK, + IX86_BUILTIN_PCMPGTQ128_MASK, + IX86_BUILTIN_PCMPGTQ256_MASK, + IX86_BUILTIN_PTESTMB128, + IX86_BUILTIN_PTESTMB256, + IX86_BUILTIN_PTESTMW128, + IX86_BUILTIN_PTESTMW256, + IX86_BUILTIN_PTESTMD128, + IX86_BUILTIN_PTESTMD256, + IX86_BUILTIN_PTESTMQ128, + IX86_BUILTIN_PTESTMQ256, + IX86_BUILTIN_PTESTNMB128, + IX86_BUILTIN_PTESTNMB256, + IX86_BUILTIN_PTESTNMW128, + IX86_BUILTIN_PTESTNMW256, + IX86_BUILTIN_PTESTNMD128, + IX86_BUILTIN_PTESTNMD256, + IX86_BUILTIN_PTESTNMQ128, + IX86_BUILTIN_PTESTNMQ256, + IX86_BUILTIN_PBROADCASTMB128, + IX86_BUILTIN_PBROADCASTMB256, + IX86_BUILTIN_PBROADCASTMW128, + IX86_BUILTIN_PBROADCASTMW256, + IX86_BUILTIN_COMPRESSPD256, + IX86_BUILTIN_COMPRESSPD128, + IX86_BUILTIN_COMPRESSPS256, + IX86_BUILTIN_COMPRESSPS128, + IX86_BUILTIN_PCOMPRESSQ256, + IX86_BUILTIN_PCOMPRESSQ128, + IX86_BUILTIN_PCOMPRESSD256, + IX86_BUILTIN_PCOMPRESSD128, + IX86_BUILTIN_EXPANDPD256, + IX86_BUILTIN_EXPANDPD128, + IX86_BUILTIN_EXPANDPS256, + IX86_BUILTIN_EXPANDPS128, + IX86_BUILTIN_PEXPANDQ256, + IX86_BUILTIN_PEXPANDQ128, + IX86_BUILTIN_PEXPANDD256, + IX86_BUILTIN_PEXPANDD128, + IX86_BUILTIN_EXPANDPD256Z, + IX86_BUILTIN_EXPANDPD128Z, + IX86_BUILTIN_EXPANDPS256Z, + IX86_BUILTIN_EXPANDPS128Z, + IX86_BUILTIN_PEXPANDQ256Z, + IX86_BUILTIN_PEXPANDQ128Z, + IX86_BUILTIN_PEXPANDD256Z, + IX86_BUILTIN_PEXPANDD128Z, + IX86_BUILTIN_PMAXSD256_MASK, + IX86_BUILTIN_PMINSD256_MASK, + IX86_BUILTIN_PMAXUD256_MASK, + IX86_BUILTIN_PMINUD256_MASK, + IX86_BUILTIN_PMAXSD128_MASK, + IX86_BUILTIN_PMINSD128_MASK, + IX86_BUILTIN_PMAXUD128_MASK, + IX86_BUILTIN_PMINUD128_MASK, + IX86_BUILTIN_PMAXSQ256_MASK, + IX86_BUILTIN_PMINSQ256_MASK, + IX86_BUILTIN_PMAXUQ256_MASK, + IX86_BUILTIN_PMINUQ256_MASK, + IX86_BUILTIN_PMAXSQ128_MASK, + IX86_BUILTIN_PMINSQ128_MASK, + IX86_BUILTIN_PMAXUQ128_MASK, + IX86_BUILTIN_PMINUQ128_MASK, + IX86_BUILTIN_PMINSB256_MASK, + IX86_BUILTIN_PMINUB256_MASK, + IX86_BUILTIN_PMAXSB256_MASK, + IX86_BUILTIN_PMAXUB256_MASK, + IX86_BUILTIN_PMINSB128_MASK, + IX86_BUILTIN_PMINUB128_MASK, + IX86_BUILTIN_PMAXSB128_MASK, + IX86_BUILTIN_PMAXUB128_MASK, + IX86_BUILTIN_PMINSW256_MASK, + IX86_BUILTIN_PMINUW256_MASK, + IX86_BUILTIN_PMAXSW256_MASK, + IX86_BUILTIN_PMAXUW256_MASK, + IX86_BUILTIN_PMINSW128_MASK, + IX86_BUILTIN_PMINUW128_MASK, + IX86_BUILTIN_PMAXSW128_MASK, + IX86_BUILTIN_PMAXUW128_MASK, + IX86_BUILTIN_VPCONFLICTQ256, + IX86_BUILTIN_VPCONFLICTD256, + IX86_BUILTIN_VPCLZCNTQ256, + IX86_BUILTIN_VPCLZCNTD256, + IX86_BUILTIN_UNPCKHPD256_MASK, + IX86_BUILTIN_UNPCKHPD128_MASK, + IX86_BUILTIN_UNPCKHPS256_MASK, + IX86_BUILTIN_UNPCKHPS128_MASK, + IX86_BUILTIN_UNPCKLPD256_MASK, + IX86_BUILTIN_UNPCKLPD128_MASK, + IX86_BUILTIN_UNPCKLPS256_MASK, + IX86_BUILTIN_VPCONFLICTQ128, + IX86_BUILTIN_VPCONFLICTD128, + IX86_BUILTIN_VPCLZCNTQ128, + IX86_BUILTIN_VPCLZCNTD128, + IX86_BUILTIN_UNPCKLPS128_MASK, + IX86_BUILTIN_ALIGND256, + IX86_BUILTIN_ALIGNQ256, + IX86_BUILTIN_ALIGND128, + IX86_BUILTIN_ALIGNQ128, + IX86_BUILTIN_CVTPS2PH256_MASK, + IX86_BUILTIN_CVTPS2PH_MASK, + IX86_BUILTIN_CVTPH2PS_MASK, + IX86_BUILTIN_CVTPH2PS256_MASK, + IX86_BUILTIN_PUNPCKHDQ128_MASK, + IX86_BUILTIN_PUNPCKHDQ256_MASK, + IX86_BUILTIN_PUNPCKHQDQ128_MASK, + IX86_BUILTIN_PUNPCKHQDQ256_MASK, + IX86_BUILTIN_PUNPCKLDQ128_MASK, + IX86_BUILTIN_PUNPCKLDQ256_MASK, + IX86_BUILTIN_PUNPCKLQDQ128_MASK, + IX86_BUILTIN_PUNPCKLQDQ256_MASK, + IX86_BUILTIN_PUNPCKHBW128_MASK, + IX86_BUILTIN_PUNPCKHBW256_MASK, + IX86_BUILTIN_PUNPCKHWD128_MASK, + IX86_BUILTIN_PUNPCKHWD256_MASK, + IX86_BUILTIN_PUNPCKLBW128_MASK, + IX86_BUILTIN_PUNPCKLBW256_MASK, + IX86_BUILTIN_PUNPCKLWD128_MASK, + IX86_BUILTIN_PUNPCKLWD256_MASK, + IX86_BUILTIN_PSLLVV16HI, + IX86_BUILTIN_PSLLVV8HI, + IX86_BUILTIN_PACKSSDW256_MASK, + IX86_BUILTIN_PACKSSDW128_MASK, + IX86_BUILTIN_PACKUSDW256_MASK, + IX86_BUILTIN_PACKUSDW128_MASK, + IX86_BUILTIN_PAVGB256_MASK, + IX86_BUILTIN_PAVGW256_MASK, + IX86_BUILTIN_PAVGB128_MASK, + IX86_BUILTIN_PAVGW128_MASK, + IX86_BUILTIN_VPERMVARSF256_MASK, + IX86_BUILTIN_VPERMVARDF256_MASK, + IX86_BUILTIN_VPERMDF256_MASK, + IX86_BUILTIN_PABSB256_MASK, + IX86_BUILTIN_PABSB128_MASK, + IX86_BUILTIN_PABSW256_MASK, + IX86_BUILTIN_PABSW128_MASK, + IX86_BUILTIN_VPERMILVARPD_MASK, + IX86_BUILTIN_VPERMILVARPS_MASK, + IX86_BUILTIN_VPERMILVARPD256_MASK, + IX86_BUILTIN_VPERMILVARPS256_MASK, + IX86_BUILTIN_VPERMILPD_MASK, + IX86_BUILTIN_VPERMILPS_MASK, + IX86_BUILTIN_VPERMILPD256_MASK, + IX86_BUILTIN_VPERMILPS256_MASK, + IX86_BUILTIN_BLENDMQ256, + IX86_BUILTIN_BLENDMD256, + IX86_BUILTIN_BLENDMPD256, + IX86_BUILTIN_BLENDMPS256, + IX86_BUILTIN_BLENDMQ128, + IX86_BUILTIN_BLENDMD128, + IX86_BUILTIN_BLENDMPD128, + IX86_BUILTIN_BLENDMPS128, + IX86_BUILTIN_BLENDMW256, + IX86_BUILTIN_BLENDMB256, + IX86_BUILTIN_BLENDMW128, + IX86_BUILTIN_BLENDMB128, + IX86_BUILTIN_PMULLD256_MASK, + IX86_BUILTIN_PMULLD128_MASK, + IX86_BUILTIN_PMULUDQ256_MASK, + IX86_BUILTIN_PMULDQ256_MASK, + IX86_BUILTIN_PMULDQ128_MASK, + IX86_BUILTIN_PMULUDQ128_MASK, + IX86_BUILTIN_CVTPD2PS256_MASK, + IX86_BUILTIN_CVTPD2PS_MASK, + IX86_BUILTIN_VPERMVARSI256_MASK, + IX86_BUILTIN_VPERMVARDI256_MASK, + IX86_BUILTIN_VPERMDI256_MASK, + IX86_BUILTIN_CMPQ256, + IX86_BUILTIN_CMPD256, + IX86_BUILTIN_UCMPQ256, + IX86_BUILTIN_UCMPD256, + IX86_BUILTIN_CMPB256, + IX86_BUILTIN_CMPW256, + IX86_BUILTIN_UCMPB256, + IX86_BUILTIN_UCMPW256, + IX86_BUILTIN_CMPPD256_MASK, + IX86_BUILTIN_CMPPS256_MASK, + IX86_BUILTIN_CMPQ128, + IX86_BUILTIN_CMPD128, + IX86_BUILTIN_UCMPQ128, + IX86_BUILTIN_UCMPD128, + IX86_BUILTIN_CMPB128, + IX86_BUILTIN_CMPW128, + IX86_BUILTIN_UCMPB128, + IX86_BUILTIN_UCMPW128, + IX86_BUILTIN_CMPPD128_MASK, + IX86_BUILTIN_CMPPS128_MASK, + + IX86_BUILTIN_GATHER3SIV8SF, + IX86_BUILTIN_GATHER3SIV4SF, + IX86_BUILTIN_GATHER3SIV4DF, + IX86_BUILTIN_GATHER3SIV2DF, + IX86_BUILTIN_GATHER3DIV8SF, + IX86_BUILTIN_GATHER3DIV4SF, + IX86_BUILTIN_GATHER3DIV4DF, + IX86_BUILTIN_GATHER3DIV2DF, + IX86_BUILTIN_GATHER3SIV8SI, + IX86_BUILTIN_GATHER3SIV4SI, + IX86_BUILTIN_GATHER3SIV4DI, + IX86_BUILTIN_GATHER3SIV2DI, + IX86_BUILTIN_GATHER3DIV8SI, + IX86_BUILTIN_GATHER3DIV4SI, + IX86_BUILTIN_GATHER3DIV4DI, + IX86_BUILTIN_GATHER3DIV2DI, + IX86_BUILTIN_SCATTERSIV8SF, + IX86_BUILTIN_SCATTERSIV4SF, + IX86_BUILTIN_SCATTERSIV4DF, + IX86_BUILTIN_SCATTERSIV2DF, + IX86_BUILTIN_SCATTERDIV8SF, + IX86_BUILTIN_SCATTERDIV4SF, + IX86_BUILTIN_SCATTERDIV4DF, + IX86_BUILTIN_SCATTERDIV2DF, + IX86_BUILTIN_SCATTERSIV8SI, + IX86_BUILTIN_SCATTERSIV4SI, + IX86_BUILTIN_SCATTERSIV4DI, + IX86_BUILTIN_SCATTERSIV2DI, + IX86_BUILTIN_SCATTERDIV8SI, + IX86_BUILTIN_SCATTERDIV4SI, + IX86_BUILTIN_SCATTERDIV4DI, + IX86_BUILTIN_SCATTERDIV2DI, + + /* AVX512DQ. */ + IX86_BUILTIN_RANGESD128, + IX86_BUILTIN_RANGESS128, + IX86_BUILTIN_KUNPCKWD, + IX86_BUILTIN_KUNPCKDQ, + IX86_BUILTIN_BROADCASTF32x2_512, + IX86_BUILTIN_BROADCASTI32x2_512, + IX86_BUILTIN_BROADCASTF64X2_512, + IX86_BUILTIN_BROADCASTI64X2_512, + IX86_BUILTIN_BROADCASTF32X8_512, + IX86_BUILTIN_BROADCASTI32X8_512, + IX86_BUILTIN_EXTRACTF64X2_512, + IX86_BUILTIN_EXTRACTF32X8, + IX86_BUILTIN_EXTRACTI64X2_512, + IX86_BUILTIN_EXTRACTI32X8, + IX86_BUILTIN_REDUCEPD512_MASK, + IX86_BUILTIN_REDUCEPS512_MASK, + IX86_BUILTIN_PMULLQ512, + IX86_BUILTIN_XORPD512, + IX86_BUILTIN_XORPS512, + IX86_BUILTIN_ORPD512, + IX86_BUILTIN_ORPS512, + IX86_BUILTIN_ANDPD512, + IX86_BUILTIN_ANDPS512, + IX86_BUILTIN_ANDNPD512, + IX86_BUILTIN_ANDNPS512, + IX86_BUILTIN_INSERTF32X8, + IX86_BUILTIN_INSERTI32X8, + IX86_BUILTIN_INSERTF64X2_512, + IX86_BUILTIN_INSERTI64X2_512, + IX86_BUILTIN_FPCLASSPD512, + IX86_BUILTIN_FPCLASSPS512, + IX86_BUILTIN_CVTD2MASK512, + IX86_BUILTIN_CVTQ2MASK512, + IX86_BUILTIN_CVTMASK2D512, + IX86_BUILTIN_CVTMASK2Q512, + IX86_BUILTIN_CVTPD2QQ512, + IX86_BUILTIN_CVTPS2QQ512, + IX86_BUILTIN_CVTPD2UQQ512, + IX86_BUILTIN_CVTPS2UQQ512, + IX86_BUILTIN_CVTQQ2PS512, + IX86_BUILTIN_CVTUQQ2PS512, + IX86_BUILTIN_CVTQQ2PD512, + IX86_BUILTIN_CVTUQQ2PD512, + IX86_BUILTIN_CVTTPS2QQ512, + IX86_BUILTIN_CVTTPS2UQQ512, + IX86_BUILTIN_CVTTPD2QQ512, + IX86_BUILTIN_CVTTPD2UQQ512, + IX86_BUILTIN_RANGEPS512, + IX86_BUILTIN_RANGEPD512, + + /* AVX512BW. */ + IX86_BUILTIN_PACKUSDW512, + IX86_BUILTIN_PACKSSDW512, + IX86_BUILTIN_LOADDQUHI512_MASK, + IX86_BUILTIN_LOADDQUQI512_MASK, + IX86_BUILTIN_PSLLDQ512, + IX86_BUILTIN_PSRLDQ512, + IX86_BUILTIN_STOREDQUHI512_MASK, + IX86_BUILTIN_STOREDQUQI512_MASK, + IX86_BUILTIN_PALIGNR512, + IX86_BUILTIN_PALIGNR512_MASK, + IX86_BUILTIN_MOVDQUHI512_MASK, + IX86_BUILTIN_MOVDQUQI512_MASK, + IX86_BUILTIN_PSADBW512, + IX86_BUILTIN_DBPSADBW512, + IX86_BUILTIN_PBROADCASTB512, + IX86_BUILTIN_PBROADCASTB512_GPR, + IX86_BUILTIN_PBROADCASTW512, + IX86_BUILTIN_PBROADCASTW512_GPR, + IX86_BUILTIN_PMOVSXBW512_MASK, + IX86_BUILTIN_PMOVZXBW512_MASK, + IX86_BUILTIN_VPERMVARHI512_MASK, + IX86_BUILTIN_VPERMT2VARHI512, + IX86_BUILTIN_VPERMT2VARHI512_MASKZ, + IX86_BUILTIN_VPERMI2VARHI512, + IX86_BUILTIN_PAVGB512, + IX86_BUILTIN_PAVGW512, + IX86_BUILTIN_PADDB512, + IX86_BUILTIN_PSUBB512, + IX86_BUILTIN_PSUBSB512, + IX86_BUILTIN_PADDSB512, + IX86_BUILTIN_PSUBUSB512, + IX86_BUILTIN_PADDUSB512, + IX86_BUILTIN_PSUBW512, + IX86_BUILTIN_PADDW512, + IX86_BUILTIN_PSUBSW512, + IX86_BUILTIN_PADDSW512, + IX86_BUILTIN_PSUBUSW512, + IX86_BUILTIN_PADDUSW512, + IX86_BUILTIN_PMAXUW512, + IX86_BUILTIN_PMAXSW512, + IX86_BUILTIN_PMINUW512, + IX86_BUILTIN_PMINSW512, + IX86_BUILTIN_PMAXUB512, + IX86_BUILTIN_PMAXSB512, + IX86_BUILTIN_PMINUB512, + IX86_BUILTIN_PMINSB512, + IX86_BUILTIN_PMOVWB512, + IX86_BUILTIN_PMOVSWB512, + IX86_BUILTIN_PMOVUSWB512, + IX86_BUILTIN_PMULHRSW512_MASK, + IX86_BUILTIN_PMULHUW512_MASK, + IX86_BUILTIN_PMULHW512_MASK, + IX86_BUILTIN_PMULLW512_MASK, + IX86_BUILTIN_PSLLWI512_MASK, + IX86_BUILTIN_PSLLW512_MASK, + IX86_BUILTIN_PACKSSWB512, + IX86_BUILTIN_PACKUSWB512, + IX86_BUILTIN_PSRAVV32HI, + IX86_BUILTIN_PMADDUBSW512_MASK, + IX86_BUILTIN_PMADDWD512_MASK, + IX86_BUILTIN_PSRLVV32HI, + IX86_BUILTIN_PUNPCKHBW512, + IX86_BUILTIN_PUNPCKHWD512, + IX86_BUILTIN_PUNPCKLBW512, + IX86_BUILTIN_PUNPCKLWD512, + IX86_BUILTIN_PSHUFB512, + IX86_BUILTIN_PSHUFHW512, + IX86_BUILTIN_PSHUFLW512, + IX86_BUILTIN_PSRAWI512, + IX86_BUILTIN_PSRAW512, + IX86_BUILTIN_PSRLWI512, + IX86_BUILTIN_PSRLW512, + IX86_BUILTIN_CVTB2MASK512, + IX86_BUILTIN_CVTW2MASK512, + IX86_BUILTIN_CVTMASK2B512, + IX86_BUILTIN_CVTMASK2W512, + IX86_BUILTIN_PCMPEQB512_MASK, + IX86_BUILTIN_PCMPEQW512_MASK, + IX86_BUILTIN_PCMPGTB512_MASK, + IX86_BUILTIN_PCMPGTW512_MASK, + IX86_BUILTIN_PTESTMB512, + IX86_BUILTIN_PTESTMW512, + IX86_BUILTIN_PTESTNMB512, + IX86_BUILTIN_PTESTNMW512, + IX86_BUILTIN_PSLLVV32HI, + IX86_BUILTIN_PABSB512, + IX86_BUILTIN_PABSW512, + IX86_BUILTIN_BLENDMW512, + IX86_BUILTIN_BLENDMB512, + IX86_BUILTIN_CMPB512, + IX86_BUILTIN_CMPW512, + IX86_BUILTIN_UCMPB512, + IX86_BUILTIN_UCMPW512, + + /* Alternate 4 and 8 element gather/scatter for the vectorizer + where all operands are 32-byte or 64-byte wide respectively. */ + IX86_BUILTIN_GATHERALTSIV4DF, + IX86_BUILTIN_GATHERALTDIV8SF, + IX86_BUILTIN_GATHERALTSIV4DI, + IX86_BUILTIN_GATHERALTDIV8SI, + IX86_BUILTIN_GATHER3ALTDIV16SF, + IX86_BUILTIN_GATHER3ALTDIV16SI, + IX86_BUILTIN_GATHER3ALTSIV4DF, + IX86_BUILTIN_GATHER3ALTDIV8SF, + IX86_BUILTIN_GATHER3ALTSIV4DI, + IX86_BUILTIN_GATHER3ALTDIV8SI, + IX86_BUILTIN_GATHER3ALTSIV8DF, + IX86_BUILTIN_GATHER3ALTSIV8DI, + IX86_BUILTIN_GATHER3DIV16SF, + IX86_BUILTIN_GATHER3DIV16SI, + IX86_BUILTIN_GATHER3DIV8DF, + IX86_BUILTIN_GATHER3DIV8DI, + IX86_BUILTIN_GATHER3SIV16SF, + IX86_BUILTIN_GATHER3SIV16SI, + IX86_BUILTIN_GATHER3SIV8DF, + IX86_BUILTIN_GATHER3SIV8DI, + IX86_BUILTIN_SCATTERALTSIV8DF, + IX86_BUILTIN_SCATTERALTDIV16SF, + IX86_BUILTIN_SCATTERALTSIV8DI, + IX86_BUILTIN_SCATTERALTDIV16SI, + IX86_BUILTIN_SCATTERDIV16SF, + IX86_BUILTIN_SCATTERDIV16SI, + IX86_BUILTIN_SCATTERDIV8DF, + IX86_BUILTIN_SCATTERDIV8DI, + IX86_BUILTIN_SCATTERSIV16SF, + IX86_BUILTIN_SCATTERSIV16SI, + IX86_BUILTIN_SCATTERSIV8DF, + IX86_BUILTIN_SCATTERSIV8DI, + + /* AVX512PF */ + IX86_BUILTIN_GATHERPFQPD, + IX86_BUILTIN_GATHERPFDPS, + IX86_BUILTIN_GATHERPFDPD, + IX86_BUILTIN_GATHERPFQPS, + IX86_BUILTIN_SCATTERPFDPD, + IX86_BUILTIN_SCATTERPFDPS, + IX86_BUILTIN_SCATTERPFQPD, + IX86_BUILTIN_SCATTERPFQPS, + + /* AVX-512ER */ + IX86_BUILTIN_EXP2PD_MASK, + IX86_BUILTIN_EXP2PS_MASK, + IX86_BUILTIN_EXP2PS, + IX86_BUILTIN_RCP28PD, + IX86_BUILTIN_RCP28PS, + IX86_BUILTIN_RCP28SD, + IX86_BUILTIN_RCP28SS, + IX86_BUILTIN_RSQRT28PD, + IX86_BUILTIN_RSQRT28PS, + IX86_BUILTIN_RSQRT28SD, + IX86_BUILTIN_RSQRT28SS, + + /* AVX-512IFMA */ + IX86_BUILTIN_VPMADD52LUQ512, + IX86_BUILTIN_VPMADD52HUQ512, + IX86_BUILTIN_VPMADD52LUQ256, + IX86_BUILTIN_VPMADD52HUQ256, + IX86_BUILTIN_VPMADD52LUQ128, + IX86_BUILTIN_VPMADD52HUQ128, + IX86_BUILTIN_VPMADD52LUQ512_MASKZ, + IX86_BUILTIN_VPMADD52HUQ512_MASKZ, + IX86_BUILTIN_VPMADD52LUQ256_MASKZ, + IX86_BUILTIN_VPMADD52HUQ256_MASKZ, + IX86_BUILTIN_VPMADD52LUQ128_MASKZ, + IX86_BUILTIN_VPMADD52HUQ128_MASKZ, + + /* AVX-512VBMI */ + IX86_BUILTIN_VPMULTISHIFTQB512, + IX86_BUILTIN_VPMULTISHIFTQB256, + IX86_BUILTIN_VPMULTISHIFTQB128, + IX86_BUILTIN_VPERMVARQI512_MASK, + IX86_BUILTIN_VPERMT2VARQI512, + IX86_BUILTIN_VPERMT2VARQI512_MASKZ, + IX86_BUILTIN_VPERMI2VARQI512, + IX86_BUILTIN_VPERMVARQI256_MASK, + IX86_BUILTIN_VPERMVARQI128_MASK, + IX86_BUILTIN_VPERMT2VARQI256, + IX86_BUILTIN_VPERMT2VARQI256_MASKZ, + IX86_BUILTIN_VPERMT2VARQI128, + IX86_BUILTIN_VPERMT2VARQI128_MASKZ, + IX86_BUILTIN_VPERMI2VARQI256, + IX86_BUILTIN_VPERMI2VARQI128, + + /* SHA builtins. */ + IX86_BUILTIN_SHA1MSG1, + IX86_BUILTIN_SHA1MSG2, + IX86_BUILTIN_SHA1NEXTE, + IX86_BUILTIN_SHA1RNDS4, + IX86_BUILTIN_SHA256MSG1, + IX86_BUILTIN_SHA256MSG2, + IX86_BUILTIN_SHA256RNDS2, + + /* CLWB instructions. */ + IX86_BUILTIN_CLWB, + + /* CLFLUSHOPT instructions. */ + IX86_BUILTIN_CLFLUSHOPT, + + /* TFmode support builtins. */ + IX86_BUILTIN_INFQ, + IX86_BUILTIN_HUGE_VALQ, + IX86_BUILTIN_FABSQ, + IX86_BUILTIN_COPYSIGNQ, + + /* Vectorizer support builtins. */ + IX86_BUILTIN_CPYSGNPS, + IX86_BUILTIN_CPYSGNPD, + IX86_BUILTIN_CPYSGNPS256, + IX86_BUILTIN_CPYSGNPS512, + IX86_BUILTIN_CPYSGNPD256, + IX86_BUILTIN_CPYSGNPD512, + IX86_BUILTIN_FLOORPS512, + IX86_BUILTIN_FLOORPD512, + IX86_BUILTIN_CEILPS512, + IX86_BUILTIN_CEILPD512, + IX86_BUILTIN_TRUNCPS512, + IX86_BUILTIN_TRUNCPD512, + IX86_BUILTIN_CVTPS2DQ512, + IX86_BUILTIN_VEC_PACK_SFIX512, + IX86_BUILTIN_FLOORPS_SFIX512, + IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, + IX86_BUILTIN_CEILPS_SFIX512, + IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, + IX86_BUILTIN_ROUNDPS_AZ_SFIX512, + IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, + + + /* FMA4 instructions. */ + IX86_BUILTIN_VFMADDSS, + IX86_BUILTIN_VFMADDSD, + IX86_BUILTIN_VFMADDPS, + IX86_BUILTIN_VFMADDPD, + IX86_BUILTIN_VFMADDPS256, + IX86_BUILTIN_VFMADDPD256, + IX86_BUILTIN_VFMADDSUBPS, + IX86_BUILTIN_VFMADDSUBPD, + IX86_BUILTIN_VFMADDSUBPS256, + IX86_BUILTIN_VFMADDSUBPD256, + + /* FMA3 instructions. */ + IX86_BUILTIN_VFMADDSS3, + IX86_BUILTIN_VFMADDSD3, + + /* XOP instructions. */ + IX86_BUILTIN_VPCMOV, + IX86_BUILTIN_VPCMOV_V2DI, + IX86_BUILTIN_VPCMOV_V4SI, + IX86_BUILTIN_VPCMOV_V8HI, + IX86_BUILTIN_VPCMOV_V16QI, + IX86_BUILTIN_VPCMOV_V4SF, + IX86_BUILTIN_VPCMOV_V2DF, + IX86_BUILTIN_VPCMOV256, + IX86_BUILTIN_VPCMOV_V4DI256, + IX86_BUILTIN_VPCMOV_V8SI256, + IX86_BUILTIN_VPCMOV_V16HI256, + IX86_BUILTIN_VPCMOV_V32QI256, + IX86_BUILTIN_VPCMOV_V8SF256, + IX86_BUILTIN_VPCMOV_V4DF256, + + IX86_BUILTIN_VPPERM, + + IX86_BUILTIN_VPMACSSWW, + IX86_BUILTIN_VPMACSWW, + IX86_BUILTIN_VPMACSSWD, + IX86_BUILTIN_VPMACSWD, + IX86_BUILTIN_VPMACSSDD, + IX86_BUILTIN_VPMACSDD, + IX86_BUILTIN_VPMACSSDQL, + IX86_BUILTIN_VPMACSSDQH, + IX86_BUILTIN_VPMACSDQL, + IX86_BUILTIN_VPMACSDQH, + IX86_BUILTIN_VPMADCSSWD, + IX86_BUILTIN_VPMADCSWD, + + IX86_BUILTIN_VPHADDBW, + IX86_BUILTIN_VPHADDBD, + IX86_BUILTIN_VPHADDBQ, + IX86_BUILTIN_VPHADDWD, + IX86_BUILTIN_VPHADDWQ, + IX86_BUILTIN_VPHADDDQ, + IX86_BUILTIN_VPHADDUBW, + IX86_BUILTIN_VPHADDUBD, + IX86_BUILTIN_VPHADDUBQ, + IX86_BUILTIN_VPHADDUWD, + IX86_BUILTIN_VPHADDUWQ, + IX86_BUILTIN_VPHADDUDQ, + IX86_BUILTIN_VPHSUBBW, + IX86_BUILTIN_VPHSUBWD, + IX86_BUILTIN_VPHSUBDQ, + + IX86_BUILTIN_VPROTB, + IX86_BUILTIN_VPROTW, + IX86_BUILTIN_VPROTD, + IX86_BUILTIN_VPROTQ, + IX86_BUILTIN_VPROTB_IMM, + IX86_BUILTIN_VPROTW_IMM, + IX86_BUILTIN_VPROTD_IMM, + IX86_BUILTIN_VPROTQ_IMM, + + IX86_BUILTIN_VPSHLB, + IX86_BUILTIN_VPSHLW, + IX86_BUILTIN_VPSHLD, + IX86_BUILTIN_VPSHLQ, + IX86_BUILTIN_VPSHAB, + IX86_BUILTIN_VPSHAW, + IX86_BUILTIN_VPSHAD, + IX86_BUILTIN_VPSHAQ, + + IX86_BUILTIN_VFRCZSS, + IX86_BUILTIN_VFRCZSD, + IX86_BUILTIN_VFRCZPS, + IX86_BUILTIN_VFRCZPD, + IX86_BUILTIN_VFRCZPS256, + IX86_BUILTIN_VFRCZPD256, + + IX86_BUILTIN_VPCOMEQUB, + IX86_BUILTIN_VPCOMNEUB, + IX86_BUILTIN_VPCOMLTUB, + IX86_BUILTIN_VPCOMLEUB, + IX86_BUILTIN_VPCOMGTUB, + IX86_BUILTIN_VPCOMGEUB, + IX86_BUILTIN_VPCOMFALSEUB, + IX86_BUILTIN_VPCOMTRUEUB, + + IX86_BUILTIN_VPCOMEQUW, + IX86_BUILTIN_VPCOMNEUW, + IX86_BUILTIN_VPCOMLTUW, + IX86_BUILTIN_VPCOMLEUW, + IX86_BUILTIN_VPCOMGTUW, + IX86_BUILTIN_VPCOMGEUW, + IX86_BUILTIN_VPCOMFALSEUW, + IX86_BUILTIN_VPCOMTRUEUW, + + IX86_BUILTIN_VPCOMEQUD, + IX86_BUILTIN_VPCOMNEUD, + IX86_BUILTIN_VPCOMLTUD, + IX86_BUILTIN_VPCOMLEUD, + IX86_BUILTIN_VPCOMGTUD, + IX86_BUILTIN_VPCOMGEUD, + IX86_BUILTIN_VPCOMFALSEUD, + IX86_BUILTIN_VPCOMTRUEUD, + + IX86_BUILTIN_VPCOMEQUQ, + IX86_BUILTIN_VPCOMNEUQ, + IX86_BUILTIN_VPCOMLTUQ, + IX86_BUILTIN_VPCOMLEUQ, + IX86_BUILTIN_VPCOMGTUQ, + IX86_BUILTIN_VPCOMGEUQ, + IX86_BUILTIN_VPCOMFALSEUQ, + IX86_BUILTIN_VPCOMTRUEUQ, + + IX86_BUILTIN_VPCOMEQB, + IX86_BUILTIN_VPCOMNEB, + IX86_BUILTIN_VPCOMLTB, + IX86_BUILTIN_VPCOMLEB, + IX86_BUILTIN_VPCOMGTB, + IX86_BUILTIN_VPCOMGEB, + IX86_BUILTIN_VPCOMFALSEB, + IX86_BUILTIN_VPCOMTRUEB, + + IX86_BUILTIN_VPCOMEQW, + IX86_BUILTIN_VPCOMNEW, + IX86_BUILTIN_VPCOMLTW, + IX86_BUILTIN_VPCOMLEW, + IX86_BUILTIN_VPCOMGTW, + IX86_BUILTIN_VPCOMGEW, + IX86_BUILTIN_VPCOMFALSEW, + IX86_BUILTIN_VPCOMTRUEW, + + IX86_BUILTIN_VPCOMEQD, + IX86_BUILTIN_VPCOMNED, + IX86_BUILTIN_VPCOMLTD, + IX86_BUILTIN_VPCOMLED, + IX86_BUILTIN_VPCOMGTD, + IX86_BUILTIN_VPCOMGED, + IX86_BUILTIN_VPCOMFALSED, + IX86_BUILTIN_VPCOMTRUED, + + IX86_BUILTIN_VPCOMEQQ, + IX86_BUILTIN_VPCOMNEQ, + IX86_BUILTIN_VPCOMLTQ, + IX86_BUILTIN_VPCOMLEQ, + IX86_BUILTIN_VPCOMGTQ, + IX86_BUILTIN_VPCOMGEQ, + IX86_BUILTIN_VPCOMFALSEQ, + IX86_BUILTIN_VPCOMTRUEQ, + + /* LWP instructions. */ + IX86_BUILTIN_LLWPCB, + IX86_BUILTIN_SLWPCB, + IX86_BUILTIN_LWPVAL32, + IX86_BUILTIN_LWPVAL64, + IX86_BUILTIN_LWPINS32, + IX86_BUILTIN_LWPINS64, + + /* LZCNT */ + IX86_BUILTIN_LZCNT16, + IX86_BUILTIN_CLZS, + IX86_BUILTIN_LZCNT32, + IX86_BUILTIN_LZCNT64, + + /* RTM */ + IX86_BUILTIN_XBEGIN, + IX86_BUILTIN_XEND, + IX86_BUILTIN_XABORT, + IX86_BUILTIN_XTEST, + + /* MPX */ + IX86_BUILTIN_BNDMK, + IX86_BUILTIN_BNDSTX, + IX86_BUILTIN_BNDLDX, + IX86_BUILTIN_BNDCL, + IX86_BUILTIN_BNDCU, + IX86_BUILTIN_BNDRET, + IX86_BUILTIN_BNDNARROW, + IX86_BUILTIN_BNDINT, + IX86_BUILTIN_SIZEOF, + IX86_BUILTIN_BNDLOWER, + IX86_BUILTIN_BNDUPPER, + + /* BMI instructions. */ + IX86_BUILTIN_BEXTR32, + IX86_BUILTIN_BEXTR64, + IX86_BUILTIN_TZCNT16, + IX86_BUILTIN_CTZS, + IX86_BUILTIN_TZCNT32, + IX86_BUILTIN_TZCNT64, + + /* TBM instructions. */ + IX86_BUILTIN_BEXTRI32, + IX86_BUILTIN_BEXTRI64, + + /* BMI2 instructions. */ + IX86_BUILTIN_BZHI32, + IX86_BUILTIN_BZHI64, + IX86_BUILTIN_PDEP32, + IX86_BUILTIN_PDEP64, + IX86_BUILTIN_PEXT32, + IX86_BUILTIN_PEXT64, + + /* ADX instructions. */ + IX86_BUILTIN_ADDCARRYX32, + IX86_BUILTIN_ADDCARRYX64, + + /* SBB instructions. */ + IX86_BUILTIN_SBB32, + IX86_BUILTIN_SBB64, + + /* FSGSBASE instructions. */ + IX86_BUILTIN_RDFSBASE32, + IX86_BUILTIN_RDFSBASE64, + IX86_BUILTIN_RDGSBASE32, + IX86_BUILTIN_RDGSBASE64, + IX86_BUILTIN_WRFSBASE32, + IX86_BUILTIN_WRFSBASE64, + IX86_BUILTIN_WRGSBASE32, + IX86_BUILTIN_WRGSBASE64, + + /* RDRND instructions. */ + IX86_BUILTIN_RDRAND16_STEP, + IX86_BUILTIN_RDRAND32_STEP, + IX86_BUILTIN_RDRAND64_STEP, + + /* RDSEED instructions. */ + IX86_BUILTIN_RDSEED16_STEP, + IX86_BUILTIN_RDSEED32_STEP, + IX86_BUILTIN_RDSEED64_STEP, + + /* F16C instructions. */ + IX86_BUILTIN_CVTPH2PS, + IX86_BUILTIN_CVTPH2PS256, + IX86_BUILTIN_CVTPS2PH, + IX86_BUILTIN_CVTPS2PH256, + + /* MONITORX and MWAITX instrucions. */ + IX86_BUILTIN_MONITORX, + IX86_BUILTIN_MWAITX, + + /* CFString built-in for darwin */ + IX86_BUILTIN_CFSTRING, + + /* Builtins to get CPU type and supported features. */ + IX86_BUILTIN_CPU_INIT, + IX86_BUILTIN_CPU_IS, + IX86_BUILTIN_CPU_SUPPORTS, + + /* Read/write FLAGS register built-ins. */ + IX86_BUILTIN_READ_FLAGS, + IX86_BUILTIN_WRITE_FLAGS, + + /* PKU instructions. */ + IX86_BUILTIN_RDPKRU, + IX86_BUILTIN_WRPKRU, + + IX86_BUILTIN_MAX +}; + +/* Table for the ix86 builtin decls. */ +static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; + +/* Table of all of the builtin functions that are possible with different ISA's + but are waiting to be built until a function is declared to use that + ISA. */ +struct builtin_isa { + const char *name; /* function name */ + enum ix86_builtin_func_type tcode; /* type to use in the declaration */ + HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */ + bool const_p; /* true if the declaration is constant */ + bool leaf_p; /* true if the declaration has leaf attribute */ + bool nothrow_p; /* true if the declaration has nothrow attribute */ + bool set_and_not_built_p; +}; + +static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; + +/* Bits that can still enable any inclusion of a builtin. */ +static HOST_WIDE_INT deferred_isa_values = 0; + +/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK + of which isa_flags to use in the ix86_builtins_isa array. Stores the + function decl in the ix86_builtins array. Returns the function decl or + NULL_TREE, if the builtin was not added. + + If the front end has a special hook for builtin functions, delay adding + builtin functions that aren't in the current ISA until the ISA is changed + with function specific optimization. Doing so, can save about 300K for the + default compiler. When the builtin is expanded, check at that time whether + it is valid. + + If the front end doesn't have a special hook, record all builtins, even if + it isn't an instruction set in the current ISA in case the user uses + function specific options for a different ISA, so that we don't get scope + errors if a builtin is added in the middle of a function scope. */ + +static inline tree +def_builtin (HOST_WIDE_INT mask, const char *name, + enum ix86_builtin_func_type tcode, + enum ix86_builtins code) +{ + tree decl = NULL_TREE; + + if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) + { + ix86_builtins_isa[(int) code].isa = mask; + + /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case, + where any bit set means that built-in is enable, this bit must be *and-ed* + with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL + means that *both* cpuid bits must be set for the built-in to be available. + Handle this here. */ + if (mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL) + mask &= ~OPTION_MASK_ISA_AVX512VL; + + mask &= ~OPTION_MASK_ISA_64BIT; + if (mask == 0 + || (mask & ix86_isa_flags) != 0 + || (lang_hooks.builtin_function + == lang_hooks.builtin_function_ext_scope)) + + { + tree type = ix86_get_builtin_func_type (tcode); + decl = add_builtin_function (name, type, code, BUILT_IN_MD, + NULL, NULL_TREE); + ix86_builtins[(int) code] = decl; + ix86_builtins_isa[(int) code].set_and_not_built_p = false; + } + else + { + /* Just a MASK where set_and_not_built_p == true can potentially + include a builtin. */ + deferred_isa_values |= mask; + ix86_builtins[(int) code] = NULL_TREE; + ix86_builtins_isa[(int) code].tcode = tcode; + ix86_builtins_isa[(int) code].name = name; + ix86_builtins_isa[(int) code].leaf_p = false; + ix86_builtins_isa[(int) code].nothrow_p = false; + ix86_builtins_isa[(int) code].const_p = false; + ix86_builtins_isa[(int) code].set_and_not_built_p = true; + } + } + + return decl; +} + +/* Like def_builtin, but also marks the function decl "const". */ + +static inline tree +def_builtin_const (HOST_WIDE_INT mask, const char *name, + enum ix86_builtin_func_type tcode, enum ix86_builtins code) +{ + tree decl = def_builtin (mask, name, tcode, code); + if (decl) + TREE_READONLY (decl) = 1; + else + ix86_builtins_isa[(int) code].const_p = true; + + return decl; +} + +/* Add any new builtin functions for a given ISA that may not have been + declared. This saves a bit of space compared to adding all of the + declarations to the tree, even if we didn't use them. */ + +static void +ix86_add_new_builtins (HOST_WIDE_INT isa) +{ + if ((isa & deferred_isa_values) == 0) + return; + + /* Bits in ISA value can be removed from potential isa values. */ + deferred_isa_values &= ~isa; + + int i; + tree saved_current_target_pragma = current_target_pragma; + current_target_pragma = NULL_TREE; + + for (i = 0; i < (int)IX86_BUILTIN_MAX; i++) + { + if ((ix86_builtins_isa[i].isa & isa) != 0 + && ix86_builtins_isa[i].set_and_not_built_p) + { + tree decl, type; + + /* Don't define the builtin again. */ + ix86_builtins_isa[i].set_and_not_built_p = false; + + type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode); + decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name, + type, i, BUILT_IN_MD, NULL, + NULL_TREE); + + ix86_builtins[i] = decl; + if (ix86_builtins_isa[i].const_p) + TREE_READONLY (decl) = 1; + if (ix86_builtins_isa[i].leaf_p) + DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"), + NULL_TREE); + if (ix86_builtins_isa[i].nothrow_p) + TREE_NOTHROW (decl) = 1; + } + } + + current_target_pragma = saved_current_target_pragma; +} + +/* Bits for builtin_description.flag. */ + +/* Set when we don't support the comparison natively, and should + swap_comparison in order to support it. */ +#define BUILTIN_DESC_SWAP_OPERANDS 1 + +struct builtin_description +{ + const HOST_WIDE_INT mask; + const enum insn_code icode; + const char *const name; + const enum ix86_builtins code; + const enum rtx_code comparison; + const int flag; +}; + +static const struct builtin_description bdesc_comi[] = +{ + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, +}; + +static const struct builtin_description bdesc_pcmpestr[] = +{ + /* SSE4.2 */ + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode }, +}; + +static const struct builtin_description bdesc_pcmpistr[] = +{ + /* SSE4.2 */ + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode }, + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode }, +}; + +/* Special builtins with variable number of arguments. */ +static const struct builtin_description bdesc_special_args[] = +{ + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID }, + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED }, + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID }, + + /* 80387 (for use internally for atomic compound assignment). */ + { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID }, + { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID }, + { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID }, + { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID }, + + /* MMX */ + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, + + /* 3DNow! */ + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, + + /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */ + { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID }, + { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID }, + { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + + { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID }, + { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID }, + { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 }, + + /* SSE */ + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF }, + + /* SSE or 3DNow!A */ + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG }, + + /* SSE2 */ + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT }, + { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, + + /* SSE3 */ + { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, + + /* SSE4.1 */ + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI }, + + /* SSE4A */ + { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, + { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, + + /* AVX */ + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, + { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF }, + + /* AVX2 */ + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI }, + + /* AVX512F */ + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI }, + + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT }, + { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT }, + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT }, + { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT }, + + /* FSGSBASE */ + { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID }, + { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID }, + { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID }, + { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID }, + { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }, + { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 }, + { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }, + { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 }, + + /* RTM */ + { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID }, + { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID }, + { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID }, + + /* AVX512BW */ + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI }, + + /* AVX512VL */ + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI }, + + /* RDPKRU and WRPKRU. */ + { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID }, + { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED } +}; + +/* Builtins with variable number of arguments. */ +static const struct builtin_description bdesc_args[] = +{ + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT }, + { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 }, + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT }, + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT }, + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT }, + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT }, + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT }, + + /* MMX */ + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI}, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI}, + + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI }, + + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI }, + + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT }, + + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT }, + + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, + { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, + + /* 3DNow! */ + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF }, + + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + + /* 3DNow!A */ + { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF }, + { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI }, + { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI }, + { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF }, + { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, + + /* SSE */ + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP}, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI }, + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, + { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 }, + { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 }, + + /* SSE MMX or 3Dnow!A */ + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI }, + + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT }, + + /* SSE2 */ + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF }, + { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF }, + { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP}, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI }, + { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, + + /* SSE2 MMX */ + { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, + + /* SSE3 */ + { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF}, + { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF }, + + { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + + /* SSSE3 */ + { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI }, + + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, + + /* SSSE3. */ + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT }, + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT }, + + /* SSE4.1 */ + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT }, + + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI }, + + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + + /* SSE4.1 */ + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND }, + + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND }, + + { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF }, + + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND }, + + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND }, + + { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF }, + + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, + { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, + + /* SSE4.2 */ + { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR }, + { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT }, + { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, + { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + + /* SSE4A */ + { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT }, + { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI }, + { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT }, + { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + + /* AES */ + { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, + + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + + /* PCLMUL */ + { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT }, + + /* AVX */ + { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF }, + + /* AVX2 */ + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + + /* LZCNT */ + { OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_hi, "__builtin_ia32_lzcnt_u16", IX86_BUILTIN_LZCNT16, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, + /* Same as above, for backward compatibility. */ + { OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_hi, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, + { OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_si, "__builtin_ia32_lzcnt_u32", IX86_BUILTIN_LZCNT32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_LZCNT | OPTION_MASK_ISA_64BIT, CODE_FOR_lzcnt_di, "__builtin_ia32_lzcnt_u64", IX86_BUILTIN_LZCNT64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + + /* BMI */ + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, + { OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_hi, "__builtin_ia32_tzcnt_u16", IX86_BUILTIN_TZCNT16, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, + /* Same as above, for backward compatibility. */ + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_hi, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, + { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_si, "__builtin_ia32_tzcnt_u32", IX86_BUILTIN_TZCNT32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_tzcnt_di, "__builtin_ia32_tzcnt_u64", IX86_BUILTIN_TZCNT64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + + /* TBM */ + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, + { OPTION_MASK_ISA_TBM | OPTION_MASK_ISA_64BIT, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + + /* F16C */ + { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI }, + { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI }, + { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT }, + + /* BMI2 */ + { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, + { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, + { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, + { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + + /* AVX512F */ + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI }, + + { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND }, + + /* Mask arithmetic operations */ + { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI }, + + /* SHA */ + { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI }, + + /* AVX512VL. */ + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) UHI_FTYPE_V16HI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) UQI_FTYPE_V4SI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) UQI_FTYPE_V8SI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) UQI_FTYPE_V2DI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) UQI_FTYPE_V4DI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_UHI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_UQI }, + { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_UQI }, + { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_UQI }, + { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_UHI }, + { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI }, + { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI }, + { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI }, + + /* AVX512DQ. */ + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI}, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI }, + + /* AVX512BW. */ + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI }, + { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI }, + + /* AVX512IFMA */ + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI }, + + /* AVX512VBMI */ + { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI }, + { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI }, +}; + +/* Builtins with rounding support. */ +static const struct builtin_description bdesc_round_args[] = +{ + /* AVX512F */ + { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT }, + + /* AVX512ER */ + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, + { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, + + /* AVX512DQ. */ + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT }, + { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT }, +}; + +/* Bultins for MPX. */ +static const struct builtin_description bdesc_mpx[] = +{ + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND }, +}; + +/* Const builtins for MPX. */ +static const struct builtin_description bdesc_mpx_const[] = +{ + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND }, + { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID }, +}; + +/* FMA4 and XOP. */ +#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT +#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT +#define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT +#define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT +#define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF +#define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF +#define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF +#define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF +#define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI +#define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI +#define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI +#define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI +#define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI +#define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI +#define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI +#define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI +#define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI +#define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI +#define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF +#define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF +#define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI +#define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI +#define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI +#define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI +#define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI +#define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI +#define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI +#define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI +#define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP +#define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP +#define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP +#define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP +#define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF +#define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF +#define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF +#define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF +#define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF +#define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF +#define MULTI_ARG_1_SF V4SF_FTYPE_V4SF +#define MULTI_ARG_1_DF V2DF_FTYPE_V2DF +#define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF +#define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF +#define MULTI_ARG_1_DI V2DI_FTYPE_V2DI +#define MULTI_ARG_1_SI V4SI_FTYPE_V4SI +#define MULTI_ARG_1_HI V8HI_FTYPE_V8HI +#define MULTI_ARG_1_QI V16QI_FTYPE_V16QI +#define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI +#define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI +#define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI +#define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI +#define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI +#define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI + +static const struct builtin_description bdesc_multi_arg[] = +{ + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf, + "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, + UNKNOWN, (int)MULTI_ARG_3_SF }, + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df, + "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, + UNKNOWN, (int)MULTI_ARG_3_DF }, + + { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf, + "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3, + UNKNOWN, (int)MULTI_ARG_3_SF }, + { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df, + "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3, + UNKNOWN, (int)MULTI_ARG_3_DF }, + + { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf, + "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, + UNKNOWN, (int)MULTI_ARG_3_SF }, + { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df, + "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, + UNKNOWN, (int)MULTI_ARG_3_DF }, + { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf, + "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, + UNKNOWN, (int)MULTI_ARG_3_SF2 }, + { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df, + "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, + UNKNOWN, (int)MULTI_ARG_3_DF2 }, + + { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf, + "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, + UNKNOWN, (int)MULTI_ARG_3_SF }, + { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df, + "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, + UNKNOWN, (int)MULTI_ARG_3_DF }, + { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf, + "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, + UNKNOWN, (int)MULTI_ARG_3_SF2 }, + { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df, + "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, + UNKNOWN, (int)MULTI_ARG_3_DF2 }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, + + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 }, + +}; + +/* TM vector builtins. */ + +/* Reuse the existing x86-specific `struct builtin_description' cause + we're lazy. Add casts to make them fit. */ +static const struct builtin_description bdesc_tm[] = +{ + { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + + { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, + { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, + + { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, + + { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID }, + { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID }, + { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID }, +}; + +/* Initialize the transactional memory vector load/store builtins. */ + +static void +ix86_init_tm_builtins (void) +{ + enum ix86_builtin_func_type ftype; + const struct builtin_description *d; + size_t i; + tree decl; + tree attrs_load, attrs_type_load, attrs_store, attrs_type_store; + tree attrs_log, attrs_type_log; + + if (!flag_tm) + return; + + /* If there are no builtins defined, we must be compiling in a + language without trans-mem support. */ + if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1)) + return; + + /* Use whatever attributes a normal TM load has. */ + decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1); + attrs_load = DECL_ATTRIBUTES (decl); + attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl)); + /* Use whatever attributes a normal TM store has. */ + decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1); + attrs_store = DECL_ATTRIBUTES (decl); + attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl)); + /* Use whatever attributes a normal TM log has. */ + decl = builtin_decl_explicit (BUILT_IN_TM_LOG); + attrs_log = DECL_ATTRIBUTES (decl); + attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl)); + + for (i = 0, d = bdesc_tm; + i < ARRAY_SIZE (bdesc_tm); + i++, d++) + { + if ((d->mask & ix86_isa_flags) != 0 + || (lang_hooks.builtin_function + == lang_hooks.builtin_function_ext_scope)) + { + tree type, attrs, attrs_type; + enum built_in_function code = (enum built_in_function) d->code; + + ftype = (enum ix86_builtin_func_type) d->flag; + type = ix86_get_builtin_func_type (ftype); + + if (BUILTIN_TM_LOAD_P (code)) + { + attrs = attrs_load; + attrs_type = attrs_type_load; + } + else if (BUILTIN_TM_STORE_P (code)) + { + attrs = attrs_store; + attrs_type = attrs_type_store; + } + else + { + attrs = attrs_log; + attrs_type = attrs_type_log; + } + decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL, + /* The builtin without the prefix for + calling it directly. */ + d->name + strlen ("__builtin_"), + attrs); + /* add_builtin_function() will set the DECL_ATTRIBUTES, now + set the TYPE_ATTRIBUTES. */ + decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN); + + set_builtin_decl (code, decl, false); + } + } +} + +/* Set up all the MMX/SSE builtins, even builtins for instructions that are not + in the current target ISA to allow the user to compile particular modules + with different target specific options that differ from the command line + options. */ +static void +ix86_init_mmx_sse_builtins (void) +{ + const struct builtin_description * d; + enum ix86_builtin_func_type ftype; + size_t i; + + /* Add all special builtins with variable number of operands. */ + for (i = 0, d = bdesc_special_args; + i < ARRAY_SIZE (bdesc_special_args); + i++, d++) + { + if (d->name == 0) + continue; + + ftype = (enum ix86_builtin_func_type) d->flag; + def_builtin (d->mask, d->name, ftype, d->code); + } + + /* Add all builtins with variable number of operands. */ + for (i = 0, d = bdesc_args; + i < ARRAY_SIZE (bdesc_args); + i++, d++) + { + if (d->name == 0) + continue; + + ftype = (enum ix86_builtin_func_type) d->flag; + def_builtin_const (d->mask, d->name, ftype, d->code); + } + + /* Add all builtins with rounding. */ + for (i = 0, d = bdesc_round_args; + i < ARRAY_SIZE (bdesc_round_args); + i++, d++) + { + if (d->name == 0) + continue; + + ftype = (enum ix86_builtin_func_type) d->flag; + def_builtin_const (d->mask, d->name, ftype, d->code); + } + + /* pcmpestr[im] insns. */ + for (i = 0, d = bdesc_pcmpestr; + i < ARRAY_SIZE (bdesc_pcmpestr); + i++, d++) + { + if (d->code == IX86_BUILTIN_PCMPESTRM128) + ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT; + else + ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT; + def_builtin_const (d->mask, d->name, ftype, d->code); + } + + /* pcmpistr[im] insns. */ + for (i = 0, d = bdesc_pcmpistr; + i < ARRAY_SIZE (bdesc_pcmpistr); + i++, d++) + { + if (d->code == IX86_BUILTIN_PCMPISTRM128) + ftype = V16QI_FTYPE_V16QI_V16QI_INT; + else + ftype = INT_FTYPE_V16QI_V16QI_INT; + def_builtin_const (d->mask, d->name, ftype, d->code); + } + + /* comi/ucomi insns. */ + for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) + { + if (d->mask == OPTION_MASK_ISA_SSE2) + ftype = INT_FTYPE_V2DF_V2DF; + else + ftype = INT_FTYPE_V4SF_V4SF; + def_builtin_const (d->mask, d->name, ftype, d->code); + } + + /* SSE */ + def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", + VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR); + def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", + UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR); + + /* SSE or 3DNow!A */ + def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, + "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR, + IX86_BUILTIN_MASKMOVQ); + + /* SSE2 */ + def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", + VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU); + + def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", + VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH); + x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", + VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE); + + /* SSE3. */ + def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", + VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR); + def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", + VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT); + + /* AES */ + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", + V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", + V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", + V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", + V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", + V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128); + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", + V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128); + + /* PCLMUL */ + def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", + V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128); + + /* RDRND */ + def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step", + INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP); + def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step", + INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP); + def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, + "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG, + IX86_BUILTIN_RDRAND64_STEP); + + /* AVX2 */ + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df", + V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT, + IX86_BUILTIN_GATHERSIV2DF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df", + V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT, + IX86_BUILTIN_GATHERSIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df", + V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT, + IX86_BUILTIN_GATHERDIV2DF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df", + V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT, + IX86_BUILTIN_GATHERDIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf", + V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT, + IX86_BUILTIN_GATHERSIV4SF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf", + V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT, + IX86_BUILTIN_GATHERSIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf", + V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT, + IX86_BUILTIN_GATHERDIV4SF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256", + V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT, + IX86_BUILTIN_GATHERDIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di", + V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT, + IX86_BUILTIN_GATHERSIV2DI); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di", + V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT, + IX86_BUILTIN_GATHERSIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di", + V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT, + IX86_BUILTIN_GATHERDIV2DI); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di", + V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT, + IX86_BUILTIN_GATHERDIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si", + V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT, + IX86_BUILTIN_GATHERSIV4SI); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si", + V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT, + IX86_BUILTIN_GATHERSIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si", + V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT, + IX86_BUILTIN_GATHERDIV4SI); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256", + V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT, + IX86_BUILTIN_GATHERDIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ", + V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT, + IX86_BUILTIN_GATHERALTSIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ", + V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT, + IX86_BUILTIN_GATHERALTDIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ", + V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT, + IX86_BUILTIN_GATHERALTSIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ", + V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT, + IX86_BUILTIN_GATHERALTDIV8SI); + + /* AVX512F */ + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf", + V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT, + IX86_BUILTIN_GATHER3SIV16SF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df", + V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT, + IX86_BUILTIN_GATHER3SIV8DF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf", + V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT, + IX86_BUILTIN_GATHER3DIV16SF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df", + V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT, + IX86_BUILTIN_GATHER3DIV8DF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si", + V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT, + IX86_BUILTIN_GATHER3SIV16SI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di", + V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT, + IX86_BUILTIN_GATHER3SIV8DI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si", + V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT, + IX86_BUILTIN_GATHER3DIV16SI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di", + V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT, + IX86_BUILTIN_GATHER3DIV8DI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ", + V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT, + IX86_BUILTIN_GATHER3ALTSIV8DF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ", + V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT, + IX86_BUILTIN_GATHER3ALTDIV16SF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ", + V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT, + IX86_BUILTIN_GATHER3ALTSIV8DI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ", + V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT, + IX86_BUILTIN_GATHER3ALTDIV16SI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf", + VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT, + IX86_BUILTIN_SCATTERSIV16SF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df", + VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT, + IX86_BUILTIN_SCATTERSIV8DF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf", + VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT, + IX86_BUILTIN_SCATTERDIV16SF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df", + VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT, + IX86_BUILTIN_SCATTERDIV8DF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si", + VOID_FTYPE_PINT_HI_V16SI_V16SI_INT, + IX86_BUILTIN_SCATTERSIV16SI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di", + VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT, + IX86_BUILTIN_SCATTERSIV8DI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si", + VOID_FTYPE_PINT_QI_V8DI_V8SI_INT, + IX86_BUILTIN_SCATTERDIV16SI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di", + VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT, + IX86_BUILTIN_SCATTERDIV8DI); + + /* AVX512VL */ + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df", + V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV2DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df", + V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df", + V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT, + IX86_BUILTIN_GATHER3DIV2DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df", + V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT, + IX86_BUILTIN_GATHER3DIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf", + V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV4SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf", + V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT, + IX86_BUILTIN_GATHER3SIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf", + V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT, + IX86_BUILTIN_GATHER3DIV4SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf", + V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT, + IX86_BUILTIN_GATHER3DIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di", + V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV2DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di", + V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di", + V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT, + IX86_BUILTIN_GATHER3DIV2DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di", + V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT, + IX86_BUILTIN_GATHER3DIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si", + V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT, + IX86_BUILTIN_GATHER3SIV4SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si", + V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT, + IX86_BUILTIN_GATHER3SIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si", + V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT, + IX86_BUILTIN_GATHER3DIV4SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si", + V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT, + IX86_BUILTIN_GATHER3DIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ", + V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT, + IX86_BUILTIN_GATHER3ALTSIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ", + V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT, + IX86_BUILTIN_GATHER3ALTDIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ", + V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT, + IX86_BUILTIN_GATHER3ALTSIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ", + V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT, + IX86_BUILTIN_GATHER3ALTDIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf", + VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT, + IX86_BUILTIN_SCATTERSIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf", + VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT, + IX86_BUILTIN_SCATTERSIV4SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df", + VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT, + IX86_BUILTIN_SCATTERSIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df", + VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT, + IX86_BUILTIN_SCATTERSIV2DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf", + VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT, + IX86_BUILTIN_SCATTERDIV8SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf", + VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT, + IX86_BUILTIN_SCATTERDIV4SF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df", + VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT, + IX86_BUILTIN_SCATTERDIV4DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df", + VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT, + IX86_BUILTIN_SCATTERDIV2DF); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si", + VOID_FTYPE_PINT_QI_V8SI_V8SI_INT, + IX86_BUILTIN_SCATTERSIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si", + VOID_FTYPE_PINT_QI_V4SI_V4SI_INT, + IX86_BUILTIN_SCATTERSIV4SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di", + VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT, + IX86_BUILTIN_SCATTERSIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di", + VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT, + IX86_BUILTIN_SCATTERSIV2DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si", + VOID_FTYPE_PINT_QI_V4DI_V4SI_INT, + IX86_BUILTIN_SCATTERDIV8SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si", + VOID_FTYPE_PINT_QI_V2DI_V4SI_INT, + IX86_BUILTIN_SCATTERDIV4SI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di", + VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT, + IX86_BUILTIN_SCATTERDIV4DI); + + def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di", + VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT, + IX86_BUILTIN_SCATTERDIV2DI); + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ", + VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT, + IX86_BUILTIN_SCATTERALTSIV8DF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ", + VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT, + IX86_BUILTIN_SCATTERALTDIV16SF); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ", + VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT, + IX86_BUILTIN_SCATTERALTSIV8DI); + + def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ", + VOID_FTYPE_PINT_HI_V8DI_V16SI_INT, + IX86_BUILTIN_SCATTERALTDIV16SI); + + /* AVX512PF */ + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd", + VOID_FTYPE_QI_V8SI_PCINT64_INT_INT, + IX86_BUILTIN_GATHERPFDPD); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps", + VOID_FTYPE_HI_V16SI_PCINT_INT_INT, + IX86_BUILTIN_GATHERPFDPS); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd", + VOID_FTYPE_QI_V8DI_PCINT64_INT_INT, + IX86_BUILTIN_GATHERPFQPD); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps", + VOID_FTYPE_QI_V8DI_PCINT_INT_INT, + IX86_BUILTIN_GATHERPFQPS); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd", + VOID_FTYPE_QI_V8SI_PCINT64_INT_INT, + IX86_BUILTIN_SCATTERPFDPD); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps", + VOID_FTYPE_HI_V16SI_PCINT_INT_INT, + IX86_BUILTIN_SCATTERPFDPS); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd", + VOID_FTYPE_QI_V8DI_PCINT64_INT_INT, + IX86_BUILTIN_SCATTERPFQPD); + def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps", + VOID_FTYPE_QI_V8DI_PCINT_INT_INT, + IX86_BUILTIN_SCATTERPFQPS); + + /* SHA */ + def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1", + V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1); + def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2", + V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2); + def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte", + V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE); + def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4", + V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4); + def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1", + V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1); + def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2", + V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2); + def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2", + V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2); + + /* RTM. */ + def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort", + VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT); + + /* MMX access to the vec_init patterns. */ + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", + V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI); + + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", + V4HI_FTYPE_HI_HI_HI_HI, + IX86_BUILTIN_VEC_INIT_V4HI); + + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", + V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI, + IX86_BUILTIN_VEC_INIT_V8QI); + + /* Access to the vec_extract patterns. */ + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", + DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF); + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", + DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI); + def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", + FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF); + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", + SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI); + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", + HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI); + + def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, + "__builtin_ia32_vec_ext_v4hi", + HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI); + + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", + SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI); + + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", + QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI); + + /* Access to the vec_set patterns. */ + def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, + "__builtin_ia32_vec_set_v2di", + V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI); + + def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", + V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF); + + def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", + V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI); + + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", + V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI); + + def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, + "__builtin_ia32_vec_set_v4hi", + V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI); + + def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", + V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI); + + /* RDSEED */ + def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step", + INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP); + def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step", + INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP); + def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT, + "__builtin_ia32_rdseed_di_step", + INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP); + + /* ADCX */ + def_builtin (0, "__builtin_ia32_addcarryx_u32", + UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32); + def_builtin (OPTION_MASK_ISA_64BIT, + "__builtin_ia32_addcarryx_u64", + UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, + IX86_BUILTIN_ADDCARRYX64); + + /* SBB */ + def_builtin (0, "__builtin_ia32_sbb_u32", + UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32); + def_builtin (OPTION_MASK_ISA_64BIT, + "__builtin_ia32_sbb_u64", + UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, + IX86_BUILTIN_SBB64); + + /* Read/write FLAGS. */ + def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32", + UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); + def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64", + UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); + def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32", + VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS); + def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64", + VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS); + + /* CLFLUSHOPT. */ + def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt", + VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT); + + /* CLWB. */ + def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb", + VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB); + + /* MONITORX and MWAITX. */ + def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx", + VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX); + def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx", + VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX); + + /* CLZERO. */ + def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero", + VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO); + + /* Add FMA4 multi-arg argument instructions */ + for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) + { + if (d->name == 0) + continue; + + ftype = (enum ix86_builtin_func_type) d->flag; + def_builtin_const (d->mask, d->name, ftype, d->code); + } +} + +static void +ix86_init_mpx_builtins () +{ + const struct builtin_description * d; + enum ix86_builtin_func_type ftype; + tree decl; + size_t i; + + for (i = 0, d = bdesc_mpx; + i < ARRAY_SIZE (bdesc_mpx); + i++, d++) + { + if (d->name == 0) + continue; + + ftype = (enum ix86_builtin_func_type) d->flag; + decl = def_builtin (d->mask, d->name, ftype, d->code); + + /* With no leaf and nothrow flags for MPX builtins + abnormal edges may follow its call when setjmp + presents in the function. Since we may have a lot + of MPX builtins calls it causes lots of useless + edges and enormous PHI nodes. To avoid this we mark + MPX builtins as leaf and nothrow. */ + if (decl) + { + DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"), + NULL_TREE); + TREE_NOTHROW (decl) = 1; + } + else + { + ix86_builtins_isa[(int)d->code].leaf_p = true; + ix86_builtins_isa[(int)d->code].nothrow_p = true; + } + } + + for (i = 0, d = bdesc_mpx_const; + i < ARRAY_SIZE (bdesc_mpx_const); + i++, d++) + { + if (d->name == 0) + continue; + + ftype = (enum ix86_builtin_func_type) d->flag; + decl = def_builtin_const (d->mask, d->name, ftype, d->code); + + if (decl) + { + DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"), + NULL_TREE); + TREE_NOTHROW (decl) = 1; + } + else + { + ix86_builtins_isa[(int)d->code].leaf_p = true; + ix86_builtins_isa[(int)d->code].nothrow_p = true; + } + } +} + +/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL + to return a pointer to VERSION_DECL if the outcome of the expression + formed by PREDICATE_CHAIN is true. This function will be called during + version dispatch to decide which function version to execute. It returns + the basic block at the end, to which more conditions can be added. */ + +static basic_block +add_condition_to_bb (tree function_decl, tree version_decl, + tree predicate_chain, basic_block new_bb) +{ + gimple *return_stmt; + tree convert_expr, result_var; + gimple *convert_stmt; + gimple *call_cond_stmt; + gimple *if_else_stmt; + + basic_block bb1, bb2, bb3; + edge e12, e23; + + tree cond_var, and_expr_var = NULL_TREE; + gimple_seq gseq; + + tree predicate_decl, predicate_arg; + + push_cfun (DECL_STRUCT_FUNCTION (function_decl)); + + gcc_assert (new_bb != NULL); + gseq = bb_seq (new_bb); + + + convert_expr = build1 (CONVERT_EXPR, ptr_type_node, + build_fold_addr_expr (version_decl)); + result_var = create_tmp_var (ptr_type_node); + convert_stmt = gimple_build_assign (result_var, convert_expr); + return_stmt = gimple_build_return (result_var); + + if (predicate_chain == NULL_TREE) + { + gimple_seq_add_stmt (&gseq, convert_stmt); + gimple_seq_add_stmt (&gseq, return_stmt); + set_bb_seq (new_bb, gseq); + gimple_set_bb (convert_stmt, new_bb); + gimple_set_bb (return_stmt, new_bb); + pop_cfun (); + return new_bb; + } + + while (predicate_chain != NULL) + { + cond_var = create_tmp_var (integer_type_node); + predicate_decl = TREE_PURPOSE (predicate_chain); + predicate_arg = TREE_VALUE (predicate_chain); + call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); + gimple_call_set_lhs (call_cond_stmt, cond_var); + + gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (call_cond_stmt, new_bb); + gimple_seq_add_stmt (&gseq, call_cond_stmt); + + predicate_chain = TREE_CHAIN (predicate_chain); + + if (and_expr_var == NULL) + and_expr_var = cond_var; + else + { + gimple *assign_stmt; + /* Use MIN_EXPR to check if any integer is zero?. + and_expr_var = min_expr */ + assign_stmt = gimple_build_assign (and_expr_var, + build2 (MIN_EXPR, integer_type_node, + cond_var, and_expr_var)); + + gimple_set_block (assign_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (assign_stmt, new_bb); + gimple_seq_add_stmt (&gseq, assign_stmt); + } + } + + if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var, + integer_zero_node, + NULL_TREE, NULL_TREE); + gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (if_else_stmt, new_bb); + gimple_seq_add_stmt (&gseq, if_else_stmt); + + gimple_seq_add_stmt (&gseq, convert_stmt); + gimple_seq_add_stmt (&gseq, return_stmt); + set_bb_seq (new_bb, gseq); + + bb1 = new_bb; + e12 = split_block (bb1, if_else_stmt); + bb2 = e12->dest; + e12->flags &= ~EDGE_FALLTHRU; + e12->flags |= EDGE_TRUE_VALUE; + + e23 = split_block (bb2, return_stmt); + + gimple_set_bb (convert_stmt, bb2); + gimple_set_bb (return_stmt, bb2); + + bb3 = e23->dest; + make_edge (bb1, bb3, EDGE_FALSE_VALUE); + + remove_edge (e23); + make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); + + pop_cfun (); + + return bb3; +} + +/* This parses the attribute arguments to target in DECL and determines + the right builtin to use to match the platform specification. + It returns the priority value for this version decl. If PREDICATE_LIST + is not NULL, it stores the list of cpu features that need to be checked + before dispatching this function. */ + +static unsigned int +get_builtin_code_for_version (tree decl, tree *predicate_list) +{ + tree attrs; + struct cl_target_option cur_target; + tree target_node; + struct cl_target_option *new_target; + const char *arg_str = NULL; + const char *attrs_str = NULL; + char *tok_str = NULL; + char *token; + + /* Priority of i386 features, greater value is higher priority. This is + used to decide the order in which function dispatch must happen. For + instance, a version specialized for SSE4.2 should be checked for dispatch + before a version for SSE3, as SSE4.2 implies SSE3. */ + enum feature_priority + { + P_ZERO = 0, + P_MMX, + P_SSE, + P_SSE2, + P_SSE3, + P_SSSE3, + P_PROC_SSSE3, + P_SSE4_A, + P_PROC_SSE4_A, + P_SSE4_1, + P_SSE4_2, + P_PROC_SSE4_2, + P_POPCNT, + P_AES, + P_PCLMUL, + P_AVX, + P_PROC_AVX, + P_BMI, + P_PROC_BMI, + P_FMA4, + P_XOP, + P_PROC_XOP, + P_FMA, + P_PROC_FMA, + P_BMI2, + P_AVX2, + P_PROC_AVX2, + P_AVX512F, + P_PROC_AVX512F + }; + + enum feature_priority priority = P_ZERO; + + /* These are the target attribute strings for which a dispatcher is + available, from fold_builtin_cpu. */ + + static struct _feature_list + { + const char *const name; + const enum feature_priority priority; + } + const feature_list[] = + { + {"mmx", P_MMX}, + {"sse", P_SSE}, + {"sse2", P_SSE2}, + {"sse3", P_SSE3}, + {"sse4a", P_SSE4_A}, + {"ssse3", P_SSSE3}, + {"sse4.1", P_SSE4_1}, + {"sse4.2", P_SSE4_2}, + {"popcnt", P_POPCNT}, + {"aes", P_AES}, + {"pclmul", P_PCLMUL}, + {"avx", P_AVX}, + {"bmi", P_BMI}, + {"fma4", P_FMA4}, + {"xop", P_XOP}, + {"fma", P_FMA}, + {"bmi2", P_BMI2}, + {"avx2", P_AVX2}, + {"avx512f", P_AVX512F} + }; + + + static unsigned int NUM_FEATURES + = sizeof (feature_list) / sizeof (struct _feature_list); + + unsigned int i; + + tree predicate_chain = NULL_TREE; + tree predicate_decl, predicate_arg; + + attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); + gcc_assert (attrs != NULL); + + attrs = TREE_VALUE (TREE_VALUE (attrs)); + + gcc_assert (TREE_CODE (attrs) == STRING_CST); + attrs_str = TREE_STRING_POINTER (attrs); + + /* Return priority zero for default function. */ + if (strcmp (attrs_str, "default") == 0) + return 0; + + /* Handle arch= if specified. For priority, set it to be 1 more than + the best instruction set the processor can handle. For instance, if + there is a version for atom and a version for ssse3 (the highest ISA + priority for atom), the atom version must be checked for dispatch + before the ssse3 version. */ + if (strstr (attrs_str, "arch=") != NULL) + { + cl_target_option_save (&cur_target, &global_options); + target_node = ix86_valid_target_attribute_tree (attrs, &global_options, + &global_options_set); + + gcc_assert (target_node); + new_target = TREE_TARGET_OPTION (target_node); + gcc_assert (new_target); + + if (new_target->arch_specified && new_target->arch > 0) + { + switch (new_target->arch) + { + case PROCESSOR_CORE2: + arg_str = "core2"; + priority = P_PROC_SSSE3; + break; + case PROCESSOR_NEHALEM: + if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES) + arg_str = "westmere"; + else + /* We translate "arch=corei7" and "arch=nehalem" to + "corei7" so that it will be mapped to M_INTEL_COREI7 + as cpu type to cover all M_INTEL_COREI7_XXXs. */ + arg_str = "corei7"; + priority = P_PROC_SSE4_2; + break; + case PROCESSOR_SANDYBRIDGE: + if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C) + arg_str = "ivybridge"; + else + arg_str = "sandybridge"; + priority = P_PROC_AVX; + break; + case PROCESSOR_HASWELL: + if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL) + arg_str = "skylake-avx512"; + else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES) + arg_str = "skylake"; + else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX) + arg_str = "broadwell"; + else + arg_str = "haswell"; + priority = P_PROC_AVX2; + break; + case PROCESSOR_BONNELL: + arg_str = "bonnell"; + priority = P_PROC_SSSE3; + break; + case PROCESSOR_KNL: + arg_str = "knl"; + priority = P_PROC_AVX512F; + break; + case PROCESSOR_SILVERMONT: + arg_str = "silvermont"; + priority = P_PROC_SSE4_2; + break; + case PROCESSOR_AMDFAM10: + arg_str = "amdfam10h"; + priority = P_PROC_SSE4_A; + break; + case PROCESSOR_BTVER1: + arg_str = "btver1"; + priority = P_PROC_SSE4_A; + break; + case PROCESSOR_BTVER2: + arg_str = "btver2"; + priority = P_PROC_BMI; + break; + case PROCESSOR_BDVER1: + arg_str = "bdver1"; + priority = P_PROC_XOP; + break; + case PROCESSOR_BDVER2: + arg_str = "bdver2"; + priority = P_PROC_FMA; + break; + case PROCESSOR_BDVER3: + arg_str = "bdver3"; + priority = P_PROC_FMA; + break; + case PROCESSOR_BDVER4: + arg_str = "bdver4"; + priority = P_PROC_AVX2; + break; + case PROCESSOR_ZNVER1: + arg_str = "znver1"; + priority = P_PROC_AVX2; + break; + } + } + + cl_target_option_restore (&global_options, &cur_target); + + if (predicate_list && arg_str == NULL) + { + error_at (DECL_SOURCE_LOCATION (decl), + "No dispatcher found for the versioning attributes"); + return 0; + } + + if (predicate_list) + { + predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS]; + /* For a C string literal the length includes the trailing NULL. */ + predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str); + predicate_chain = tree_cons (predicate_decl, predicate_arg, + predicate_chain); + } + } + + /* Process feature name. */ + tok_str = (char *) xmalloc (strlen (attrs_str) + 1); + strcpy (tok_str, attrs_str); + token = strtok (tok_str, ","); + predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS]; + + while (token != NULL) + { + /* Do not process "arch=" */ + if (strncmp (token, "arch=", 5) == 0) + { + token = strtok (NULL, ","); + continue; + } + for (i = 0; i < NUM_FEATURES; ++i) + { + if (strcmp (token, feature_list[i].name) == 0) + { + if (predicate_list) + { + predicate_arg = build_string_literal ( + strlen (feature_list[i].name) + 1, + feature_list[i].name); + predicate_chain = tree_cons (predicate_decl, predicate_arg, + predicate_chain); + } + /* Find the maximum priority feature. */ + if (feature_list[i].priority > priority) + priority = feature_list[i].priority; + + break; + } + } + if (predicate_list && i == NUM_FEATURES) + { + error_at (DECL_SOURCE_LOCATION (decl), + "No dispatcher found for %s", token); + return 0; + } + token = strtok (NULL, ","); + } + free (tok_str); + + if (predicate_list && predicate_chain == NULL_TREE) + { + error_at (DECL_SOURCE_LOCATION (decl), + "No dispatcher found for the versioning attributes : %s", + attrs_str); + return 0; + } + else if (predicate_list) + { + predicate_chain = nreverse (predicate_chain); + *predicate_list = predicate_chain; + } + + return priority; +} + +/* This compares the priority of target features in function DECL1 + and DECL2. It returns positive value if DECL1 is higher priority, + negative value if DECL2 is higher priority and 0 if they are the + same. */ + +static int +ix86_compare_version_priority (tree decl1, tree decl2) +{ + unsigned int priority1 = get_builtin_code_for_version (decl1, NULL); + unsigned int priority2 = get_builtin_code_for_version (decl2, NULL); + + return (int)priority1 - (int)priority2; +} + +/* V1 and V2 point to function versions with different priorities + based on the target ISA. This function compares their priorities. */ + +static int +feature_compare (const void *v1, const void *v2) +{ + typedef struct _function_version_info + { + tree version_decl; + tree predicate_chain; + unsigned int dispatch_priority; + } function_version_info; + + const function_version_info c1 = *(const function_version_info *)v1; + const function_version_info c2 = *(const function_version_info *)v2; + return (c2.dispatch_priority - c1.dispatch_priority); +} + +/* This function generates the dispatch function for + multi-versioned functions. DISPATCH_DECL is the function which will + contain the dispatch logic. FNDECLS are the function choices for + dispatch, and is a tree chain. EMPTY_BB is the basic block pointer + in DISPATCH_DECL in which the dispatch code is generated. */ + +static int +dispatch_function_versions (tree dispatch_decl, + void *fndecls_p, + basic_block *empty_bb) +{ + tree default_decl; + gimple *ifunc_cpu_init_stmt; + gimple_seq gseq; + int ix; + tree ele; + vec *fndecls; + unsigned int num_versions = 0; + unsigned int actual_versions = 0; + unsigned int i; + + struct _function_version_info + { + tree version_decl; + tree predicate_chain; + unsigned int dispatch_priority; + }*function_version_info; + + gcc_assert (dispatch_decl != NULL + && fndecls_p != NULL + && empty_bb != NULL); + + /*fndecls_p is actually a vector. */ + fndecls = static_cast *> (fndecls_p); + + /* At least one more version other than the default. */ + num_versions = fndecls->length (); + gcc_assert (num_versions >= 2); + + function_version_info = (struct _function_version_info *) + XNEWVEC (struct _function_version_info, (num_versions - 1)); + + /* The first version in the vector is the default decl. */ + default_decl = (*fndecls)[0]; + + push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); + + gseq = bb_seq (*empty_bb); + /* Function version dispatch is via IFUNC. IFUNC resolvers fire before + constructors, so explicity call __builtin_cpu_init here. */ + ifunc_cpu_init_stmt = gimple_build_call_vec ( + ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL); + gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); + gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); + set_bb_seq (*empty_bb, gseq); + + pop_cfun (); + + + for (ix = 1; fndecls->iterate (ix, &ele); ++ix) + { + tree version_decl = ele; + tree predicate_chain = NULL_TREE; + unsigned int priority; + /* Get attribute string, parse it and find the right predicate decl. + The predicate function could be a lengthy combination of many + features, like arch-type and various isa-variants. */ + priority = get_builtin_code_for_version (version_decl, + &predicate_chain); + + if (predicate_chain == NULL_TREE) + continue; + + function_version_info [actual_versions].version_decl = version_decl; + function_version_info [actual_versions].predicate_chain + = predicate_chain; + function_version_info [actual_versions].dispatch_priority = priority; + actual_versions++; + } + + /* Sort the versions according to descending order of dispatch priority. The + priority is based on the ISA. This is not a perfect solution. There + could still be ambiguity. If more than one function version is suitable + to execute, which one should be dispatched? In future, allow the user + to specify a dispatch priority next to the version. */ + qsort (function_version_info, actual_versions, + sizeof (struct _function_version_info), feature_compare); + + for (i = 0; i < actual_versions; ++i) + *empty_bb = add_condition_to_bb (dispatch_decl, + function_version_info[i].version_decl, + function_version_info[i].predicate_chain, + *empty_bb); + + /* dispatch default version at the end. */ + *empty_bb = add_condition_to_bb (dispatch_decl, default_decl, + NULL, *empty_bb); + + free (function_version_info); + return 0; +} + +/* Comparator function to be used in qsort routine to sort attribute + specification strings to "target". */ + +static int +attr_strcmp (const void *v1, const void *v2) +{ + const char *c1 = *(char *const*)v1; + const char *c2 = *(char *const*)v2; + return strcmp (c1, c2); +} + +/* ARGLIST is the argument to target attribute. This function tokenizes + the comma separated arguments, sorts them and returns a string which + is a unique identifier for the comma separated arguments. It also + replaces non-identifier characters "=,-" with "_". */ + +static char * +sorted_attr_string (tree arglist) +{ + tree arg; + size_t str_len_sum = 0; + char **args = NULL; + char *attr_str, *ret_str; + char *attr = NULL; + unsigned int argnum = 1; + unsigned int i; + + for (arg = arglist; arg; arg = TREE_CHAIN (arg)) + { + const char *str = TREE_STRING_POINTER (TREE_VALUE (arg)); + size_t len = strlen (str); + str_len_sum += len + 1; + if (arg != arglist) + argnum++; + for (i = 0; i < strlen (str); i++) + if (str[i] == ',') + argnum++; + } + + attr_str = XNEWVEC (char, str_len_sum); + str_len_sum = 0; + for (arg = arglist; arg; arg = TREE_CHAIN (arg)) + { + const char *str = TREE_STRING_POINTER (TREE_VALUE (arg)); + size_t len = strlen (str); + memcpy (attr_str + str_len_sum, str, len); + attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0'; + str_len_sum += len + 1; + } + + /* Replace "=,-" with "_". */ + for (i = 0; i < strlen (attr_str); i++) + if (attr_str[i] == '=' || attr_str[i]== '-') + attr_str[i] = '_'; + + if (argnum == 1) + return attr_str; + + args = XNEWVEC (char *, argnum); + + i = 0; + attr = strtok (attr_str, ","); + while (attr != NULL) + { + args[i] = attr; + i++; + attr = strtok (NULL, ","); + } + + qsort (args, argnum, sizeof (char *), attr_strcmp); + + ret_str = XNEWVEC (char, str_len_sum); + str_len_sum = 0; + for (i = 0; i < argnum; i++) + { + size_t len = strlen (args[i]); + memcpy (ret_str + str_len_sum, args[i], len); + ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0'; + str_len_sum += len + 1; + } + + XDELETEVEC (args); + XDELETEVEC (attr_str); + return ret_str; +} + +/* This function changes the assembler name for functions that are + versions. If DECL is a function version and has a "target" + attribute, it appends the attribute string to its assembler name. */ + +static tree +ix86_mangle_function_version_assembler_name (tree decl, tree id) +{ + tree version_attr; + const char *orig_name, *version_string; + char *attr_str, *assembler_name; + + if (DECL_DECLARED_INLINE_P (decl) + && lookup_attribute ("gnu_inline", + DECL_ATTRIBUTES (decl))) + error_at (DECL_SOURCE_LOCATION (decl), + "Function versions cannot be marked as gnu_inline," + " bodies have to be generated"); + + if (DECL_VIRTUAL_P (decl) + || DECL_VINDEX (decl)) + sorry ("Virtual function multiversioning not supported"); + + version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); + + /* target attribute string cannot be NULL. */ + gcc_assert (version_attr != NULL_TREE); + + orig_name = IDENTIFIER_POINTER (id); + version_string + = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr))); + + if (strcmp (version_string, "default") == 0) + return id; + + attr_str = sorted_attr_string (TREE_VALUE (version_attr)); + assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2); + + sprintf (assembler_name, "%s.%s", orig_name, attr_str); + + /* Allow assembler name to be modified if already set. */ + if (DECL_ASSEMBLER_NAME_SET_P (decl)) + SET_DECL_RTL (decl, NULL); + + tree ret = get_identifier (assembler_name); + XDELETEVEC (attr_str); + XDELETEVEC (assembler_name); + return ret; +} + +/* This function returns true if FN1 and FN2 are versions of the same function, + that is, the target strings of the function decls are different. This assumes + that FN1 and FN2 have the same signature. */ + +static bool +ix86_function_versions (tree fn1, tree fn2) +{ + tree attr1, attr2; + char *target1, *target2; + bool result; + + if (TREE_CODE (fn1) != FUNCTION_DECL + || TREE_CODE (fn2) != FUNCTION_DECL) + return false; + + attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1)); + attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2)); + + /* At least one function decl should have the target attribute specified. */ + if (attr1 == NULL_TREE && attr2 == NULL_TREE) + return false; + + /* Diagnose missing target attribute if one of the decls is already + multi-versioned. */ + if (attr1 == NULL_TREE || attr2 == NULL_TREE) + { + if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2)) + { + if (attr2 != NULL_TREE) + { + std::swap (fn1, fn2); + attr1 = attr2; + } + error_at (DECL_SOURCE_LOCATION (fn2), + "missing % attribute for multi-versioned %D", + fn2); + inform (DECL_SOURCE_LOCATION (fn1), + "previous declaration of %D", fn1); + /* Prevent diagnosing of the same error multiple times. */ + DECL_ATTRIBUTES (fn2) + = tree_cons (get_identifier ("target"), + copy_node (TREE_VALUE (attr1)), + DECL_ATTRIBUTES (fn2)); + } + return false; + } + + target1 = sorted_attr_string (TREE_VALUE (attr1)); + target2 = sorted_attr_string (TREE_VALUE (attr2)); + + /* The sorted target strings must be different for fn1 and fn2 + to be versions. */ + if (strcmp (target1, target2) == 0) + result = false; + else + result = true; + + XDELETEVEC (target1); + XDELETEVEC (target2); + + return result; +} + +static tree +ix86_mangle_decl_assembler_name (tree decl, tree id) +{ + /* For function version, add the target suffix to the assembler name. */ + if (TREE_CODE (decl) == FUNCTION_DECL + && DECL_FUNCTION_VERSIONED (decl)) + id = ix86_mangle_function_version_assembler_name (decl, id); +#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME + id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id); +#endif + + return id; +} + +/* Return a new name by appending SUFFIX to the DECL name. If make_unique + is true, append the full path name of the source file. */ + +static char * +make_name (tree decl, const char *suffix, bool make_unique) +{ + char *global_var_name; + int name_len; + const char *name; + const char *unique_name = NULL; + + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + + /* Get a unique name that can be used globally without any chances + of collision at link time. */ + if (make_unique) + unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0")); + + name_len = strlen (name) + strlen (suffix) + 2; + + if (make_unique) + name_len += strlen (unique_name) + 1; + global_var_name = XNEWVEC (char, name_len); + + /* Use '.' to concatenate names as it is demangler friendly. */ + if (make_unique) + snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name, + suffix); + else + snprintf (global_var_name, name_len, "%s.%s", name, suffix); + + return global_var_name; +} + +#if defined (ASM_OUTPUT_TYPE_DIRECTIVE) + +/* Make a dispatcher declaration for the multi-versioned function DECL. + Calls to DECL function will be replaced with calls to the dispatcher + by the front-end. Return the decl created. */ + +static tree +make_dispatcher_decl (const tree decl) +{ + tree func_decl; + char *func_name; + tree fn_type, func_type; + bool is_uniq = false; + + if (TREE_PUBLIC (decl) == 0) + is_uniq = true; + + func_name = make_name (decl, "ifunc", is_uniq); + + fn_type = TREE_TYPE (decl); + func_type = build_function_type (TREE_TYPE (fn_type), + TYPE_ARG_TYPES (fn_type)); + + func_decl = build_fn_decl (func_name, func_type); + XDELETEVEC (func_name); + TREE_USED (func_decl) = 1; + DECL_CONTEXT (func_decl) = NULL_TREE; + DECL_INITIAL (func_decl) = error_mark_node; + DECL_ARTIFICIAL (func_decl) = 1; + /* Mark this func as external, the resolver will flip it again if + it gets generated. */ + DECL_EXTERNAL (func_decl) = 1; + /* This will be of type IFUNCs have to be externally visible. */ + TREE_PUBLIC (func_decl) = 1; + + return func_decl; +} + +#endif + +/* Returns true if decl is multi-versioned and DECL is the default function, + that is it is not tagged with target specific optimization. */ + +static bool +is_function_default_version (const tree decl) +{ + if (TREE_CODE (decl) != FUNCTION_DECL + || !DECL_FUNCTION_VERSIONED (decl)) + return false; + tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); + gcc_assert (attr); + attr = TREE_VALUE (TREE_VALUE (attr)); + return (TREE_CODE (attr) == STRING_CST + && strcmp (TREE_STRING_POINTER (attr), "default") == 0); +} + +/* Make a dispatcher declaration for the multi-versioned function DECL. + Calls to DECL function will be replaced with calls to the dispatcher + by the front-end. Returns the decl of the dispatcher function. */ + +static tree +ix86_get_function_versions_dispatcher (void *decl) +{ + tree fn = (tree) decl; + struct cgraph_node *node = NULL; + struct cgraph_node *default_node = NULL; + struct cgraph_function_version_info *node_v = NULL; + struct cgraph_function_version_info *first_v = NULL; + + tree dispatch_decl = NULL; + + struct cgraph_function_version_info *default_version_info = NULL; + + gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); + + node = cgraph_node::get (fn); + gcc_assert (node != NULL); + + node_v = node->function_version (); + gcc_assert (node_v != NULL); + + if (node_v->dispatcher_resolver != NULL) + return node_v->dispatcher_resolver; + + /* Find the default version and make it the first node. */ + first_v = node_v; + /* Go to the beginning of the chain. */ + while (first_v->prev != NULL) + first_v = first_v->prev; + default_version_info = first_v; + while (default_version_info != NULL) + { + if (is_function_default_version + (default_version_info->this_node->decl)) + break; + default_version_info = default_version_info->next; + } + + /* If there is no default node, just return NULL. */ + if (default_version_info == NULL) + return NULL; + + /* Make default info the first node. */ + if (first_v != default_version_info) + { + default_version_info->prev->next = default_version_info->next; + if (default_version_info->next) + default_version_info->next->prev = default_version_info->prev; + first_v->prev = default_version_info; + default_version_info->next = first_v; + default_version_info->prev = NULL; + } + + default_node = default_version_info->this_node; + +#if defined (ASM_OUTPUT_TYPE_DIRECTIVE) + if (targetm.has_ifunc_p ()) + { + struct cgraph_function_version_info *it_v = NULL; + struct cgraph_node *dispatcher_node = NULL; + struct cgraph_function_version_info *dispatcher_version_info = NULL; + + /* Right now, the dispatching is done via ifunc. */ + dispatch_decl = make_dispatcher_decl (default_node->decl); + + dispatcher_node = cgraph_node::get_create (dispatch_decl); + gcc_assert (dispatcher_node != NULL); + dispatcher_node->dispatcher_function = 1; + dispatcher_version_info + = dispatcher_node->insert_new_function_version (); + dispatcher_version_info->next = default_version_info; + dispatcher_node->definition = 1; + + /* Set the dispatcher for all the versions. */ + it_v = default_version_info; + while (it_v != NULL) + { + it_v->dispatcher_resolver = dispatch_decl; + it_v = it_v->next; + } + } + else +#endif + { + error_at (DECL_SOURCE_LOCATION (default_node->decl), + "multiversioning needs ifunc which is not supported " + "on this target"); + } + + return dispatch_decl; +} + +/* Make the resolver function decl to dispatch the versions of + a multi-versioned function, DEFAULT_DECL. Create an + empty basic block in the resolver and store the pointer in + EMPTY_BB. Return the decl of the resolver function. */ + +static tree +make_resolver_func (const tree default_decl, + const tree dispatch_decl, + basic_block *empty_bb) +{ + char *resolver_name; + tree decl, type, decl_name, t; + bool is_uniq = false; + + /* IFUNC's have to be globally visible. So, if the default_decl is + not, then the name of the IFUNC should be made unique. */ + if (TREE_PUBLIC (default_decl) == 0) + is_uniq = true; + + /* Append the filename to the resolver function if the versions are + not externally visible. This is because the resolver function has + to be externally visible for the loader to find it. So, appending + the filename will prevent conflicts with a resolver function from + another module which is based on the same version name. */ + resolver_name = make_name (default_decl, "resolver", is_uniq); + + /* The resolver function should return a (void *). */ + type = build_function_type_list (ptr_type_node, NULL_TREE); + + decl = build_fn_decl (resolver_name, type); + decl_name = get_identifier (resolver_name); + SET_DECL_ASSEMBLER_NAME (decl, decl_name); + + DECL_NAME (decl) = decl_name; + TREE_USED (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; + DECL_IGNORED_P (decl) = 0; + /* IFUNC resolvers have to be externally visible. */ + TREE_PUBLIC (decl) = 1; + DECL_UNINLINABLE (decl) = 1; + + /* Resolver is not external, body is generated. */ + DECL_EXTERNAL (decl) = 0; + DECL_EXTERNAL (dispatch_decl) = 0; + + DECL_CONTEXT (decl) = NULL_TREE; + DECL_INITIAL (decl) = make_node (BLOCK); + DECL_STATIC_CONSTRUCTOR (decl) = 0; + + if (DECL_COMDAT_GROUP (default_decl) + || TREE_PUBLIC (default_decl)) + { + /* In this case, each translation unit with a call to this + versioned function will put out a resolver. Ensure it + is comdat to keep just one copy. */ + DECL_COMDAT (decl) = 1; + make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); + } + /* Build result decl and add to function_decl. */ + t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); + DECL_ARTIFICIAL (t) = 1; + DECL_IGNORED_P (t) = 1; + DECL_RESULT (decl) = t; + + gimplify_function_tree (decl); + push_cfun (DECL_STRUCT_FUNCTION (decl)); + *empty_bb = init_lowered_empty_function (decl, false, 0); + + cgraph_node::add_new_function (decl, true); + symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); + + pop_cfun (); + + gcc_assert (dispatch_decl != NULL); + /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */ + DECL_ATTRIBUTES (dispatch_decl) + = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl)); + + /* Create the alias for dispatch to resolver here. */ + /*cgraph_create_function_alias (dispatch_decl, decl);*/ + cgraph_node::create_same_body_alias (dispatch_decl, decl); + XDELETEVEC (resolver_name); + return decl; +} + +/* Generate the dispatching code body to dispatch multi-versioned function + DECL. The target hook is called to process the "target" attributes and + provide the code to dispatch the right function at run-time. NODE points + to the dispatcher decl whose body will be created. */ + +static tree +ix86_generate_version_dispatcher_body (void *node_p) +{ + tree resolver_decl; + basic_block empty_bb; + tree default_ver_decl; + struct cgraph_node *versn; + struct cgraph_node *node; + + struct cgraph_function_version_info *node_version_info = NULL; + struct cgraph_function_version_info *versn_info = NULL; + + node = (cgraph_node *)node_p; + + node_version_info = node->function_version (); + gcc_assert (node->dispatcher_function + && node_version_info != NULL); + + if (node_version_info->dispatcher_resolver) + return node_version_info->dispatcher_resolver; + + /* The first version in the chain corresponds to the default version. */ + default_ver_decl = node_version_info->next->this_node->decl; + + /* node is going to be an alias, so remove the finalized bit. */ + node->definition = false; + + resolver_decl = make_resolver_func (default_ver_decl, + node->decl, &empty_bb); + + node_version_info->dispatcher_resolver = resolver_decl; + + push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); + + auto_vec fn_ver_vec; + + for (versn_info = node_version_info->next; versn_info; + versn_info = versn_info->next) + { + versn = versn_info->this_node; + /* Check for virtual functions here again, as by this time it should + have been determined if this function needs a vtable index or + not. This happens for methods in derived classes that override + virtual methods in base classes but are not explicitly marked as + virtual. */ + if (DECL_VINDEX (versn->decl)) + sorry ("Virtual function multiversioning not supported"); + + fn_ver_vec.safe_push (versn->decl); + } + + dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); + cgraph_edge::rebuild_edges (); + pop_cfun (); + return resolver_decl; +} +/* This builds the processor_model struct type defined in + libgcc/config/i386/cpuinfo.c */ + +static tree +build_processor_model_struct (void) +{ + const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype", + "__cpu_features"}; + tree field = NULL_TREE, field_chain = NULL_TREE; + int i; + tree type = make_node (RECORD_TYPE); + + /* The first 3 fields are unsigned int. */ + for (i = 0; i < 3; ++i) + { + field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, + get_identifier (field_name[i]), unsigned_type_node); + if (field_chain != NULL_TREE) + DECL_CHAIN (field) = field_chain; + field_chain = field; + } + + /* The last field is an array of unsigned integers of size one. */ + field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, + get_identifier (field_name[3]), + build_array_type (unsigned_type_node, + build_index_type (size_one_node))); + if (field_chain != NULL_TREE) + DECL_CHAIN (field) = field_chain; + field_chain = field; + + finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE); + return type; +} + +/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */ + +static tree +make_var_decl (tree type, const char *name) +{ + tree new_decl; + + new_decl = build_decl (UNKNOWN_LOCATION, + VAR_DECL, + get_identifier(name), + type); + + DECL_EXTERNAL (new_decl) = 1; + TREE_STATIC (new_decl) = 1; + TREE_PUBLIC (new_decl) = 1; + DECL_INITIAL (new_decl) = 0; + DECL_ARTIFICIAL (new_decl) = 0; + DECL_PRESERVE_P (new_decl) = 1; + + make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl)); + assemble_variable (new_decl, 0, 0, 0); + + return new_decl; +} + +/* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded + into an integer defined in libgcc/config/i386/cpuinfo.c */ + +static tree +fold_builtin_cpu (tree fndecl, tree *args) +{ + unsigned int i; + enum ix86_builtins fn_code = (enum ix86_builtins) + DECL_FUNCTION_CODE (fndecl); + tree param_string_cst = NULL; + + /* This is the order of bit-fields in __processor_features in cpuinfo.c */ + enum processor_features + { + F_CMOV = 0, + F_MMX, + F_POPCNT, + F_SSE, + F_SSE2, + F_SSE3, + F_SSSE3, + F_SSE4_1, + F_SSE4_2, + F_AVX, + F_AVX2, + F_SSE4_A, + F_FMA4, + F_XOP, + F_FMA, + F_AVX512F, + F_BMI, + F_BMI2, + F_AES, + F_PCLMUL, + F_AVX512VL, + F_AVX512BW, + F_AVX512DQ, + F_AVX512CD, + F_AVX512ER, + F_AVX512PF, + F_AVX512VBMI, + F_AVX512IFMA, + F_MAX + }; + + /* These are the values for vendor types and cpu types and subtypes + in cpuinfo.c. Cpu types and subtypes should be subtracted by + the corresponding start value. */ + enum processor_model + { + M_INTEL = 1, + M_AMD, + M_CPU_TYPE_START, + M_INTEL_BONNELL, + M_INTEL_CORE2, + M_INTEL_COREI7, + M_AMDFAM10H, + M_AMDFAM15H, + M_INTEL_SILVERMONT, + M_INTEL_KNL, + M_AMD_BTVER1, + M_AMD_BTVER2, + M_CPU_SUBTYPE_START, + M_INTEL_COREI7_NEHALEM, + M_INTEL_COREI7_WESTMERE, + M_INTEL_COREI7_SANDYBRIDGE, + M_AMDFAM10H_BARCELONA, + M_AMDFAM10H_SHANGHAI, + M_AMDFAM10H_ISTANBUL, + M_AMDFAM15H_BDVER1, + M_AMDFAM15H_BDVER2, + M_AMDFAM15H_BDVER3, + M_AMDFAM15H_BDVER4, + M_AMDFAM17H_ZNVER1, + M_INTEL_COREI7_IVYBRIDGE, + M_INTEL_COREI7_HASWELL, + M_INTEL_COREI7_BROADWELL, + M_INTEL_COREI7_SKYLAKE, + M_INTEL_COREI7_SKYLAKE_AVX512 + }; + + static struct _arch_names_table + { + const char *const name; + const enum processor_model model; + } + const arch_names_table[] = + { + {"amd", M_AMD}, + {"intel", M_INTEL}, + {"atom", M_INTEL_BONNELL}, + {"slm", M_INTEL_SILVERMONT}, + {"core2", M_INTEL_CORE2}, + {"corei7", M_INTEL_COREI7}, + {"nehalem", M_INTEL_COREI7_NEHALEM}, + {"westmere", M_INTEL_COREI7_WESTMERE}, + {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE}, + {"ivybridge", M_INTEL_COREI7_IVYBRIDGE}, + {"haswell", M_INTEL_COREI7_HASWELL}, + {"broadwell", M_INTEL_COREI7_BROADWELL}, + {"skylake", M_INTEL_COREI7_SKYLAKE}, + {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512}, + {"bonnell", M_INTEL_BONNELL}, + {"silvermont", M_INTEL_SILVERMONT}, + {"knl", M_INTEL_KNL}, + {"amdfam10h", M_AMDFAM10H}, + {"barcelona", M_AMDFAM10H_BARCELONA}, + {"shanghai", M_AMDFAM10H_SHANGHAI}, + {"istanbul", M_AMDFAM10H_ISTANBUL}, + {"btver1", M_AMD_BTVER1}, + {"amdfam15h", M_AMDFAM15H}, + {"bdver1", M_AMDFAM15H_BDVER1}, + {"bdver2", M_AMDFAM15H_BDVER2}, + {"bdver3", M_AMDFAM15H_BDVER3}, + {"bdver4", M_AMDFAM15H_BDVER4}, + {"btver2", M_AMD_BTVER2}, + {"znver1", M_AMDFAM17H_ZNVER1}, + }; + + static struct _isa_names_table + { + const char *const name; + const enum processor_features feature; + } + const isa_names_table[] = + { + {"cmov", F_CMOV}, + {"mmx", F_MMX}, + {"popcnt", F_POPCNT}, + {"sse", F_SSE}, + {"sse2", F_SSE2}, + {"sse3", F_SSE3}, + {"ssse3", F_SSSE3}, + {"sse4a", F_SSE4_A}, + {"sse4.1", F_SSE4_1}, + {"sse4.2", F_SSE4_2}, + {"avx", F_AVX}, + {"fma4", F_FMA4}, + {"xop", F_XOP}, + {"fma", F_FMA}, + {"avx2", F_AVX2}, + {"avx512f", F_AVX512F}, + {"bmi", F_BMI}, + {"bmi2", F_BMI2}, + {"aes", F_AES}, + {"pclmul", F_PCLMUL}, + {"avx512vl",F_AVX512VL}, + {"avx512bw",F_AVX512BW}, + {"avx512dq",F_AVX512DQ}, + {"avx512cd",F_AVX512CD}, + {"avx512er",F_AVX512ER}, + {"avx512pf",F_AVX512PF}, + {"avx512vbmi",F_AVX512VBMI}, + {"avx512ifma",F_AVX512IFMA}, + }; + + tree __processor_model_type = build_processor_model_struct (); + tree __cpu_model_var = make_var_decl (__processor_model_type, + "__cpu_model"); + + + varpool_node::add (__cpu_model_var); + + gcc_assert ((args != NULL) && (*args != NULL)); + + param_string_cst = *args; + while (param_string_cst + && TREE_CODE (param_string_cst) != STRING_CST) + { + /* *args must be a expr that can contain other EXPRS leading to a + STRING_CST. */ + if (!EXPR_P (param_string_cst)) + { + error ("Parameter to builtin must be a string constant or literal"); + return integer_zero_node; + } + param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0); + } + + gcc_assert (param_string_cst); + + if (fn_code == IX86_BUILTIN_CPU_IS) + { + tree ref; + tree field; + tree final; + + unsigned int field_val = 0; + unsigned int NUM_ARCH_NAMES + = sizeof (arch_names_table) / sizeof (struct _arch_names_table); + + for (i = 0; i < NUM_ARCH_NAMES; i++) + if (strcmp (arch_names_table[i].name, + TREE_STRING_POINTER (param_string_cst)) == 0) + break; + + if (i == NUM_ARCH_NAMES) + { + error ("Parameter to builtin not valid: %s", + TREE_STRING_POINTER (param_string_cst)); + return integer_zero_node; + } + + field = TYPE_FIELDS (__processor_model_type); + field_val = arch_names_table[i].model; + + /* CPU types are stored in the next field. */ + if (field_val > M_CPU_TYPE_START + && field_val < M_CPU_SUBTYPE_START) + { + field = DECL_CHAIN (field); + field_val -= M_CPU_TYPE_START; + } + + /* CPU subtypes are stored in the next field. */ + if (field_val > M_CPU_SUBTYPE_START) + { + field = DECL_CHAIN ( DECL_CHAIN (field)); + field_val -= M_CPU_SUBTYPE_START; + } + + /* Get the appropriate field in __cpu_model. */ + ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, + field, NULL_TREE); + + /* Check the value. */ + final = build2 (EQ_EXPR, unsigned_type_node, ref, + build_int_cstu (unsigned_type_node, field_val)); + return build1 (CONVERT_EXPR, integer_type_node, final); + } + else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS) + { + tree ref; + tree array_elt; + tree field; + tree final; + + unsigned int field_val = 0; + unsigned int NUM_ISA_NAMES + = sizeof (isa_names_table) / sizeof (struct _isa_names_table); + + for (i = 0; i < NUM_ISA_NAMES; i++) + if (strcmp (isa_names_table[i].name, + TREE_STRING_POINTER (param_string_cst)) == 0) + break; + + if (i == NUM_ISA_NAMES) + { + error ("Parameter to builtin not valid: %s", + TREE_STRING_POINTER (param_string_cst)); + return integer_zero_node; + } + + field = TYPE_FIELDS (__processor_model_type); + /* Get the last field, which is __cpu_features. */ + while (DECL_CHAIN (field)) + field = DECL_CHAIN (field); + + /* Get the appropriate field: __cpu_model.__cpu_features */ + ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, + field, NULL_TREE); + + /* Access the 0th element of __cpu_features array. */ + array_elt = build4 (ARRAY_REF, unsigned_type_node, ref, + integer_zero_node, NULL_TREE, NULL_TREE); + + field_val = (1 << isa_names_table[i].feature); + /* Return __cpu_model.__cpu_features[0] & field_val */ + final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt, + build_int_cstu (unsigned_type_node, field_val)); + return build1 (CONVERT_EXPR, integer_type_node, final); + } + gcc_unreachable (); +} + +static tree +ix86_fold_builtin (tree fndecl, int n_args, + tree *args, bool ignore ATTRIBUTE_UNUSED) +{ + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) + { + enum ix86_builtins fn_code = (enum ix86_builtins) + DECL_FUNCTION_CODE (fndecl); + switch (fn_code) + { + case IX86_BUILTIN_CPU_IS: + case IX86_BUILTIN_CPU_SUPPORTS: + gcc_assert (n_args == 1); + return fold_builtin_cpu (fndecl, args); + + case IX86_BUILTIN_TZCNT16: + case IX86_BUILTIN_CTZS: + case IX86_BUILTIN_TZCNT32: + case IX86_BUILTIN_TZCNT64: + gcc_assert (n_args == 1); + if (TREE_CODE (args[0]) == INTEGER_CST) + { + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + tree arg = args[0]; + if (fn_code == IX86_BUILTIN_TZCNT16 + || fn_code == IX86_BUILTIN_CTZS) + arg = fold_convert (short_unsigned_type_node, arg); + if (integer_zerop (arg)) + return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); + else + return fold_const_call (CFN_CTZ, type, arg); + } + break; + + case IX86_BUILTIN_LZCNT16: + case IX86_BUILTIN_CLZS: + case IX86_BUILTIN_LZCNT32: + case IX86_BUILTIN_LZCNT64: + gcc_assert (n_args == 1); + if (TREE_CODE (args[0]) == INTEGER_CST) + { + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + tree arg = args[0]; + if (fn_code == IX86_BUILTIN_LZCNT16 + || fn_code == IX86_BUILTIN_CLZS) + arg = fold_convert (short_unsigned_type_node, arg); + if (integer_zerop (arg)) + return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); + else + return fold_const_call (CFN_CLZ, type, arg); + } + break; + + default: + break; + } + } + +#ifdef SUBTARGET_FOLD_BUILTIN + return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); +#endif + + return NULL_TREE; +} + +/* Fold a MD builtin (use ix86_fold_builtin for folding into + constant) in GIMPLE. */ + +bool +ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) +{ + gimple *stmt = gsi_stmt (*gsi); + tree fndecl = gimple_call_fndecl (stmt); + gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD); + int n_args = gimple_call_num_args (stmt); + enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl); + tree decl = NULL_TREE; + tree arg0; + + switch (fn_code) + { + case IX86_BUILTIN_TZCNT32: + decl = builtin_decl_implicit (BUILT_IN_CTZ); + goto fold_tzcnt_lzcnt; + + case IX86_BUILTIN_TZCNT64: + decl = builtin_decl_implicit (BUILT_IN_CTZLL); + goto fold_tzcnt_lzcnt; + + case IX86_BUILTIN_LZCNT32: + decl = builtin_decl_implicit (BUILT_IN_CLZ); + goto fold_tzcnt_lzcnt; + + case IX86_BUILTIN_LZCNT64: + decl = builtin_decl_implicit (BUILT_IN_CLZLL); + goto fold_tzcnt_lzcnt; + + fold_tzcnt_lzcnt: + gcc_assert (n_args == 1); + arg0 = gimple_call_arg (stmt, 0); + if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt)) + { + int prec = TYPE_PRECISION (TREE_TYPE (arg0)); + /* If arg0 is provably non-zero, optimize into generic + __builtin_c[tl]z{,ll} function the middle-end handles + better. */ + if (!expr_not_equal_to (arg0, wi::zero (prec))) + return false; + + location_t loc = gimple_location (stmt); + gimple *g = gimple_build_call (decl, 1, arg0); + gimple_set_location (g, loc); + tree lhs = make_ssa_name (integer_type_node); + gimple_call_set_lhs (g, lhs); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs); + gimple_set_location (g, loc); + gsi_replace (gsi, g, false); + return true; + } + break; + + default: + break; + } + + return false; +} + +/* Make builtins to detect cpu type and features supported. NAME is + the builtin name, CODE is the builtin code, and FTYPE is the function + type of the builtin. */ + +static void +make_cpu_type_builtin (const char* name, int code, + enum ix86_builtin_func_type ftype, bool is_const) +{ + tree decl; + tree type; + + type = ix86_get_builtin_func_type (ftype); + decl = add_builtin_function (name, type, code, BUILT_IN_MD, + NULL, NULL_TREE); + gcc_assert (decl != NULL_TREE); + ix86_builtins[(int) code] = decl; + TREE_READONLY (decl) = is_const; +} + +/* Make builtins to get CPU type and features supported. The created + builtins are : + + __builtin_cpu_init (), to detect cpu type and features, + __builtin_cpu_is (""), to check if cpu is of type , + __builtin_cpu_supports (""), to check if cpu supports + */ + +static void +ix86_init_platform_type_builtins (void) +{ + make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT, + INT_FTYPE_VOID, false); + make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS, + INT_FTYPE_PCCHAR, true); + make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS, + INT_FTYPE_PCCHAR, true); +} + +/* Internal method for ix86_init_builtins. */ + +static void +ix86_init_builtins_va_builtins_abi (void) +{ + tree ms_va_ref, sysv_va_ref; + tree fnvoid_va_end_ms, fnvoid_va_end_sysv; + tree fnvoid_va_start_ms, fnvoid_va_start_sysv; + tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv; + tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE; + + if (!TARGET_64BIT) + return; + fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE); + fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE); + ms_va_ref = build_reference_type (ms_va_list_type_node); + sysv_va_ref = + build_pointer_type (TREE_TYPE (sysv_va_list_type_node)); + + fnvoid_va_end_ms = + build_function_type_list (void_type_node, ms_va_ref, NULL_TREE); + fnvoid_va_start_ms = + build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE); + fnvoid_va_end_sysv = + build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE); + fnvoid_va_start_sysv = + build_varargs_function_type_list (void_type_node, sysv_va_ref, + NULL_TREE); + fnvoid_va_copy_ms = + build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node, + NULL_TREE); + fnvoid_va_copy_sysv = + build_function_type_list (void_type_node, sysv_va_ref, + sysv_va_ref, NULL_TREE); + + add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms, + BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms); + add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms, + BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms); + add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms, + BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms); + add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv, + BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv); + add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv, + BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv); + add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv, + BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv); +} + +static void +ix86_init_builtin_types (void) +{ + tree float128_type_node, float80_type_node; + + /* The __float80 type. */ + float80_type_node = long_double_type_node; + if (TYPE_MODE (float80_type_node) != XFmode) + { + /* The __float80 type. */ + float80_type_node = make_node (REAL_TYPE); + + TYPE_PRECISION (float80_type_node) = 80; + layout_type (float80_type_node); + } + lang_hooks.types.register_builtin_type (float80_type_node, "__float80"); + + /* The __float128 type. */ + float128_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (float128_type_node) = 128; + layout_type (float128_type_node); + lang_hooks.types.register_builtin_type (float128_type_node, "__float128"); + + /* This macro is built by i386-builtin-types.awk. */ + DEFINE_BUILTIN_PRIMITIVE_TYPES; +} + +static void +ix86_init_builtins (void) +{ + tree t; + + ix86_init_builtin_types (); + + /* Builtins to get CPU type and features. */ + ix86_init_platform_type_builtins (); + + /* TFmode support builtins. */ + def_builtin_const (0, "__builtin_infq", + FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ); + def_builtin_const (0, "__builtin_huge_valq", + FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ); + + /* We will expand them to normal call if SSE isn't available since + they are used by libgcc. */ + t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128); + t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ, + BUILT_IN_MD, "__fabstf2", NULL_TREE); + TREE_READONLY (t) = 1; + ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t; + + t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128); + t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ, + BUILT_IN_MD, "__copysigntf3", NULL_TREE); + TREE_READONLY (t) = 1; + ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t; + + ix86_init_tm_builtins (); + ix86_init_mmx_sse_builtins (); + ix86_init_mpx_builtins (); + + if (TARGET_LP64) + ix86_init_builtins_va_builtins_abi (); + +#ifdef SUBTARGET_INIT_BUILTINS + SUBTARGET_INIT_BUILTINS; +#endif +} + +/* Return the ix86 builtin for CODE. */ + +static tree +ix86_builtin_decl (unsigned code, bool) +{ + if (code >= IX86_BUILTIN_MAX) + return error_mark_node; + + return ix86_builtins[code]; +} + +/* Errors in the source file can cause expand_expr to return const0_rtx + where we expect a vector. To avoid crashing, use one of the vector + clear instructions. */ +static rtx +safe_vector_operand (rtx x, machine_mode mode) +{ + if (x == const0_rtx) + x = CONST0_RTX (mode); + return x; +} + +/* Fixup modeless constants to fit required mode. */ +static rtx +fixup_modeless_constant (rtx x, machine_mode mode) +{ + if (GET_MODE (x) == VOIDmode) + x = convert_to_mode (mode, x, 1); + return x; +} + +/* Subroutine of ix86_expand_builtin to take care of binop insns. */ + +static rtx +ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode0 = insn_data[icode].operand[1].mode; + machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + if (optimize || !target + || GET_MODE (target) != tmode + || !insn_data[icode].operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + if (GET_MODE (op1) == SImode && mode1 == TImode) + { + rtx x = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_loadd (x, op1)); + op1 = gen_lowpart (TImode, x); + } + + if (!insn_data[icode].operand[1].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (!insn_data[icode].operand[2].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + + emit_insn (pat); + + return target; +} + +/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */ + +static rtx +ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, + enum ix86_builtin_func_type m_type, + enum rtx_code sub_code) +{ + rtx pat; + int i; + int nargs; + bool comparison_p = false; + bool tf_p = false; + bool last_arg_constant = false; + int num_memory = 0; + struct { + rtx op; + machine_mode mode; + } args[4]; + + machine_mode tmode = insn_data[icode].operand[0].mode; + + switch (m_type) + { + case MULTI_ARG_4_DF2_DI_I: + case MULTI_ARG_4_DF2_DI_I1: + case MULTI_ARG_4_SF2_SI_I: + case MULTI_ARG_4_SF2_SI_I1: + nargs = 4; + last_arg_constant = true; + break; + + case MULTI_ARG_3_SF: + case MULTI_ARG_3_DF: + case MULTI_ARG_3_SF2: + case MULTI_ARG_3_DF2: + case MULTI_ARG_3_DI: + case MULTI_ARG_3_SI: + case MULTI_ARG_3_SI_DI: + case MULTI_ARG_3_HI: + case MULTI_ARG_3_HI_SI: + case MULTI_ARG_3_QI: + case MULTI_ARG_3_DI2: + case MULTI_ARG_3_SI2: + case MULTI_ARG_3_HI2: + case MULTI_ARG_3_QI2: + nargs = 3; + break; + + case MULTI_ARG_2_SF: + case MULTI_ARG_2_DF: + case MULTI_ARG_2_DI: + case MULTI_ARG_2_SI: + case MULTI_ARG_2_HI: + case MULTI_ARG_2_QI: + nargs = 2; + break; + + case MULTI_ARG_2_DI_IMM: + case MULTI_ARG_2_SI_IMM: + case MULTI_ARG_2_HI_IMM: + case MULTI_ARG_2_QI_IMM: + nargs = 2; + last_arg_constant = true; + break; + + case MULTI_ARG_1_SF: + case MULTI_ARG_1_DF: + case MULTI_ARG_1_SF2: + case MULTI_ARG_1_DF2: + case MULTI_ARG_1_DI: + case MULTI_ARG_1_SI: + case MULTI_ARG_1_HI: + case MULTI_ARG_1_QI: + case MULTI_ARG_1_SI_DI: + case MULTI_ARG_1_HI_DI: + case MULTI_ARG_1_HI_SI: + case MULTI_ARG_1_QI_DI: + case MULTI_ARG_1_QI_SI: + case MULTI_ARG_1_QI_HI: + nargs = 1; + break; + + case MULTI_ARG_2_DI_CMP: + case MULTI_ARG_2_SI_CMP: + case MULTI_ARG_2_HI_CMP: + case MULTI_ARG_2_QI_CMP: + nargs = 2; + comparison_p = true; + break; + + case MULTI_ARG_2_SF_TF: + case MULTI_ARG_2_DF_TF: + case MULTI_ARG_2_DI_TF: + case MULTI_ARG_2_SI_TF: + case MULTI_ARG_2_HI_TF: + case MULTI_ARG_2_QI_TF: + nargs = 2; + tf_p = true; + break; + + default: + gcc_unreachable (); + } + + if (optimize || !target + || GET_MODE (target) != tmode + || !insn_data[icode].operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + gcc_assert (nargs <= 4); + + for (i = 0; i < nargs; i++) + { + tree arg = CALL_EXPR_ARG (exp, i); + rtx op = expand_normal (arg); + int adjust = (comparison_p) ? 1 : 0; + machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; + + if (last_arg_constant && i == nargs - 1) + { + if (!insn_data[icode].operand[i + 1].predicate (op, mode)) + { + enum insn_code new_icode = icode; + switch (icode) + { + case CODE_FOR_xop_vpermil2v2df3: + case CODE_FOR_xop_vpermil2v4sf3: + case CODE_FOR_xop_vpermil2v4df3: + case CODE_FOR_xop_vpermil2v8sf3: + error ("the last argument must be a 2-bit immediate"); + return gen_reg_rtx (tmode); + case CODE_FOR_xop_rotlv2di3: + new_icode = CODE_FOR_rotlv2di3; + goto xop_rotl; + case CODE_FOR_xop_rotlv4si3: + new_icode = CODE_FOR_rotlv4si3; + goto xop_rotl; + case CODE_FOR_xop_rotlv8hi3: + new_icode = CODE_FOR_rotlv8hi3; + goto xop_rotl; + case CODE_FOR_xop_rotlv16qi3: + new_icode = CODE_FOR_rotlv16qi3; + xop_rotl: + if (CONST_INT_P (op)) + { + int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1; + op = GEN_INT (INTVAL (op) & mask); + gcc_checking_assert + (insn_data[icode].operand[i + 1].predicate (op, mode)); + } + else + { + gcc_checking_assert + (nargs == 2 + && insn_data[new_icode].operand[0].mode == tmode + && insn_data[new_icode].operand[1].mode == tmode + && insn_data[new_icode].operand[2].mode == mode + && insn_data[new_icode].operand[0].predicate + == insn_data[icode].operand[0].predicate + && insn_data[new_icode].operand[1].predicate + == insn_data[icode].operand[1].predicate); + icode = new_icode; + goto non_constant; + } + break; + default: + gcc_unreachable (); + } + } + } + else + { + non_constant: + if (VECTOR_MODE_P (mode)) + op = safe_vector_operand (op, mode); + + /* If we aren't optimizing, only allow one memory operand to be + generated. */ + if (memory_operand (op, mode)) + num_memory++; + + gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode); + + if (optimize + || !insn_data[icode].operand[i+adjust+1].predicate (op, mode) + || num_memory > 1) + op = force_reg (mode, op); + } + + args[i].op = op; + args[i].mode = mode; + } + + switch (nargs) + { + case 1: + pat = GEN_FCN (icode) (target, args[0].op); + break; + + case 2: + if (tf_p) + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, + GEN_INT ((int)sub_code)); + else if (! comparison_p) + pat = GEN_FCN (icode) (target, args[0].op, args[1].op); + else + { + rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target), + args[0].op, + args[1].op); + + pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op); + } + break; + + case 3: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); + break; + + case 4: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op); + break; + + default: + gcc_unreachable (); + } + + if (! pat) + return 0; + + emit_insn (pat); + return target; +} + +/* Subroutine of ix86_expand_args_builtin to take care of scalar unop + insns with vec_merge. */ + +static rtx +ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op1, op0 = expand_normal (arg0); + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode0 = insn_data[icode].operand[1].mode; + + if (optimize || !target + || GET_MODE (target) != tmode + || !insn_data[icode].operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if ((optimize && !register_operand (op0, mode0)) + || !insn_data[icode].operand[1].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + op1 = op0; + if (!insn_data[icode].operand[2].predicate (op1, mode0)) + op1 = copy_to_mode_reg (mode0, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ + +static rtx +ix86_expand_sse_compare (const struct builtin_description *d, + tree exp, rtx target, bool swap) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2; + machine_mode tmode = insn_data[d->icode].operand[0].mode; + machine_mode mode0 = insn_data[d->icode].operand[1].mode; + machine_mode mode1 = insn_data[d->icode].operand[2].mode; + enum rtx_code comparison = d->comparison; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + /* Swap operands if we have a comparison that isn't available in + hardware. */ + if (swap) + std::swap (op0, op1); + + if (optimize || !target + || GET_MODE (target) != tmode + || !insn_data[d->icode].operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + if ((optimize && !register_operand (op0, mode0)) + || !insn_data[d->icode].operand[1].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if ((optimize && !register_operand (op1, mode1)) + || !insn_data[d->icode].operand[2].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); + pat = GEN_FCN (d->icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Subroutine of ix86_expand_builtin to take care of comi insns. */ + +static rtx +ix86_expand_sse_comi (const struct builtin_description *d, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + machine_mode mode0 = insn_data[d->icode].operand[0].mode; + machine_mode mode1 = insn_data[d->icode].operand[1].mode; + enum rtx_code comparison = d->comparison; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + /* Swap operands if we have a comparison that isn't available in + hardware. */ + if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) + std::swap (op0, op1); + + target = gen_reg_rtx (SImode); + emit_move_insn (target, const0_rtx); + target = gen_rtx_SUBREG (QImode, target, 0); + + if ((optimize && !register_operand (op0, mode0)) + || !insn_data[d->icode].operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if ((optimize && !register_operand (op1, mode1)) + || !insn_data[d->icode].operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (d->icode) (op0, op1); + if (! pat) + return 0; + emit_insn (pat); + emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), + gen_rtx_fmt_ee (comparison, QImode, + SET_DEST (pat), + const0_rtx))); + + return SUBREG_REG (target); +} + +/* Subroutines of ix86_expand_args_builtin to take care of round insns. */ + +static rtx +ix86_expand_sse_round (const struct builtin_description *d, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op1, op0 = expand_normal (arg0); + machine_mode tmode = insn_data[d->icode].operand[0].mode; + machine_mode mode0 = insn_data[d->icode].operand[1].mode; + + if (optimize || target == 0 + || GET_MODE (target) != tmode + || !insn_data[d->icode].operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if ((optimize && !register_operand (op0, mode0)) + || !insn_data[d->icode].operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + op1 = GEN_INT (d->comparison); + + pat = GEN_FCN (d->icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +static rtx +ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d, + tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2; + machine_mode tmode = insn_data[d->icode].operand[0].mode; + machine_mode mode0 = insn_data[d->icode].operand[1].mode; + machine_mode mode1 = insn_data[d->icode].operand[2].mode; + + if (optimize || target == 0 + || GET_MODE (target) != tmode + || !insn_data[d->icode].operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + op0 = safe_vector_operand (op0, mode0); + op1 = safe_vector_operand (op1, mode1); + + if ((optimize && !register_operand (op0, mode0)) + || !insn_data[d->icode].operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if ((optimize && !register_operand (op1, mode1)) + || !insn_data[d->icode].operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + op2 = GEN_INT (d->comparison); + + pat = GEN_FCN (d->icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Subroutine of ix86_expand_builtin to take care of ptest insns. */ + +static rtx +ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + machine_mode mode0 = insn_data[d->icode].operand[0].mode; + machine_mode mode1 = insn_data[d->icode].operand[1].mode; + enum rtx_code comparison = d->comparison; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + target = gen_reg_rtx (SImode); + emit_move_insn (target, const0_rtx); + target = gen_rtx_SUBREG (QImode, target, 0); + + if ((optimize && !register_operand (op0, mode0)) + || !insn_data[d->icode].operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if ((optimize && !register_operand (op1, mode1)) + || !insn_data[d->icode].operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (d->icode) (op0, op1); + if (! pat) + return 0; + emit_insn (pat); + emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), + gen_rtx_fmt_ee (comparison, QImode, + SET_DEST (pat), + const0_rtx))); + + return SUBREG_REG (target); +} + +/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */ + +static rtx +ix86_expand_sse_pcmpestr (const struct builtin_description *d, + tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + tree arg3 = CALL_EXPR_ARG (exp, 3); + tree arg4 = CALL_EXPR_ARG (exp, 4); + rtx scratch0, scratch1; + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + rtx op3 = expand_normal (arg3); + rtx op4 = expand_normal (arg4); + machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; + + tmode0 = insn_data[d->icode].operand[0].mode; + tmode1 = insn_data[d->icode].operand[1].mode; + modev2 = insn_data[d->icode].operand[2].mode; + modei3 = insn_data[d->icode].operand[3].mode; + modev4 = insn_data[d->icode].operand[4].mode; + modei5 = insn_data[d->icode].operand[5].mode; + modeimm = insn_data[d->icode].operand[6].mode; + + if (VECTOR_MODE_P (modev2)) + op0 = safe_vector_operand (op0, modev2); + if (VECTOR_MODE_P (modev4)) + op2 = safe_vector_operand (op2, modev4); + + if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) + op0 = copy_to_mode_reg (modev2, op0); + if (!insn_data[d->icode].operand[3].predicate (op1, modei3)) + op1 = copy_to_mode_reg (modei3, op1); + if ((optimize && !register_operand (op2, modev4)) + || !insn_data[d->icode].operand[4].predicate (op2, modev4)) + op2 = copy_to_mode_reg (modev4, op2); + if (!insn_data[d->icode].operand[5].predicate (op3, modei5)) + op3 = copy_to_mode_reg (modei5, op3); + + if (!insn_data[d->icode].operand[6].predicate (op4, modeimm)) + { + error ("the fifth argument must be an 8-bit immediate"); + return const0_rtx; + } + + if (d->code == IX86_BUILTIN_PCMPESTRI128) + { + if (optimize || !target + || GET_MODE (target) != tmode0 + || !insn_data[d->icode].operand[0].predicate (target, tmode0)) + target = gen_reg_rtx (tmode0); + + scratch1 = gen_reg_rtx (tmode1); + + pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4); + } + else if (d->code == IX86_BUILTIN_PCMPESTRM128) + { + if (optimize || !target + || GET_MODE (target) != tmode1 + || !insn_data[d->icode].operand[1].predicate (target, tmode1)) + target = gen_reg_rtx (tmode1); + + scratch0 = gen_reg_rtx (tmode0); + + pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4); + } + else + { + gcc_assert (d->flag); + + scratch0 = gen_reg_rtx (tmode0); + scratch1 = gen_reg_rtx (tmode1); + + pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4); + } + + if (! pat) + return 0; + + emit_insn (pat); + + if (d->flag) + { + target = gen_reg_rtx (SImode); + emit_move_insn (target, const0_rtx); + target = gen_rtx_SUBREG (QImode, target, 0); + + emit_insn + (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), + gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG ((machine_mode) d->flag, + FLAGS_REG), + const0_rtx))); + return SUBREG_REG (target); + } + else + return target; +} + + +/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */ + +static rtx +ix86_expand_sse_pcmpistr (const struct builtin_description *d, + tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + rtx scratch0, scratch1; + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + machine_mode tmode0, tmode1, modev2, modev3, modeimm; + + tmode0 = insn_data[d->icode].operand[0].mode; + tmode1 = insn_data[d->icode].operand[1].mode; + modev2 = insn_data[d->icode].operand[2].mode; + modev3 = insn_data[d->icode].operand[3].mode; + modeimm = insn_data[d->icode].operand[4].mode; + + if (VECTOR_MODE_P (modev2)) + op0 = safe_vector_operand (op0, modev2); + if (VECTOR_MODE_P (modev3)) + op1 = safe_vector_operand (op1, modev3); + + if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) + op0 = copy_to_mode_reg (modev2, op0); + if ((optimize && !register_operand (op1, modev3)) + || !insn_data[d->icode].operand[3].predicate (op1, modev3)) + op1 = copy_to_mode_reg (modev3, op1); + + if (!insn_data[d->icode].operand[4].predicate (op2, modeimm)) + { + error ("the third argument must be an 8-bit immediate"); + return const0_rtx; + } + + if (d->code == IX86_BUILTIN_PCMPISTRI128) + { + if (optimize || !target + || GET_MODE (target) != tmode0 + || !insn_data[d->icode].operand[0].predicate (target, tmode0)) + target = gen_reg_rtx (tmode0); + + scratch1 = gen_reg_rtx (tmode1); + + pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2); + } + else if (d->code == IX86_BUILTIN_PCMPISTRM128) + { + if (optimize || !target + || GET_MODE (target) != tmode1 + || !insn_data[d->icode].operand[1].predicate (target, tmode1)) + target = gen_reg_rtx (tmode1); + + scratch0 = gen_reg_rtx (tmode0); + + pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2); + } + else + { + gcc_assert (d->flag); + + scratch0 = gen_reg_rtx (tmode0); + scratch1 = gen_reg_rtx (tmode1); + + pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2); + } + + if (! pat) + return 0; + + emit_insn (pat); + + if (d->flag) + { + target = gen_reg_rtx (SImode); + emit_move_insn (target, const0_rtx); + target = gen_rtx_SUBREG (QImode, target, 0); + + emit_insn + (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), + gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG ((machine_mode) d->flag, + FLAGS_REG), + const0_rtx))); + return SUBREG_REG (target); + } + else + return target; +} + +/* Subroutine of ix86_expand_builtin to take care of insns with + variable number of operands. */ + +static rtx +ix86_expand_args_builtin (const struct builtin_description *d, + tree exp, rtx target) +{ + rtx pat, real_target; + unsigned int i, nargs; + unsigned int nargs_constant = 0; + unsigned int mask_pos = 0; + int num_memory = 0; + struct + { + rtx op; + machine_mode mode; + } args[6]; + bool last_arg_count = false; + enum insn_code icode = d->icode; + const struct insn_data_d *insn_p = &insn_data[icode]; + machine_mode tmode = insn_p->operand[0].mode; + machine_mode rmode = VOIDmode; + bool swap = false; + enum rtx_code comparison = d->comparison; + + switch ((enum ix86_builtin_func_type) d->flag) + { + case V2DF_FTYPE_V2DF_ROUND: + case V4DF_FTYPE_V4DF_ROUND: + case V8DF_FTYPE_V8DF_ROUND: + case V4SF_FTYPE_V4SF_ROUND: + case V8SF_FTYPE_V8SF_ROUND: + case V16SF_FTYPE_V16SF_ROUND: + case V4SI_FTYPE_V4SF_ROUND: + case V8SI_FTYPE_V8SF_ROUND: + case V16SI_FTYPE_V16SF_ROUND: + return ix86_expand_sse_round (d, exp, target); + case V4SI_FTYPE_V2DF_V2DF_ROUND: + case V8SI_FTYPE_V4DF_V4DF_ROUND: + case V16SI_FTYPE_V8DF_V8DF_ROUND: + return ix86_expand_sse_round_vec_pack_sfix (d, exp, target); + case INT_FTYPE_V8SF_V8SF_PTEST: + case INT_FTYPE_V4DI_V4DI_PTEST: + case INT_FTYPE_V4DF_V4DF_PTEST: + case INT_FTYPE_V4SF_V4SF_PTEST: + case INT_FTYPE_V2DI_V2DI_PTEST: + case INT_FTYPE_V2DF_V2DF_PTEST: + return ix86_expand_sse_ptest (d, exp, target); + case FLOAT128_FTYPE_FLOAT128: + case FLOAT_FTYPE_FLOAT: + case INT_FTYPE_INT: + case UINT_FTYPE_UINT: + case UINT16_FTYPE_UINT16: + case UINT64_FTYPE_INT: + case UINT64_FTYPE_UINT64: + case INT64_FTYPE_INT64: + case INT64_FTYPE_V4SF: + case INT64_FTYPE_V2DF: + case INT_FTYPE_V16QI: + case INT_FTYPE_V8QI: + case INT_FTYPE_V8SF: + case INT_FTYPE_V4DF: + case INT_FTYPE_V4SF: + case INT_FTYPE_V2DF: + case INT_FTYPE_V32QI: + case V16QI_FTYPE_V16QI: + case V8SI_FTYPE_V8SF: + case V8SI_FTYPE_V4SI: + case V8HI_FTYPE_V8HI: + case V8HI_FTYPE_V16QI: + case V8QI_FTYPE_V8QI: + case V8SF_FTYPE_V8SF: + case V8SF_FTYPE_V8SI: + case V8SF_FTYPE_V4SF: + case V8SF_FTYPE_V8HI: + case V4SI_FTYPE_V4SI: + case V4SI_FTYPE_V16QI: + case V4SI_FTYPE_V4SF: + case V4SI_FTYPE_V8SI: + case V4SI_FTYPE_V8HI: + case V4SI_FTYPE_V4DF: + case V4SI_FTYPE_V2DF: + case V4HI_FTYPE_V4HI: + case V4DF_FTYPE_V4DF: + case V4DF_FTYPE_V4SI: + case V4DF_FTYPE_V4SF: + case V4DF_FTYPE_V2DF: + case V4SF_FTYPE_V4SF: + case V4SF_FTYPE_V4SI: + case V4SF_FTYPE_V8SF: + case V4SF_FTYPE_V4DF: + case V4SF_FTYPE_V8HI: + case V4SF_FTYPE_V2DF: + case V2DI_FTYPE_V2DI: + case V2DI_FTYPE_V16QI: + case V2DI_FTYPE_V8HI: + case V2DI_FTYPE_V4SI: + case V2DF_FTYPE_V2DF: + case V2DF_FTYPE_V4SI: + case V2DF_FTYPE_V4DF: + case V2DF_FTYPE_V4SF: + case V2DF_FTYPE_V2SI: + case V2SI_FTYPE_V2SI: + case V2SI_FTYPE_V4SF: + case V2SI_FTYPE_V2SF: + case V2SI_FTYPE_V2DF: + case V2SF_FTYPE_V2SF: + case V2SF_FTYPE_V2SI: + case V32QI_FTYPE_V32QI: + case V32QI_FTYPE_V16QI: + case V16HI_FTYPE_V16HI: + case V16HI_FTYPE_V8HI: + case V8SI_FTYPE_V8SI: + case V16HI_FTYPE_V16QI: + case V8SI_FTYPE_V16QI: + case V4DI_FTYPE_V16QI: + case V8SI_FTYPE_V8HI: + case V4DI_FTYPE_V8HI: + case V4DI_FTYPE_V4SI: + case V4DI_FTYPE_V2DI: + case UHI_FTYPE_UHI: + case UHI_FTYPE_V16QI: + case USI_FTYPE_V32QI: + case UDI_FTYPE_V64QI: + case V16QI_FTYPE_UHI: + case V32QI_FTYPE_USI: + case V64QI_FTYPE_UDI: + case V8HI_FTYPE_UQI: + case V16HI_FTYPE_UHI: + case V32HI_FTYPE_USI: + case V4SI_FTYPE_UQI: + case V8SI_FTYPE_UQI: + case V4SI_FTYPE_UHI: + case V8SI_FTYPE_UHI: + case UQI_FTYPE_V8HI: + case UHI_FTYPE_V16HI: + case USI_FTYPE_V32HI: + case UQI_FTYPE_V4SI: + case UQI_FTYPE_V8SI: + case UHI_FTYPE_V16SI: + case UQI_FTYPE_V2DI: + case UQI_FTYPE_V4DI: + case UQI_FTYPE_V8DI: + case V16SI_FTYPE_UHI: + case V2DI_FTYPE_UQI: + case V4DI_FTYPE_UQI: + case V16SI_FTYPE_INT: + case V16SF_FTYPE_V8SF: + case V16SI_FTYPE_V8SI: + case V16SF_FTYPE_V4SF: + case V16SI_FTYPE_V4SI: + case V16SI_FTYPE_V16SF: + case V16SF_FTYPE_V16SF: + case V8DI_FTYPE_UQI: + case V8DF_FTYPE_V4DF: + case V8DF_FTYPE_V2DF: + case V8DF_FTYPE_V8DF: + nargs = 1; + break; + case V4SF_FTYPE_V4SF_VEC_MERGE: + case V2DF_FTYPE_V2DF_VEC_MERGE: + return ix86_expand_unop_vec_merge_builtin (icode, exp, target); + case FLOAT128_FTYPE_FLOAT128_FLOAT128: + case V16QI_FTYPE_V16QI_V16QI: + case V16QI_FTYPE_V8HI_V8HI: + case V16SF_FTYPE_V16SF_V16SF: + case V8QI_FTYPE_V8QI_V8QI: + case V8QI_FTYPE_V4HI_V4HI: + case V8HI_FTYPE_V8HI_V8HI: + case V8HI_FTYPE_V16QI_V16QI: + case V8HI_FTYPE_V4SI_V4SI: + case V8SF_FTYPE_V8SF_V8SF: + case V8SF_FTYPE_V8SF_V8SI: + case V8DF_FTYPE_V8DF_V8DF: + case V4SI_FTYPE_V4SI_V4SI: + case V4SI_FTYPE_V8HI_V8HI: + case V4SI_FTYPE_V2DF_V2DF: + case V4HI_FTYPE_V4HI_V4HI: + case V4HI_FTYPE_V8QI_V8QI: + case V4HI_FTYPE_V2SI_V2SI: + case V4DF_FTYPE_V4DF_V4DF: + case V4DF_FTYPE_V4DF_V4DI: + case V4SF_FTYPE_V4SF_V4SF: + case V4SF_FTYPE_V4SF_V4SI: + case V4SF_FTYPE_V4SF_V2SI: + case V4SF_FTYPE_V4SF_V2DF: + case V4SF_FTYPE_V4SF_UINT: + case V4SF_FTYPE_V4SF_DI: + case V4SF_FTYPE_V4SF_SI: + case V2DI_FTYPE_V2DI_V2DI: + case V2DI_FTYPE_V16QI_V16QI: + case V2DI_FTYPE_V4SI_V4SI: + case V2DI_FTYPE_V2DI_V16QI: + case V2SI_FTYPE_V2SI_V2SI: + case V2SI_FTYPE_V4HI_V4HI: + case V2SI_FTYPE_V2SF_V2SF: + case V2DF_FTYPE_V2DF_V2DF: + case V2DF_FTYPE_V2DF_V4SF: + case V2DF_FTYPE_V2DF_V2DI: + case V2DF_FTYPE_V2DF_DI: + case V2DF_FTYPE_V2DF_SI: + case V2DF_FTYPE_V2DF_UINT: + case V2SF_FTYPE_V2SF_V2SF: + case V1DI_FTYPE_V1DI_V1DI: + case V1DI_FTYPE_V8QI_V8QI: + case V1DI_FTYPE_V2SI_V2SI: + case V32QI_FTYPE_V16HI_V16HI: + case V16HI_FTYPE_V8SI_V8SI: + case V32QI_FTYPE_V32QI_V32QI: + case V16HI_FTYPE_V32QI_V32QI: + case V16HI_FTYPE_V16HI_V16HI: + case V8SI_FTYPE_V4DF_V4DF: + case V8SI_FTYPE_V8SI_V8SI: + case V8SI_FTYPE_V16HI_V16HI: + case V4DI_FTYPE_V4DI_V4DI: + case V4DI_FTYPE_V8SI_V8SI: + case V8DI_FTYPE_V64QI_V64QI: + if (comparison == UNKNOWN) + return ix86_expand_binop_builtin (icode, exp, target); + nargs = 2; + break; + case V4SF_FTYPE_V4SF_V4SF_SWAP: + case V2DF_FTYPE_V2DF_V2DF_SWAP: + gcc_assert (comparison != UNKNOWN); + nargs = 2; + swap = true; + break; + case V16HI_FTYPE_V16HI_V8HI_COUNT: + case V16HI_FTYPE_V16HI_SI_COUNT: + case V8SI_FTYPE_V8SI_V4SI_COUNT: + case V8SI_FTYPE_V8SI_SI_COUNT: + case V4DI_FTYPE_V4DI_V2DI_COUNT: + case V4DI_FTYPE_V4DI_INT_COUNT: + case V8HI_FTYPE_V8HI_V8HI_COUNT: + case V8HI_FTYPE_V8HI_SI_COUNT: + case V4SI_FTYPE_V4SI_V4SI_COUNT: + case V4SI_FTYPE_V4SI_SI_COUNT: + case V4HI_FTYPE_V4HI_V4HI_COUNT: + case V4HI_FTYPE_V4HI_SI_COUNT: + case V2DI_FTYPE_V2DI_V2DI_COUNT: + case V2DI_FTYPE_V2DI_SI_COUNT: + case V2SI_FTYPE_V2SI_V2SI_COUNT: + case V2SI_FTYPE_V2SI_SI_COUNT: + case V1DI_FTYPE_V1DI_V1DI_COUNT: + case V1DI_FTYPE_V1DI_SI_COUNT: + nargs = 2; + last_arg_count = true; + break; + case UINT64_FTYPE_UINT64_UINT64: + case UINT_FTYPE_UINT_UINT: + case UINT_FTYPE_UINT_USHORT: + case UINT_FTYPE_UINT_UCHAR: + case UINT16_FTYPE_UINT16_INT: + case UINT8_FTYPE_UINT8_INT: + case UHI_FTYPE_UHI_UHI: + case USI_FTYPE_USI_USI: + case UDI_FTYPE_UDI_UDI: + case V16SI_FTYPE_V8DF_V8DF: + nargs = 2; + break; + case V2DI_FTYPE_V2DI_INT_CONVERT: + nargs = 2; + rmode = V1TImode; + nargs_constant = 1; + break; + case V4DI_FTYPE_V4DI_INT_CONVERT: + nargs = 2; + rmode = V2TImode; + nargs_constant = 1; + break; + case V8DI_FTYPE_V8DI_INT_CONVERT: + nargs = 2; + rmode = V4TImode; + nargs_constant = 1; + break; + case V8HI_FTYPE_V8HI_INT: + case V8HI_FTYPE_V8SF_INT: + case V16HI_FTYPE_V16SF_INT: + case V8HI_FTYPE_V4SF_INT: + case V8SF_FTYPE_V8SF_INT: + case V4SF_FTYPE_V16SF_INT: + case V16SF_FTYPE_V16SF_INT: + case V4SI_FTYPE_V4SI_INT: + case V4SI_FTYPE_V8SI_INT: + case V4HI_FTYPE_V4HI_INT: + case V4DF_FTYPE_V4DF_INT: + case V4DF_FTYPE_V8DF_INT: + case V4SF_FTYPE_V4SF_INT: + case V4SF_FTYPE_V8SF_INT: + case V2DI_FTYPE_V2DI_INT: + case V2DF_FTYPE_V2DF_INT: + case V2DF_FTYPE_V4DF_INT: + case V16HI_FTYPE_V16HI_INT: + case V8SI_FTYPE_V8SI_INT: + case V16SI_FTYPE_V16SI_INT: + case V4SI_FTYPE_V16SI_INT: + case V4DI_FTYPE_V4DI_INT: + case V2DI_FTYPE_V4DI_INT: + case V4DI_FTYPE_V8DI_INT: + case QI_FTYPE_V4SF_INT: + case QI_FTYPE_V2DF_INT: + nargs = 2; + nargs_constant = 1; + break; + case V16QI_FTYPE_V16QI_V16QI_V16QI: + case V8SF_FTYPE_V8SF_V8SF_V8SF: + case V4DF_FTYPE_V4DF_V4DF_V4DF: + case V4SF_FTYPE_V4SF_V4SF_V4SF: + case V2DF_FTYPE_V2DF_V2DF_V2DF: + case V32QI_FTYPE_V32QI_V32QI_V32QI: + case UHI_FTYPE_V16SI_V16SI_UHI: + case UQI_FTYPE_V8DI_V8DI_UQI: + case V16HI_FTYPE_V16SI_V16HI_UHI: + case V16QI_FTYPE_V16SI_V16QI_UHI: + case V16QI_FTYPE_V8DI_V16QI_UQI: + case V16SF_FTYPE_V16SF_V16SF_UHI: + case V16SF_FTYPE_V4SF_V16SF_UHI: + case V16SI_FTYPE_SI_V16SI_UHI: + case V16SI_FTYPE_V16HI_V16SI_UHI: + case V16SI_FTYPE_V16QI_V16SI_UHI: + case V8SF_FTYPE_V4SF_V8SF_UQI: + case V4DF_FTYPE_V2DF_V4DF_UQI: + case V8SI_FTYPE_V4SI_V8SI_UQI: + case V8SI_FTYPE_SI_V8SI_UQI: + case V4SI_FTYPE_V4SI_V4SI_UQI: + case V4SI_FTYPE_SI_V4SI_UQI: + case V4DI_FTYPE_V2DI_V4DI_UQI: + case V4DI_FTYPE_DI_V4DI_UQI: + case V2DI_FTYPE_V2DI_V2DI_UQI: + case V2DI_FTYPE_DI_V2DI_UQI: + case V64QI_FTYPE_V64QI_V64QI_UDI: + case V64QI_FTYPE_V16QI_V64QI_UDI: + case V64QI_FTYPE_QI_V64QI_UDI: + case V32QI_FTYPE_V32QI_V32QI_USI: + case V32QI_FTYPE_V16QI_V32QI_USI: + case V32QI_FTYPE_QI_V32QI_USI: + case V16QI_FTYPE_V16QI_V16QI_UHI: + case V16QI_FTYPE_QI_V16QI_UHI: + case V32HI_FTYPE_V8HI_V32HI_USI: + case V32HI_FTYPE_HI_V32HI_USI: + case V16HI_FTYPE_V8HI_V16HI_UHI: + case V16HI_FTYPE_HI_V16HI_UHI: + case V8HI_FTYPE_V8HI_V8HI_UQI: + case V8HI_FTYPE_HI_V8HI_UQI: + case V8SF_FTYPE_V8HI_V8SF_UQI: + case V4SF_FTYPE_V8HI_V4SF_UQI: + case V8SI_FTYPE_V8SF_V8SI_UQI: + case V4SI_FTYPE_V4SF_V4SI_UQI: + case V4DI_FTYPE_V4SF_V4DI_UQI: + case V2DI_FTYPE_V4SF_V2DI_UQI: + case V4SF_FTYPE_V4DI_V4SF_UQI: + case V4SF_FTYPE_V2DI_V4SF_UQI: + case V4DF_FTYPE_V4DI_V4DF_UQI: + case V2DF_FTYPE_V2DI_V2DF_UQI: + case V16QI_FTYPE_V8HI_V16QI_UQI: + case V16QI_FTYPE_V16HI_V16QI_UHI: + case V16QI_FTYPE_V4SI_V16QI_UQI: + case V16QI_FTYPE_V8SI_V16QI_UQI: + case V8HI_FTYPE_V4SI_V8HI_UQI: + case V8HI_FTYPE_V8SI_V8HI_UQI: + case V16QI_FTYPE_V2DI_V16QI_UQI: + case V16QI_FTYPE_V4DI_V16QI_UQI: + case V8HI_FTYPE_V2DI_V8HI_UQI: + case V8HI_FTYPE_V4DI_V8HI_UQI: + case V4SI_FTYPE_V2DI_V4SI_UQI: + case V4SI_FTYPE_V4DI_V4SI_UQI: + case V32QI_FTYPE_V32HI_V32QI_USI: + case UHI_FTYPE_V16QI_V16QI_UHI: + case USI_FTYPE_V32QI_V32QI_USI: + case UDI_FTYPE_V64QI_V64QI_UDI: + case UQI_FTYPE_V8HI_V8HI_UQI: + case UHI_FTYPE_V16HI_V16HI_UHI: + case USI_FTYPE_V32HI_V32HI_USI: + case UQI_FTYPE_V4SI_V4SI_UQI: + case UQI_FTYPE_V8SI_V8SI_UQI: + case UQI_FTYPE_V2DI_V2DI_UQI: + case UQI_FTYPE_V4DI_V4DI_UQI: + case V4SF_FTYPE_V2DF_V4SF_UQI: + case V4SF_FTYPE_V4DF_V4SF_UQI: + case V16SI_FTYPE_V16SI_V16SI_UHI: + case V16SI_FTYPE_V4SI_V16SI_UHI: + case V2DI_FTYPE_V4SI_V2DI_UQI: + case V2DI_FTYPE_V8HI_V2DI_UQI: + case V2DI_FTYPE_V16QI_V2DI_UQI: + case V4DI_FTYPE_V4DI_V4DI_UQI: + case V4DI_FTYPE_V4SI_V4DI_UQI: + case V4DI_FTYPE_V8HI_V4DI_UQI: + case V4DI_FTYPE_V16QI_V4DI_UQI: + case V4DI_FTYPE_V4DF_V4DI_UQI: + case V2DI_FTYPE_V2DF_V2DI_UQI: + case V4SI_FTYPE_V4DF_V4SI_UQI: + case V4SI_FTYPE_V2DF_V4SI_UQI: + case V4SI_FTYPE_V8HI_V4SI_UQI: + case V4SI_FTYPE_V16QI_V4SI_UQI: + case V4DI_FTYPE_V4DI_V4DI_V4DI: + case V8DF_FTYPE_V2DF_V8DF_UQI: + case V8DF_FTYPE_V4DF_V8DF_UQI: + case V8DF_FTYPE_V8DF_V8DF_UQI: + case V8SF_FTYPE_V8SF_V8SF_UQI: + case V8SF_FTYPE_V8SI_V8SF_UQI: + case V4DF_FTYPE_V4DF_V4DF_UQI: + case V4SF_FTYPE_V4SF_V4SF_UQI: + case V2DF_FTYPE_V2DF_V2DF_UQI: + case V2DF_FTYPE_V4SF_V2DF_UQI: + case V2DF_FTYPE_V4SI_V2DF_UQI: + case V4SF_FTYPE_V4SI_V4SF_UQI: + case V4DF_FTYPE_V4SF_V4DF_UQI: + case V4DF_FTYPE_V4SI_V4DF_UQI: + case V8SI_FTYPE_V8SI_V8SI_UQI: + case V8SI_FTYPE_V8HI_V8SI_UQI: + case V8SI_FTYPE_V16QI_V8SI_UQI: + case V8DF_FTYPE_V8SI_V8DF_UQI: + case V8DI_FTYPE_DI_V8DI_UQI: + case V16SF_FTYPE_V8SF_V16SF_UHI: + case V16SI_FTYPE_V8SI_V16SI_UHI: + case V16HI_FTYPE_V16HI_V16HI_UHI: + case V8HI_FTYPE_V16QI_V8HI_UQI: + case V16HI_FTYPE_V16QI_V16HI_UHI: + case V32HI_FTYPE_V32HI_V32HI_USI: + case V32HI_FTYPE_V32QI_V32HI_USI: + case V8DI_FTYPE_V16QI_V8DI_UQI: + case V8DI_FTYPE_V2DI_V8DI_UQI: + case V8DI_FTYPE_V4DI_V8DI_UQI: + case V8DI_FTYPE_V8DI_V8DI_UQI: + case V8DI_FTYPE_V8HI_V8DI_UQI: + case V8DI_FTYPE_V8SI_V8DI_UQI: + case V8HI_FTYPE_V8DI_V8HI_UQI: + case V8SI_FTYPE_V8DI_V8SI_UQI: + case V4SI_FTYPE_V4SI_V4SI_V4SI: + nargs = 3; + break; + case V32QI_FTYPE_V32QI_V32QI_INT: + case V16HI_FTYPE_V16HI_V16HI_INT: + case V16QI_FTYPE_V16QI_V16QI_INT: + case V4DI_FTYPE_V4DI_V4DI_INT: + case V8HI_FTYPE_V8HI_V8HI_INT: + case V8SI_FTYPE_V8SI_V8SI_INT: + case V8SI_FTYPE_V8SI_V4SI_INT: + case V8SF_FTYPE_V8SF_V8SF_INT: + case V8SF_FTYPE_V8SF_V4SF_INT: + case V4SI_FTYPE_V4SI_V4SI_INT: + case V4DF_FTYPE_V4DF_V4DF_INT: + case V16SF_FTYPE_V16SF_V16SF_INT: + case V16SF_FTYPE_V16SF_V4SF_INT: + case V16SI_FTYPE_V16SI_V4SI_INT: + case V4DF_FTYPE_V4DF_V2DF_INT: + case V4SF_FTYPE_V4SF_V4SF_INT: + case V2DI_FTYPE_V2DI_V2DI_INT: + case V4DI_FTYPE_V4DI_V2DI_INT: + case V2DF_FTYPE_V2DF_V2DF_INT: + case UQI_FTYPE_V8DI_V8UDI_INT: + case UQI_FTYPE_V8DF_V8DF_INT: + case UQI_FTYPE_V2DF_V2DF_INT: + case UQI_FTYPE_V4SF_V4SF_INT: + case UHI_FTYPE_V16SI_V16SI_INT: + case UHI_FTYPE_V16SF_V16SF_INT: + nargs = 3; + nargs_constant = 1; + break; + case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT: + nargs = 3; + rmode = V4DImode; + nargs_constant = 1; + break; + case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT: + nargs = 3; + rmode = V2DImode; + nargs_constant = 1; + break; + case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT: + nargs = 3; + rmode = DImode; + nargs_constant = 1; + break; + case V2DI_FTYPE_V2DI_UINT_UINT: + nargs = 3; + nargs_constant = 2; + break; + case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT: + nargs = 3; + rmode = V8DImode; + nargs_constant = 1; + break; + case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT: + nargs = 5; + rmode = V8DImode; + mask_pos = 2; + nargs_constant = 1; + break; + case QI_FTYPE_V8DF_INT_UQI: + case QI_FTYPE_V4DF_INT_UQI: + case QI_FTYPE_V2DF_INT_UQI: + case HI_FTYPE_V16SF_INT_UHI: + case QI_FTYPE_V8SF_INT_UQI: + case QI_FTYPE_V4SF_INT_UQI: + nargs = 3; + mask_pos = 1; + nargs_constant = 1; + break; + case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT: + nargs = 5; + rmode = V4DImode; + mask_pos = 2; + nargs_constant = 1; + break; + case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT: + nargs = 5; + rmode = V2DImode; + mask_pos = 2; + nargs_constant = 1; + break; + case V32QI_FTYPE_V32QI_V32QI_V32QI_USI: + case V32HI_FTYPE_V32HI_V32HI_V32HI_USI: + case V32HI_FTYPE_V64QI_V64QI_V32HI_USI: + case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI: + case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI: + case V32HI_FTYPE_V32HI_V8HI_V32HI_USI: + case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI: + case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI: + case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI: + case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI: + case V32QI_FTYPE_V16HI_V16HI_V32QI_USI: + case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI: + case V32HI_FTYPE_V16SI_V16SI_V32HI_USI: + case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI: + case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI: + case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI: + case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI: + case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI: + case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI: + case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI: + case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI: + case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI: + case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI: + case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI: + case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI: + case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI: + case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI: + case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI: + case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI: + case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI: + case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI: + case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI: + case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI: + case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI: + case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI: + case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI: + case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI: + case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI: + case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI: + case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI: + case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI: + case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI: + case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI: + case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI: + case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI: + case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI: + case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI: + case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI: + case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI: + case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI: + case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI: + nargs = 4; + break; + case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: + case V4DF_FTYPE_V4DF_V4DF_V4DI_INT: + case V4SF_FTYPE_V4SF_V4SF_V4SI_INT: + case V8SF_FTYPE_V8SF_V8SF_V8SI_INT: + case V16SF_FTYPE_V16SF_V16SF_V16SI_INT: + nargs = 4; + nargs_constant = 1; + break; + case UQI_FTYPE_V4DI_V4DI_INT_UQI: + case UQI_FTYPE_V8SI_V8SI_INT_UQI: + case QI_FTYPE_V4DF_V4DF_INT_UQI: + case QI_FTYPE_V8SF_V8SF_INT_UQI: + case UQI_FTYPE_V2DI_V2DI_INT_UQI: + case UQI_FTYPE_V4SI_V4SI_INT_UQI: + case UQI_FTYPE_V2DF_V2DF_INT_UQI: + case UQI_FTYPE_V4SF_V4SF_INT_UQI: + case UDI_FTYPE_V64QI_V64QI_INT_UDI: + case USI_FTYPE_V32QI_V32QI_INT_USI: + case UHI_FTYPE_V16QI_V16QI_INT_UHI: + case USI_FTYPE_V32HI_V32HI_INT_USI: + case UHI_FTYPE_V16HI_V16HI_INT_UHI: + case UQI_FTYPE_V8HI_V8HI_INT_UQI: + nargs = 4; + mask_pos = 1; + nargs_constant = 1; + break; + case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: + nargs = 4; + nargs_constant = 2; + break; + case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED: + case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG: + nargs = 4; + break; + case UQI_FTYPE_V8DI_V8DI_INT_UQI: + case UHI_FTYPE_V16SI_V16SI_INT_UHI: + mask_pos = 1; + nargs = 4; + nargs_constant = 1; + break; + case V8SF_FTYPE_V8SF_INT_V8SF_UQI: + case V4SF_FTYPE_V4SF_INT_V4SF_UQI: + case V2DF_FTYPE_V4DF_INT_V2DF_UQI: + case V2DI_FTYPE_V4DI_INT_V2DI_UQI: + case V8SF_FTYPE_V16SF_INT_V8SF_UQI: + case V8SI_FTYPE_V16SI_INT_V8SI_UQI: + case V2DF_FTYPE_V8DF_INT_V2DF_UQI: + case V2DI_FTYPE_V8DI_INT_V2DI_UQI: + case V4SF_FTYPE_V8SF_INT_V4SF_UQI: + case V4SI_FTYPE_V8SI_INT_V4SI_UQI: + case V8HI_FTYPE_V8SF_INT_V8HI_UQI: + case V8HI_FTYPE_V4SF_INT_V8HI_UQI: + case V32HI_FTYPE_V32HI_INT_V32HI_USI: + case V16HI_FTYPE_V16HI_INT_V16HI_UHI: + case V8HI_FTYPE_V8HI_INT_V8HI_UQI: + case V4DI_FTYPE_V4DI_INT_V4DI_UQI: + case V2DI_FTYPE_V2DI_INT_V2DI_UQI: + case V8SI_FTYPE_V8SI_INT_V8SI_UQI: + case V4SI_FTYPE_V4SI_INT_V4SI_UQI: + case V4DF_FTYPE_V4DF_INT_V4DF_UQI: + case V2DF_FTYPE_V2DF_INT_V2DF_UQI: + case V8DF_FTYPE_V8DF_INT_V8DF_UQI: + case V16SF_FTYPE_V16SF_INT_V16SF_UHI: + case V16HI_FTYPE_V16SF_INT_V16HI_UHI: + case V16SI_FTYPE_V16SI_INT_V16SI_UHI: + case V4SI_FTYPE_V16SI_INT_V4SI_UQI: + case V4DI_FTYPE_V8DI_INT_V4DI_UQI: + case V4DF_FTYPE_V8DF_INT_V4DF_UQI: + case V4SF_FTYPE_V16SF_INT_V4SF_UQI: + case V8DI_FTYPE_V8DI_INT_V8DI_UQI: + nargs = 4; + mask_pos = 2; + nargs_constant = 1; + break; + case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI: + case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI: + case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI: + case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI: + case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI: + case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI: + case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI: + case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI: + case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI: + case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI: + case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI: + case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI: + case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI: + case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI: + case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI: + case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI: + case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI: + case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI: + case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI: + case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI: + case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI: + case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI: + case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI: + case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI: + case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI: + case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI: + case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI: + nargs = 5; + mask_pos = 2; + nargs_constant = 1; + break; + case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI: + case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI: + case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI: + case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI: + case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI: + case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI: + case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI: + case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI: + case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI: + case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI: + nargs = 5; + nargs = 5; + mask_pos = 1; + nargs_constant = 1; + break; + + default: + gcc_unreachable (); + } + + gcc_assert (nargs <= ARRAY_SIZE (args)); + + if (comparison != UNKNOWN) + { + gcc_assert (nargs == 2); + return ix86_expand_sse_compare (d, exp, target, swap); + } + + if (rmode == VOIDmode || rmode == tmode) + { + if (optimize + || target == 0 + || GET_MODE (target) != tmode + || !insn_p->operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + real_target = target; + } + else + { + real_target = gen_reg_rtx (tmode); + target = simplify_gen_subreg (rmode, real_target, tmode, 0); + } + + for (i = 0; i < nargs; i++) + { + tree arg = CALL_EXPR_ARG (exp, i); + rtx op = expand_normal (arg); + machine_mode mode = insn_p->operand[i + 1].mode; + bool match = insn_p->operand[i + 1].predicate (op, mode); + + if (last_arg_count && (i + 1) == nargs) + { + /* SIMD shift insns take either an 8-bit immediate or + register as count. But builtin functions take int as + count. If count doesn't match, we put it in register. */ + if (!match) + { + op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0); + if (!insn_p->operand[i + 1].predicate (op, mode)) + op = copy_to_reg (op); + } + } + else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || + (!mask_pos && (nargs - i) <= nargs_constant)) + { + if (!match) + switch (icode) + { + case CODE_FOR_avx_vinsertf128v4di: + case CODE_FOR_avx_vextractf128v4di: + error ("the last argument must be an 1-bit immediate"); + return const0_rtx; + + case CODE_FOR_avx512f_cmpv8di3_mask: + case CODE_FOR_avx512f_cmpv16si3_mask: + case CODE_FOR_avx512f_ucmpv8di3_mask: + case CODE_FOR_avx512f_ucmpv16si3_mask: + case CODE_FOR_avx512vl_cmpv4di3_mask: + case CODE_FOR_avx512vl_cmpv8si3_mask: + case CODE_FOR_avx512vl_ucmpv4di3_mask: + case CODE_FOR_avx512vl_ucmpv8si3_mask: + case CODE_FOR_avx512vl_cmpv2di3_mask: + case CODE_FOR_avx512vl_cmpv4si3_mask: + case CODE_FOR_avx512vl_ucmpv2di3_mask: + case CODE_FOR_avx512vl_ucmpv4si3_mask: + error ("the last argument must be a 3-bit immediate"); + return const0_rtx; + + case CODE_FOR_sse4_1_roundsd: + case CODE_FOR_sse4_1_roundss: + + case CODE_FOR_sse4_1_roundpd: + case CODE_FOR_sse4_1_roundps: + case CODE_FOR_avx_roundpd256: + case CODE_FOR_avx_roundps256: + + case CODE_FOR_sse4_1_roundpd_vec_pack_sfix: + case CODE_FOR_sse4_1_roundps_sfix: + case CODE_FOR_avx_roundpd_vec_pack_sfix256: + case CODE_FOR_avx_roundps_sfix256: + + case CODE_FOR_sse4_1_blendps: + case CODE_FOR_avx_blendpd256: + case CODE_FOR_avx_vpermilv4df: + case CODE_FOR_avx_vpermilv4df_mask: + case CODE_FOR_avx512f_getmantv8df_mask: + case CODE_FOR_avx512f_getmantv16sf_mask: + case CODE_FOR_avx512vl_getmantv8sf_mask: + case CODE_FOR_avx512vl_getmantv4df_mask: + case CODE_FOR_avx512vl_getmantv4sf_mask: + case CODE_FOR_avx512vl_getmantv2df_mask: + case CODE_FOR_avx512dq_rangepv8df_mask_round: + case CODE_FOR_avx512dq_rangepv16sf_mask_round: + case CODE_FOR_avx512dq_rangepv4df_mask: + case CODE_FOR_avx512dq_rangepv8sf_mask: + case CODE_FOR_avx512dq_rangepv2df_mask: + case CODE_FOR_avx512dq_rangepv4sf_mask: + case CODE_FOR_avx_shufpd256_mask: + error ("the last argument must be a 4-bit immediate"); + return const0_rtx; + + case CODE_FOR_sha1rnds4: + case CODE_FOR_sse4_1_blendpd: + case CODE_FOR_avx_vpermilv2df: + case CODE_FOR_avx_vpermilv2df_mask: + case CODE_FOR_xop_vpermil2v2df3: + case CODE_FOR_xop_vpermil2v4sf3: + case CODE_FOR_xop_vpermil2v4df3: + case CODE_FOR_xop_vpermil2v8sf3: + case CODE_FOR_avx512f_vinsertf32x4_mask: + case CODE_FOR_avx512f_vinserti32x4_mask: + case CODE_FOR_avx512f_vextractf32x4_mask: + case CODE_FOR_avx512f_vextracti32x4_mask: + case CODE_FOR_sse2_shufpd: + case CODE_FOR_sse2_shufpd_mask: + case CODE_FOR_avx512dq_shuf_f64x2_mask: + case CODE_FOR_avx512dq_shuf_i64x2_mask: + case CODE_FOR_avx512vl_shuf_i32x4_mask: + case CODE_FOR_avx512vl_shuf_f32x4_mask: + error ("the last argument must be a 2-bit immediate"); + return const0_rtx; + + case CODE_FOR_avx_vextractf128v4df: + case CODE_FOR_avx_vextractf128v8sf: + case CODE_FOR_avx_vextractf128v8si: + case CODE_FOR_avx_vinsertf128v4df: + case CODE_FOR_avx_vinsertf128v8sf: + case CODE_FOR_avx_vinsertf128v8si: + case CODE_FOR_avx512f_vinsertf64x4_mask: + case CODE_FOR_avx512f_vinserti64x4_mask: + case CODE_FOR_avx512f_vextractf64x4_mask: + case CODE_FOR_avx512f_vextracti64x4_mask: + case CODE_FOR_avx512dq_vinsertf32x8_mask: + case CODE_FOR_avx512dq_vinserti32x8_mask: + case CODE_FOR_avx512vl_vinsertv4df: + case CODE_FOR_avx512vl_vinsertv4di: + case CODE_FOR_avx512vl_vinsertv8sf: + case CODE_FOR_avx512vl_vinsertv8si: + error ("the last argument must be a 1-bit immediate"); + return const0_rtx; + + case CODE_FOR_avx_vmcmpv2df3: + case CODE_FOR_avx_vmcmpv4sf3: + case CODE_FOR_avx_cmpv2df3: + case CODE_FOR_avx_cmpv4sf3: + case CODE_FOR_avx_cmpv4df3: + case CODE_FOR_avx_cmpv8sf3: + case CODE_FOR_avx512f_cmpv8df3_mask: + case CODE_FOR_avx512f_cmpv16sf3_mask: + case CODE_FOR_avx512f_vmcmpv2df3_mask: + case CODE_FOR_avx512f_vmcmpv4sf3_mask: + error ("the last argument must be a 5-bit immediate"); + return const0_rtx; + + default: + switch (nargs_constant) + { + case 2: + if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || + (!mask_pos && (nargs - i) == nargs_constant)) + { + error ("the next to last argument must be an 8-bit immediate"); + break; + } + case 1: + error ("the last argument must be an 8-bit immediate"); + break; + default: + gcc_unreachable (); + } + return const0_rtx; + } + } + else + { + if (VECTOR_MODE_P (mode)) + op = safe_vector_operand (op, mode); + + /* If we aren't optimizing, only allow one memory operand to + be generated. */ + if (memory_operand (op, mode)) + num_memory++; + + op = fixup_modeless_constant (op, mode); + + if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) + { + if (optimize || !match || num_memory > 1) + op = copy_to_mode_reg (mode, op); + } + else + { + op = copy_to_reg (op); + op = simplify_gen_subreg (mode, op, GET_MODE (op), 0); + } + } + + args[i].op = op; + args[i].mode = mode; + } + + switch (nargs) + { + case 1: + pat = GEN_FCN (icode) (real_target, args[0].op); + break; + case 2: + pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op); + break; + case 3: + pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, + args[2].op); + break; + case 4: + pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, + args[2].op, args[3].op); + break; + case 5: + pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, + args[2].op, args[3].op, args[4].op); + break; + case 6: + pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, + args[2].op, args[3].op, args[4].op, + args[5].op); + break; + default: + gcc_unreachable (); + } + + if (! pat) + return 0; + + emit_insn (pat); + return target; +} + +/* Transform pattern of following layout: + (parallel [ + set (A B) + (unspec [C] UNSPEC_EMBEDDED_ROUNDING)]) + ]) + into: + (set (A B)) + + Or: + (parallel [ A B + ... + (unspec [C] UNSPEC_EMBEDDED_ROUNDING) + ... + ]) + into: + (parallel [ A B ... ]) */ + +static rtx +ix86_erase_embedded_rounding (rtx pat) +{ + if (GET_CODE (pat) == INSN) + pat = PATTERN (pat); + + gcc_assert (GET_CODE (pat) == PARALLEL); + + if (XVECLEN (pat, 0) == 2) + { + rtx p0 = XVECEXP (pat, 0, 0); + rtx p1 = XVECEXP (pat, 0, 1); + + gcc_assert (GET_CODE (p0) == SET + && GET_CODE (p1) == UNSPEC + && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING); + + return p0; + } + else + { + rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0)); + int i = 0; + int j = 0; + + for (; i < XVECLEN (pat, 0); ++i) + { + rtx elem = XVECEXP (pat, 0, i); + if (GET_CODE (elem) != UNSPEC + || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING) + res [j++] = elem; + } + + /* No more than 1 occurence was removed. */ + gcc_assert (j >= XVECLEN (pat, 0) - 1); + + return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res)); + } +} + +/* Subroutine of ix86_expand_round_builtin to take care of comi insns + with rounding. */ +static rtx +ix86_expand_sse_comi_round (const struct builtin_description *d, + tree exp, rtx target) +{ + rtx pat, set_dst; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + tree arg3 = CALL_EXPR_ARG (exp, 3); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + rtx op3 = expand_normal (arg3); + enum insn_code icode = d->icode; + const struct insn_data_d *insn_p = &insn_data[icode]; + machine_mode mode0 = insn_p->operand[0].mode; + machine_mode mode1 = insn_p->operand[1].mode; + enum rtx_code comparison = UNEQ; + bool need_ucomi = false; + + /* See avxintrin.h for values. */ + enum rtx_code comi_comparisons[32] = + { + UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT, + UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE, + UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT + }; + bool need_ucomi_values[32] = + { + true, false, false, true, true, false, false, true, + true, false, false, true, true, false, false, true, + false, true, true, false, false, true, true, false, + false, true, true, false, false, true, true, false + }; + + if (!CONST_INT_P (op2)) + { + error ("the third argument must be comparison constant"); + return const0_rtx; + } + if (INTVAL (op2) < 0 || INTVAL (op2) >= 32) + { + error ("incorrect comparison mode"); + return const0_rtx; + } + + if (!insn_p->operand[2].predicate (op3, SImode)) + { + error ("incorrect rounding operand"); + return const0_rtx; + } + + comparison = comi_comparisons[INTVAL (op2)]; + need_ucomi = need_ucomi_values[INTVAL (op2)]; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + target = gen_reg_rtx (SImode); + emit_move_insn (target, const0_rtx); + target = gen_rtx_SUBREG (QImode, target, 0); + + if ((optimize && !register_operand (op0, mode0)) + || !insn_p->operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if ((optimize && !register_operand (op1, mode1)) + || !insn_p->operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + if (need_ucomi) + icode = icode == CODE_FOR_sse_comi_round + ? CODE_FOR_sse_ucomi_round + : CODE_FOR_sse2_ucomi_round; + + pat = GEN_FCN (icode) (op0, op1, op3); + if (! pat) + return 0; + + /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */ + if (INTVAL (op3) == NO_ROUND) + { + pat = ix86_erase_embedded_rounding (pat); + if (! pat) + return 0; + + set_dst = SET_DEST (pat); + } + else + { + gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET); + set_dst = SET_DEST (XVECEXP (pat, 0, 0)); + } + + emit_insn (pat); + emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), + gen_rtx_fmt_ee (comparison, QImode, + set_dst, + const0_rtx))); + + return SUBREG_REG (target); +} + +static rtx +ix86_expand_round_builtin (const struct builtin_description *d, + tree exp, rtx target) +{ + rtx pat; + unsigned int i, nargs; + struct + { + rtx op; + machine_mode mode; + } args[6]; + enum insn_code icode = d->icode; + const struct insn_data_d *insn_p = &insn_data[icode]; + machine_mode tmode = insn_p->operand[0].mode; + unsigned int nargs_constant = 0; + unsigned int redundant_embed_rnd = 0; + + switch ((enum ix86_builtin_func_type) d->flag) + { + case UINT64_FTYPE_V2DF_INT: + case UINT64_FTYPE_V4SF_INT: + case UINT_FTYPE_V2DF_INT: + case UINT_FTYPE_V4SF_INT: + case INT64_FTYPE_V2DF_INT: + case INT64_FTYPE_V4SF_INT: + case INT_FTYPE_V2DF_INT: + case INT_FTYPE_V4SF_INT: + nargs = 2; + break; + case V4SF_FTYPE_V4SF_UINT_INT: + case V4SF_FTYPE_V4SF_UINT64_INT: + case V2DF_FTYPE_V2DF_UINT64_INT: + case V4SF_FTYPE_V4SF_INT_INT: + case V4SF_FTYPE_V4SF_INT64_INT: + case V2DF_FTYPE_V2DF_INT64_INT: + case V4SF_FTYPE_V4SF_V4SF_INT: + case V2DF_FTYPE_V2DF_V2DF_INT: + case V4SF_FTYPE_V4SF_V2DF_INT: + case V2DF_FTYPE_V2DF_V4SF_INT: + nargs = 3; + break; + case V8SF_FTYPE_V8DF_V8SF_QI_INT: + case V8DF_FTYPE_V8DF_V8DF_QI_INT: + case V8SI_FTYPE_V8DF_V8SI_QI_INT: + case V8DI_FTYPE_V8DF_V8DI_QI_INT: + case V8SF_FTYPE_V8DI_V8SF_QI_INT: + case V8DF_FTYPE_V8DI_V8DF_QI_INT: + case V16SF_FTYPE_V16SF_V16SF_HI_INT: + case V8DI_FTYPE_V8SF_V8DI_QI_INT: + case V16SF_FTYPE_V16SI_V16SF_HI_INT: + case V16SI_FTYPE_V16SF_V16SI_HI_INT: + case V8DF_FTYPE_V8SF_V8DF_QI_INT: + case V16SF_FTYPE_V16HI_V16SF_HI_INT: + case V2DF_FTYPE_V2DF_V2DF_V2DF_INT: + case V4SF_FTYPE_V4SF_V4SF_V4SF_INT: + nargs = 4; + break; + case V4SF_FTYPE_V4SF_V4SF_INT_INT: + case V2DF_FTYPE_V2DF_V2DF_INT_INT: + nargs_constant = 2; + nargs = 4; + break; + case INT_FTYPE_V4SF_V4SF_INT_INT: + case INT_FTYPE_V2DF_V2DF_INT_INT: + return ix86_expand_sse_comi_round (d, exp, target); + case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT: + case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT: + case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT: + case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT: + case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT: + case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT: + nargs = 5; + break; + case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT: + case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT: + nargs_constant = 4; + nargs = 5; + break; + case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT: + case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT: + case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT: + case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT: + nargs_constant = 3; + nargs = 5; + break; + case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT: + case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT: + case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT: + case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT: + nargs = 6; + nargs_constant = 4; + break; + case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT: + case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT: + case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT: + case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT: + nargs = 6; + nargs_constant = 3; + break; + default: + gcc_unreachable (); + } + gcc_assert (nargs <= ARRAY_SIZE (args)); + + if (optimize + || target == 0 + || GET_MODE (target) != tmode + || !insn_p->operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + for (i = 0; i < nargs; i++) + { + tree arg = CALL_EXPR_ARG (exp, i); + rtx op = expand_normal (arg); + machine_mode mode = insn_p->operand[i + 1].mode; + bool match = insn_p->operand[i + 1].predicate (op, mode); + + if (i == nargs - nargs_constant) + { + if (!match) + { + switch (icode) + { + case CODE_FOR_avx512f_getmantv8df_mask_round: + case CODE_FOR_avx512f_getmantv16sf_mask_round: + case CODE_FOR_avx512f_vgetmantv2df_round: + case CODE_FOR_avx512f_vgetmantv4sf_round: + error ("the immediate argument must be a 4-bit immediate"); + return const0_rtx; + case CODE_FOR_avx512f_cmpv8df3_mask_round: + case CODE_FOR_avx512f_cmpv16sf3_mask_round: + case CODE_FOR_avx512f_vmcmpv2df3_mask_round: + case CODE_FOR_avx512f_vmcmpv4sf3_mask_round: + error ("the immediate argument must be a 5-bit immediate"); + return const0_rtx; + default: + error ("the immediate argument must be an 8-bit immediate"); + return const0_rtx; + } + } + } + else if (i == nargs-1) + { + if (!insn_p->operand[nargs].predicate (op, SImode)) + { + error ("incorrect rounding operand"); + return const0_rtx; + } + + /* If there is no rounding use normal version of the pattern. */ + if (INTVAL (op) == NO_ROUND) + redundant_embed_rnd = 1; + } + else + { + if (VECTOR_MODE_P (mode)) + op = safe_vector_operand (op, mode); + + op = fixup_modeless_constant (op, mode); + + if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) + { + if (optimize || !match) + op = copy_to_mode_reg (mode, op); + } + else + { + op = copy_to_reg (op); + op = simplify_gen_subreg (mode, op, GET_MODE (op), 0); + } + } + + args[i].op = op; + args[i].mode = mode; + } + + switch (nargs) + { + case 1: + pat = GEN_FCN (icode) (target, args[0].op); + break; + case 2: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op); + break; + case 3: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, + args[2].op); + break; + case 4: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, + args[2].op, args[3].op); + break; + case 5: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, + args[2].op, args[3].op, args[4].op); + break; + case 6: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, + args[2].op, args[3].op, args[4].op, + args[5].op); + break; + default: + gcc_unreachable (); + } + + if (!pat) + return 0; + + if (redundant_embed_rnd) + pat = ix86_erase_embedded_rounding (pat); + + emit_insn (pat); + return target; +} + +/* Subroutine of ix86_expand_builtin to take care of special insns + with variable number of operands. */ + +static rtx +ix86_expand_special_args_builtin (const struct builtin_description *d, + tree exp, rtx target) +{ + tree arg; + rtx pat, op; + unsigned int i, nargs, arg_adjust, memory; + bool aligned_mem = false; + struct + { + rtx op; + machine_mode mode; + } args[3]; + enum insn_code icode = d->icode; + bool last_arg_constant = false; + const struct insn_data_d *insn_p = &insn_data[icode]; + machine_mode tmode = insn_p->operand[0].mode; + enum { load, store } klass; + + switch ((enum ix86_builtin_func_type) d->flag) + { + case VOID_FTYPE_VOID: + emit_insn (GEN_FCN (icode) (target)); + return 0; + case VOID_FTYPE_UINT64: + case VOID_FTYPE_UNSIGNED: + nargs = 0; + klass = store; + memory = 0; + break; + + case INT_FTYPE_VOID: + case USHORT_FTYPE_VOID: + case UINT64_FTYPE_VOID: + case UNSIGNED_FTYPE_VOID: + nargs = 0; + klass = load; + memory = 0; + break; + case UINT64_FTYPE_PUNSIGNED: + case V2DI_FTYPE_PV2DI: + case V4DI_FTYPE_PV4DI: + case V32QI_FTYPE_PCCHAR: + case V16QI_FTYPE_PCCHAR: + case V8SF_FTYPE_PCV4SF: + case V8SF_FTYPE_PCFLOAT: + case V4SF_FTYPE_PCFLOAT: + case V4DF_FTYPE_PCV2DF: + case V4DF_FTYPE_PCDOUBLE: + case V2DF_FTYPE_PCDOUBLE: + case VOID_FTYPE_PVOID: + case V8DI_FTYPE_PV8DI: + nargs = 1; + klass = load; + memory = 0; + switch (icode) + { + case CODE_FOR_sse4_1_movntdqa: + case CODE_FOR_avx2_movntdqa: + case CODE_FOR_avx512f_movntdqa: + aligned_mem = true; + break; + default: + break; + } + break; + case VOID_FTYPE_PV2SF_V4SF: + case VOID_FTYPE_PV8DI_V8DI: + case VOID_FTYPE_PV4DI_V4DI: + case VOID_FTYPE_PV2DI_V2DI: + case VOID_FTYPE_PCHAR_V32QI: + case VOID_FTYPE_PCHAR_V16QI: + case VOID_FTYPE_PFLOAT_V16SF: + case VOID_FTYPE_PFLOAT_V8SF: + case VOID_FTYPE_PFLOAT_V4SF: + case VOID_FTYPE_PDOUBLE_V8DF: + case VOID_FTYPE_PDOUBLE_V4DF: + case VOID_FTYPE_PDOUBLE_V2DF: + case VOID_FTYPE_PLONGLONG_LONGLONG: + case VOID_FTYPE_PULONGLONG_ULONGLONG: + case VOID_FTYPE_PINT_INT: + nargs = 1; + klass = store; + /* Reserve memory operand for target. */ + memory = ARRAY_SIZE (args); + switch (icode) + { + /* These builtins and instructions require the memory + to be properly aligned. */ + case CODE_FOR_avx_movntv4di: + case CODE_FOR_sse2_movntv2di: + case CODE_FOR_avx_movntv8sf: + case CODE_FOR_sse_movntv4sf: + case CODE_FOR_sse4a_vmmovntv4sf: + case CODE_FOR_avx_movntv4df: + case CODE_FOR_sse2_movntv2df: + case CODE_FOR_sse4a_vmmovntv2df: + case CODE_FOR_sse2_movntidi: + case CODE_FOR_sse_movntq: + case CODE_FOR_sse2_movntisi: + case CODE_FOR_avx512f_movntv16sf: + case CODE_FOR_avx512f_movntv8df: + case CODE_FOR_avx512f_movntv8di: + aligned_mem = true; + break; + default: + break; + } + break; + case V4SF_FTYPE_V4SF_PCV2SF: + case V2DF_FTYPE_V2DF_PCDOUBLE: + nargs = 2; + klass = load; + memory = 1; + break; + case V8SF_FTYPE_PCV8SF_V8SI: + case V4DF_FTYPE_PCV4DF_V4DI: + case V4SF_FTYPE_PCV4SF_V4SI: + case V2DF_FTYPE_PCV2DF_V2DI: + case V8SI_FTYPE_PCV8SI_V8SI: + case V4DI_FTYPE_PCV4DI_V4DI: + case V4SI_FTYPE_PCV4SI_V4SI: + case V2DI_FTYPE_PCV2DI_V2DI: + nargs = 2; + klass = load; + memory = 0; + break; + case VOID_FTYPE_PV8DF_V8DF_UQI: + case VOID_FTYPE_PV4DF_V4DF_UQI: + case VOID_FTYPE_PV2DF_V2DF_UQI: + case VOID_FTYPE_PV16SF_V16SF_UHI: + case VOID_FTYPE_PV8SF_V8SF_UQI: + case VOID_FTYPE_PV4SF_V4SF_UQI: + case VOID_FTYPE_PV8DI_V8DI_UQI: + case VOID_FTYPE_PV4DI_V4DI_UQI: + case VOID_FTYPE_PV2DI_V2DI_UQI: + case VOID_FTYPE_PV16SI_V16SI_UHI: + case VOID_FTYPE_PV8SI_V8SI_UQI: + case VOID_FTYPE_PV4SI_V4SI_UQI: + switch (icode) + { + /* These builtins and instructions require the memory + to be properly aligned. */ + case CODE_FOR_avx512f_storev16sf_mask: + case CODE_FOR_avx512f_storev16si_mask: + case CODE_FOR_avx512f_storev8df_mask: + case CODE_FOR_avx512f_storev8di_mask: + case CODE_FOR_avx512vl_storev8sf_mask: + case CODE_FOR_avx512vl_storev8si_mask: + case CODE_FOR_avx512vl_storev4df_mask: + case CODE_FOR_avx512vl_storev4di_mask: + case CODE_FOR_avx512vl_storev4sf_mask: + case CODE_FOR_avx512vl_storev4si_mask: + case CODE_FOR_avx512vl_storev2df_mask: + case CODE_FOR_avx512vl_storev2di_mask: + aligned_mem = true; + break; + default: + break; + } + /* FALLTHRU */ + case VOID_FTYPE_PV8SF_V8SI_V8SF: + case VOID_FTYPE_PV4DF_V4DI_V4DF: + case VOID_FTYPE_PV4SF_V4SI_V4SF: + case VOID_FTYPE_PV2DF_V2DI_V2DF: + case VOID_FTYPE_PV8SI_V8SI_V8SI: + case VOID_FTYPE_PV4DI_V4DI_V4DI: + case VOID_FTYPE_PV4SI_V4SI_V4SI: + case VOID_FTYPE_PV2DI_V2DI_V2DI: + case VOID_FTYPE_PV8SI_V8DI_UQI: + case VOID_FTYPE_PV8HI_V8DI_UQI: + case VOID_FTYPE_PV16HI_V16SI_UHI: + case VOID_FTYPE_PV16QI_V8DI_UQI: + case VOID_FTYPE_PV16QI_V16SI_UHI: + case VOID_FTYPE_PV4SI_V4DI_UQI: + case VOID_FTYPE_PV4SI_V2DI_UQI: + case VOID_FTYPE_PV8HI_V4DI_UQI: + case VOID_FTYPE_PV8HI_V2DI_UQI: + case VOID_FTYPE_PV8HI_V8SI_UQI: + case VOID_FTYPE_PV8HI_V4SI_UQI: + case VOID_FTYPE_PV16QI_V4DI_UQI: + case VOID_FTYPE_PV16QI_V2DI_UQI: + case VOID_FTYPE_PV16QI_V8SI_UQI: + case VOID_FTYPE_PV16QI_V4SI_UQI: + case VOID_FTYPE_PV8HI_V8HI_UQI: + case VOID_FTYPE_PV16HI_V16HI_UHI: + case VOID_FTYPE_PV32HI_V32HI_USI: + case VOID_FTYPE_PV16QI_V16QI_UHI: + case VOID_FTYPE_PV32QI_V32QI_USI: + case VOID_FTYPE_PV64QI_V64QI_UDI: + nargs = 2; + klass = store; + /* Reserve memory operand for target. */ + memory = ARRAY_SIZE (args); + break; + case V4SF_FTYPE_PCV4SF_V4SF_UQI: + case V8SF_FTYPE_PCV8SF_V8SF_UQI: + case V16SF_FTYPE_PCV16SF_V16SF_UHI: + case V4SI_FTYPE_PCV4SI_V4SI_UQI: + case V8SI_FTYPE_PCV8SI_V8SI_UQI: + case V16SI_FTYPE_PCV16SI_V16SI_UHI: + case V2DF_FTYPE_PCV2DF_V2DF_UQI: + case V4DF_FTYPE_PCV4DF_V4DF_UQI: + case V8DF_FTYPE_PCV8DF_V8DF_UQI: + case V2DI_FTYPE_PCV2DI_V2DI_UQI: + case V4DI_FTYPE_PCV4DI_V4DI_UQI: + case V8DI_FTYPE_PCV8DI_V8DI_UQI: + case V8HI_FTYPE_PCV8HI_V8HI_UQI: + case V16HI_FTYPE_PCV16HI_V16HI_UHI: + case V32HI_FTYPE_PCV32HI_V32HI_USI: + case V16QI_FTYPE_PCV16QI_V16QI_UHI: + case V32QI_FTYPE_PCV32QI_V32QI_USI: + case V64QI_FTYPE_PCV64QI_V64QI_UDI: + nargs = 3; + klass = load; + memory = 0; + switch (icode) + { + /* These builtins and instructions require the memory + to be properly aligned. */ + case CODE_FOR_avx512f_loadv16sf_mask: + case CODE_FOR_avx512f_loadv16si_mask: + case CODE_FOR_avx512f_loadv8df_mask: + case CODE_FOR_avx512f_loadv8di_mask: + case CODE_FOR_avx512vl_loadv8sf_mask: + case CODE_FOR_avx512vl_loadv8si_mask: + case CODE_FOR_avx512vl_loadv4df_mask: + case CODE_FOR_avx512vl_loadv4di_mask: + case CODE_FOR_avx512vl_loadv4sf_mask: + case CODE_FOR_avx512vl_loadv4si_mask: + case CODE_FOR_avx512vl_loadv2df_mask: + case CODE_FOR_avx512vl_loadv2di_mask: + case CODE_FOR_avx512bw_loadv64qi_mask: + case CODE_FOR_avx512vl_loadv32qi_mask: + case CODE_FOR_avx512vl_loadv16qi_mask: + case CODE_FOR_avx512bw_loadv32hi_mask: + case CODE_FOR_avx512vl_loadv16hi_mask: + case CODE_FOR_avx512vl_loadv8hi_mask: + aligned_mem = true; + break; + default: + break; + } + break; + case VOID_FTYPE_UINT_UINT_UINT: + case VOID_FTYPE_UINT64_UINT_UINT: + case UCHAR_FTYPE_UINT_UINT_UINT: + case UCHAR_FTYPE_UINT64_UINT_UINT: + nargs = 3; + klass = load; + memory = ARRAY_SIZE (args); + last_arg_constant = true; + break; + default: + gcc_unreachable (); + } + + gcc_assert (nargs <= ARRAY_SIZE (args)); + + if (klass == store) + { + arg = CALL_EXPR_ARG (exp, 0); + op = expand_normal (arg); + gcc_assert (target == 0); + if (memory) + { + op = ix86_zero_extend_to_Pmode (op); + target = gen_rtx_MEM (tmode, op); + /* target at this point has just BITS_PER_UNIT MEM_ALIGN + on it. Try to improve it using get_pointer_alignment, + and if the special builtin is one that requires strict + mode alignment, also from it's GET_MODE_ALIGNMENT. + Failure to do so could lead to ix86_legitimate_combined_insn + rejecting all changes to such insns. */ + unsigned int align = get_pointer_alignment (arg); + if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode)) + align = GET_MODE_ALIGNMENT (tmode); + if (MEM_ALIGN (target) < align) + set_mem_align (target, align); + } + else + target = force_reg (tmode, op); + arg_adjust = 1; + } + else + { + arg_adjust = 0; + if (optimize + || target == 0 + || !register_operand (target, tmode) + || GET_MODE (target) != tmode) + target = gen_reg_rtx (tmode); + } + + for (i = 0; i < nargs; i++) + { + machine_mode mode = insn_p->operand[i + 1].mode; + bool match; + + arg = CALL_EXPR_ARG (exp, i + arg_adjust); + op = expand_normal (arg); + match = insn_p->operand[i + 1].predicate (op, mode); + + if (last_arg_constant && (i + 1) == nargs) + { + if (!match) + { + if (icode == CODE_FOR_lwp_lwpvalsi3 + || icode == CODE_FOR_lwp_lwpinssi3 + || icode == CODE_FOR_lwp_lwpvaldi3 + || icode == CODE_FOR_lwp_lwpinsdi3) + error ("the last argument must be a 32-bit immediate"); + else + error ("the last argument must be an 8-bit immediate"); + return const0_rtx; + } + } + else + { + if (i == memory) + { + /* This must be the memory operand. */ + op = ix86_zero_extend_to_Pmode (op); + op = gen_rtx_MEM (mode, op); + /* op at this point has just BITS_PER_UNIT MEM_ALIGN + on it. Try to improve it using get_pointer_alignment, + and if the special builtin is one that requires strict + mode alignment, also from it's GET_MODE_ALIGNMENT. + Failure to do so could lead to ix86_legitimate_combined_insn + rejecting all changes to such insns. */ + unsigned int align = get_pointer_alignment (arg); + if (aligned_mem && align < GET_MODE_ALIGNMENT (mode)) + align = GET_MODE_ALIGNMENT (mode); + if (MEM_ALIGN (op) < align) + set_mem_align (op, align); + } + else + { + /* This must be register. */ + if (VECTOR_MODE_P (mode)) + op = safe_vector_operand (op, mode); + + op = fixup_modeless_constant (op, mode); + + if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) + op = copy_to_mode_reg (mode, op); + else + { + op = copy_to_reg (op); + op = simplify_gen_subreg (mode, op, GET_MODE (op), 0); + } + } + } + + args[i].op = op; + args[i].mode = mode; + } + + switch (nargs) + { + case 0: + pat = GEN_FCN (icode) (target); + break; + case 1: + pat = GEN_FCN (icode) (target, args[0].op); + break; + case 2: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op); + break; + case 3: + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); + break; + default: + gcc_unreachable (); + } + + if (! pat) + return 0; + emit_insn (pat); + return klass == store ? 0 : target; +} + +/* Return the integer constant in ARG. Constrain it to be in the range + of the subparts of VEC_TYPE; issue an error if not. */ + +static int +get_element_number (tree vec_type, tree arg) +{ + unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; + + if (!tree_fits_uhwi_p (arg) + || (elt = tree_to_uhwi (arg), elt > max)) + { + error ("selector must be an integer constant in the range 0..%wi", max); + return 0; + } + + return elt; +} + +/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around + ix86_expand_vector_init. We DO have language-level syntax for this, in + the form of (type){ init-list }. Except that since we can't place emms + instructions from inside the compiler, we can't allow the use of MMX + registers unless the user explicitly asks for it. So we do *not* define + vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead + we have builtins invoked by mmintrin.h that gives us license to emit + these sorts of instructions. */ + +static rtx +ix86_expand_vec_init_builtin (tree type, tree exp, rtx target) +{ + machine_mode tmode = TYPE_MODE (type); + machine_mode inner_mode = GET_MODE_INNER (tmode); + int i, n_elt = GET_MODE_NUNITS (tmode); + rtvec v = rtvec_alloc (n_elt); + + gcc_assert (VECTOR_MODE_P (tmode)); + gcc_assert (call_expr_nargs (exp) == n_elt); + + for (i = 0; i < n_elt; ++i) + { + rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); + RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); + } + + if (!target || !register_operand (target, tmode)) + target = gen_reg_rtx (tmode); + + ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); + return target; +} + +/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around + ix86_expand_vector_extract. They would be redundant (for non-MMX) if we + had a language-level syntax for referencing vector elements. */ + +static rtx +ix86_expand_vec_ext_builtin (tree exp, rtx target) +{ + machine_mode tmode, mode0; + tree arg0, arg1; + int elt; + rtx op0; + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + elt = get_element_number (TREE_TYPE (arg0), arg1); + + tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); + mode0 = TYPE_MODE (TREE_TYPE (arg0)); + gcc_assert (VECTOR_MODE_P (mode0)); + + op0 = force_reg (mode0, op0); + + if (optimize || !target || !register_operand (target, tmode)) + target = gen_reg_rtx (tmode); + + ix86_expand_vector_extract (true, target, op0, elt); + + return target; +} + +/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around + ix86_expand_vector_set. They would be redundant (for non-MMX) if we had + a language-level syntax for referencing vector elements. */ + +static rtx +ix86_expand_vec_set_builtin (tree exp) +{ + machine_mode tmode, mode1; + tree arg0, arg1, arg2; + int elt; + rtx op0, op1, target; + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + + tmode = TYPE_MODE (TREE_TYPE (arg0)); + mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); + gcc_assert (VECTOR_MODE_P (tmode)); + + op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); + elt = get_element_number (TREE_TYPE (arg0), arg2); + + if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) + op1 = convert_modes (mode1, GET_MODE (op1), op1, true); + + op0 = force_reg (tmode, op0); + op1 = force_reg (mode1, op1); + + /* OP0 is the source of these builtin functions and shouldn't be + modified. Create a copy, use it and return it as target. */ + target = gen_reg_rtx (tmode); + emit_move_insn (target, op0); + ix86_expand_vector_set (true, target, op1, elt); + + return target; +} + +/* Emit conditional move of SRC to DST with condition + OP1 CODE OP2. */ +static void +ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2) +{ + rtx t; + + if (TARGET_CMOVE) + { + t = ix86_expand_compare (code, op1, op2); + emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t, + src, dst))); + } + else + { + rtx_code_label *nomove = gen_label_rtx (); + emit_cmp_and_jump_insns (op1, op2, reverse_condition (code), + const0_rtx, GET_MODE (op1), 1, nomove); + emit_move_insn (dst, src); + emit_label (nomove); + } +} + +/* Choose max of DST and SRC and put it to DST. */ +static void +ix86_emit_move_max (rtx dst, rtx src) +{ + ix86_emit_cmove (dst, src, LTU, dst, src); +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +ix86_expand_builtin (tree exp, rtx target, rtx subtarget, + machine_mode mode, int ignore) +{ + const struct builtin_description *d; + size_t i; + enum insn_code icode; + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + tree arg0, arg1, arg2, arg3, arg4; + rtx op0, op1, op2, op3, op4, pat, insn; + machine_mode mode0, mode1, mode2, mode3, mode4; + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + + /* For CPU builtins that can be folded, fold first and expand the fold. */ + switch (fcode) + { + case IX86_BUILTIN_CPU_INIT: + { + /* Make it call __cpu_indicator_init in libgcc. */ + tree call_expr, fndecl, type; + type = build_function_type_list (integer_type_node, NULL_TREE); + fndecl = build_fn_decl ("__cpu_indicator_init", type); + call_expr = build_call_expr (fndecl, 0); + return expand_expr (call_expr, target, mode, EXPAND_NORMAL); + } + case IX86_BUILTIN_CPU_IS: + case IX86_BUILTIN_CPU_SUPPORTS: + { + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree fold_expr = fold_builtin_cpu (fndecl, &arg0); + gcc_assert (fold_expr != NULL_TREE); + return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); + } + } + + /* Determine whether the builtin function is available under the current ISA. + Originally the builtin was not created if it wasn't applicable to the + current ISA based on the command line switches. With function specific + options, we need to check in the context of the function making the call + whether it is supported. */ + if (ix86_builtins_isa[fcode].isa + && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags)) + { + char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL, + NULL, (enum fpmath_unit) 0, false); + + if (!opts) + error ("%qE needs unknown isa option", fndecl); + else + { + gcc_assert (opts != NULL); + error ("%qE needs isa option %s", fndecl, opts); + free (opts); + } + return expand_call (exp, target, ignore); + } + + switch (fcode) + { + case IX86_BUILTIN_BNDMK: + if (!target + || GET_MODE (target) != BNDmode + || !register_operand (target, BNDmode)) + target = gen_reg_rtx (BNDmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (!register_operand (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + if (!register_operand (op1, Pmode)) + op1 = ix86_zero_extend_to_Pmode (op1); + + /* Builtin arg1 is size of block but instruction op1 should + be (size - 1). */ + op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx, + NULL_RTX, 1, OPTAB_DIRECT); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_mk (target, op0, op1) + : gen_bnd32_mk (target, op0, op1)); + return target; + + case IX86_BUILTIN_BNDSTX: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + + if (!register_operand (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + if (!register_operand (op1, BNDmode)) + op1 = copy_to_mode_reg (BNDmode, op1); + if (!register_operand (op2, Pmode)) + op2 = ix86_zero_extend_to_Pmode (op2); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_stx (op2, op0, op1) + : gen_bnd32_stx (op2, op0, op1)); + return 0; + + case IX86_BUILTIN_BNDLDX: + if (!target + || GET_MODE (target) != BNDmode + || !register_operand (target, BNDmode)) + target = gen_reg_rtx (BNDmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (!register_operand (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + if (!register_operand (op1, Pmode)) + op1 = ix86_zero_extend_to_Pmode (op1); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_ldx (target, op0, op1) + : gen_bnd32_ldx (target, op0, op1)); + return target; + + case IX86_BUILTIN_BNDCL: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (!register_operand (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + if (!register_operand (op1, BNDmode)) + op1 = copy_to_mode_reg (BNDmode, op1); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_cl (op1, op0) + : gen_bnd32_cl (op1, op0)); + return 0; + + case IX86_BUILTIN_BNDCU: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (!register_operand (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + if (!register_operand (op1, BNDmode)) + op1 = copy_to_mode_reg (BNDmode, op1); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_cu (op1, op0) + : gen_bnd32_cu (op1, op0)); + return 0; + + case IX86_BUILTIN_BNDRET: + arg0 = CALL_EXPR_ARG (exp, 0); + gcc_assert (TREE_CODE (arg0) == SSA_NAME); + target = chkp_get_rtl_bounds (arg0); + + /* If no bounds were specified for returned value, + then use INIT bounds. It usually happens when + some built-in function is expanded. */ + if (!target) + { + rtx t1 = gen_reg_rtx (Pmode); + rtx t2 = gen_reg_rtx (Pmode); + target = gen_reg_rtx (BNDmode); + emit_move_insn (t1, const0_rtx); + emit_move_insn (t2, constm1_rtx); + emit_insn (BNDmode == BND64mode + ? gen_bnd64_mk (target, t1, t2) + : gen_bnd32_mk (target, t1, t2)); + } + + gcc_assert (target && REG_P (target)); + return target; + + case IX86_BUILTIN_BNDNARROW: + { + rtx m1, m1h1, m1h2, lb, ub, t1; + + /* Return value and lb. */ + arg0 = CALL_EXPR_ARG (exp, 0); + /* Bounds. */ + arg1 = CALL_EXPR_ARG (exp, 1); + /* Size. */ + arg2 = CALL_EXPR_ARG (exp, 2); + + lb = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + + /* Size was passed but we need to use (size - 1) as for bndmk. */ + op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx, + NULL_RTX, 1, OPTAB_DIRECT); + + /* Add LB to size and inverse to get UB. */ + op2 = expand_simple_binop (Pmode, PLUS, op2, lb, + op2, 1, OPTAB_DIRECT); + ub = expand_simple_unop (Pmode, NOT, op2, op2, 1); + + if (!register_operand (lb, Pmode)) + lb = ix86_zero_extend_to_Pmode (lb); + if (!register_operand (ub, Pmode)) + ub = ix86_zero_extend_to_Pmode (ub); + + /* We need to move bounds to memory before any computations. */ + if (MEM_P (op1)) + m1 = op1; + else + { + m1 = assign_386_stack_local (BNDmode, SLOT_TEMP); + emit_move_insn (m1, op1); + } + + /* Generate mem expression to be used for access to LB and UB. */ + m1h1 = adjust_address (m1, Pmode, 0); + m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode)); + + t1 = gen_reg_rtx (Pmode); + + /* Compute LB. */ + emit_move_insn (t1, m1h1); + ix86_emit_move_max (t1, lb); + emit_move_insn (m1h1, t1); + + /* Compute UB. UB is stored in 1's complement form. Therefore + we also use max here. */ + emit_move_insn (t1, m1h2); + ix86_emit_move_max (t1, ub); + emit_move_insn (m1h2, t1); + + op2 = gen_reg_rtx (BNDmode); + emit_move_insn (op2, m1); + + return chkp_join_splitted_slot (lb, op2); + } + + case IX86_BUILTIN_BNDINT: + { + rtx res, rh1, rh2, lb1, lb2, ub1, ub2; + + if (!target + || GET_MODE (target) != BNDmode + || !register_operand (target, BNDmode)) + target = gen_reg_rtx (BNDmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + res = assign_386_stack_local (BNDmode, SLOT_TEMP); + rh1 = adjust_address (res, Pmode, 0); + rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode)); + + /* Put first bounds to temporaries. */ + lb1 = gen_reg_rtx (Pmode); + ub1 = gen_reg_rtx (Pmode); + if (MEM_P (op0)) + { + emit_move_insn (lb1, adjust_address (op0, Pmode, 0)); + emit_move_insn (ub1, adjust_address (op0, Pmode, + GET_MODE_SIZE (Pmode))); + } + else + { + emit_move_insn (res, op0); + emit_move_insn (lb1, rh1); + emit_move_insn (ub1, rh2); + } + + /* Put second bounds to temporaries. */ + lb2 = gen_reg_rtx (Pmode); + ub2 = gen_reg_rtx (Pmode); + if (MEM_P (op1)) + { + emit_move_insn (lb2, adjust_address (op1, Pmode, 0)); + emit_move_insn (ub2, adjust_address (op1, Pmode, + GET_MODE_SIZE (Pmode))); + } + else + { + emit_move_insn (res, op1); + emit_move_insn (lb2, rh1); + emit_move_insn (ub2, rh2); + } + + /* Compute LB. */ + ix86_emit_move_max (lb1, lb2); + emit_move_insn (rh1, lb1); + + /* Compute UB. UB is stored in 1's complement form. Therefore + we also use max here. */ + ix86_emit_move_max (ub1, ub2); + emit_move_insn (rh2, ub1); + + emit_move_insn (target, res); + + return target; + } + + case IX86_BUILTIN_SIZEOF: + { + tree name; + rtx symbol; + + if (!target + || GET_MODE (target) != Pmode + || !register_operand (target, Pmode)) + target = gen_reg_rtx (Pmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + gcc_assert (TREE_CODE (arg0) == VAR_DECL); + + name = DECL_ASSEMBLER_NAME (arg0); + symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name)); + + emit_insn (Pmode == SImode + ? gen_move_size_reloc_si (target, symbol) + : gen_move_size_reloc_di (target, symbol)); + + return target; + } + + case IX86_BUILTIN_BNDLOWER: + { + rtx mem, hmem; + + if (!target + || GET_MODE (target) != Pmode + || !register_operand (target, Pmode)) + target = gen_reg_rtx (Pmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + + /* We need to move bounds to memory first. */ + if (MEM_P (op0)) + mem = op0; + else + { + mem = assign_386_stack_local (BNDmode, SLOT_TEMP); + emit_move_insn (mem, op0); + } + + /* Generate mem expression to access LB and load it. */ + hmem = adjust_address (mem, Pmode, 0); + emit_move_insn (target, hmem); + + return target; + } + + case IX86_BUILTIN_BNDUPPER: + { + rtx mem, hmem, res; + + if (!target + || GET_MODE (target) != Pmode + || !register_operand (target, Pmode)) + target = gen_reg_rtx (Pmode); + + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + + /* We need to move bounds to memory first. */ + if (MEM_P (op0)) + mem = op0; + else + { + mem = assign_386_stack_local (BNDmode, SLOT_TEMP); + emit_move_insn (mem, op0); + } + + /* Generate mem expression to access UB. */ + hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode)); + + /* We need to inverse all bits of UB. */ + res = expand_simple_unop (Pmode, NOT, hmem, target, 1); + + if (res != target) + emit_move_insn (target, res); + + return target; + } + + case IX86_BUILTIN_MASKMOVQ: + case IX86_BUILTIN_MASKMOVDQU: + icode = (fcode == IX86_BUILTIN_MASKMOVQ + ? CODE_FOR_mmx_maskmovq + : CODE_FOR_sse2_maskmovdqu); + /* Note the arg order is different from the operand order. */ + arg1 = CALL_EXPR_ARG (exp, 0); + arg2 = CALL_EXPR_ARG (exp, 1); + arg0 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + + op0 = ix86_zero_extend_to_Pmode (op0); + op0 = gen_rtx_MEM (mode1, op0); + + if (!insn_data[icode].operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (!insn_data[icode].operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (!insn_data[icode].operand[2].predicate (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + pat = GEN_FCN (icode) (op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return 0; + + case IX86_BUILTIN_LDMXCSR: + op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); + target = assign_386_stack_local (SImode, SLOT_TEMP); + emit_move_insn (target, op0); + emit_insn (gen_sse_ldmxcsr (target)); + return 0; + + case IX86_BUILTIN_STMXCSR: + target = assign_386_stack_local (SImode, SLOT_TEMP); + emit_insn (gen_sse_stmxcsr (target)); + return copy_to_mode_reg (SImode, target); + + case IX86_BUILTIN_CLFLUSH: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + icode = CODE_FOR_sse2_clflush; + if (!insn_data[icode].operand[0].predicate (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + + emit_insn (gen_sse2_clflush (op0)); + return 0; + + case IX86_BUILTIN_CLWB: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + icode = CODE_FOR_clwb; + if (!insn_data[icode].operand[0].predicate (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + + emit_insn (gen_clwb (op0)); + return 0; + + case IX86_BUILTIN_CLFLUSHOPT: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + icode = CODE_FOR_clflushopt; + if (!insn_data[icode].operand[0].predicate (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + + emit_insn (gen_clflushopt (op0)); + return 0; + + case IX86_BUILTIN_MONITOR: + case IX86_BUILTIN_MONITORX: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + if (!REG_P (op0)) + op0 = ix86_zero_extend_to_Pmode (op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + if (!REG_P (op2)) + op2 = copy_to_mode_reg (SImode, op2); + + emit_insn (fcode == IX86_BUILTIN_MONITOR + ? ix86_gen_monitor (op0, op1, op2) + : ix86_gen_monitorx (op0, op1, op2)); + return 0; + + case IX86_BUILTIN_MWAIT: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + emit_insn (gen_sse3_mwait (op0, op1)); + return 0; + + case IX86_BUILTIN_MWAITX: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + if (!REG_P (op2)) + op2 = copy_to_mode_reg (SImode, op2); + emit_insn (gen_mwaitx (op0, op1, op2)); + return 0; + + case IX86_BUILTIN_CLZERO: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + if (!REG_P (op0)) + op0 = ix86_zero_extend_to_Pmode (op0); + emit_insn (ix86_gen_clzero (op0)); + return 0; + + case IX86_BUILTIN_VEC_INIT_V2SI: + case IX86_BUILTIN_VEC_INIT_V4HI: + case IX86_BUILTIN_VEC_INIT_V8QI: + return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); + + case IX86_BUILTIN_VEC_EXT_V2DF: + case IX86_BUILTIN_VEC_EXT_V2DI: + case IX86_BUILTIN_VEC_EXT_V4SF: + case IX86_BUILTIN_VEC_EXT_V4SI: + case IX86_BUILTIN_VEC_EXT_V8HI: + case IX86_BUILTIN_VEC_EXT_V2SI: + case IX86_BUILTIN_VEC_EXT_V4HI: + case IX86_BUILTIN_VEC_EXT_V16QI: + return ix86_expand_vec_ext_builtin (exp, target); + + case IX86_BUILTIN_VEC_SET_V2DI: + case IX86_BUILTIN_VEC_SET_V4SF: + case IX86_BUILTIN_VEC_SET_V4SI: + case IX86_BUILTIN_VEC_SET_V8HI: + case IX86_BUILTIN_VEC_SET_V4HI: + case IX86_BUILTIN_VEC_SET_V16QI: + return ix86_expand_vec_set_builtin (exp); + + case IX86_BUILTIN_INFQ: + case IX86_BUILTIN_HUGE_VALQ: + { + REAL_VALUE_TYPE inf; + rtx tmp; + + real_inf (&inf); + tmp = const_double_from_real_value (inf, mode); + + tmp = validize_mem (force_const_mem (mode, tmp)); + + if (target == 0) + target = gen_reg_rtx (mode); + + emit_move_insn (target, tmp); + return target; + } + + case IX86_BUILTIN_RDPMC: + case IX86_BUILTIN_RDTSC: + case IX86_BUILTIN_RDTSCP: + + op0 = gen_reg_rtx (DImode); + op1 = gen_reg_rtx (DImode); + + if (fcode == IX86_BUILTIN_RDPMC) + { + arg0 = CALL_EXPR_ARG (exp, 0); + op2 = expand_normal (arg0); + if (!register_operand (op2, SImode)) + op2 = copy_to_mode_reg (SImode, op2); + + insn = (TARGET_64BIT + ? gen_rdpmc_rex64 (op0, op1, op2) + : gen_rdpmc (op0, op2)); + emit_insn (insn); + } + else if (fcode == IX86_BUILTIN_RDTSC) + { + insn = (TARGET_64BIT + ? gen_rdtsc_rex64 (op0, op1) + : gen_rdtsc (op0)); + emit_insn (insn); + } + else + { + op2 = gen_reg_rtx (SImode); + + insn = (TARGET_64BIT + ? gen_rdtscp_rex64 (op0, op1, op2) + : gen_rdtscp (op0, op2)); + emit_insn (insn); + + arg0 = CALL_EXPR_ARG (exp, 0); + op4 = expand_normal (arg0); + if (!address_operand (op4, VOIDmode)) + { + op4 = convert_memory_address (Pmode, op4); + op4 = copy_addr_to_reg (op4); + } + emit_move_insn (gen_rtx_MEM (SImode, op4), op2); + } + + if (target == 0) + { + /* mode is VOIDmode if __builtin_rd* has been called + without lhs. */ + if (mode == VOIDmode) + return target; + target = gen_reg_rtx (mode); + } + + if (TARGET_64BIT) + { + op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32), + op1, 1, OPTAB_DIRECT); + op0 = expand_simple_binop (DImode, IOR, op0, op1, + op0, 1, OPTAB_DIRECT); + } + + emit_move_insn (target, op0); + return target; + + case IX86_BUILTIN_FXSAVE: + case IX86_BUILTIN_FXRSTOR: + case IX86_BUILTIN_FXSAVE64: + case IX86_BUILTIN_FXRSTOR64: + case IX86_BUILTIN_FNSTENV: + case IX86_BUILTIN_FLDENV: + mode0 = BLKmode; + switch (fcode) + { + case IX86_BUILTIN_FXSAVE: + icode = CODE_FOR_fxsave; + break; + case IX86_BUILTIN_FXRSTOR: + icode = CODE_FOR_fxrstor; + break; + case IX86_BUILTIN_FXSAVE64: + icode = CODE_FOR_fxsave64; + break; + case IX86_BUILTIN_FXRSTOR64: + icode = CODE_FOR_fxrstor64; + break; + case IX86_BUILTIN_FNSTENV: + icode = CODE_FOR_fnstenv; + break; + case IX86_BUILTIN_FLDENV: + icode = CODE_FOR_fldenv; + break; + default: + gcc_unreachable (); + } + + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + + if (!address_operand (op0, VOIDmode)) + { + op0 = convert_memory_address (Pmode, op0); + op0 = copy_addr_to_reg (op0); + } + op0 = gen_rtx_MEM (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (pat) + emit_insn (pat); + return 0; + + case IX86_BUILTIN_XSAVE: + case IX86_BUILTIN_XRSTOR: + case IX86_BUILTIN_XSAVE64: + case IX86_BUILTIN_XRSTOR64: + case IX86_BUILTIN_XSAVEOPT: + case IX86_BUILTIN_XSAVEOPT64: + case IX86_BUILTIN_XSAVES: + case IX86_BUILTIN_XRSTORS: + case IX86_BUILTIN_XSAVES64: + case IX86_BUILTIN_XRSTORS64: + case IX86_BUILTIN_XSAVEC: + case IX86_BUILTIN_XSAVEC64: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (!address_operand (op0, VOIDmode)) + { + op0 = convert_memory_address (Pmode, op0); + op0 = copy_addr_to_reg (op0); + } + op0 = gen_rtx_MEM (BLKmode, op0); + + op1 = force_reg (DImode, op1); + + if (TARGET_64BIT) + { + op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), + NULL, 1, OPTAB_DIRECT); + switch (fcode) + { + case IX86_BUILTIN_XSAVE: + icode = CODE_FOR_xsave_rex64; + break; + case IX86_BUILTIN_XRSTOR: + icode = CODE_FOR_xrstor_rex64; + break; + case IX86_BUILTIN_XSAVE64: + icode = CODE_FOR_xsave64; + break; + case IX86_BUILTIN_XRSTOR64: + icode = CODE_FOR_xrstor64; + break; + case IX86_BUILTIN_XSAVEOPT: + icode = CODE_FOR_xsaveopt_rex64; + break; + case IX86_BUILTIN_XSAVEOPT64: + icode = CODE_FOR_xsaveopt64; + break; + case IX86_BUILTIN_XSAVES: + icode = CODE_FOR_xsaves_rex64; + break; + case IX86_BUILTIN_XRSTORS: + icode = CODE_FOR_xrstors_rex64; + break; + case IX86_BUILTIN_XSAVES64: + icode = CODE_FOR_xsaves64; + break; + case IX86_BUILTIN_XRSTORS64: + icode = CODE_FOR_xrstors64; + break; + case IX86_BUILTIN_XSAVEC: + icode = CODE_FOR_xsavec_rex64; + break; + case IX86_BUILTIN_XSAVEC64: + icode = CODE_FOR_xsavec64; + break; + default: + gcc_unreachable (); + } + + op2 = gen_lowpart (SImode, op2); + op1 = gen_lowpart (SImode, op1); + pat = GEN_FCN (icode) (op0, op1, op2); + } + else + { + switch (fcode) + { + case IX86_BUILTIN_XSAVE: + icode = CODE_FOR_xsave; + break; + case IX86_BUILTIN_XRSTOR: + icode = CODE_FOR_xrstor; + break; + case IX86_BUILTIN_XSAVEOPT: + icode = CODE_FOR_xsaveopt; + break; + case IX86_BUILTIN_XSAVES: + icode = CODE_FOR_xsaves; + break; + case IX86_BUILTIN_XRSTORS: + icode = CODE_FOR_xrstors; + break; + case IX86_BUILTIN_XSAVEC: + icode = CODE_FOR_xsavec; + break; + default: + gcc_unreachable (); + } + pat = GEN_FCN (icode) (op0, op1); + } + + if (pat) + emit_insn (pat); + return 0; + + case IX86_BUILTIN_LLWPCB: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + icode = CODE_FOR_lwp_llwpcb; + if (!insn_data[icode].operand[0].predicate (op0, Pmode)) + op0 = ix86_zero_extend_to_Pmode (op0); + emit_insn (gen_lwp_llwpcb (op0)); + return 0; + + case IX86_BUILTIN_SLWPCB: + icode = CODE_FOR_lwp_slwpcb; + if (!target + || !insn_data[icode].operand[0].predicate (target, Pmode)) + target = gen_reg_rtx (Pmode); + emit_insn (gen_lwp_slwpcb (target)); + return target; + + case IX86_BUILTIN_BEXTRI32: + case IX86_BUILTIN_BEXTRI64: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + icode = (fcode == IX86_BUILTIN_BEXTRI32 + ? CODE_FOR_tbm_bextri_si + : CODE_FOR_tbm_bextri_di); + if (!CONST_INT_P (op1)) + { + error ("last argument must be an immediate"); + return const0_rtx; + } + else + { + unsigned char length = (INTVAL (op1) >> 8) & 0xFF; + unsigned char lsb_index = INTVAL (op1) & 0xFF; + op1 = GEN_INT (length); + op2 = GEN_INT (lsb_index); + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (pat) + emit_insn (pat); + return target; + } + + case IX86_BUILTIN_RDRAND16_STEP: + icode = CODE_FOR_rdrandhi_1; + mode0 = HImode; + goto rdrand_step; + + case IX86_BUILTIN_RDRAND32_STEP: + icode = CODE_FOR_rdrandsi_1; + mode0 = SImode; + goto rdrand_step; + + case IX86_BUILTIN_RDRAND64_STEP: + icode = CODE_FOR_rdranddi_1; + mode0 = DImode; + +rdrand_step: + op0 = gen_reg_rtx (mode0); + emit_insn (GEN_FCN (icode) (op0)); + + arg0 = CALL_EXPR_ARG (exp, 0); + op1 = expand_normal (arg0); + if (!address_operand (op1, VOIDmode)) + { + op1 = convert_memory_address (Pmode, op1); + op1 = copy_addr_to_reg (op1); + } + emit_move_insn (gen_rtx_MEM (mode0, op1), op0); + + op1 = gen_reg_rtx (SImode); + emit_move_insn (op1, CONST1_RTX (SImode)); + + /* Emit SImode conditional move. */ + if (mode0 == HImode) + { + op2 = gen_reg_rtx (SImode); + emit_insn (gen_zero_extendhisi2 (op2, op0)); + } + else if (mode0 == SImode) + op2 = op0; + else + op2 = gen_rtx_SUBREG (SImode, op0, 0); + + if (target == 0 + || !register_operand (target, SImode)) + target = gen_reg_rtx (SImode); + + pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (target, + gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1))); + return target; + + case IX86_BUILTIN_RDSEED16_STEP: + icode = CODE_FOR_rdseedhi_1; + mode0 = HImode; + goto rdseed_step; + + case IX86_BUILTIN_RDSEED32_STEP: + icode = CODE_FOR_rdseedsi_1; + mode0 = SImode; + goto rdseed_step; + + case IX86_BUILTIN_RDSEED64_STEP: + icode = CODE_FOR_rdseeddi_1; + mode0 = DImode; + +rdseed_step: + op0 = gen_reg_rtx (mode0); + emit_insn (GEN_FCN (icode) (op0)); + + arg0 = CALL_EXPR_ARG (exp, 0); + op1 = expand_normal (arg0); + if (!address_operand (op1, VOIDmode)) + { + op1 = convert_memory_address (Pmode, op1); + op1 = copy_addr_to_reg (op1); + } + emit_move_insn (gen_rtx_MEM (mode0, op1), op0); + + op2 = gen_reg_rtx (QImode); + + pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (op2, pat)); + + if (target == 0 + || !register_operand (target, SImode)) + target = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendqisi2 (target, op2)); + return target; + + case IX86_BUILTIN_SBB32: + icode = CODE_FOR_subborrowsi; + mode0 = SImode; + goto handlecarry; + + case IX86_BUILTIN_SBB64: + icode = CODE_FOR_subborrowdi; + mode0 = DImode; + goto handlecarry; + + case IX86_BUILTIN_ADDCARRYX32: + icode = CODE_FOR_addcarrysi; + mode0 = SImode; + goto handlecarry; + + case IX86_BUILTIN_ADDCARRYX64: + icode = CODE_FOR_addcarrydi; + mode0 = DImode; + + handlecarry: + arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */ + arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */ + arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */ + arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */ + + op1 = expand_normal (arg0); + op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1)); + + op2 = expand_normal (arg1); + if (!register_operand (op2, mode0)) + op2 = copy_to_mode_reg (mode0, op2); + + op3 = expand_normal (arg2); + if (!register_operand (op3, mode0)) + op3 = copy_to_mode_reg (mode0, op3); + + op4 = expand_normal (arg3); + if (!address_operand (op4, VOIDmode)) + { + op4 = convert_memory_address (Pmode, op4); + op4 = copy_addr_to_reg (op4); + } + + /* Generate CF from input operand. */ + emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); + + /* Generate instruction that consumes CF. */ + op0 = gen_reg_rtx (mode0); + + op1 = gen_rtx_REG (CCCmode, FLAGS_REG); + pat = gen_rtx_LTU (mode0, op1, const0_rtx); + emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat)); + + /* Return current CF value. */ + if (target == 0) + target = gen_reg_rtx (QImode); + + PUT_MODE (pat, QImode); + emit_insn (gen_rtx_SET (target, pat)); + + /* Store the result. */ + emit_move_insn (gen_rtx_MEM (mode0, op4), op0); + + return target; + + case IX86_BUILTIN_READ_FLAGS: + emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG))); + + if (optimize + || target == NULL_RTX + || !nonimmediate_operand (target, word_mode) + || GET_MODE (target) != word_mode) + target = gen_reg_rtx (word_mode); + + emit_insn (gen_pop (target)); + return target; + + case IX86_BUILTIN_WRITE_FLAGS: + + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + if (!general_no_elim_operand (op0, word_mode)) + op0 = copy_to_mode_reg (word_mode, op0); + + emit_insn (gen_push (op0)); + emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG))); + return 0; + + case IX86_BUILTIN_KORTESTC16: + icode = CODE_FOR_kortestchi; + mode0 = HImode; + mode1 = CCCmode; + goto kortest; + + case IX86_BUILTIN_KORTESTZ16: + icode = CODE_FOR_kortestzhi; + mode0 = HImode; + mode1 = CCZmode; + + kortest: + arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */ + arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */ + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + op0 = copy_to_reg (op0); + op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0); + op1 = copy_to_reg (op1); + op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0); + + target = gen_reg_rtx (QImode); + emit_insn (gen_rtx_SET (target, const0_rtx)); + + /* Emit kortest. */ + emit_insn (GEN_FCN (icode) (op0, op1)); + /* And use setcc to return result from flags. */ + ix86_expand_setcc (target, EQ, + gen_rtx_REG (mode1, FLAGS_REG), const0_rtx); + return target; + + case IX86_BUILTIN_GATHERSIV2DF: + icode = CODE_FOR_avx2_gathersiv2df; + goto gather_gen; + case IX86_BUILTIN_GATHERSIV4DF: + icode = CODE_FOR_avx2_gathersiv4df; + goto gather_gen; + case IX86_BUILTIN_GATHERDIV2DF: + icode = CODE_FOR_avx2_gatherdiv2df; + goto gather_gen; + case IX86_BUILTIN_GATHERDIV4DF: + icode = CODE_FOR_avx2_gatherdiv4df; + goto gather_gen; + case IX86_BUILTIN_GATHERSIV4SF: + icode = CODE_FOR_avx2_gathersiv4sf; + goto gather_gen; + case IX86_BUILTIN_GATHERSIV8SF: + icode = CODE_FOR_avx2_gathersiv8sf; + goto gather_gen; + case IX86_BUILTIN_GATHERDIV4SF: + icode = CODE_FOR_avx2_gatherdiv4sf; + goto gather_gen; + case IX86_BUILTIN_GATHERDIV8SF: + icode = CODE_FOR_avx2_gatherdiv8sf; + goto gather_gen; + case IX86_BUILTIN_GATHERSIV2DI: + icode = CODE_FOR_avx2_gathersiv2di; + goto gather_gen; + case IX86_BUILTIN_GATHERSIV4DI: + icode = CODE_FOR_avx2_gathersiv4di; + goto gather_gen; + case IX86_BUILTIN_GATHERDIV2DI: + icode = CODE_FOR_avx2_gatherdiv2di; + goto gather_gen; + case IX86_BUILTIN_GATHERDIV4DI: + icode = CODE_FOR_avx2_gatherdiv4di; + goto gather_gen; + case IX86_BUILTIN_GATHERSIV4SI: + icode = CODE_FOR_avx2_gathersiv4si; + goto gather_gen; + case IX86_BUILTIN_GATHERSIV8SI: + icode = CODE_FOR_avx2_gathersiv8si; + goto gather_gen; + case IX86_BUILTIN_GATHERDIV4SI: + icode = CODE_FOR_avx2_gatherdiv4si; + goto gather_gen; + case IX86_BUILTIN_GATHERDIV8SI: + icode = CODE_FOR_avx2_gatherdiv8si; + goto gather_gen; + case IX86_BUILTIN_GATHERALTSIV4DF: + icode = CODE_FOR_avx2_gathersiv4df; + goto gather_gen; + case IX86_BUILTIN_GATHERALTDIV8SF: + icode = CODE_FOR_avx2_gatherdiv8sf; + goto gather_gen; + case IX86_BUILTIN_GATHERALTSIV4DI: + icode = CODE_FOR_avx2_gathersiv4di; + goto gather_gen; + case IX86_BUILTIN_GATHERALTDIV8SI: + icode = CODE_FOR_avx2_gatherdiv8si; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV16SF: + icode = CODE_FOR_avx512f_gathersiv16sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV8DF: + icode = CODE_FOR_avx512f_gathersiv8df; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV16SF: + icode = CODE_FOR_avx512f_gatherdiv16sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV8DF: + icode = CODE_FOR_avx512f_gatherdiv8df; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV16SI: + icode = CODE_FOR_avx512f_gathersiv16si; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV8DI: + icode = CODE_FOR_avx512f_gathersiv8di; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV16SI: + icode = CODE_FOR_avx512f_gatherdiv16si; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV8DI: + icode = CODE_FOR_avx512f_gatherdiv8di; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTSIV8DF: + icode = CODE_FOR_avx512f_gathersiv8df; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTDIV16SF: + icode = CODE_FOR_avx512f_gatherdiv16sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTSIV8DI: + icode = CODE_FOR_avx512f_gathersiv8di; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTDIV16SI: + icode = CODE_FOR_avx512f_gatherdiv16si; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV2DF: + icode = CODE_FOR_avx512vl_gathersiv2df; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV4DF: + icode = CODE_FOR_avx512vl_gathersiv4df; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV2DF: + icode = CODE_FOR_avx512vl_gatherdiv2df; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV4DF: + icode = CODE_FOR_avx512vl_gatherdiv4df; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV4SF: + icode = CODE_FOR_avx512vl_gathersiv4sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV8SF: + icode = CODE_FOR_avx512vl_gathersiv8sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV4SF: + icode = CODE_FOR_avx512vl_gatherdiv4sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV8SF: + icode = CODE_FOR_avx512vl_gatherdiv8sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV2DI: + icode = CODE_FOR_avx512vl_gathersiv2di; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV4DI: + icode = CODE_FOR_avx512vl_gathersiv4di; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV2DI: + icode = CODE_FOR_avx512vl_gatherdiv2di; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV4DI: + icode = CODE_FOR_avx512vl_gatherdiv4di; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV4SI: + icode = CODE_FOR_avx512vl_gathersiv4si; + goto gather_gen; + case IX86_BUILTIN_GATHER3SIV8SI: + icode = CODE_FOR_avx512vl_gathersiv8si; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV4SI: + icode = CODE_FOR_avx512vl_gatherdiv4si; + goto gather_gen; + case IX86_BUILTIN_GATHER3DIV8SI: + icode = CODE_FOR_avx512vl_gatherdiv8si; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTSIV4DF: + icode = CODE_FOR_avx512vl_gathersiv4df; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTDIV8SF: + icode = CODE_FOR_avx512vl_gatherdiv8sf; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTSIV4DI: + icode = CODE_FOR_avx512vl_gathersiv4di; + goto gather_gen; + case IX86_BUILTIN_GATHER3ALTDIV8SI: + icode = CODE_FOR_avx512vl_gatherdiv8si; + goto gather_gen; + case IX86_BUILTIN_SCATTERSIV16SF: + icode = CODE_FOR_avx512f_scattersiv16sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV8DF: + icode = CODE_FOR_avx512f_scattersiv8df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV16SF: + icode = CODE_FOR_avx512f_scatterdiv16sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV8DF: + icode = CODE_FOR_avx512f_scatterdiv8df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV16SI: + icode = CODE_FOR_avx512f_scattersiv16si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV8DI: + icode = CODE_FOR_avx512f_scattersiv8di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV16SI: + icode = CODE_FOR_avx512f_scatterdiv16si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV8DI: + icode = CODE_FOR_avx512f_scatterdiv8di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV8SF: + icode = CODE_FOR_avx512vl_scattersiv8sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV4SF: + icode = CODE_FOR_avx512vl_scattersiv4sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV4DF: + icode = CODE_FOR_avx512vl_scattersiv4df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV2DF: + icode = CODE_FOR_avx512vl_scattersiv2df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV8SF: + icode = CODE_FOR_avx512vl_scatterdiv8sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV4SF: + icode = CODE_FOR_avx512vl_scatterdiv4sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV4DF: + icode = CODE_FOR_avx512vl_scatterdiv4df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV2DF: + icode = CODE_FOR_avx512vl_scatterdiv2df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV8SI: + icode = CODE_FOR_avx512vl_scattersiv8si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV4SI: + icode = CODE_FOR_avx512vl_scattersiv4si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV4DI: + icode = CODE_FOR_avx512vl_scattersiv4di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERSIV2DI: + icode = CODE_FOR_avx512vl_scattersiv2di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV8SI: + icode = CODE_FOR_avx512vl_scatterdiv8si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV4SI: + icode = CODE_FOR_avx512vl_scatterdiv4si; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV4DI: + icode = CODE_FOR_avx512vl_scatterdiv4di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERDIV2DI: + icode = CODE_FOR_avx512vl_scatterdiv2di; + goto scatter_gen; + case IX86_BUILTIN_GATHERPFDPD: + icode = CODE_FOR_avx512pf_gatherpfv8sidf; + goto vec_prefetch_gen; + case IX86_BUILTIN_SCATTERALTSIV8DF: + icode = CODE_FOR_avx512f_scattersiv8df; + goto scatter_gen; + case IX86_BUILTIN_SCATTERALTDIV16SF: + icode = CODE_FOR_avx512f_scatterdiv16sf; + goto scatter_gen; + case IX86_BUILTIN_SCATTERALTSIV8DI: + icode = CODE_FOR_avx512f_scattersiv8di; + goto scatter_gen; + case IX86_BUILTIN_SCATTERALTDIV16SI: + icode = CODE_FOR_avx512f_scatterdiv16si; + goto scatter_gen; + case IX86_BUILTIN_GATHERPFDPS: + icode = CODE_FOR_avx512pf_gatherpfv16sisf; + goto vec_prefetch_gen; + case IX86_BUILTIN_GATHERPFQPD: + icode = CODE_FOR_avx512pf_gatherpfv8didf; + goto vec_prefetch_gen; + case IX86_BUILTIN_GATHERPFQPS: + icode = CODE_FOR_avx512pf_gatherpfv8disf; + goto vec_prefetch_gen; + case IX86_BUILTIN_SCATTERPFDPD: + icode = CODE_FOR_avx512pf_scatterpfv8sidf; + goto vec_prefetch_gen; + case IX86_BUILTIN_SCATTERPFDPS: + icode = CODE_FOR_avx512pf_scatterpfv16sisf; + goto vec_prefetch_gen; + case IX86_BUILTIN_SCATTERPFQPD: + icode = CODE_FOR_avx512pf_scatterpfv8didf; + goto vec_prefetch_gen; + case IX86_BUILTIN_SCATTERPFQPS: + icode = CODE_FOR_avx512pf_scatterpfv8disf; + goto vec_prefetch_gen; + + gather_gen: + rtx half; + rtx (*gen) (rtx, rtx); + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + arg3 = CALL_EXPR_ARG (exp, 3); + arg4 = CALL_EXPR_ARG (exp, 4); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + op3 = expand_normal (arg3); + op4 = expand_normal (arg4); + /* Note the arg order is different from the operand order. */ + mode0 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[3].mode; + mode3 = insn_data[icode].operand[4].mode; + mode4 = insn_data[icode].operand[5].mode; + + if (target == NULL_RTX + || GET_MODE (target) != insn_data[icode].operand[0].mode + || !insn_data[icode].operand[0].predicate (target, + GET_MODE (target))) + subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode); + else + subtarget = target; + + switch (fcode) + { + case IX86_BUILTIN_GATHER3ALTSIV8DF: + case IX86_BUILTIN_GATHER3ALTSIV8DI: + half = gen_reg_rtx (V8SImode); + if (!nonimmediate_operand (op2, V16SImode)) + op2 = copy_to_mode_reg (V16SImode, op2); + emit_insn (gen_vec_extract_lo_v16si (half, op2)); + op2 = half; + break; + case IX86_BUILTIN_GATHER3ALTSIV4DF: + case IX86_BUILTIN_GATHER3ALTSIV4DI: + case IX86_BUILTIN_GATHERALTSIV4DF: + case IX86_BUILTIN_GATHERALTSIV4DI: + half = gen_reg_rtx (V4SImode); + if (!nonimmediate_operand (op2, V8SImode)) + op2 = copy_to_mode_reg (V8SImode, op2); + emit_insn (gen_vec_extract_lo_v8si (half, op2)); + op2 = half; + break; + case IX86_BUILTIN_GATHER3ALTDIV16SF: + case IX86_BUILTIN_GATHER3ALTDIV16SI: + half = gen_reg_rtx (mode0); + if (mode0 == V8SFmode) + gen = gen_vec_extract_lo_v16sf; + else + gen = gen_vec_extract_lo_v16si; + if (!nonimmediate_operand (op0, GET_MODE (op0))) + op0 = copy_to_mode_reg (GET_MODE (op0), op0); + emit_insn (gen (half, op0)); + op0 = half; + if (GET_MODE (op3) != VOIDmode) + { + if (!nonimmediate_operand (op3, GET_MODE (op3))) + op3 = copy_to_mode_reg (GET_MODE (op3), op3); + emit_insn (gen (half, op3)); + op3 = half; + } + break; + case IX86_BUILTIN_GATHER3ALTDIV8SF: + case IX86_BUILTIN_GATHER3ALTDIV8SI: + case IX86_BUILTIN_GATHERALTDIV8SF: + case IX86_BUILTIN_GATHERALTDIV8SI: + half = gen_reg_rtx (mode0); + if (mode0 == V4SFmode) + gen = gen_vec_extract_lo_v8sf; + else + gen = gen_vec_extract_lo_v8si; + if (!nonimmediate_operand (op0, GET_MODE (op0))) + op0 = copy_to_mode_reg (GET_MODE (op0), op0); + emit_insn (gen (half, op0)); + op0 = half; + if (GET_MODE (op3) != VOIDmode) + { + if (!nonimmediate_operand (op3, GET_MODE (op3))) + op3 = copy_to_mode_reg (GET_MODE (op3), op3); + emit_insn (gen (half, op3)); + op3 = half; + } + break; + default: + break; + } + + /* Force memory operand only with base register here. But we + don't want to do it on memory operand for other builtin + functions. */ + op1 = ix86_zero_extend_to_Pmode (op1); + + if (!insn_data[icode].operand[1].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (!insn_data[icode].operand[2].predicate (op1, Pmode)) + op1 = copy_to_mode_reg (Pmode, op1); + if (!insn_data[icode].operand[3].predicate (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + + op3 = fixup_modeless_constant (op3, mode3); + + if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode) + { + if (!insn_data[icode].operand[4].predicate (op3, mode3)) + op3 = copy_to_mode_reg (mode3, op3); + } + else + { + op3 = copy_to_reg (op3); + op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0); + } + if (!insn_data[icode].operand[5].predicate (op4, mode4)) + { + error ("the last argument must be scale 1, 2, 4, 8"); + return const0_rtx; + } + + /* Optimize. If mask is known to have all high bits set, + replace op0 with pc_rtx to signal that the instruction + overwrites the whole destination and doesn't use its + previous contents. */ + if (optimize) + { + if (TREE_CODE (arg3) == INTEGER_CST) + { + if (integer_all_onesp (arg3)) + op0 = pc_rtx; + } + else if (TREE_CODE (arg3) == VECTOR_CST) + { + unsigned int negative = 0; + for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i) + { + tree cst = VECTOR_CST_ELT (arg3, i); + if (TREE_CODE (cst) == INTEGER_CST + && tree_int_cst_sign_bit (cst)) + negative++; + else if (TREE_CODE (cst) == REAL_CST + && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst))) + negative++; + } + if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3))) + op0 = pc_rtx; + } + else if (TREE_CODE (arg3) == SSA_NAME + && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE) + { + /* Recognize also when mask is like: + __v2df src = _mm_setzero_pd (); + __v2df mask = _mm_cmpeq_pd (src, src); + or + __v8sf src = _mm256_setzero_ps (); + __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ); + as that is a cheaper way to load all ones into + a register than having to load a constant from + memory. */ + gimple *def_stmt = SSA_NAME_DEF_STMT (arg3); + if (is_gimple_call (def_stmt)) + { + tree fndecl = gimple_call_fndecl (def_stmt); + if (fndecl + && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) + switch ((unsigned int) DECL_FUNCTION_CODE (fndecl)) + { + case IX86_BUILTIN_CMPPD: + case IX86_BUILTIN_CMPPS: + case IX86_BUILTIN_CMPPD256: + case IX86_BUILTIN_CMPPS256: + if (!integer_zerop (gimple_call_arg (def_stmt, 2))) + break; + /* FALLTHRU */ + case IX86_BUILTIN_CMPEQPD: + case IX86_BUILTIN_CMPEQPS: + if (initializer_zerop (gimple_call_arg (def_stmt, 0)) + && initializer_zerop (gimple_call_arg (def_stmt, + 1))) + op0 = pc_rtx; + break; + default: + break; + } + } + } + } + + pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4); + if (! pat) + return const0_rtx; + emit_insn (pat); + + switch (fcode) + { + case IX86_BUILTIN_GATHER3DIV16SF: + if (target == NULL_RTX) + target = gen_reg_rtx (V8SFmode); + emit_insn (gen_vec_extract_lo_v16sf (target, subtarget)); + break; + case IX86_BUILTIN_GATHER3DIV16SI: + if (target == NULL_RTX) + target = gen_reg_rtx (V8SImode); + emit_insn (gen_vec_extract_lo_v16si (target, subtarget)); + break; + case IX86_BUILTIN_GATHER3DIV8SF: + case IX86_BUILTIN_GATHERDIV8SF: + if (target == NULL_RTX) + target = gen_reg_rtx (V4SFmode); + emit_insn (gen_vec_extract_lo_v8sf (target, subtarget)); + break; + case IX86_BUILTIN_GATHER3DIV8SI: + case IX86_BUILTIN_GATHERDIV8SI: + if (target == NULL_RTX) + target = gen_reg_rtx (V4SImode); + emit_insn (gen_vec_extract_lo_v8si (target, subtarget)); + break; + default: + target = subtarget; + break; + } + return target; + + scatter_gen: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + arg3 = CALL_EXPR_ARG (exp, 3); + arg4 = CALL_EXPR_ARG (exp, 4); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + op3 = expand_normal (arg3); + op4 = expand_normal (arg4); + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + mode4 = insn_data[icode].operand[4].mode; + + /* Scatter instruction stores operand op3 to memory with + indices from op2 and scale from op4 under writemask op1. + If index operand op2 has more elements then source operand + op3 one need to use only its low half. And vice versa. */ + switch (fcode) + { + case IX86_BUILTIN_SCATTERALTSIV8DF: + case IX86_BUILTIN_SCATTERALTSIV8DI: + half = gen_reg_rtx (V8SImode); + if (!nonimmediate_operand (op2, V16SImode)) + op2 = copy_to_mode_reg (V16SImode, op2); + emit_insn (gen_vec_extract_lo_v16si (half, op2)); + op2 = half; + break; + case IX86_BUILTIN_SCATTERALTDIV16SF: + case IX86_BUILTIN_SCATTERALTDIV16SI: + half = gen_reg_rtx (mode3); + if (mode3 == V8SFmode) + gen = gen_vec_extract_lo_v16sf; + else + gen = gen_vec_extract_lo_v16si; + if (!nonimmediate_operand (op3, GET_MODE (op3))) + op3 = copy_to_mode_reg (GET_MODE (op3), op3); + emit_insn (gen (half, op3)); + op3 = half; + break; + default: + break; + } + + /* Force memory operand only with base register here. But we + don't want to do it on memory operand for other builtin + functions. */ + op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1)); + + if (!insn_data[icode].operand[0].predicate (op0, Pmode)) + op0 = copy_to_mode_reg (Pmode, op0); + + op1 = fixup_modeless_constant (op1, mode1); + + if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode) + { + if (!insn_data[icode].operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + } + else + { + op1 = copy_to_reg (op1); + op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0); + } + + if (!insn_data[icode].operand[2].predicate (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + + if (!insn_data[icode].operand[3].predicate (op3, mode3)) + op3 = copy_to_mode_reg (mode3, op3); + + if (!insn_data[icode].operand[4].predicate (op4, mode4)) + { + error ("the last argument must be scale 1, 2, 4, 8"); + return const0_rtx; + } + + pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); + if (! pat) + return const0_rtx; + + emit_insn (pat); + return 0; + + vec_prefetch_gen: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + arg3 = CALL_EXPR_ARG (exp, 3); + arg4 = CALL_EXPR_ARG (exp, 4); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + op3 = expand_normal (arg3); + op4 = expand_normal (arg4); + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode3 = insn_data[icode].operand[3].mode; + mode4 = insn_data[icode].operand[4].mode; + + op0 = fixup_modeless_constant (op0, mode0); + + if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) + { + if (!insn_data[icode].operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + } + else + { + op0 = copy_to_reg (op0); + op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0); + } + + if (!insn_data[icode].operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + /* Force memory operand only with base register here. But we + don't want to do it on memory operand for other builtin + functions. */ + op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1)); + + if (!insn_data[icode].operand[2].predicate (op2, Pmode)) + op2 = copy_to_mode_reg (Pmode, op2); + + if (!insn_data[icode].operand[3].predicate (op3, mode3)) + { + error ("the forth argument must be scale 1, 2, 4, 8"); + return const0_rtx; + } + + if (!insn_data[icode].operand[4].predicate (op4, mode4)) + { + error ("incorrect hint operand"); + return const0_rtx; + } + + pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); + if (! pat) + return const0_rtx; + + emit_insn (pat); + + return 0; + + case IX86_BUILTIN_XABORT: + icode = CODE_FOR_xabort; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + mode0 = insn_data[icode].operand[0].mode; + if (!insn_data[icode].operand[0].predicate (op0, mode0)) + { + error ("the xabort's argument must be an 8-bit immediate"); + return const0_rtx; + } + emit_insn (gen_xabort (op0)); + return 0; + + default: + break; + } + + for (i = 0, d = bdesc_special_args; + i < ARRAY_SIZE (bdesc_special_args); + i++, d++) + if (d->code == fcode) + return ix86_expand_special_args_builtin (d, exp, target); + + for (i = 0, d = bdesc_args; + i < ARRAY_SIZE (bdesc_args); + i++, d++) + if (d->code == fcode) + switch (fcode) + { + case IX86_BUILTIN_FABSQ: + case IX86_BUILTIN_COPYSIGNQ: + if (!TARGET_SSE) + /* Emit a normal call if SSE isn't available. */ + return expand_call (exp, target, ignore); + default: + return ix86_expand_args_builtin (d, exp, target); + } + + for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) + if (d->code == fcode) + return ix86_expand_sse_comi (d, exp, target); + + for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++) + if (d->code == fcode) + return ix86_expand_round_builtin (d, exp, target); + + for (i = 0, d = bdesc_pcmpestr; + i < ARRAY_SIZE (bdesc_pcmpestr); + i++, d++) + if (d->code == fcode) + return ix86_expand_sse_pcmpestr (d, exp, target); + + for (i = 0, d = bdesc_pcmpistr; + i < ARRAY_SIZE (bdesc_pcmpistr); + i++, d++) + if (d->code == fcode) + return ix86_expand_sse_pcmpistr (d, exp, target); + + for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) + if (d->code == fcode) + return ix86_expand_multi_arg_builtin (d->icode, exp, target, + (enum ix86_builtin_func_type) + d->flag, d->comparison); + + gcc_unreachable (); +} + +/* This returns the target-specific builtin with code CODE if + current_function_decl has visibility on this builtin, which is checked + using isa flags. Returns NULL_TREE otherwise. */ + +static tree ix86_get_builtin (enum ix86_builtins code) +{ + struct cl_target_option *opts; + tree target_tree = NULL_TREE; + + /* Determine the isa flags of current_function_decl. */ + + if (current_function_decl) + target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl); + + if (target_tree == NULL) + target_tree = target_option_default_node; + + opts = TREE_TARGET_OPTION (target_tree); + + if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags) + return ix86_builtin_decl (code, true); + else + return NULL_TREE; +} + +/* Return function decl for target specific builtin + for given MPX builtin passed i FCODE. */ +static tree +ix86_builtin_mpx_function (unsigned fcode) +{ + switch (fcode) + { + case BUILT_IN_CHKP_BNDMK: + return ix86_builtins[IX86_BUILTIN_BNDMK]; + + case BUILT_IN_CHKP_BNDSTX: + return ix86_builtins[IX86_BUILTIN_BNDSTX]; + + case BUILT_IN_CHKP_BNDLDX: + return ix86_builtins[IX86_BUILTIN_BNDLDX]; + + case BUILT_IN_CHKP_BNDCL: + return ix86_builtins[IX86_BUILTIN_BNDCL]; + + case BUILT_IN_CHKP_BNDCU: + return ix86_builtins[IX86_BUILTIN_BNDCU]; + + case BUILT_IN_CHKP_BNDRET: + return ix86_builtins[IX86_BUILTIN_BNDRET]; + + case BUILT_IN_CHKP_INTERSECT: + return ix86_builtins[IX86_BUILTIN_BNDINT]; + + case BUILT_IN_CHKP_NARROW: + return ix86_builtins[IX86_BUILTIN_BNDNARROW]; + + case BUILT_IN_CHKP_SIZEOF: + return ix86_builtins[IX86_BUILTIN_SIZEOF]; + + case BUILT_IN_CHKP_EXTRACT_LOWER: + return ix86_builtins[IX86_BUILTIN_BNDLOWER]; + + case BUILT_IN_CHKP_EXTRACT_UPPER: + return ix86_builtins[IX86_BUILTIN_BNDUPPER]; + + default: + return NULL_TREE; + } + + gcc_unreachable (); +} + +/* Helper function for ix86_load_bounds and ix86_store_bounds. + + Return an address to be used to load/store bounds for pointer + passed in SLOT. + + SLOT_NO is an integer constant holding number of a target + dependent special slot to be used in case SLOT is not a memory. + + SPECIAL_BASE is a pointer to be used as a base of fake address + to access special slots in Bounds Table. SPECIAL_BASE[-1], + SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */ + +static rtx +ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base) +{ + rtx addr = NULL; + + /* NULL slot means we pass bounds for pointer not passed to the + function at all. Register slot means we pass pointer in a + register. In both these cases bounds are passed via Bounds + Table. Since we do not have actual pointer stored in memory, + we have to use fake addresses to access Bounds Table. We + start with (special_base - sizeof (void*)) and decrease this + address by pointer size to get addresses for other slots. */ + if (!slot || REG_P (slot)) + { + gcc_assert (CONST_INT_P (slot_no)); + addr = plus_constant (Pmode, special_base, + -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode)); + } + /* If pointer is passed in a memory then its address is used to + access Bounds Table. */ + else if (MEM_P (slot)) + { + addr = XEXP (slot, 0); + if (!register_operand (addr, Pmode)) + addr = copy_addr_to_reg (addr); + } + else + gcc_unreachable (); + + return addr; +} + +/* Expand pass uses this hook to load bounds for function parameter + PTR passed in SLOT in case its bounds are not passed in a register. + + If SLOT is a memory, then bounds are loaded as for regular pointer + loaded from memory. PTR may be NULL in case SLOT is a memory. + In such case value of PTR (if required) may be loaded from SLOT. + + If SLOT is NULL or a register then SLOT_NO is an integer constant + holding number of the target dependent special slot which should be + used to obtain bounds. + + Return loaded bounds. */ + +static rtx +ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no) +{ + rtx reg = gen_reg_rtx (BNDmode); + rtx addr; + + /* Get address to be used to access Bounds Table. Special slots start + at the location of return address of the current function. */ + addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx); + + /* Load pointer value from a memory if we don't have it. */ + if (!ptr) + { + gcc_assert (MEM_P (slot)); + ptr = copy_addr_to_reg (slot); + } + + if (!register_operand (ptr, Pmode)) + ptr = ix86_zero_extend_to_Pmode (ptr); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_ldx (reg, addr, ptr) + : gen_bnd32_ldx (reg, addr, ptr)); + + return reg; +} + +/* Expand pass uses this hook to store BOUNDS for call argument PTR + passed in SLOT in case BOUNDS are not passed in a register. + + If SLOT is a memory, then BOUNDS are stored as for regular pointer + stored in memory. PTR may be NULL in case SLOT is a memory. + In such case value of PTR (if required) may be loaded from SLOT. + + If SLOT is NULL or a register then SLOT_NO is an integer constant + holding number of the target dependent special slot which should be + used to store BOUNDS. */ + +static void +ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no) +{ + rtx addr; + + /* Get address to be used to access Bounds Table. Special slots start + at the location of return address of a called function. */ + addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx); + + /* Load pointer value from a memory if we don't have it. */ + if (!ptr) + { + gcc_assert (MEM_P (slot)); + ptr = copy_addr_to_reg (slot); + } + + if (!register_operand (ptr, Pmode)) + ptr = ix86_zero_extend_to_Pmode (ptr); + + gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds))); + if (!register_operand (bounds, BNDmode)) + bounds = copy_to_mode_reg (BNDmode, bounds); + + emit_insn (BNDmode == BND64mode + ? gen_bnd64_stx (addr, ptr, bounds) + : gen_bnd32_stx (addr, ptr, bounds)); +} + +/* Load and return bounds returned by function in SLOT. */ + +static rtx +ix86_load_returned_bounds (rtx slot) +{ + rtx res; + + gcc_assert (REG_P (slot)); + res = gen_reg_rtx (BNDmode); + emit_move_insn (res, slot); + + return res; +} + +/* Store BOUNDS returned by function into SLOT. */ + +static void +ix86_store_returned_bounds (rtx slot, rtx bounds) +{ + gcc_assert (REG_P (slot)); + emit_move_insn (slot, bounds); +} + +/* Returns a function decl for a vectorized version of the combined function + with combined_fn code FN and the result vector type TYPE, or NULL_TREE + if it is not available. */ + +static tree +ix86_builtin_vectorized_function (unsigned int fn, tree type_out, + tree type_in) +{ + machine_mode in_mode, out_mode; + int in_n, out_n; + + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE) + return NULL_TREE; + + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + + switch (fn) + { + CASE_CFN_EXP2: + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_EXP2PS); + } + break; + + CASE_CFN_IFLOOR: + CASE_CFN_LFLOOR: + CASE_CFN_LLFLOOR: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == SImode && in_mode == DFmode) + { + if (out_n == 4 && in_n == 2) + return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX); + else if (out_n == 8 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256); + else if (out_n == 16 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512); + } + if (out_mode == SImode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512); + } + break; + + CASE_CFN_ICEIL: + CASE_CFN_LCEIL: + CASE_CFN_LLCEIL: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == SImode && in_mode == DFmode) + { + if (out_n == 4 && in_n == 2) + return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX); + else if (out_n == 8 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256); + else if (out_n == 16 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512); + } + if (out_mode == SImode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512); + } + break; + + CASE_CFN_IRINT: + CASE_CFN_LRINT: + CASE_CFN_LLRINT: + if (out_mode == SImode && in_mode == DFmode) + { + if (out_n == 4 && in_n == 2) + return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX); + else if (out_n == 8 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256); + else if (out_n == 16 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512); + } + if (out_mode == SImode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512); + } + break; + + CASE_CFN_IROUND: + CASE_CFN_LROUND: + CASE_CFN_LLROUND: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == SImode && in_mode == DFmode) + { + if (out_n == 4 && in_n == 2) + return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX); + else if (out_n == 8 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256); + else if (out_n == 16 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512); + } + if (out_mode == SImode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512); + } + break; + + CASE_CFN_FLOOR: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == DFmode && in_mode == DFmode) + { + if (out_n == 2 && in_n == 2) + return ix86_get_builtin (IX86_BUILTIN_FLOORPD); + else if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_FLOORPD256); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_FLOORPD512); + } + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_FLOORPS); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_FLOORPS256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_FLOORPS512); + } + break; + + CASE_CFN_CEIL: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == DFmode && in_mode == DFmode) + { + if (out_n == 2 && in_n == 2) + return ix86_get_builtin (IX86_BUILTIN_CEILPD); + else if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_CEILPD256); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_CEILPD512); + } + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_CEILPS); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_CEILPS256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_CEILPS512); + } + break; + + CASE_CFN_TRUNC: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == DFmode && in_mode == DFmode) + { + if (out_n == 2 && in_n == 2) + return ix86_get_builtin (IX86_BUILTIN_TRUNCPD); + else if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512); + } + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_TRUNCPS); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512); + } + break; + + CASE_CFN_RINT: + /* The round insn does not trap on denormals. */ + if (flag_trapping_math || !TARGET_ROUND) + break; + + if (out_mode == DFmode && in_mode == DFmode) + { + if (out_n == 2 && in_n == 2) + return ix86_get_builtin (IX86_BUILTIN_RINTPD); + else if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_RINTPD256); + } + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_RINTPS); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_RINTPS256); + } + break; + + CASE_CFN_FMA: + if (out_mode == DFmode && in_mode == DFmode) + { + if (out_n == 2 && in_n == 2) + return ix86_get_builtin (IX86_BUILTIN_VFMADDPD); + if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256); + } + if (out_mode == SFmode && in_mode == SFmode) + { + if (out_n == 4 && in_n == 4) + return ix86_get_builtin (IX86_BUILTIN_VFMADDPS); + if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256); + } + break; + + default: + break; + } + + /* Dispatch to a handler for a vectorization library. */ + if (ix86_veclib_handler) + return ix86_veclib_handler (combined_fn (fn), type_out, type_in); + + return NULL_TREE; +} + +/* Handler for an SVML-style interface to + a library with vectorized intrinsics. */ + +static tree +ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in) +{ + char name[20]; + tree fntype, new_fndecl, args; + unsigned arity; + const char *bname; + machine_mode el_mode, in_mode; + int n, in_n; + + /* The SVML is suitable for unsafe math only. */ + if (!flag_unsafe_math_optimizations) + return NULL_TREE; + + el_mode = TYPE_MODE (TREE_TYPE (type_out)); + n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + if (el_mode != in_mode + || n != in_n) + return NULL_TREE; + + switch (fn) + { + CASE_CFN_EXP: + CASE_CFN_LOG: + CASE_CFN_LOG10: + CASE_CFN_POW: + CASE_CFN_TANH: + CASE_CFN_TAN: + CASE_CFN_ATAN: + CASE_CFN_ATAN2: + CASE_CFN_ATANH: + CASE_CFN_CBRT: + CASE_CFN_SINH: + CASE_CFN_SIN: + CASE_CFN_ASINH: + CASE_CFN_ASIN: + CASE_CFN_COSH: + CASE_CFN_COS: + CASE_CFN_ACOSH: + CASE_CFN_ACOS: + if ((el_mode != DFmode || n != 2) + && (el_mode != SFmode || n != 4)) + return NULL_TREE; + break; + + default: + return NULL_TREE; + } + + tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); + bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + + if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF) + strcpy (name, "vmlsLn4"); + else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG) + strcpy (name, "vmldLn2"); + else if (n == 4) + { + sprintf (name, "vmls%s", bname+10); + name[strlen (name)-1] = '4'; + } + else + sprintf (name, "vmld%s2", bname+10); + + /* Convert to uppercase. */ + name[4] &= ~0x20; + + arity = 0; + for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) + arity++; + + if (arity == 1) + fntype = build_function_type_list (type_out, type_in, NULL); + else + fntype = build_function_type_list (type_out, type_in, type_in, NULL); + + /* Build a function declaration for the vectorized function. */ + new_fndecl = build_decl (BUILTINS_LOCATION, + FUNCTION_DECL, get_identifier (name), fntype); + TREE_PUBLIC (new_fndecl) = 1; + DECL_EXTERNAL (new_fndecl) = 1; + DECL_IS_NOVOPS (new_fndecl) = 1; + TREE_READONLY (new_fndecl) = 1; + + return new_fndecl; +} + +/* Handler for an ACML-style interface to + a library with vectorized intrinsics. */ + +static tree +ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in) +{ + char name[20] = "__vr.._"; + tree fntype, new_fndecl, args; + unsigned arity; + const char *bname; + machine_mode el_mode, in_mode; + int n, in_n; + + /* The ACML is 64bits only and suitable for unsafe math only as + it does not correctly support parts of IEEE with the required + precision such as denormals. */ + if (!TARGET_64BIT + || !flag_unsafe_math_optimizations) + return NULL_TREE; + + el_mode = TYPE_MODE (TREE_TYPE (type_out)); + n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + if (el_mode != in_mode + || n != in_n) + return NULL_TREE; + + switch (fn) + { + CASE_CFN_SIN: + CASE_CFN_COS: + CASE_CFN_EXP: + CASE_CFN_LOG: + CASE_CFN_LOG2: + CASE_CFN_LOG10: + if (el_mode == DFmode && n == 2) + { + name[4] = 'd'; + name[5] = '2'; + } + else if (el_mode == SFmode && n == 4) + { + name[4] = 's'; + name[5] = '4'; + } + else + return NULL_TREE; + break; + + default: + return NULL_TREE; + } + + tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); + bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + sprintf (name + 7, "%s", bname+10); + + arity = 0; + for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) + arity++; + + if (arity == 1) + fntype = build_function_type_list (type_out, type_in, NULL); + else + fntype = build_function_type_list (type_out, type_in, type_in, NULL); + + /* Build a function declaration for the vectorized function. */ + new_fndecl = build_decl (BUILTINS_LOCATION, + FUNCTION_DECL, get_identifier (name), fntype); + TREE_PUBLIC (new_fndecl) = 1; + DECL_EXTERNAL (new_fndecl) = 1; + DECL_IS_NOVOPS (new_fndecl) = 1; + TREE_READONLY (new_fndecl) = 1; + + return new_fndecl; +} + +/* Returns a decl of a function that implements gather load with + memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE. + Return NULL_TREE if it is not available. */ + +static tree +ix86_vectorize_builtin_gather (const_tree mem_vectype, + const_tree index_type, int scale) +{ + bool si; + enum ix86_builtins code; + + if (! TARGET_AVX2) + return NULL_TREE; + + if ((TREE_CODE (index_type) != INTEGER_TYPE + && !POINTER_TYPE_P (index_type)) + || (TYPE_MODE (index_type) != SImode + && TYPE_MODE (index_type) != DImode)) + return NULL_TREE; + + if (TYPE_PRECISION (index_type) > POINTER_SIZE) + return NULL_TREE; + + /* v*gather* insn sign extends index to pointer mode. */ + if (TYPE_PRECISION (index_type) < POINTER_SIZE + && TYPE_UNSIGNED (index_type)) + return NULL_TREE; + + if (scale <= 0 + || scale > 8 + || (scale & (scale - 1)) != 0) + return NULL_TREE; + + si = TYPE_MODE (index_type) == SImode; + switch (TYPE_MODE (mem_vectype)) + { + case V2DFmode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF; + else + code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF; + break; + case V4DFmode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF; + else + code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF; + break; + case V2DImode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI; + else + code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI; + break; + case V4DImode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI; + else + code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI; + break; + case V4SFmode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF; + else + code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF; + break; + case V8SFmode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF; + else + code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF; + break; + case V4SImode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI; + else + code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI; + break; + case V8SImode: + if (TARGET_AVX512VL) + code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI; + else + code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI; + break; + case V8DFmode: + if (TARGET_AVX512F) + code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF; + else + return NULL_TREE; + break; + case V8DImode: + if (TARGET_AVX512F) + code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI; + else + return NULL_TREE; + break; + case V16SFmode: + if (TARGET_AVX512F) + code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF; + else + return NULL_TREE; + break; + case V16SImode: + if (TARGET_AVX512F) + code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI; + else + return NULL_TREE; + break; + default: + return NULL_TREE; + } + + return ix86_get_builtin (code); +} + +/* Returns a decl of a function that implements scatter store with + register type VECTYPE and index type INDEX_TYPE and SCALE. + Return NULL_TREE if it is not available. */ + +static tree +ix86_vectorize_builtin_scatter (const_tree vectype, + const_tree index_type, int scale) +{ + bool si; + enum ix86_builtins code; + + if (!TARGET_AVX512F) + return NULL_TREE; + + if ((TREE_CODE (index_type) != INTEGER_TYPE + && !POINTER_TYPE_P (index_type)) + || (TYPE_MODE (index_type) != SImode + && TYPE_MODE (index_type) != DImode)) + return NULL_TREE; + + if (TYPE_PRECISION (index_type) > POINTER_SIZE) + return NULL_TREE; + + /* v*scatter* insn sign extends index to pointer mode. */ + if (TYPE_PRECISION (index_type) < POINTER_SIZE + && TYPE_UNSIGNED (index_type)) + return NULL_TREE; + + /* Scale can be 1, 2, 4 or 8. */ + if (scale <= 0 + || scale > 8 + || (scale & (scale - 1)) != 0) + return NULL_TREE; + + si = TYPE_MODE (index_type) == SImode; + switch (TYPE_MODE (vectype)) + { + case V8DFmode: + code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF; + break; + case V8DImode: + code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI; + break; + case V16SFmode: + code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF; + break; + case V16SImode: + code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI; + break; + default: + return NULL_TREE; + } + + return ix86_builtins[code]; +} + +/* Return true if it is safe to use the rsqrt optabs to optimize + 1.0/sqrt. */ + +static bool +use_rsqrt_p () +{ + return (TARGET_SSE_MATH + && flag_finite_math_only + && !flag_trapping_math + && flag_unsafe_math_optimizations); +} + +/* Returns a code for a target-specific builtin that implements + reciprocal of the function, or NULL_TREE if not available. */ + +static tree +ix86_builtin_reciprocal (tree fndecl) +{ + switch (DECL_FUNCTION_CODE (fndecl)) + { + /* Vectorized version of sqrt to rsqrt conversion. */ + case IX86_BUILTIN_SQRTPS_NR: + return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR); + + case IX86_BUILTIN_SQRTPS_NR256: + return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256); + + default: + return NULL_TREE; + } +} + +/* Helper for avx_vpermilps256_operand et al. This is also used by + the expansion functions to turn the parallel back into a mask. + The return value is 0 for no match and the imm8+1 for a match. */ + +int +avx_vpermilp_parallel (rtx par, machine_mode mode) +{ + unsigned i, nelt = GET_MODE_NUNITS (mode); + unsigned mask = 0; + unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */ + + if (XVECLEN (par, 0) != (int) nelt) + return 0; + + /* Validate that all of the elements are constants, and not totally + out of range. Copy the data into an integral array to make the + subsequent checks easier. */ + for (i = 0; i < nelt; ++i) + { + rtx er = XVECEXP (par, 0, i); + unsigned HOST_WIDE_INT ei; + + if (!CONST_INT_P (er)) + return 0; + ei = INTVAL (er); + if (ei >= nelt) + return 0; + ipar[i] = ei; + } + + switch (mode) + { + case V8DFmode: + /* In the 512-bit DFmode case, we can only move elements within + a 128-bit lane. First fill the second part of the mask, + then fallthru. */ + for (i = 4; i < 6; ++i) + { + if (ipar[i] < 4 || ipar[i] >= 6) + return 0; + mask |= (ipar[i] - 4) << i; + } + for (i = 6; i < 8; ++i) + { + if (ipar[i] < 6) + return 0; + mask |= (ipar[i] - 6) << i; + } + /* FALLTHRU */ + + case V4DFmode: + /* In the 256-bit DFmode case, we can only move elements within + a 128-bit lane. */ + for (i = 0; i < 2; ++i) + { + if (ipar[i] >= 2) + return 0; + mask |= ipar[i] << i; + } + for (i = 2; i < 4; ++i) + { + if (ipar[i] < 2) + return 0; + mask |= (ipar[i] - 2) << i; + } + break; + + case V16SFmode: + /* In 512 bit SFmode case, permutation in the upper 256 bits + must mirror the permutation in the lower 256-bits. */ + for (i = 0; i < 8; ++i) + if (ipar[i] + 8 != ipar[i + 8]) + return 0; + /* FALLTHRU */ + + case V8SFmode: + /* In 256 bit SFmode case, we have full freedom of + movement within the low 128-bit lane, but the high 128-bit + lane must mirror the exact same pattern. */ + for (i = 0; i < 4; ++i) + if (ipar[i] + 4 != ipar[i + 4]) + return 0; + nelt = 4; + /* FALLTHRU */ + + case V2DFmode: + case V4SFmode: + /* In the 128-bit case, we've full freedom in the placement of + the elements from the source operand. */ + for (i = 0; i < nelt; ++i) + mask |= ipar[i] << (i * (nelt / 2)); + break; + + default: + gcc_unreachable (); + } + + /* Make sure success has a non-zero value by adding one. */ + return mask + 1; +} + +/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by + the expansion functions to turn the parallel back into a mask. + The return value is 0 for no match and the imm8+1 for a match. */ + +int +avx_vperm2f128_parallel (rtx par, machine_mode mode) +{ + unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; + unsigned mask = 0; + unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */ + + if (XVECLEN (par, 0) != (int) nelt) + return 0; + + /* Validate that all of the elements are constants, and not totally + out of range. Copy the data into an integral array to make the + subsequent checks easier. */ + for (i = 0; i < nelt; ++i) + { + rtx er = XVECEXP (par, 0, i); + unsigned HOST_WIDE_INT ei; + + if (!CONST_INT_P (er)) + return 0; + ei = INTVAL (er); + if (ei >= 2 * nelt) + return 0; + ipar[i] = ei; + } + + /* Validate that the halves of the permute are halves. */ + for (i = 0; i < nelt2 - 1; ++i) + if (ipar[i] + 1 != ipar[i + 1]) + return 0; + for (i = nelt2; i < nelt - 1; ++i) + if (ipar[i] + 1 != ipar[i + 1]) + return 0; + + /* Reconstruct the mask. */ + for (i = 0; i < 2; ++i) + { + unsigned e = ipar[i * nelt2]; + if (e % nelt2) + return 0; + e /= nelt2; + mask |= e << (i * 4); + } + + /* Make sure success has a non-zero value by adding one. */ + return mask + 1; +} + +/* Return a register priority for hard reg REGNO. */ +static int +ix86_register_priority (int hard_regno) +{ + /* ebp and r13 as the base always wants a displacement, r12 as the + base always wants an index. So discourage their usage in an + address. */ + if (hard_regno == R12_REG || hard_regno == R13_REG) + return 0; + if (hard_regno == BP_REG) + return 1; + /* New x86-64 int registers result in bigger code size. Discourage + them. */ + if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG) + return 2; + /* New x86-64 SSE registers result in bigger code size. Discourage + them. */ + if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG) + return 2; + /* Usage of AX register results in smaller code. Prefer it. */ + if (hard_regno == AX_REG) + return 4; + return 3; +} + +/* Implement TARGET_PREFERRED_RELOAD_CLASS. + + Put float CONST_DOUBLE in the constant pool instead of fp regs. + QImode must go into class Q_REGS. + Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and + movdf to do mem-to-mem moves through integer regs. */ + +static reg_class_t +ix86_preferred_reload_class (rtx x, reg_class_t regclass) +{ + machine_mode mode = GET_MODE (x); + + /* We're only allowed to return a subclass of CLASS. Many of the + following checks fail for NO_REGS, so eliminate that early. */ + if (regclass == NO_REGS) + return NO_REGS; + + /* All classes can load zeros. */ + if (x == CONST0_RTX (mode)) + return regclass; + + /* Force constants into memory if we are loading a (nonzero) constant into + an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK + instructions to load from a constant. */ + if (CONSTANT_P (x) + && (MAYBE_MMX_CLASS_P (regclass) + || MAYBE_SSE_CLASS_P (regclass) + || MAYBE_MASK_CLASS_P (regclass))) + return NO_REGS; + + /* Prefer SSE regs only, if we can use them for math. */ + if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)) + return SSE_CLASS_P (regclass) ? regclass : NO_REGS; + + /* Floating-point constants need more complex checks. */ + if (CONST_DOUBLE_P (x)) + { + /* General regs can load everything. */ + if (reg_class_subset_p (regclass, GENERAL_REGS)) + return regclass; + + /* Floats can load 0 and 1 plus some others. Note that we eliminated + zero above. We only want to wind up preferring 80387 registers if + we plan on doing computation with them. */ + if (TARGET_80387 + && standard_80387_constant_p (x) > 0) + { + /* Limit class to non-sse. */ + if (regclass == FLOAT_SSE_REGS) + return FLOAT_REGS; + if (regclass == FP_TOP_SSE_REGS) + return FP_TOP_REG; + if (regclass == FP_SECOND_SSE_REGS) + return FP_SECOND_REG; + if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS) + return regclass; + } + + return NO_REGS; + } + + /* Generally when we see PLUS here, it's the function invariant + (plus soft-fp const_int). Which can only be computed into general + regs. */ + if (GET_CODE (x) == PLUS) + return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS; + + /* QImode constants are easy to load, but non-constant QImode data + must go into Q_REGS. */ + if (GET_MODE (x) == QImode && !CONSTANT_P (x)) + { + if (reg_class_subset_p (regclass, Q_REGS)) + return regclass; + if (reg_class_subset_p (Q_REGS, regclass)) + return Q_REGS; + return NO_REGS; + } + + return regclass; +} + +/* Discourage putting floating-point values in SSE registers unless + SSE math is being used, and likewise for the 387 registers. */ +static reg_class_t +ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) +{ + machine_mode mode = GET_MODE (x); + + /* Restrict the output reload class to the register bank that we are doing + math on. If we would like not to return a subset of CLASS, reject this + alternative: if reload cannot do this, it will still use its choice. */ + mode = GET_MODE (x); + if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) + return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS; + + if (X87_FLOAT_MODE_P (mode)) + { + if (regclass == FP_TOP_SSE_REGS) + return FP_TOP_REG; + else if (regclass == FP_SECOND_SSE_REGS) + return FP_SECOND_REG; + else + return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; + } + + return regclass; +} + +static reg_class_t +ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, + machine_mode mode, secondary_reload_info *sri) +{ + /* Double-word spills from general registers to non-offsettable memory + references (zero-extended addresses) require special handling. */ + if (TARGET_64BIT + && MEM_P (x) + && GET_MODE_SIZE (mode) > UNITS_PER_WORD + && INTEGER_CLASS_P (rclass) + && !offsettable_memref_p (x)) + { + sri->icode = (in_p + ? CODE_FOR_reload_noff_load + : CODE_FOR_reload_noff_store); + /* Add the cost of moving address to a temporary. */ + sri->extra_cost = 1; + + return NO_REGS; + } + + /* QImode spills from non-QI registers require + intermediate register on 32bit targets. */ + if (mode == QImode + && (MAYBE_MASK_CLASS_P (rclass) + || (!TARGET_64BIT && !in_p + && INTEGER_CLASS_P (rclass) + && MAYBE_NON_Q_CLASS_P (rclass)))) + { + int regno; + + if (REG_P (x)) + regno = REGNO (x); + else + regno = -1; + + if (regno >= FIRST_PSEUDO_REGISTER || SUBREG_P (x)) + regno = true_regnum (x); + + /* Return Q_REGS if the operand is in memory. */ + if (regno == -1) + return Q_REGS; + } + + /* This condition handles corner case where an expression involving + pointers gets vectorized. We're trying to use the address of a + stack slot as a vector initializer. + + (set (reg:V2DI 74 [ vect_cst_.2 ]) + (vec_duplicate:V2DI (reg/f:DI 20 frame))) + + Eventually frame gets turned into sp+offset like this: + + (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) + (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) + (const_int 392 [0x188])))) + + That later gets turned into: + + (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) + (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) + (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])))) + + We'll have the following reload recorded: + + Reload 0: reload_in (DI) = + (plus:DI (reg/f:DI 7 sp) + (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) + reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) + SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine + reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188])) + reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) + reload_reg_rtx: (reg:V2DI 22 xmm1) + + Which isn't going to work since SSE instructions can't handle scalar + additions. Returning GENERAL_REGS forces the addition into integer + register and reload can handle subsequent reloads without problems. */ + + if (in_p && GET_CODE (x) == PLUS + && SSE_CLASS_P (rclass) + && SCALAR_INT_MODE_P (mode)) + return GENERAL_REGS; + + return NO_REGS; +} + +/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ + +static bool +ix86_class_likely_spilled_p (reg_class_t rclass) +{ + switch (rclass) + { + case AREG: + case DREG: + case CREG: + case BREG: + case AD_REGS: + case SIREG: + case DIREG: + case SSE_FIRST_REG: + case FP_TOP_REG: + case FP_SECOND_REG: + case BND_REGS: + return true; + + default: + break; + } + + return false; +} + +/* If we are copying between general and FP registers, we need a memory + location. The same is true for SSE and MMX registers. + + To optimize register_move_cost performance, allow inline variant. + + The macro can't work reliably when one of the CLASSES is class containing + registers from multiple units (SSE, MMX, integer). We avoid this by never + combining those units in single alternative in the machine description. + Ensure that this constraint holds to avoid unexpected surprises. + + When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not + enforce these sanity checks. */ + +static inline bool +inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, + machine_mode mode, int strict) +{ + if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) + return false; + if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) + || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) + || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) + || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) + || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) + || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) + { + gcc_assert (!strict || lra_in_progress); + return true; + } + + if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) + return true; + + /* Between mask and general, we have moves no larger than word size. */ + if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2)) + && (GET_MODE_SIZE (mode) > UNITS_PER_WORD)) + return true; + + /* ??? This is a lie. We do have moves between mmx/general, and for + mmx/sse2. But by saying we need secondary memory we discourage the + register allocator from using the mmx registers unless needed. */ + if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) + return true; + + if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) + { + /* SSE1 doesn't have any direct moves from other classes. */ + if (!TARGET_SSE2) + return true; + + /* If the target says that inter-unit moves are more expensive + than moving through memory, then don't generate them. */ + if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC) + || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC)) + return true; + + /* Between SSE and general, we have moves no larger than word size. */ + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return true; + } + + return false; +} + +bool +ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, + machine_mode mode, int strict) +{ + return inline_secondary_memory_needed (class1, class2, mode, strict); +} + +/* Implement the TARGET_CLASS_MAX_NREGS hook. + + On the 80386, this is the size of MODE in words, + except in the FP regs, where a single reg is always enough. */ + +static unsigned char +ix86_class_max_nregs (reg_class_t rclass, machine_mode mode) +{ + if (MAYBE_INTEGER_CLASS_P (rclass)) + { + if (mode == XFmode) + return (TARGET_64BIT ? 2 : 3); + else if (mode == XCmode) + return (TARGET_64BIT ? 4 : 6); + else + return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); + } + else + { + if (COMPLEX_MODE_P (mode)) + return 2; + else + return 1; + } +} + +/* Return true if the registers in CLASS cannot represent the change from + modes FROM to TO. */ + +bool +ix86_cannot_change_mode_class (machine_mode from, machine_mode to, + enum reg_class regclass) +{ + if (from == to) + return false; + + /* x87 registers can't do subreg at all, as all values are reformatted + to extended precision. */ + if (MAYBE_FLOAT_CLASS_P (regclass)) + return true; + + if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) + { + /* Vector registers do not support QI or HImode loads. If we don't + disallow a change to these modes, reload will assume it's ok to + drop the subreg from (subreg:SI (reg:HI 100) 0). This affects + the vec_dupv4hi pattern. */ + if (GET_MODE_SIZE (from) < 4) + return true; + } + + return false; +} + +/* Return the cost of moving data of mode M between a + register and memory. A value of 2 is the default; this cost is + relative to those in `REGISTER_MOVE_COST'. + + This function is used extensively by register_move_cost that is used to + build tables at startup. Make it inline in this case. + When IN is 2, return maximum of in and out move cost. + + If moving between registers and memory is more expensive than + between two registers, you should define this macro to express the + relative cost. + + Model also increased moving costs of QImode registers in non + Q_REGS classes. + */ +static inline int +inline_memory_move_cost (machine_mode mode, enum reg_class regclass, + int in) +{ + int cost; + if (FLOAT_CLASS_P (regclass)) + { + int index; + switch (mode) + { + case SFmode: + index = 0; + break; + case DFmode: + index = 1; + break; + case XFmode: + index = 2; + break; + default: + return 100; + } + if (in == 2) + return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]); + return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; + } + if (SSE_CLASS_P (regclass)) + { + int index; + switch (GET_MODE_SIZE (mode)) + { + case 4: + index = 0; + break; + case 8: + index = 1; + break; + case 16: + index = 2; + break; + default: + return 100; + } + if (in == 2) + return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]); + return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; + } + if (MMX_CLASS_P (regclass)) + { + int index; + switch (GET_MODE_SIZE (mode)) + { + case 4: + index = 0; + break; + case 8: + index = 1; + break; + default: + return 100; + } + if (in) + return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]); + return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; + } + switch (GET_MODE_SIZE (mode)) + { + case 1: + if (Q_CLASS_P (regclass) || TARGET_64BIT) + { + if (!in) + return ix86_cost->int_store[0]; + if (TARGET_PARTIAL_REG_DEPENDENCY + && optimize_function_for_speed_p (cfun)) + cost = ix86_cost->movzbl_load; + else + cost = ix86_cost->int_load[0]; + if (in == 2) + return MAX (cost, ix86_cost->int_store[0]); + return cost; + } + else + { + if (in == 2) + return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4); + if (in) + return ix86_cost->movzbl_load; + else + return ix86_cost->int_store[0] + 4; + } + break; + case 2: + if (in == 2) + return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]); + return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; + default: + /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ + if (mode == TFmode) + mode = XFmode; + if (in == 2) + cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]); + else if (in) + cost = ix86_cost->int_load[2]; + else + cost = ix86_cost->int_store[2]; + return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD); + } +} + +static int +ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, + bool in) +{ + return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0); +} + + +/* Return the cost of moving data from a register in class CLASS1 to + one in class CLASS2. + + It is not required that the cost always equal 2 when FROM is the same as TO; + on some machines it is expensive to move between registers if they are not + general registers. */ + +static int +ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, + reg_class_t class2_i) +{ + enum reg_class class1 = (enum reg_class) class1_i; + enum reg_class class2 = (enum reg_class) class2_i; + + /* In case we require secondary memory, compute cost of the store followed + by load. In order to avoid bad register allocation choices, we need + for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ + + if (inline_secondary_memory_needed (class1, class2, mode, 0)) + { + int cost = 1; + + cost += inline_memory_move_cost (mode, class1, 2); + cost += inline_memory_move_cost (mode, class2, 2); + + /* In case of copying from general_purpose_register we may emit multiple + stores followed by single load causing memory size mismatch stall. + Count this as arbitrarily high cost of 20. */ + if (targetm.class_max_nregs (class1, mode) + > targetm.class_max_nregs (class2, mode)) + cost += 20; + + /* In the case of FP/MMX moves, the registers actually overlap, and we + have to switch modes in order to treat them differently. */ + if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) + || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) + cost += 20; + + return cost; + } + + /* Moves between SSE/MMX and integer unit are expensive. */ + if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) + || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) + + /* ??? By keeping returned value relatively high, we limit the number + of moves between integer and MMX/SSE registers for all targets. + Additionally, high value prevents problem with x86_modes_tieable_p(), + where integer modes in MMX/SSE registers are not tieable + because of missing QImode and HImode moves to, from or between + MMX/SSE registers. */ + return MAX (8, ix86_cost->mmxsse_to_integer); + + if (MAYBE_FLOAT_CLASS_P (class1)) + return ix86_cost->fp_move; + if (MAYBE_SSE_CLASS_P (class1)) + return ix86_cost->sse_move; + if (MAYBE_MMX_CLASS_P (class1)) + return ix86_cost->mmx_move; + return 2; +} + +/* Return TRUE if hard register REGNO can hold a value of machine-mode + MODE. */ + +bool +ix86_hard_regno_mode_ok (int regno, machine_mode mode) +{ + /* Flags and only flags can only hold CCmode values. */ + if (CC_REGNO_P (regno)) + return GET_MODE_CLASS (mode) == MODE_CC; + if (GET_MODE_CLASS (mode) == MODE_CC + || GET_MODE_CLASS (mode) == MODE_RANDOM + || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) + return false; + if (STACK_REGNO_P (regno)) + return VALID_FP_MODE_P (mode); + if (MASK_REGNO_P (regno)) + return (VALID_MASK_REG_MODE (mode) + || (TARGET_AVX512BW + && VALID_MASK_AVX512BW_MODE (mode))); + if (BND_REGNO_P (regno)) + return VALID_BND_REG_MODE (mode); + if (SSE_REGNO_P (regno)) + { + /* We implement the move patterns for all vector modes into and + out of SSE registers, even when no operation instructions + are available. */ + + /* For AVX-512 we allow, regardless of regno: + - XI mode + - any of 512-bit wide vector mode + - any scalar mode. */ + if (TARGET_AVX512F + && (mode == XImode + || VALID_AVX512F_REG_MODE (mode) + || VALID_AVX512F_SCALAR_MODE (mode))) + return true; + + /* TODO check for QI/HI scalars. */ + /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ + if (TARGET_AVX512VL + && (mode == OImode + || mode == TImode + || VALID_AVX256_REG_MODE (mode) + || VALID_AVX512VL_128_REG_MODE (mode))) + return true; + + /* xmm16-xmm31 are only available for AVX-512. */ + if (EXT_REX_SSE_REGNO_P (regno)) + return false; + + /* OImode and AVX modes are available only when AVX is enabled. */ + return ((TARGET_AVX + && VALID_AVX256_REG_OR_OI_MODE (mode)) + || VALID_SSE_REG_MODE (mode) + || VALID_SSE2_REG_MODE (mode) + || VALID_MMX_REG_MODE (mode) + || VALID_MMX_REG_MODE_3DNOW (mode)); + } + if (MMX_REGNO_P (regno)) + { + /* We implement the move patterns for 3DNOW modes even in MMX mode, + so if the register is available at all, then we can move data of + the given mode into or out of it. */ + return (VALID_MMX_REG_MODE (mode) + || VALID_MMX_REG_MODE_3DNOW (mode)); + } + + if (mode == QImode) + { + /* Take care for QImode values - they can be in non-QI regs, + but then they do cause partial register stalls. */ + if (ANY_QI_REGNO_P (regno)) + return true; + if (!TARGET_PARTIAL_REG_STALL) + return true; + /* LRA checks if the hard register is OK for the given mode. + QImode values can live in non-QI regs, so we allow all + registers here. */ + if (lra_in_progress) + return true; + return !can_create_pseudo_p (); + } + /* We handle both integer and floats in the general purpose registers. */ + else if (VALID_INT_MODE_P (mode)) + return true; + else if (VALID_FP_MODE_P (mode)) + return true; + else if (VALID_DFP_MODE_P (mode)) + return true; + /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go + on to use that value in smaller contexts, this can easily force a + pseudo to be allocated to GENERAL_REGS. Since this is no worse than + supporting DImode, allow it. */ + else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) + return true; + + return false; +} + +/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a + tieable integer mode. */ + +static bool +ix86_tieable_integer_mode_p (machine_mode mode) +{ + switch (mode) + { + case HImode: + case SImode: + return true; + + case QImode: + return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; + + case DImode: + return TARGET_64BIT; + + default: + return false; + } +} + +/* Return true if MODE1 is accessible in a register that can hold MODE2 + without copying. That is, all register classes that can hold MODE2 + can also hold MODE1. */ + +bool +ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) +{ + if (mode1 == mode2) + return true; + + if (ix86_tieable_integer_mode_p (mode1) + && ix86_tieable_integer_mode_p (mode2)) + return true; + + /* MODE2 being XFmode implies fp stack or general regs, which means we + can tie any smaller floating point modes to it. Note that we do not + tie this with TFmode. */ + if (mode2 == XFmode) + return mode1 == SFmode || mode1 == DFmode; + + /* MODE2 being DFmode implies fp stack, general or sse regs, which means + that we can tie it with SFmode. */ + if (mode2 == DFmode) + return mode1 == SFmode; + + /* If MODE2 is only appropriate for an SSE register, then tie with + any other mode acceptable to SSE registers. */ + if (GET_MODE_SIZE (mode2) == 32 + && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) + return (GET_MODE_SIZE (mode1) == 32 + && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); + if (GET_MODE_SIZE (mode2) == 16 + && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) + return (GET_MODE_SIZE (mode1) == 16 + && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); + + /* If MODE2 is appropriate for an MMX register, then tie + with any other mode acceptable to MMX registers. */ + if (GET_MODE_SIZE (mode2) == 8 + && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) + return (GET_MODE_SIZE (mode1) == 8 + && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1)); + + return false; +} + +/* Return the cost of moving between two registers of mode MODE. */ + +static int +ix86_set_reg_reg_cost (machine_mode mode) +{ + unsigned int units = UNITS_PER_WORD; + + switch (GET_MODE_CLASS (mode)) + { + default: + break; + + case MODE_CC: + units = GET_MODE_SIZE (CCmode); + break; + + case MODE_FLOAT: + if ((TARGET_SSE && mode == TFmode) + || (TARGET_80387 && mode == XFmode) + || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode) + || ((TARGET_80387 || TARGET_SSE) && mode == SFmode)) + units = GET_MODE_SIZE (mode); + break; + + case MODE_COMPLEX_FLOAT: + if ((TARGET_SSE && mode == TCmode) + || (TARGET_80387 && mode == XCmode) + || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode) + || ((TARGET_80387 || TARGET_SSE) && mode == SCmode)) + units = GET_MODE_SIZE (mode); + break; + + case MODE_VECTOR_INT: + case MODE_VECTOR_FLOAT: + if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) + || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) + || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) + || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) + || (TARGET_MMX && VALID_MMX_REG_MODE (mode))) + units = GET_MODE_SIZE (mode); + } + + /* Return the cost of moving between two registers of mode MODE, + assuming that the move will be in pieces of at most UNITS bytes. */ + return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units)); +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, + int *total, bool speed) +{ + rtx mask; + enum rtx_code code = GET_CODE (x); + enum rtx_code outer_code = (enum rtx_code) outer_code_i; + const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost; + + switch (code) + { + case SET: + if (register_operand (SET_DEST (x), VOIDmode) + && reg_or_0_operand (SET_SRC (x), VOIDmode)) + { + *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x))); + return true; + } + return false; + + case CONST_INT: + case CONST: + case LABEL_REF: + case SYMBOL_REF: + if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode)) + *total = 3; + else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) + *total = 2; + else if (flag_pic && SYMBOLIC_CONST (x) + && !(TARGET_64BIT + && (GET_CODE (x) == LABEL_REF + || (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_LOCAL_P (x)))) + /* Use 0 cost for CONST to improve its propagation. */ + && (TARGET_64BIT || GET_CODE (x) != CONST)) + *total = 1; + else + *total = 0; + return true; + + case CONST_WIDE_INT: + *total = 0; + return true; + + case CONST_DOUBLE: + switch (standard_80387_constant_p (x)) + { + case 1: /* 0.0 */ + *total = 1; + return true; + default: /* Other constants */ + *total = 2; + return true; + case 0: + case -1: + break; + } + if (SSE_FLOAT_MODE_P (mode)) + { + case CONST_VECTOR: + switch (standard_sse_constant_p (x)) + { + case 0: + break; + case 1: /* 0: xor eliminates false dependency */ + *total = 0; + return true; + default: /* -1: cmp contains false dependency */ + *total = 1; + return true; + } + } + /* Fall back to (MEM (SYMBOL_REF)), since that's where + it'll probably end up. Add a penalty for size. */ + *total = (COSTS_N_INSNS (1) + + (flag_pic != 0 && !TARGET_64BIT) + + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); + return true; + + case ZERO_EXTEND: + /* The zero extensions is often completely free on x86_64, so make + it as cheap as possible. */ + if (TARGET_64BIT && mode == DImode + && GET_MODE (XEXP (x, 0)) == SImode) + *total = 1; + else if (TARGET_ZERO_EXTEND_WITH_AND) + *total = cost->add; + else + *total = cost->movzx; + return false; + + case SIGN_EXTEND: + *total = cost->movsx; + return false; + + case ASHIFT: + if (SCALAR_INT_MODE_P (mode) + && GET_MODE_SIZE (mode) < UNITS_PER_WORD + && CONST_INT_P (XEXP (x, 1))) + { + HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); + if (value == 1) + { + *total = cost->add; + return false; + } + if ((value == 2 || value == 3) + && cost->lea <= cost->shift_const) + { + *total = cost->lea; + return false; + } + } + /* FALLTHRU */ + + case ROTATE: + case ASHIFTRT: + case LSHIFTRT: + case ROTATERT: + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + /* ??? Should be SSE vector operation cost. */ + /* At least for published AMD latencies, this really is the same + as the latency for a simple fpu operation like fabs. */ + /* V*QImode is emulated with 1-11 insns. */ + if (mode == V16QImode || mode == V32QImode) + { + int count = 11; + if (TARGET_XOP && mode == V16QImode) + { + /* For XOP we use vpshab, which requires a broadcast of the + value to the variable shift insn. For constants this + means a V16Q const in mem; even when we can perform the + shift with one insn set the cost to prefer paddb. */ + if (CONSTANT_P (XEXP (x, 1))) + { + *total = (cost->fabs + + rtx_cost (XEXP (x, 0), mode, code, 0, speed) + + (speed ? 2 : COSTS_N_BYTES (16))); + return true; + } + count = 3; + } + else if (TARGET_SSSE3) + count = 7; + *total = cost->fabs * count; + } + else + *total = cost->fabs; + } + else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + { + if (CONST_INT_P (XEXP (x, 1))) + { + if (INTVAL (XEXP (x, 1)) > 32) + *total = cost->shift_const + COSTS_N_INSNS (2); + else + *total = cost->shift_const * 2; + } + else + { + if (GET_CODE (XEXP (x, 1)) == AND) + *total = cost->shift_var * 2; + else + *total = cost->shift_var * 6 + COSTS_N_INSNS (2); + } + } + else + { + if (CONST_INT_P (XEXP (x, 1))) + *total = cost->shift_const; + else if (SUBREG_P (XEXP (x, 1)) + && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND) + { + /* Return the cost after shift-and truncation. */ + *total = cost->shift_var; + return true; + } + else + *total = cost->shift_var; + } + return false; + + case FMA: + { + rtx sub; + + gcc_assert (FLOAT_MODE_P (mode)); + gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); + + /* ??? SSE scalar/vector cost should be used here. */ + /* ??? Bald assumption that fma has the same cost as fmul. */ + *total = cost->fmul; + *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); + + /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ + sub = XEXP (x, 0); + if (GET_CODE (sub) == NEG) + sub = XEXP (sub, 0); + *total += rtx_cost (sub, mode, FMA, 0, speed); + + sub = XEXP (x, 2); + if (GET_CODE (sub) == NEG) + sub = XEXP (sub, 0); + *total += rtx_cost (sub, mode, FMA, 2, speed); + return true; + } + + case MULT: + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + { + /* ??? SSE scalar cost should be used here. */ + *total = cost->fmul; + return false; + } + else if (X87_FLOAT_MODE_P (mode)) + { + *total = cost->fmul; + return false; + } + else if (FLOAT_MODE_P (mode)) + { + /* ??? SSE vector cost should be used here. */ + *total = cost->fmul; + return false; + } + else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + /* V*QImode is emulated with 7-13 insns. */ + if (mode == V16QImode || mode == V32QImode) + { + int extra = 11; + if (TARGET_XOP && mode == V16QImode) + extra = 5; + else if (TARGET_SSSE3) + extra = 6; + *total = cost->fmul * 2 + cost->fabs * extra; + } + /* V*DImode is emulated with 5-8 insns. */ + else if (mode == V2DImode || mode == V4DImode) + { + if (TARGET_XOP && mode == V2DImode) + *total = cost->fmul * 2 + cost->fabs * 3; + else + *total = cost->fmul * 3 + cost->fabs * 5; + } + /* Without sse4.1, we don't have PMULLD; it's emulated with 7 + insns, including two PMULUDQ. */ + else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX)) + *total = cost->fmul * 2 + cost->fabs * 5; + else + *total = cost->fmul; + return false; + } + else + { + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + int nbits; + if (CONST_INT_P (XEXP (x, 1))) + { + unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); + for (nbits = 0; value != 0; value &= value - 1) + nbits++; + } + else + /* This is arbitrary. */ + nbits = 7; + + /* Compute costs correctly for widening multiplication. */ + if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) + && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 + == GET_MODE_SIZE (mode)) + { + int is_mulwiden = 0; + machine_mode inner_mode = GET_MODE (op0); + + if (GET_CODE (op0) == GET_CODE (op1)) + is_mulwiden = 1, op1 = XEXP (op1, 0); + else if (CONST_INT_P (op1)) + { + if (GET_CODE (op0) == SIGN_EXTEND) + is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) + == INTVAL (op1); + else + is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); + } + + if (is_mulwiden) + op0 = XEXP (op0, 0), mode = GET_MODE (op0); + } + + *total = (cost->mult_init[MODE_INDEX (mode)] + + nbits * cost->mult_bit + + rtx_cost (op0, mode, outer_code, opno, speed) + + rtx_cost (op1, mode, outer_code, opno, speed)); + + return true; + } + + case DIV: + case UDIV: + case MOD: + case UMOD: + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + /* ??? SSE cost should be used here. */ + *total = cost->fdiv; + else if (X87_FLOAT_MODE_P (mode)) + *total = cost->fdiv; + else if (FLOAT_MODE_P (mode)) + /* ??? SSE vector cost should be used here. */ + *total = cost->fdiv; + else + *total = cost->divide[MODE_INDEX (mode)]; + return false; + + case PLUS: + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) + { + if (GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) + && CONSTANT_P (XEXP (x, 1))) + { + HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); + if (val == 2 || val == 4 || val == 8) + { + *total = cost->lea; + *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, + outer_code, opno, speed); + *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, + outer_code, opno, speed); + *total += rtx_cost (XEXP (x, 1), mode, + outer_code, opno, speed); + return true; + } + } + else if (GET_CODE (XEXP (x, 0)) == MULT + && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + { + HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); + if (val == 2 || val == 4 || val == 8) + { + *total = cost->lea; + *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, + outer_code, opno, speed); + *total += rtx_cost (XEXP (x, 1), mode, + outer_code, opno, speed); + return true; + } + } + else if (GET_CODE (XEXP (x, 0)) == PLUS) + { + *total = cost->lea; + *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, + outer_code, opno, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, + outer_code, opno, speed); + *total += rtx_cost (XEXP (x, 1), mode, + outer_code, opno, speed); + return true; + } + } + /* FALLTHRU */ + + case MINUS: + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + { + /* ??? SSE cost should be used here. */ + *total = cost->fadd; + return false; + } + else if (X87_FLOAT_MODE_P (mode)) + { + *total = cost->fadd; + return false; + } + else if (FLOAT_MODE_P (mode)) + { + /* ??? SSE vector cost should be used here. */ + *total = cost->fadd; + return false; + } + /* FALLTHRU */ + + case AND: + case IOR: + case XOR: + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) > UNITS_PER_WORD) + { + *total = (cost->add * 2 + + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) + << (GET_MODE (XEXP (x, 0)) != DImode)) + + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) + << (GET_MODE (XEXP (x, 1)) != DImode))); + return true; + } + /* FALLTHRU */ + + case NEG: + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + { + /* ??? SSE cost should be used here. */ + *total = cost->fchs; + return false; + } + else if (X87_FLOAT_MODE_P (mode)) + { + *total = cost->fchs; + return false; + } + else if (FLOAT_MODE_P (mode)) + { + /* ??? SSE vector cost should be used here. */ + *total = cost->fchs; + return false; + } + /* FALLTHRU */ + + case NOT: + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + /* ??? Should be SSE vector operation cost. */ + /* At least for published AMD latencies, this really is the same + as the latency for a simple fpu operation like fabs. */ + *total = cost->fabs; + } + else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + *total = cost->add * 2; + else + *total = cost->add; + return false; + + case COMPARE: + if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT + && XEXP (XEXP (x, 0), 1) == const1_rtx + && CONST_INT_P (XEXP (XEXP (x, 0), 2)) + && XEXP (x, 1) == const0_rtx) + { + /* This kind of construct is implemented using test[bwl]. + Treat it as if we had an AND. */ + mode = GET_MODE (XEXP (XEXP (x, 0), 0)); + *total = (cost->add + + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code, + opno, speed) + + rtx_cost (const1_rtx, mode, outer_code, opno, speed)); + return true; + } + + /* The embedded comparison operand is completely free. */ + if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))) + && XEXP (x, 1) == const0_rtx) + *total = 0; + + return false; + + case FLOAT_EXTEND: + if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + *total = 0; + return false; + + case ABS: + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + /* ??? SSE cost should be used here. */ + *total = cost->fabs; + else if (X87_FLOAT_MODE_P (mode)) + *total = cost->fabs; + else if (FLOAT_MODE_P (mode)) + /* ??? SSE vector cost should be used here. */ + *total = cost->fabs; + return false; + + case SQRT: + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + /* ??? SSE cost should be used here. */ + *total = cost->fsqrt; + else if (X87_FLOAT_MODE_P (mode)) + *total = cost->fsqrt; + else if (FLOAT_MODE_P (mode)) + /* ??? SSE vector cost should be used here. */ + *total = cost->fsqrt; + return false; + + case UNSPEC: + if (XINT (x, 1) == UNSPEC_TP) + *total = 0; + return false; + + case VEC_SELECT: + case VEC_CONCAT: + case VEC_DUPLICATE: + /* ??? Assume all of these vector manipulation patterns are + recognizable. In which case they all pretty much have the + same cost. */ + *total = cost->fabs; + return true; + case VEC_MERGE: + mask = XEXP (x, 2); + /* This is masked instruction, assume the same cost, + as nonmasked variant. */ + if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) + *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); + else + *total = cost->fabs; + return true; + + default: + return false; + } +} + +#if TARGET_MACHO + +static int current_machopic_label_num; + +/* Given a symbol name and its associated stub, write out the + definition of the stub. */ + +void +machopic_output_stub (FILE *file, const char *symb, const char *stub) +{ + unsigned int length; + char *binder_name, *symbol_name, lazy_ptr_name[32]; + int label = ++current_machopic_label_num; + + /* For 64-bit we shouldn't get here. */ + gcc_assert (!TARGET_64BIT); + + /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ + symb = targetm.strip_name_encoding (symb); + + length = strlen (stub); + binder_name = XALLOCAVEC (char, length + 32); + GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); + + length = strlen (symb); + symbol_name = XALLOCAVEC (char, length + 32); + GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); + + sprintf (lazy_ptr_name, "L%d$lz", label); + + if (MACHOPIC_ATT_STUB) + switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); + else if (MACHOPIC_PURE) + switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); + else + switch_to_section (darwin_sections[machopic_symbol_stub_section]); + + fprintf (file, "%s:\n", stub); + fprintf (file, "\t.indirect_symbol %s\n", symbol_name); + + if (MACHOPIC_ATT_STUB) + { + fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n"); + } + else if (MACHOPIC_PURE) + { + /* PIC stub. */ + /* 25-byte PIC stub using "CALL get_pc_thunk". */ + rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); + output_set_got (tmp, NULL_RTX); /* "CALL ___.get_pc_thunk.cx". */ + fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", + label, lazy_ptr_name, label); + fprintf (file, "\tjmp\t*%%ecx\n"); + } + else + fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); + + /* The AT&T-style ("self-modifying") stub is not lazily bound, thus + it needs no stub-binding-helper. */ + if (MACHOPIC_ATT_STUB) + return; + + fprintf (file, "%s:\n", binder_name); + + if (MACHOPIC_PURE) + { + fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name); + fprintf (file, "\tpushl\t%%ecx\n"); + } + else + fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); + + fputs ("\tjmp\tdyld_stub_binding_helper\n", file); + + /* N.B. Keep the correspondence of these + 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the + old-pic/new-pic/non-pic stubs; altering this will break + compatibility with existing dylibs. */ + if (MACHOPIC_PURE) + { + /* 25-byte PIC stub using "CALL get_pc_thunk". */ + switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); + } + else + /* 16-byte -mdynamic-no-pic stub. */ + switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); + + fprintf (file, "%s:\n", lazy_ptr_name); + fprintf (file, "\t.indirect_symbol %s\n", symbol_name); + fprintf (file, ASM_LONG "%s\n", binder_name); +} +#endif /* TARGET_MACHO */ + +/* Order the registers for register allocator. */ + +void +x86_order_regs_for_local_alloc (void) +{ + int pos = 0; + int i; + + /* First allocate the local general purpose registers. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (GENERAL_REGNO_P (i) && call_used_regs[i]) + reg_alloc_order [pos++] = i; + + /* Global general purpose registers. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (GENERAL_REGNO_P (i) && !call_used_regs[i]) + reg_alloc_order [pos++] = i; + + /* x87 registers come first in case we are doing FP math + using them. */ + if (!TARGET_SSE_MATH) + for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) + reg_alloc_order [pos++] = i; + + /* SSE registers. */ + for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) + reg_alloc_order [pos++] = i; + for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) + reg_alloc_order [pos++] = i; + + /* Extended REX SSE registers. */ + for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) + reg_alloc_order [pos++] = i; + + /* Mask register. */ + for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) + reg_alloc_order [pos++] = i; + + /* MPX bound registers. */ + for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++) + reg_alloc_order [pos++] = i; + + /* x87 registers. */ + if (TARGET_SSE_MATH) + for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) + reg_alloc_order [pos++] = i; + + for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) + reg_alloc_order [pos++] = i; + + /* Initialize the rest of array as we do not allocate some registers + at all. */ + while (pos < FIRST_PSEUDO_REGISTER) + reg_alloc_order [pos++] = 0; +} + +/* Handle a "callee_pop_aggregate_return" attribute; arguments as + in struct attribute_spec handler. */ +static tree +ix86_handle_callee_pop_aggregate_return (tree *node, tree name, + tree args, + int, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + return NULL_TREE; + } + if (TARGET_64BIT) + { + warning (OPT_Wattributes, "%qE attribute only available for 32-bit", + name); + *no_add_attrs = true; + return NULL_TREE; + } + if (is_attribute_p ("callee_pop_aggregate_return", name)) + { + tree cst; + + cst = TREE_VALUE (args); + if (TREE_CODE (cst) != INTEGER_CST) + { + warning (OPT_Wattributes, + "%qE attribute requires an integer constant argument", + name); + *no_add_attrs = true; + } + else if (compare_tree_int (cst, 0) != 0 + && compare_tree_int (cst, 1) != 0) + { + warning (OPT_Wattributes, + "argument to %qE attribute is neither zero, nor one", + name); + *no_add_attrs = true; + } + + return NULL_TREE; + } + + return NULL_TREE; +} + +/* Handle a "ms_abi" or "sysv" attribute; arguments as in + struct attribute_spec.handler. */ +static tree +ix86_handle_abi_attribute (tree *node, tree name, tree, int, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + return NULL_TREE; + } + + /* Can combine regparm with all attributes but fastcall. */ + if (is_attribute_p ("ms_abi", name)) + { + if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node))) + { + error ("ms_abi and sysv_abi attributes are not compatible"); + } + + return NULL_TREE; + } + else if (is_attribute_p ("sysv_abi", name)) + { + if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node))) + { + error ("ms_abi and sysv_abi attributes are not compatible"); + } + + return NULL_TREE; + } + + return NULL_TREE; +} + +/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in + struct attribute_spec.handler. */ +static tree +ix86_handle_struct_attribute (tree *node, tree name, tree, int, + bool *no_add_attrs) +{ + tree *type = NULL; + if (DECL_P (*node)) + { + if (TREE_CODE (*node) == TYPE_DECL) + type = &TREE_TYPE (*node); + } + else + type = node; + + if (!(type && RECORD_OR_UNION_TYPE_P (*type))) + { + warning (OPT_Wattributes, "%qE attribute ignored", + name); + *no_add_attrs = true; + } + + else if ((is_attribute_p ("ms_struct", name) + && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) + || ((is_attribute_p ("gcc_struct", name) + && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) + { + warning (OPT_Wattributes, "%qE incompatible attribute ignored", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +static tree +ix86_handle_fndecl_attribute (tree *node, tree name, tree, int, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + return NULL_TREE; +} + +static bool +ix86_ms_bitfield_layout_p (const_tree record_type) +{ + return ((TARGET_MS_BITFIELD_LAYOUT + && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) + || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type))); +} + +/* Returns an expression indicating where the this parameter is + located on entry to the FUNCTION. */ + +static rtx +x86_this_parameter (tree function) +{ + tree type = TREE_TYPE (function); + bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; + int nregs; + + if (TARGET_64BIT) + { + const int *parm_regs; + + if (ix86_function_type_abi (type) == MS_ABI) + parm_regs = x86_64_ms_abi_int_parameter_registers; + else + parm_regs = x86_64_int_parameter_registers; + return gen_rtx_REG (Pmode, parm_regs[aggr]); + } + + nregs = ix86_function_regparm (type, function); + + if (nregs > 0 && !stdarg_p (type)) + { + int regno; + unsigned int ccvt = ix86_get_callcvt (type); + + if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) + regno = aggr ? DX_REG : CX_REG; + else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) + { + regno = CX_REG; + if (aggr) + return gen_rtx_MEM (SImode, + plus_constant (Pmode, stack_pointer_rtx, 4)); + } + else + { + regno = AX_REG; + if (aggr) + { + regno = DX_REG; + if (nregs == 1) + return gen_rtx_MEM (SImode, + plus_constant (Pmode, + stack_pointer_rtx, 4)); + } + } + return gen_rtx_REG (SImode, regno); + } + + return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, + aggr ? 8 : 4)); +} + +/* Determine whether x86_output_mi_thunk can succeed. */ + +static bool +x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, + const_tree function) +{ + /* 64-bit can handle anything. */ + if (TARGET_64BIT) + return true; + + /* For 32-bit, everything's fine if we have one free register. */ + if (ix86_function_regparm (TREE_TYPE (function), function) < 3) + return true; + + /* Need a free register for vcall_offset. */ + if (vcall_offset) + return false; + + /* Need a free register for GOT references. */ + if (flag_pic && !targetm.binds_local_p (function)) + return false; + + /* Otherwise ok. */ + return true; +} + +/* Output the assembler code for a thunk function. THUNK_DECL is the + declaration for the thunk function itself, FUNCTION is the decl for + the target function. DELTA is an immediate constant offset to be + added to THIS. If VCALL_OFFSET is nonzero, the word at + *(*this + vcall_offset) should be added to THIS. */ + +static void +x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, tree function) +{ + rtx this_param = x86_this_parameter (function); + rtx this_reg, tmp, fnaddr; + unsigned int tmp_regno; + rtx_insn *insn; + + if (TARGET_64BIT) + tmp_regno = R10_REG; + else + { + unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function)); + if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) + tmp_regno = AX_REG; + else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) + tmp_regno = DX_REG; + else + tmp_regno = CX_REG; + } + + emit_note (NOTE_INSN_PROLOGUE_END); + + /* If VCALL_OFFSET, we'll need THIS in a register. Might as well + pull it in now and let DELTA benefit. */ + if (REG_P (this_param)) + this_reg = this_param; + else if (vcall_offset) + { + /* Put the this parameter into %eax. */ + this_reg = gen_rtx_REG (Pmode, AX_REG); + emit_move_insn (this_reg, this_param); + } + else + this_reg = NULL_RTX; + + /* Adjust the this parameter by a fixed constant. */ + if (delta) + { + rtx delta_rtx = GEN_INT (delta); + rtx delta_dst = this_reg ? this_reg : this_param; + + if (TARGET_64BIT) + { + if (!x86_64_general_operand (delta_rtx, Pmode)) + { + tmp = gen_rtx_REG (Pmode, tmp_regno); + emit_move_insn (tmp, delta_rtx); + delta_rtx = tmp; + } + } + + ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx); + } + + /* Adjust the this parameter by a value stored in the vtable. */ + if (vcall_offset) + { + rtx vcall_addr, vcall_mem, this_mem; + + tmp = gen_rtx_REG (Pmode, tmp_regno); + + this_mem = gen_rtx_MEM (ptr_mode, this_reg); + if (Pmode != ptr_mode) + this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem); + emit_move_insn (tmp, this_mem); + + /* Adjust the this parameter. */ + vcall_addr = plus_constant (Pmode, tmp, vcall_offset); + if (TARGET_64BIT + && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true)) + { + rtx tmp2 = gen_rtx_REG (Pmode, R11_REG); + emit_move_insn (tmp2, GEN_INT (vcall_offset)); + vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2); + } + + vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr); + if (Pmode != ptr_mode) + emit_insn (gen_addsi_1_zext (this_reg, + gen_rtx_REG (ptr_mode, + REGNO (this_reg)), + vcall_mem)); + else + ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem); + } + + /* If necessary, drop THIS back to its stack slot. */ + if (this_reg && this_reg != this_param) + emit_move_insn (this_param, this_reg); + + fnaddr = XEXP (DECL_RTL (function), 0); + if (TARGET_64BIT) + { + if (!flag_pic || targetm.binds_local_p (function) + || TARGET_PECOFF) + ; + else + { + tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL); + tmp = gen_rtx_CONST (Pmode, tmp); + fnaddr = gen_const_mem (Pmode, tmp); + } + } + else + { + if (!flag_pic || targetm.binds_local_p (function)) + ; +#if TARGET_MACHO + else if (TARGET_MACHO) + { + fnaddr = machopic_indirect_call_target (DECL_RTL (function)); + fnaddr = XEXP (fnaddr, 0); + } +#endif /* TARGET_MACHO */ + else + { + tmp = gen_rtx_REG (Pmode, CX_REG); + output_set_got (tmp, NULL_RTX); + + fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT); + fnaddr = gen_rtx_CONST (Pmode, fnaddr); + fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr); + fnaddr = gen_const_mem (Pmode, fnaddr); + } + } + + /* Our sibling call patterns do not allow memories, because we have no + predicate that can distinguish between frame and non-frame memory. + For our purposes here, we can get away with (ab)using a jump pattern, + because we're going to do no optimization. */ + if (MEM_P (fnaddr)) + { + if (sibcall_insn_operand (fnaddr, word_mode)) + { + fnaddr = XEXP (DECL_RTL (function), 0); + tmp = gen_rtx_MEM (QImode, fnaddr); + tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); + tmp = emit_call_insn (tmp); + SIBLING_CALL_P (tmp) = 1; + } + else + emit_jump_insn (gen_indirect_jump (fnaddr)); + } + else + { + if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) + { + // CM_LARGE_PIC always uses pseudo PIC register which is + // uninitialized. Since FUNCTION is local and calling it + // doesn't go through PLT, we use scratch register %r11 as + // PIC register and initialize it here. + pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG); + ix86_init_large_pic_reg (tmp_regno); + fnaddr = legitimize_pic_address (fnaddr, + gen_rtx_REG (Pmode, tmp_regno)); + } + + if (!sibcall_insn_operand (fnaddr, word_mode)) + { + tmp = gen_rtx_REG (word_mode, tmp_regno); + if (GET_MODE (fnaddr) != word_mode) + fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); + emit_move_insn (tmp, fnaddr); + fnaddr = tmp; + } + + tmp = gen_rtx_MEM (QImode, fnaddr); + tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); + tmp = emit_call_insn (tmp); + SIBLING_CALL_P (tmp) = 1; + } + emit_barrier (); + + /* Emit just enough of rest_of_compilation to get the insns emitted. + Note that use_thunk calls assemble_start_function et al. */ + insn = get_insns (); + shorten_branches (insn); + final_start_function (insn, file, 1); + final (insn, file, 1); + final_end_function (); +} + +static void +x86_file_start (void) +{ + default_file_start (); + if (TARGET_16BIT) + fputs ("\t.code16gcc\n", asm_out_file); +#if TARGET_MACHO + darwin_file_start (); +#endif + if (X86_FILE_START_VERSION_DIRECTIVE) + fputs ("\t.version\t\"01.01\"\n", asm_out_file); + if (X86_FILE_START_FLTUSED) + fputs ("\t.global\t__fltused\n", asm_out_file); + if (ix86_asm_dialect == ASM_INTEL) + fputs ("\t.intel_syntax noprefix\n", asm_out_file); +} + +int +x86_field_alignment (tree field, int computed) +{ + machine_mode mode; + tree type = TREE_TYPE (field); + + if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) + return computed; + if (TARGET_IAMCU) + return iamcu_alignment (type, computed); + mode = TYPE_MODE (strip_array_types (type)); + if (mode == DFmode || mode == DCmode + || GET_MODE_CLASS (mode) == MODE_INT + || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) + return MIN (32, computed); + return computed; +} + +/* Print call to TARGET to FILE. */ + +static void +x86_print_call_or_nop (FILE *file, const char *target) +{ + if (flag_nop_mcount) + fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */ + else + fprintf (file, "1:\tcall\t%s\n", target); +} + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. */ +void +x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) +{ + const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE + : MCOUNT_NAME); + if (TARGET_64BIT) + { +#ifndef NO_PROFILE_COUNTERS + fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno); +#endif + + if (!TARGET_PECOFF && flag_pic) + fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name); + else + x86_print_call_or_nop (file, mcount_name); + } + else if (flag_pic) + { +#ifndef NO_PROFILE_COUNTERS + fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n", + LPREFIX, labelno); +#endif + fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); + } + else + { +#ifndef NO_PROFILE_COUNTERS + fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n", + LPREFIX, labelno); +#endif + x86_print_call_or_nop (file, mcount_name); + } + + if (flag_record_mcount) + { + fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n"); + fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); + fprintf (file, "\t.previous\n"); + } +} + +/* We don't have exact information about the insn sizes, but we may assume + quite safely that we are informed about all 1 byte insns and memory + address sizes. This is enough to eliminate unnecessary padding in + 99% of cases. */ + +static int +min_insn_size (rtx_insn *insn) +{ + int l = 0, len; + + if (!INSN_P (insn) || !active_insn_p (insn)) + return 0; + + /* Discard alignments we've emit and jump instructions. */ + if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE + && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) + return 0; + + /* Important case - calls are always 5 bytes. + It is common to have many calls in the row. */ + if (CALL_P (insn) + && symbolic_reference_mentioned_p (PATTERN (insn)) + && !SIBLING_CALL_P (insn)) + return 5; + len = get_attr_length (insn); + if (len <= 1) + return 1; + + /* For normal instructions we rely on get_attr_length being exact, + with a few exceptions. */ + if (!JUMP_P (insn)) + { + enum attr_type type = get_attr_type (insn); + + switch (type) + { + case TYPE_MULTI: + if (GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0) + return 0; + break; + case TYPE_OTHER: + case TYPE_FCMP: + break; + default: + /* Otherwise trust get_attr_length. */ + return len; + } + + l = get_attr_length_address (insn); + if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) + l = 4; + } + if (l) + return 1+l; + else + return 2; +} + +#ifdef ASM_OUTPUT_MAX_SKIP_PAD + +/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte + window. */ + +static void +ix86_avoid_jump_mispredicts (void) +{ + rtx_insn *insn, *start = get_insns (); + int nbytes = 0, njumps = 0; + bool isjump = false; + + /* Look for all minimal intervals of instructions containing 4 jumps. + The intervals are bounded by START and INSN. NBYTES is the total + size of instructions in the interval including INSN and not including + START. When the NBYTES is smaller than 16 bytes, it is possible + that the end of START and INSN ends up in the same 16byte page. + + The smallest offset in the page INSN can start is the case where START + ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). + We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN). + + Don't consider asm goto as jump, while it can contain a jump, it doesn't + have to, control transfer to label(s) can be performed through other + means, and also we estimate minimum length of all asm stmts as 0. */ + for (insn = start; insn; insn = NEXT_INSN (insn)) + { + int min_size; + + if (LABEL_P (insn)) + { + int align = label_to_alignment (insn); + int max_skip = label_to_max_skip (insn); + + if (max_skip > 15) + max_skip = 15; + /* If align > 3, only up to 16 - max_skip - 1 bytes can be + already in the current 16 byte page, because otherwise + ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer + bytes to reach 16 byte boundary. */ + if (align <= 0 + || (align <= 3 && max_skip != (1 << align) - 1)) + max_skip = 0; + if (dump_file) + fprintf (dump_file, "Label %i with max_skip %i\n", + INSN_UID (insn), max_skip); + if (max_skip) + { + while (nbytes + max_skip >= 16) + { + start = NEXT_INSN (start); + if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) + || CALL_P (start)) + njumps--, isjump = true; + else + isjump = false; + nbytes -= min_insn_size (start); + } + } + continue; + } + + min_size = min_insn_size (insn); + nbytes += min_size; + if (dump_file) + fprintf (dump_file, "Insn %i estimated to %i bytes\n", + INSN_UID (insn), min_size); + if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0) + || CALL_P (insn)) + njumps++; + else + continue; + + while (njumps > 3) + { + start = NEXT_INSN (start); + if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) + || CALL_P (start)) + njumps--, isjump = true; + else + isjump = false; + nbytes -= min_insn_size (start); + } + gcc_assert (njumps >= 0); + if (dump_file) + fprintf (dump_file, "Interval %i to %i has %i bytes\n", + INSN_UID (start), INSN_UID (insn), nbytes); + + if (njumps == 3 && isjump && nbytes < 16) + { + int padsize = 15 - nbytes + min_insn_size (insn); + + if (dump_file) + fprintf (dump_file, "Padding insn %i by %i bytes!\n", + INSN_UID (insn), padsize); + emit_insn_before (gen_pad (GEN_INT (padsize)), insn); + } + } +} +#endif + +/* AMD Athlon works faster + when RET is not destination of conditional jump or directly preceded + by other jump instruction. We avoid the penalty by inserting NOP just + before the RET instructions in such cases. */ +static void +ix86_pad_returns (void) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) + { + basic_block bb = e->src; + rtx_insn *ret = BB_END (bb); + rtx_insn *prev; + bool replace = false; + + if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) + || optimize_bb_for_size_p (bb)) + continue; + for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) + if (active_insn_p (prev) || LABEL_P (prev)) + break; + if (prev && LABEL_P (prev)) + { + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->preds) + if (EDGE_FREQUENCY (e) && e->src->index >= 0 + && !(e->flags & EDGE_FALLTHRU)) + { + replace = true; + break; + } + } + if (!replace) + { + prev = prev_active_insn (ret); + if (prev + && ((JUMP_P (prev) && any_condjump_p (prev)) + || CALL_P (prev))) + replace = true; + /* Empty functions get branch mispredict even when + the jump destination is not visible to us. */ + if (!prev && !optimize_function_for_size_p (cfun)) + replace = true; + } + if (replace) + { + emit_jump_insn_before (gen_simple_return_internal_long (), ret); + delete_insn (ret); + } + } +} + +/* Count the minimum number of instructions in BB. Return 4 if the + number of instructions >= 4. */ + +static int +ix86_count_insn_bb (basic_block bb) +{ + rtx_insn *insn; + int insn_count = 0; + + /* Count number of instructions in this block. Return 4 if the number + of instructions >= 4. */ + FOR_BB_INSNS (bb, insn) + { + /* Only happen in exit blocks. */ + if (JUMP_P (insn) + && ANY_RETURN_P (PATTERN (insn))) + break; + + if (NONDEBUG_INSN_P (insn) + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) + { + insn_count++; + if (insn_count >= 4) + return insn_count; + } + } + + return insn_count; +} + + +/* Count the minimum number of instructions in code path in BB. + Return 4 if the number of instructions >= 4. */ + +static int +ix86_count_insn (basic_block bb) +{ + edge e; + edge_iterator ei; + int min_prev_count; + + /* Only bother counting instructions along paths with no + more than 2 basic blocks between entry and exit. Given + that BB has an edge to exit, determine if a predecessor + of BB has an edge from entry. If so, compute the number + of instructions in the predecessor block. If there + happen to be multiple such blocks, compute the minimum. */ + min_prev_count = 4; + FOR_EACH_EDGE (e, ei, bb->preds) + { + edge prev_e; + edge_iterator prev_ei; + + if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) + { + min_prev_count = 0; + break; + } + FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) + { + if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) + { + int count = ix86_count_insn_bb (e->src); + if (count < min_prev_count) + min_prev_count = count; + break; + } + } + } + + if (min_prev_count < 4) + min_prev_count += ix86_count_insn_bb (bb); + + return min_prev_count; +} + +/* Pad short function to 4 instructions. */ + +static void +ix86_pad_short_function (void) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) + { + rtx_insn *ret = BB_END (e->src); + if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret))) + { + int insn_count = ix86_count_insn (e->src); + + /* Pad short function. */ + if (insn_count < 4) + { + rtx_insn *insn = ret; + + /* Find epilogue. */ + while (insn + && (!NOTE_P (insn) + || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) + insn = PREV_INSN (insn); + + if (!insn) + insn = ret; + + /* Two NOPs count as one instruction. */ + insn_count = 2 * (4 - insn_count); + emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); + } + } + } +} + +/* Fix up a Windows system unwinder issue. If an EH region falls through into + the epilogue, the Windows system unwinder will apply epilogue logic and + produce incorrect offsets. This can be avoided by adding a nop between + the last insn that can throw and the first insn of the epilogue. */ + +static void +ix86_seh_fixup_eh_fallthru (void) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) + { + rtx_insn *insn, *next; + + /* Find the beginning of the epilogue. */ + for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) + if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) + break; + if (insn == NULL) + continue; + + /* We only care about preceding insns that can throw. */ + insn = prev_active_insn (insn); + if (insn == NULL || !can_throw_internal (insn)) + continue; + + /* Do not separate calls from their debug information. */ + for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next)) + if (NOTE_P (next) + && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION + || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)) + insn = next; + else + break; + + emit_insn_after (gen_nops (const1_rtx), insn); + } +} + +/* Given a register number BASE, the lowest of a group of registers, update + regsets IN and OUT with the registers that should be avoided in input + and output operands respectively when trying to avoid generating a modr/m + byte for -fmitigate-rop. */ + +static void +set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out) +{ + SET_HARD_REG_BIT (out, base); + SET_HARD_REG_BIT (out, base + 1); + SET_HARD_REG_BIT (in, base + 2); + SET_HARD_REG_BIT (in, base + 3); +} + +/* Called if -fmitigate_rop is in effect. Try to rewrite instructions so + that certain encodings of modr/m bytes do not occur. */ +static void +ix86_mitigate_rop (void) +{ + HARD_REG_SET input_risky; + HARD_REG_SET output_risky; + HARD_REG_SET inout_risky; + + CLEAR_HARD_REG_SET (output_risky); + CLEAR_HARD_REG_SET (input_risky); + SET_HARD_REG_BIT (output_risky, AX_REG); + SET_HARD_REG_BIT (output_risky, CX_REG); + SET_HARD_REG_BIT (input_risky, BX_REG); + SET_HARD_REG_BIT (input_risky, DX_REG); + set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky); + set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky); + set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky); + set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky); + set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky); + set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky); + COPY_HARD_REG_SET (inout_risky, input_risky); + IOR_HARD_REG_SET (inout_risky, output_risky); + + df_note_add_problem (); + /* Fix up what stack-regs did. */ + df_insn_rescan_all (); + df_analyze (); + + regrename_init (true); + regrename_analyze (NULL); + + auto_vec cands; + + for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + if (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + continue; + + extract_insn (insn); + + int opno0, opno1; + int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand, + recog_data.n_operands, &opno0, + &opno1); + + if (!ix86_rop_should_change_byte_p (modrm)) + continue; + + insn_rr_info *info = &insn_rr[INSN_UID (insn)]; + + /* This happens when regrename has to fail a block. */ + if (!info->op_info) + continue; + + if (info->op_info[opno0].n_chains != 0) + { + gcc_assert (info->op_info[opno0].n_chains == 1); + du_head_p op0c; + op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id); + if (op0c->target_data_1 + op0c->target_data_2 == 0 + && !op0c->cannot_rename) + cands.safe_push (op0c); + + op0c->target_data_1++; + } + if (info->op_info[opno1].n_chains != 0) + { + gcc_assert (info->op_info[opno1].n_chains == 1); + du_head_p op1c; + op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id); + if (op1c->target_data_1 + op1c->target_data_2 == 0 + && !op1c->cannot_rename) + cands.safe_push (op1c); + + op1c->target_data_2++; + } + } + + int i; + du_head_p head; + FOR_EACH_VEC_ELT (cands, i, head) + { + int old_reg, best_reg; + HARD_REG_SET unavailable; + + CLEAR_HARD_REG_SET (unavailable); + if (head->target_data_1) + IOR_HARD_REG_SET (unavailable, output_risky); + if (head->target_data_2) + IOR_HARD_REG_SET (unavailable, input_risky); + + int n_uses; + reg_class superclass = regrename_find_superclass (head, &n_uses, + &unavailable); + old_reg = head->regno; + best_reg = find_rename_reg (head, superclass, &unavailable, + old_reg, false); + bool ok = regrename_do_replace (head, best_reg); + gcc_assert (ok); + if (dump_file) + fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id, + reg_names[best_reg], reg_class_names[superclass]); + + } + + regrename_finish (); + + df_analyze (); + + basic_block bb; + regset_head live; + + INIT_REG_SET (&live); + + FOR_EACH_BB_FN (bb, cfun) + { + rtx_insn *insn; + + COPY_REG_SET (&live, DF_LR_OUT (bb)); + df_simulate_initialize_backwards (bb, &live); + + FOR_BB_INSNS_REVERSE (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + df_simulate_one_insn_backwards (bb, insn, &live); + + if (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + continue; + + extract_insn (insn); + constrain_operands_cached (insn, reload_completed); + int opno0, opno1; + int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand, + recog_data.n_operands, &opno0, + &opno1); + if (modrm < 0 + || !ix86_rop_should_change_byte_p (modrm) + || opno0 == opno1) + continue; + + rtx oldreg = recog_data.operand[opno1]; + preprocess_constraints (insn); + const operand_alternative *alt = which_op_alt (); + + int i; + for (i = 0; i < recog_data.n_operands; i++) + if (i != opno1 + && alt[i].earlyclobber + && reg_overlap_mentioned_p (recog_data.operand[i], + oldreg)) + break; + + if (i < recog_data.n_operands) + continue; + + if (dump_file) + fprintf (dump_file, + "attempting to fix modrm byte in insn %d:" + " reg %d class %s", INSN_UID (insn), REGNO (oldreg), + reg_class_names[alt[opno1].cl]); + + HARD_REG_SET unavailable; + REG_SET_TO_HARD_REG_SET (unavailable, &live); + SET_HARD_REG_BIT (unavailable, REGNO (oldreg)); + IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set); + IOR_HARD_REG_SET (unavailable, fixed_reg_set); + IOR_HARD_REG_SET (unavailable, output_risky); + IOR_COMPL_HARD_REG_SET (unavailable, + reg_class_contents[alt[opno1].cl]); + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (!TEST_HARD_REG_BIT (unavailable, i)) + break; + if (i == FIRST_PSEUDO_REGISTER) + { + if (dump_file) + fprintf (dump_file, ", none available\n"); + continue; + } + if (dump_file) + fprintf (dump_file, " -> %d\n", i); + rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i); + validate_change (insn, recog_data.operand_loc[opno1], newreg, false); + insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn); + } + } +} + +/* Implement machine specific optimizations. We implement padding of returns + for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ +static void +ix86_reorg (void) +{ + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + + if (flag_mitigate_rop) + ix86_mitigate_rop (); + + if (TARGET_SEH && current_function_has_exception_handlers ()) + ix86_seh_fixup_eh_fallthru (); + + if (optimize && optimize_function_for_speed_p (cfun)) + { + if (TARGET_PAD_SHORT_FUNCTION) + ix86_pad_short_function (); + else if (TARGET_PAD_RETURNS) + ix86_pad_returns (); +#ifdef ASM_OUTPUT_MAX_SKIP_PAD + if (TARGET_FOUR_JUMP_LIMIT) + ix86_avoid_jump_mispredicts (); +#endif + } +} + +/* Return nonzero when QImode register that must be represented via REX prefix + is used. */ +bool +x86_extended_QIreg_mentioned_p (rtx_insn *insn) +{ + int i; + extract_insn_cached (insn); + for (i = 0; i < recog_data.n_operands; i++) + if (GENERAL_REG_P (recog_data.operand[i]) + && !QI_REGNO_P (REGNO (recog_data.operand[i]))) + return true; + return false; +} + +/* Return true when INSN mentions register that must be encoded using REX + prefix. */ +bool +x86_extended_reg_mentioned_p (rtx insn) +{ + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) + { + const_rtx x = *iter; + if (REG_P (x) + && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x)))) + return true; + } + return false; +} + +/* If profitable, negate (without causing overflow) integer constant + of mode MODE at location LOC. Return true in this case. */ +bool +x86_maybe_negate_const_int (rtx *loc, machine_mode mode) +{ + HOST_WIDE_INT val; + + if (!CONST_INT_P (*loc)) + return false; + + switch (mode) + { + case DImode: + /* DImode x86_64 constants must fit in 32 bits. */ + gcc_assert (x86_64_immediate_operand (*loc, mode)); + + mode = SImode; + break; + + case SImode: + case HImode: + case QImode: + break; + + default: + gcc_unreachable (); + } + + /* Avoid overflows. */ + if (mode_signbit_p (mode, *loc)) + return false; + + val = INTVAL (*loc); + + /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if ((val < 0 && val != -128) + || val == 128) + { + *loc = GEN_INT (-val); + return true; + } + + return false; +} + +/* Generate an unsigned DImode/SImode to FP conversion. This is the same code + optabs would emit if we didn't have TFmode patterns. */ + +void +x86_emit_floatuns (rtx operands[2]) +{ + rtx_code_label *neglab, *donelab; + rtx i0, i1, f0, in, out; + machine_mode mode, inmode; + + inmode = GET_MODE (operands[1]); + gcc_assert (inmode == SImode || inmode == DImode); + + out = operands[0]; + in = force_reg (inmode, operands[1]); + mode = GET_MODE (out); + neglab = gen_label_rtx (); + donelab = gen_label_rtx (); + f0 = gen_reg_rtx (mode); + + emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); + + expand_float (out, in, 0); + + emit_jump_insn (gen_jump (donelab)); + emit_barrier (); + + emit_label (neglab); + + i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, + 1, OPTAB_DIRECT); + i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, + 1, OPTAB_DIRECT); + i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); + + expand_float (f0, i0, 0); + + emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); + + emit_label (donelab); +} + +static bool canonicalize_perm (struct expand_vec_perm_d *d); +static bool expand_vec_perm_1 (struct expand_vec_perm_d *d); +static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d); +static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool); + +/* Get a vector mode of the same size as the original but with elements + twice as wide. This is only guaranteed to apply to integral vectors. */ + +static inline machine_mode +get_mode_wider_vector (machine_mode o) +{ + /* ??? Rely on the ordering that genmodes.c gives to vectors. */ + machine_mode n = GET_MODE_WIDER_MODE (o); + gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2); + gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n)); + return n; +} + +/* A subroutine of ix86_expand_vector_init_duplicate. Tries to + fill target with val via vec_duplicate. */ + +static bool +ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) +{ + bool ok; + rtx_insn *insn; + rtx dup; + + /* First attempt to recognize VAL as-is. */ + dup = gen_rtx_VEC_DUPLICATE (mode, val); + insn = emit_insn (gen_rtx_SET (target, dup)); + if (recog_memoized (insn) < 0) + { + rtx_insn *seq; + /* If that fails, force VAL into a register. */ + + start_sequence (); + XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val); + seq = get_insns (); + end_sequence (); + if (seq) + emit_insn_before (seq, insn); + + ok = recog_memoized (insn) >= 0; + gcc_assert (ok); + } + return true; +} + +/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector + with all elements equal to VAR. Return true if successful. */ + +static bool +ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, + rtx target, rtx val) +{ + bool ok; + + switch (mode) + { + case V2SImode: + case V2SFmode: + if (!mmx_ok) + return false; + /* FALLTHRU */ + + case V4DFmode: + case V4DImode: + case V8SFmode: + case V8SImode: + case V2DFmode: + case V2DImode: + case V4SFmode: + case V4SImode: + case V16SImode: + case V8DImode: + case V16SFmode: + case V8DFmode: + return ix86_vector_duplicate_value (mode, target, val); + + case V4HImode: + if (!mmx_ok) + return false; + if (TARGET_SSE || TARGET_3DNOW_A) + { + rtx x; + + val = gen_lowpart (SImode, val); + x = gen_rtx_TRUNCATE (HImode, val); + x = gen_rtx_VEC_DUPLICATE (mode, x); + emit_insn (gen_rtx_SET (target, x)); + return true; + } + goto widen; + + case V8QImode: + if (!mmx_ok) + return false; + goto widen; + + case V8HImode: + if (TARGET_AVX2) + return ix86_vector_duplicate_value (mode, target, val); + + if (TARGET_SSE2) + { + struct expand_vec_perm_d dperm; + rtx tmp1, tmp2; + + permute: + memset (&dperm, 0, sizeof (dperm)); + dperm.target = target; + dperm.vmode = mode; + dperm.nelt = GET_MODE_NUNITS (mode); + dperm.op0 = dperm.op1 = gen_reg_rtx (mode); + dperm.one_operand_p = true; + + /* Extend to SImode using a paradoxical SUBREG. */ + tmp1 = gen_reg_rtx (SImode); + emit_move_insn (tmp1, gen_lowpart (SImode, val)); + + /* Insert the SImode value as low element of a V4SImode vector. */ + tmp2 = gen_reg_rtx (V4SImode); + emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); + emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2)); + + ok = (expand_vec_perm_1 (&dperm) + || expand_vec_perm_broadcast_1 (&dperm)); + gcc_assert (ok); + return ok; + } + goto widen; + + case V16QImode: + if (TARGET_AVX2) + return ix86_vector_duplicate_value (mode, target, val); + + if (TARGET_SSE2) + goto permute; + goto widen; + + widen: + /* Replicate the value once into the next wider mode and recurse. */ + { + machine_mode smode, wsmode, wvmode; + rtx x; + + smode = GET_MODE_INNER (mode); + wvmode = get_mode_wider_vector (mode); + wsmode = GET_MODE_INNER (wvmode); + + val = convert_modes (wsmode, smode, val, true); + x = expand_simple_binop (wsmode, ASHIFT, val, + GEN_INT (GET_MODE_BITSIZE (smode)), + NULL_RTX, 1, OPTAB_LIB_WIDEN); + val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); + + x = gen_reg_rtx (wvmode); + ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val); + gcc_assert (ok); + emit_move_insn (target, gen_lowpart (GET_MODE (target), x)); + return ok; + } + + case V16HImode: + case V32QImode: + if (TARGET_AVX2) + return ix86_vector_duplicate_value (mode, target, val); + else + { + machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); + rtx x = gen_reg_rtx (hvmode); + + ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); + gcc_assert (ok); + + x = gen_rtx_VEC_CONCAT (mode, x, x); + emit_insn (gen_rtx_SET (target, x)); + } + return true; + + case V64QImode: + case V32HImode: + if (TARGET_AVX512BW) + return ix86_vector_duplicate_value (mode, target, val); + else + { + machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode); + rtx x = gen_reg_rtx (hvmode); + + ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); + gcc_assert (ok); + + x = gen_rtx_VEC_CONCAT (mode, x, x); + emit_insn (gen_rtx_SET (target, x)); + } + return true; + + default: + return false; + } +} + +/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector + whose ONE_VAR element is VAR, and other elements are zero. Return true + if successful. */ + +static bool +ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, + rtx target, rtx var, int one_var) +{ + machine_mode vsimode; + rtx new_target; + rtx x, tmp; + bool use_vector_set = false; + + switch (mode) + { + case V2DImode: + /* For SSE4.1, we normally use vector set. But if the second + element is zero and inter-unit moves are OK, we use movq + instead. */ + use_vector_set = (TARGET_64BIT && TARGET_SSE4_1 + && !(TARGET_INTER_UNIT_MOVES_TO_VEC + && one_var == 0)); + break; + case V16QImode: + case V4SImode: + case V4SFmode: + use_vector_set = TARGET_SSE4_1; + break; + case V8HImode: + use_vector_set = TARGET_SSE2; + break; + case V4HImode: + use_vector_set = TARGET_SSE || TARGET_3DNOW_A; + break; + case V32QImode: + case V16HImode: + case V8SImode: + case V8SFmode: + case V4DFmode: + use_vector_set = TARGET_AVX; + break; + case V4DImode: + /* Use ix86_expand_vector_set in 64bit mode only. */ + use_vector_set = TARGET_AVX && TARGET_64BIT; + break; + default: + break; + } + + if (use_vector_set) + { + emit_insn (gen_rtx_SET (target, CONST0_RTX (mode))); + var = force_reg (GET_MODE_INNER (mode), var); + ix86_expand_vector_set (mmx_ok, target, var, one_var); + return true; + } + + switch (mode) + { + case V2SFmode: + case V2SImode: + if (!mmx_ok) + return false; + /* FALLTHRU */ + + case V2DFmode: + case V2DImode: + if (one_var != 0) + return false; + var = force_reg (GET_MODE_INNER (mode), var); + x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); + emit_insn (gen_rtx_SET (target, x)); + return true; + + case V4SFmode: + case V4SImode: + if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) + new_target = gen_reg_rtx (mode); + else + new_target = target; + var = force_reg (GET_MODE_INNER (mode), var); + x = gen_rtx_VEC_DUPLICATE (mode, var); + x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); + emit_insn (gen_rtx_SET (new_target, x)); + if (one_var != 0) + { + /* We need to shuffle the value to the correct position, so + create a new pseudo to store the intermediate result. */ + + /* With SSE2, we can use the integer shuffle insns. */ + if (mode != V4SFmode && TARGET_SSE2) + { + emit_insn (gen_sse2_pshufd_1 (new_target, new_target, + const1_rtx, + GEN_INT (one_var == 1 ? 0 : 1), + GEN_INT (one_var == 2 ? 0 : 1), + GEN_INT (one_var == 3 ? 0 : 1))); + if (target != new_target) + emit_move_insn (target, new_target); + return true; + } + + /* Otherwise convert the intermediate result to V4SFmode and + use the SSE1 shuffle instructions. */ + if (mode != V4SFmode) + { + tmp = gen_reg_rtx (V4SFmode); + emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); + } + else + tmp = new_target; + + emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp, + const1_rtx, + GEN_INT (one_var == 1 ? 0 : 1), + GEN_INT (one_var == 2 ? 0+4 : 1+4), + GEN_INT (one_var == 3 ? 0+4 : 1+4))); + + if (mode != V4SFmode) + emit_move_insn (target, gen_lowpart (V4SImode, tmp)); + else if (tmp != target) + emit_move_insn (target, tmp); + } + else if (target != new_target) + emit_move_insn (target, new_target); + return true; + + case V8HImode: + case V16QImode: + vsimode = V4SImode; + goto widen; + case V4HImode: + case V8QImode: + if (!mmx_ok) + return false; + vsimode = V2SImode; + goto widen; + widen: + if (one_var != 0) + return false; + + /* Zero extend the variable element to SImode and recurse. */ + var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); + + x = gen_reg_rtx (vsimode); + if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, + var, one_var)) + gcc_unreachable (); + + emit_move_insn (target, gen_lowpart (mode, x)); + return true; + + default: + return false; + } +} + +/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector + consisting of the values in VALS. It is known that all elements + except ONE_VAR are constants. Return true if successful. */ + +static bool +ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode, + rtx target, rtx vals, int one_var) +{ + rtx var = XVECEXP (vals, 0, one_var); + machine_mode wmode; + rtx const_vec, x; + + const_vec = copy_rtx (vals); + XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); + const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); + + switch (mode) + { + case V2DFmode: + case V2DImode: + case V2SFmode: + case V2SImode: + /* For the two element vectors, it's just as easy to use + the general case. */ + return false; + + case V4DImode: + /* Use ix86_expand_vector_set in 64bit mode only. */ + if (!TARGET_64BIT) + return false; + case V4DFmode: + case V8SFmode: + case V8SImode: + case V16HImode: + case V32QImode: + case V4SFmode: + case V4SImode: + case V8HImode: + case V4HImode: + break; + + case V16QImode: + if (TARGET_SSE4_1) + break; + wmode = V8HImode; + goto widen; + case V8QImode: + wmode = V4HImode; + goto widen; + widen: + /* There's no way to set one QImode entry easily. Combine + the variable value with its adjacent constant value, and + promote to an HImode set. */ + x = XVECEXP (vals, 0, one_var ^ 1); + if (one_var & 1) + { + var = convert_modes (HImode, QImode, var, true); + var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), + NULL_RTX, 1, OPTAB_LIB_WIDEN); + x = GEN_INT (INTVAL (x) & 0xff); + } + else + { + var = convert_modes (HImode, QImode, var, true); + x = gen_int_mode (INTVAL (x) << 8, HImode); + } + if (x != const0_rtx) + var = expand_simple_binop (HImode, IOR, var, x, var, + 1, OPTAB_LIB_WIDEN); + + x = gen_reg_rtx (wmode); + emit_move_insn (x, gen_lowpart (wmode, const_vec)); + ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); + + emit_move_insn (target, gen_lowpart (mode, x)); + return true; + + default: + return false; + } + + emit_move_insn (target, const_vec); + ix86_expand_vector_set (mmx_ok, target, var, one_var); + return true; +} + +/* A subroutine of ix86_expand_vector_init_general. Use vector + concatenate to handle the most general case: all values variable, + and none identical. */ + +static void +ix86_expand_vector_init_concat (machine_mode mode, + rtx target, rtx *ops, int n) +{ + machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode; + rtx first[16], second[8], third[4]; + rtvec v; + int i, j; + + switch (n) + { + case 2: + switch (mode) + { + case V16SImode: + cmode = V8SImode; + break; + case V16SFmode: + cmode = V8SFmode; + break; + case V8DImode: + cmode = V4DImode; + break; + case V8DFmode: + cmode = V4DFmode; + break; + case V8SImode: + cmode = V4SImode; + break; + case V8SFmode: + cmode = V4SFmode; + break; + case V4DImode: + cmode = V2DImode; + break; + case V4DFmode: + cmode = V2DFmode; + break; + case V4SImode: + cmode = V2SImode; + break; + case V4SFmode: + cmode = V2SFmode; + break; + case V2DImode: + cmode = DImode; + break; + case V2SImode: + cmode = SImode; + break; + case V2DFmode: + cmode = DFmode; + break; + case V2SFmode: + cmode = SFmode; + break; + default: + gcc_unreachable (); + } + + if (!register_operand (ops[1], cmode)) + ops[1] = force_reg (cmode, ops[1]); + if (!register_operand (ops[0], cmode)) + ops[0] = force_reg (cmode, ops[0]); + emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0], + ops[1]))); + break; + + case 4: + switch (mode) + { + case V4DImode: + cmode = V2DImode; + break; + case V4DFmode: + cmode = V2DFmode; + break; + case V4SImode: + cmode = V2SImode; + break; + case V4SFmode: + cmode = V2SFmode; + break; + default: + gcc_unreachable (); + } + goto half; + + case 8: + switch (mode) + { + case V8DImode: + cmode = V2DImode; + hmode = V4DImode; + break; + case V8DFmode: + cmode = V2DFmode; + hmode = V4DFmode; + break; + case V8SImode: + cmode = V2SImode; + hmode = V4SImode; + break; + case V8SFmode: + cmode = V2SFmode; + hmode = V4SFmode; + break; + default: + gcc_unreachable (); + } + goto half; + + case 16: + switch (mode) + { + case V16SImode: + cmode = V2SImode; + hmode = V4SImode; + gmode = V8SImode; + break; + case V16SFmode: + cmode = V2SFmode; + hmode = V4SFmode; + gmode = V8SFmode; + break; + default: + gcc_unreachable (); + } + goto half; + +half: + /* FIXME: We process inputs backward to help RA. PR 36222. */ + i = n - 1; + j = (n >> 1) - 1; + for (; i > 0; i -= 2, j--) + { + first[j] = gen_reg_rtx (cmode); + v = gen_rtvec (2, ops[i - 1], ops[i]); + ix86_expand_vector_init (false, first[j], + gen_rtx_PARALLEL (cmode, v)); + } + + n >>= 1; + if (n > 4) + { + gcc_assert (hmode != VOIDmode); + gcc_assert (gmode != VOIDmode); + for (i = j = 0; i < n; i += 2, j++) + { + second[j] = gen_reg_rtx (hmode); + ix86_expand_vector_init_concat (hmode, second [j], + &first [i], 2); + } + n >>= 1; + for (i = j = 0; i < n; i += 2, j++) + { + third[j] = gen_reg_rtx (gmode); + ix86_expand_vector_init_concat (gmode, third[j], + &second[i], 2); + } + n >>= 1; + ix86_expand_vector_init_concat (mode, target, third, n); + } + else if (n > 2) + { + gcc_assert (hmode != VOIDmode); + for (i = j = 0; i < n; i += 2, j++) + { + second[j] = gen_reg_rtx (hmode); + ix86_expand_vector_init_concat (hmode, second [j], + &first [i], 2); + } + n >>= 1; + ix86_expand_vector_init_concat (mode, target, second, n); + } + else + ix86_expand_vector_init_concat (mode, target, first, n); + break; + + default: + gcc_unreachable (); + } +} + +/* A subroutine of ix86_expand_vector_init_general. Use vector + interleave to handle the most general case: all values variable, + and none identical. */ + +static void +ix86_expand_vector_init_interleave (machine_mode mode, + rtx target, rtx *ops, int n) +{ + machine_mode first_imode, second_imode, third_imode, inner_mode; + int i, j; + rtx op0, op1; + rtx (*gen_load_even) (rtx, rtx, rtx); + rtx (*gen_interleave_first_low) (rtx, rtx, rtx); + rtx (*gen_interleave_second_low) (rtx, rtx, rtx); + + switch (mode) + { + case V8HImode: + gen_load_even = gen_vec_setv8hi; + gen_interleave_first_low = gen_vec_interleave_lowv4si; + gen_interleave_second_low = gen_vec_interleave_lowv2di; + inner_mode = HImode; + first_imode = V4SImode; + second_imode = V2DImode; + third_imode = VOIDmode; + break; + case V16QImode: + gen_load_even = gen_vec_setv16qi; + gen_interleave_first_low = gen_vec_interleave_lowv8hi; + gen_interleave_second_low = gen_vec_interleave_lowv4si; + inner_mode = QImode; + first_imode = V8HImode; + second_imode = V4SImode; + third_imode = V2DImode; + break; + default: + gcc_unreachable (); + } + + for (i = 0; i < n; i++) + { + /* Extend the odd elment to SImode using a paradoxical SUBREG. */ + op0 = gen_reg_rtx (SImode); + emit_move_insn (op0, gen_lowpart (SImode, ops [i + i])); + + /* Insert the SImode value as low element of V4SImode vector. */ + op1 = gen_reg_rtx (V4SImode); + op0 = gen_rtx_VEC_MERGE (V4SImode, + gen_rtx_VEC_DUPLICATE (V4SImode, + op0), + CONST0_RTX (V4SImode), + const1_rtx); + emit_insn (gen_rtx_SET (op1, op0)); + + /* Cast the V4SImode vector back to a vector in orignal mode. */ + op0 = gen_reg_rtx (mode); + emit_move_insn (op0, gen_lowpart (mode, op1)); + + /* Load even elements into the second position. */ + emit_insn (gen_load_even (op0, + force_reg (inner_mode, + ops [i + i + 1]), + const1_rtx)); + + /* Cast vector to FIRST_IMODE vector. */ + ops[i] = gen_reg_rtx (first_imode); + emit_move_insn (ops[i], gen_lowpart (first_imode, op0)); + } + + /* Interleave low FIRST_IMODE vectors. */ + for (i = j = 0; i < n; i += 2, j++) + { + op0 = gen_reg_rtx (first_imode); + emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1])); + + /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */ + ops[j] = gen_reg_rtx (second_imode); + emit_move_insn (ops[j], gen_lowpart (second_imode, op0)); + } + + /* Interleave low SECOND_IMODE vectors. */ + switch (second_imode) + { + case V4SImode: + for (i = j = 0; i < n / 2; i += 2, j++) + { + op0 = gen_reg_rtx (second_imode); + emit_insn (gen_interleave_second_low (op0, ops[i], + ops[i + 1])); + + /* Cast the SECOND_IMODE vector to the THIRD_IMODE + vector. */ + ops[j] = gen_reg_rtx (third_imode); + emit_move_insn (ops[j], gen_lowpart (third_imode, op0)); + } + second_imode = V2DImode; + gen_interleave_second_low = gen_vec_interleave_lowv2di; + /* FALLTHRU */ + + case V2DImode: + op0 = gen_reg_rtx (second_imode); + emit_insn (gen_interleave_second_low (op0, ops[0], + ops[1])); + + /* Cast the SECOND_IMODE vector back to a vector on original + mode. */ + emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0))); + break; + + default: + gcc_unreachable (); + } +} + +/* A subroutine of ix86_expand_vector_init. Handle the most general case: + all values variable, and none identical. */ + +static void +ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, + rtx target, rtx vals) +{ + rtx ops[64], op0, op1, op2, op3, op4, op5; + machine_mode half_mode = VOIDmode; + machine_mode quarter_mode = VOIDmode; + int n, i; + + switch (mode) + { + case V2SFmode: + case V2SImode: + if (!mmx_ok && !TARGET_SSE) + break; + /* FALLTHRU */ + + case V16SImode: + case V16SFmode: + case V8DFmode: + case V8DImode: + case V8SFmode: + case V8SImode: + case V4DFmode: + case V4DImode: + case V4SFmode: + case V4SImode: + case V2DFmode: + case V2DImode: + n = GET_MODE_NUNITS (mode); + for (i = 0; i < n; i++) + ops[i] = XVECEXP (vals, 0, i); + ix86_expand_vector_init_concat (mode, target, ops, n); + return; + + case V32QImode: + half_mode = V16QImode; + goto half; + + case V16HImode: + half_mode = V8HImode; + goto half; + +half: + n = GET_MODE_NUNITS (mode); + for (i = 0; i < n; i++) + ops[i] = XVECEXP (vals, 0, i); + op0 = gen_reg_rtx (half_mode); + op1 = gen_reg_rtx (half_mode); + ix86_expand_vector_init_interleave (half_mode, op0, ops, + n >> 2); + ix86_expand_vector_init_interleave (half_mode, op1, + &ops [n >> 1], n >> 2); + emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1))); + return; + + case V64QImode: + quarter_mode = V16QImode; + half_mode = V32QImode; + goto quarter; + + case V32HImode: + quarter_mode = V8HImode; + half_mode = V16HImode; + goto quarter; + +quarter: + n = GET_MODE_NUNITS (mode); + for (i = 0; i < n; i++) + ops[i] = XVECEXP (vals, 0, i); + op0 = gen_reg_rtx (quarter_mode); + op1 = gen_reg_rtx (quarter_mode); + op2 = gen_reg_rtx (quarter_mode); + op3 = gen_reg_rtx (quarter_mode); + op4 = gen_reg_rtx (half_mode); + op5 = gen_reg_rtx (half_mode); + ix86_expand_vector_init_interleave (quarter_mode, op0, ops, + n >> 3); + ix86_expand_vector_init_interleave (quarter_mode, op1, + &ops [n >> 2], n >> 3); + ix86_expand_vector_init_interleave (quarter_mode, op2, + &ops [n >> 1], n >> 3); + ix86_expand_vector_init_interleave (quarter_mode, op3, + &ops [(n >> 1) | (n >> 2)], n >> 3); + emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1))); + emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3))); + emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5))); + return; + + case V16QImode: + if (!TARGET_SSE4_1) + break; + /* FALLTHRU */ + + case V8HImode: + if (!TARGET_SSE2) + break; + + /* Don't use ix86_expand_vector_init_interleave if we can't + move from GPR to SSE register directly. */ + if (!TARGET_INTER_UNIT_MOVES_TO_VEC) + break; + + n = GET_MODE_NUNITS (mode); + for (i = 0; i < n; i++) + ops[i] = XVECEXP (vals, 0, i); + ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); + return; + + case V4HImode: + case V8QImode: + break; + + default: + gcc_unreachable (); + } + + { + int i, j, n_elts, n_words, n_elt_per_word; + machine_mode inner_mode; + rtx words[4], shift; + + inner_mode = GET_MODE_INNER (mode); + n_elts = GET_MODE_NUNITS (mode); + n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; + n_elt_per_word = n_elts / n_words; + shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); + + for (i = 0; i < n_words; ++i) + { + rtx word = NULL_RTX; + + for (j = 0; j < n_elt_per_word; ++j) + { + rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); + elt = convert_modes (word_mode, inner_mode, elt, true); + + if (j == 0) + word = elt; + else + { + word = expand_simple_binop (word_mode, ASHIFT, word, shift, + word, 1, OPTAB_LIB_WIDEN); + word = expand_simple_binop (word_mode, IOR, word, elt, + word, 1, OPTAB_LIB_WIDEN); + } + } + + words[i] = word; + } + + if (n_words == 1) + emit_move_insn (target, gen_lowpart (mode, words[0])); + else if (n_words == 2) + { + rtx tmp = gen_reg_rtx (mode); + emit_clobber (tmp); + emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); + emit_move_insn (gen_highpart (word_mode, tmp), words[1]); + emit_move_insn (target, tmp); + } + else if (n_words == 4) + { + rtx tmp = gen_reg_rtx (V4SImode); + gcc_assert (word_mode == SImode); + vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); + ix86_expand_vector_init_general (false, V4SImode, tmp, vals); + emit_move_insn (target, gen_lowpart (mode, tmp)); + } + else + gcc_unreachable (); + } +} + +/* Initialize vector TARGET via VALS. Suppress the use of MMX + instructions unless MMX_OK is true. */ + +void +ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) +{ + machine_mode mode = GET_MODE (target); + machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + int n_var = 0, one_var = -1; + bool all_same = true, all_const_zero = true; + int i; + rtx x; + + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (!(CONST_SCALAR_INT_P (x) + || CONST_DOUBLE_P (x) + || CONST_FIXED_P (x))) + n_var++, one_var = i; + else if (x != CONST0_RTX (inner_mode)) + all_const_zero = false; + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + /* Constants are best loaded from the constant pool. */ + if (n_var == 0) + { + emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); + return; + } + + /* If all values are identical, broadcast the value. */ + if (all_same + && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, + XVECEXP (vals, 0, 0))) + return; + + /* Values where only one field is non-constant are best loaded from + the pool and overwritten via move later. */ + if (n_var == 1) + { + if (all_const_zero + && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, + XVECEXP (vals, 0, one_var), + one_var)) + return; + + if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) + return; + } + + ix86_expand_vector_init_general (mmx_ok, mode, target, vals); +} + +void +ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) +{ + machine_mode mode = GET_MODE (target); + machine_mode inner_mode = GET_MODE_INNER (mode); + machine_mode half_mode; + bool use_vec_merge = false; + rtx tmp; + static rtx (*gen_extract[6][2]) (rtx, rtx) + = { + { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi }, + { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi }, + { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si }, + { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di }, + { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf }, + { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df } + }; + static rtx (*gen_insert[6][2]) (rtx, rtx, rtx) + = { + { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi }, + { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi }, + { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si }, + { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di }, + { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf }, + { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df } + }; + int i, j, n; + machine_mode mmode = VOIDmode; + rtx (*gen_blendm) (rtx, rtx, rtx, rtx); + + switch (mode) + { + case V2SFmode: + case V2SImode: + if (mmx_ok) + { + tmp = gen_reg_rtx (GET_MODE_INNER (mode)); + ix86_expand_vector_extract (true, tmp, target, 1 - elt); + if (elt == 0) + tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); + else + tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); + emit_insn (gen_rtx_SET (target, tmp)); + return; + } + break; + + case V2DImode: + use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT; + if (use_vec_merge) + break; + + tmp = gen_reg_rtx (GET_MODE_INNER (mode)); + ix86_expand_vector_extract (false, tmp, target, 1 - elt); + if (elt == 0) + tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); + else + tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); + emit_insn (gen_rtx_SET (target, tmp)); + return; + + case V2DFmode: + { + rtx op0, op1; + + /* For the two element vectors, we implement a VEC_CONCAT with + the extraction of the other element. */ + + tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); + tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); + + if (elt == 0) + op0 = val, op1 = tmp; + else + op0 = tmp, op1 = val; + + tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); + emit_insn (gen_rtx_SET (target, tmp)); + } + return; + + case V4SFmode: + use_vec_merge = TARGET_SSE4_1; + if (use_vec_merge) + break; + + switch (elt) + { + case 0: + use_vec_merge = true; + break; + + case 1: + /* tmp = target = A B C D */ + tmp = copy_to_reg (target); + /* target = A A B B */ + emit_insn (gen_vec_interleave_lowv4sf (target, target, target)); + /* target = X A B B */ + ix86_expand_vector_set (false, target, val, 0); + /* target = A X C D */ + emit_insn (gen_sse_shufps_v4sf (target, target, tmp, + const1_rtx, const0_rtx, + GEN_INT (2+4), GEN_INT (3+4))); + return; + + case 2: + /* tmp = target = A B C D */ + tmp = copy_to_reg (target); + /* tmp = X B C D */ + ix86_expand_vector_set (false, tmp, val, 0); + /* target = A B X D */ + emit_insn (gen_sse_shufps_v4sf (target, target, tmp, + const0_rtx, const1_rtx, + GEN_INT (0+4), GEN_INT (3+4))); + return; + + case 3: + /* tmp = target = A B C D */ + tmp = copy_to_reg (target); + /* tmp = X B C D */ + ix86_expand_vector_set (false, tmp, val, 0); + /* target = A B X D */ + emit_insn (gen_sse_shufps_v4sf (target, target, tmp, + const0_rtx, const1_rtx, + GEN_INT (2+4), GEN_INT (0+4))); + return; + + default: + gcc_unreachable (); + } + break; + + case V4SImode: + use_vec_merge = TARGET_SSE4_1; + if (use_vec_merge) + break; + + /* Element 0 handled by vec_merge below. */ + if (elt == 0) + { + use_vec_merge = true; + break; + } + + if (TARGET_SSE2) + { + /* With SSE2, use integer shuffles to swap element 0 and ELT, + store into element 0, then shuffle them back. */ + + rtx order[4]; + + order[0] = GEN_INT (elt); + order[1] = const1_rtx; + order[2] = const2_rtx; + order[3] = GEN_INT (3); + order[elt] = const0_rtx; + + emit_insn (gen_sse2_pshufd_1 (target, target, order[0], + order[1], order[2], order[3])); + + ix86_expand_vector_set (false, target, val, 0); + + emit_insn (gen_sse2_pshufd_1 (target, target, order[0], + order[1], order[2], order[3])); + } + else + { + /* For SSE1, we have to reuse the V4SF code. */ + rtx t = gen_reg_rtx (V4SFmode); + emit_move_insn (t, gen_lowpart (V4SFmode, target)); + ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt); + emit_move_insn (target, gen_lowpart (mode, t)); + } + return; + + case V8HImode: + use_vec_merge = TARGET_SSE2; + break; + case V4HImode: + use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); + break; + + case V16QImode: + use_vec_merge = TARGET_SSE4_1; + break; + + case V8QImode: + break; + + case V32QImode: + half_mode = V16QImode; + j = 0; + n = 16; + goto half; + + case V16HImode: + half_mode = V8HImode; + j = 1; + n = 8; + goto half; + + case V8SImode: + half_mode = V4SImode; + j = 2; + n = 4; + goto half; + + case V4DImode: + half_mode = V2DImode; + j = 3; + n = 2; + goto half; + + case V8SFmode: + half_mode = V4SFmode; + j = 4; + n = 4; + goto half; + + case V4DFmode: + half_mode = V2DFmode; + j = 5; + n = 2; + goto half; + +half: + /* Compute offset. */ + i = elt / n; + elt %= n; + + gcc_assert (i <= 1); + + /* Extract the half. */ + tmp = gen_reg_rtx (half_mode); + emit_insn (gen_extract[j][i] (tmp, target)); + + /* Put val in tmp at elt. */ + ix86_expand_vector_set (false, tmp, val, elt); + + /* Put it back. */ + emit_insn (gen_insert[j][i] (target, target, tmp)); + return; + + case V8DFmode: + if (TARGET_AVX512F) + { + mmode = QImode; + gen_blendm = gen_avx512f_blendmv8df; + } + break; + + case V8DImode: + if (TARGET_AVX512F) + { + mmode = QImode; + gen_blendm = gen_avx512f_blendmv8di; + } + break; + + case V16SFmode: + if (TARGET_AVX512F) + { + mmode = HImode; + gen_blendm = gen_avx512f_blendmv16sf; + } + break; + + case V16SImode: + if (TARGET_AVX512F) + { + mmode = HImode; + gen_blendm = gen_avx512f_blendmv16si; + } + break; + + case V32HImode: + if (TARGET_AVX512F && TARGET_AVX512BW) + { + mmode = SImode; + gen_blendm = gen_avx512bw_blendmv32hi; + } + break; + + case V64QImode: + if (TARGET_AVX512F && TARGET_AVX512BW) + { + mmode = DImode; + gen_blendm = gen_avx512bw_blendmv64qi; + } + break; + + default: + break; + } + + if (mmode != VOIDmode) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val))); + /* The avx512*_blendm expanders have different operand order + from VEC_MERGE. In VEC_MERGE, the first input operand is used for + elements where the mask is set and second input operand otherwise, + in {sse,avx}*_*blend* the first input operand is used for elements + where the mask is clear and second input operand otherwise. */ + emit_insn (gen_blendm (target, target, tmp, + force_reg (mmode, + gen_int_mode (1 << elt, mmode)))); + } + else if (use_vec_merge) + { + tmp = gen_rtx_VEC_DUPLICATE (mode, val); + tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); + emit_insn (gen_rtx_SET (target, tmp)); + } + else + { + rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); + + emit_move_insn (mem, target); + + tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); + emit_move_insn (tmp, val); + + emit_move_insn (target, mem); + } +} + +void +ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) +{ + machine_mode mode = GET_MODE (vec); + machine_mode inner_mode = GET_MODE_INNER (mode); + bool use_vec_extr = false; + rtx tmp; + + switch (mode) + { + case V2SImode: + case V2SFmode: + if (!mmx_ok) + break; + /* FALLTHRU */ + + case V2DFmode: + case V2DImode: + use_vec_extr = true; + break; + + case V4SFmode: + use_vec_extr = TARGET_SSE4_1; + if (use_vec_extr) + break; + + switch (elt) + { + case 0: + tmp = vec; + break; + + case 1: + case 3: + tmp = gen_reg_rtx (mode); + emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec, + GEN_INT (elt), GEN_INT (elt), + GEN_INT (elt+4), GEN_INT (elt+4))); + break; + + case 2: + tmp = gen_reg_rtx (mode); + emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec)); + break; + + default: + gcc_unreachable (); + } + vec = tmp; + use_vec_extr = true; + elt = 0; + break; + + case V4SImode: + use_vec_extr = TARGET_SSE4_1; + if (use_vec_extr) + break; + + if (TARGET_SSE2) + { + switch (elt) + { + case 0: + tmp = vec; + break; + + case 1: + case 3: + tmp = gen_reg_rtx (mode); + emit_insn (gen_sse2_pshufd_1 (tmp, vec, + GEN_INT (elt), GEN_INT (elt), + GEN_INT (elt), GEN_INT (elt))); + break; + + case 2: + tmp = gen_reg_rtx (mode); + emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec)); + break; + + default: + gcc_unreachable (); + } + vec = tmp; + use_vec_extr = true; + elt = 0; + } + else + { + /* For SSE1, we have to reuse the V4SF code. */ + ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), + gen_lowpart (V4SFmode, vec), elt); + return; + } + break; + + case V8HImode: + use_vec_extr = TARGET_SSE2; + break; + case V4HImode: + use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); + break; + + case V16QImode: + use_vec_extr = TARGET_SSE4_1; + break; + + case V8SFmode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V4SFmode); + if (elt < 4) + emit_insn (gen_vec_extract_lo_v8sf (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v8sf (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 3); + return; + } + break; + + case V4DFmode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V2DFmode); + if (elt < 2) + emit_insn (gen_vec_extract_lo_v4df (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v4df (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 1); + return; + } + break; + + case V32QImode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V16QImode); + if (elt < 16) + emit_insn (gen_vec_extract_lo_v32qi (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v32qi (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 15); + return; + } + break; + + case V16HImode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V8HImode); + if (elt < 8) + emit_insn (gen_vec_extract_lo_v16hi (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v16hi (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 7); + return; + } + break; + + case V8SImode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V4SImode); + if (elt < 4) + emit_insn (gen_vec_extract_lo_v8si (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v8si (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 3); + return; + } + break; + + case V4DImode: + if (TARGET_AVX) + { + tmp = gen_reg_rtx (V2DImode); + if (elt < 2) + emit_insn (gen_vec_extract_lo_v4di (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v4di (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 1); + return; + } + break; + + case V32HImode: + if (TARGET_AVX512BW) + { + tmp = gen_reg_rtx (V16HImode); + if (elt < 16) + emit_insn (gen_vec_extract_lo_v32hi (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v32hi (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 15); + return; + } + break; + + case V64QImode: + if (TARGET_AVX512BW) + { + tmp = gen_reg_rtx (V32QImode); + if (elt < 32) + emit_insn (gen_vec_extract_lo_v64qi (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v64qi (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 31); + return; + } + break; + + case V16SFmode: + tmp = gen_reg_rtx (V8SFmode); + if (elt < 8) + emit_insn (gen_vec_extract_lo_v16sf (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v16sf (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 7); + return; + + case V8DFmode: + tmp = gen_reg_rtx (V4DFmode); + if (elt < 4) + emit_insn (gen_vec_extract_lo_v8df (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v8df (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 3); + return; + + case V16SImode: + tmp = gen_reg_rtx (V8SImode); + if (elt < 8) + emit_insn (gen_vec_extract_lo_v16si (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v16si (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 7); + return; + + case V8DImode: + tmp = gen_reg_rtx (V4DImode); + if (elt < 4) + emit_insn (gen_vec_extract_lo_v8di (tmp, vec)); + else + emit_insn (gen_vec_extract_hi_v8di (tmp, vec)); + ix86_expand_vector_extract (false, target, tmp, elt & 3); + return; + + case V8QImode: + /* ??? Could extract the appropriate HImode element and shift. */ + default: + break; + } + + if (use_vec_extr) + { + tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); + tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); + + /* Let the rtl optimizers know about the zero extension performed. */ + if (inner_mode == QImode || inner_mode == HImode) + { + tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); + target = gen_lowpart (SImode, target); + } + + emit_insn (gen_rtx_SET (target, tmp)); + } + else + { + rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); + + emit_move_insn (mem, vec); + + tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); + emit_move_insn (target, tmp); + } +} + +/* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC + to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode. + The upper bits of DEST are undefined, though they shouldn't cause + exceptions (some bits from src or all zeros are ok). */ + +static void +emit_reduc_half (rtx dest, rtx src, int i) +{ + rtx tem, d = dest; + switch (GET_MODE (src)) + { + case V4SFmode: + if (i == 128) + tem = gen_sse_movhlps (dest, src, src); + else + tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx, + GEN_INT (1 + 4), GEN_INT (1 + 4)); + break; + case V2DFmode: + tem = gen_vec_interleave_highv2df (dest, src, src); + break; + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + d = gen_reg_rtx (V1TImode); + tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src), + GEN_INT (i / 2)); + break; + case V8SFmode: + if (i == 256) + tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx); + else + tem = gen_avx_shufps256 (dest, src, src, + GEN_INT (i == 128 ? 2 + (3 << 2) : 1)); + break; + case V4DFmode: + if (i == 256) + tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx); + else + tem = gen_avx_shufpd256 (dest, src, src, const1_rtx); + break; + case V32QImode: + case V16HImode: + case V8SImode: + case V4DImode: + if (i == 256) + { + if (GET_MODE (dest) != V4DImode) + d = gen_reg_rtx (V4DImode); + tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src), + gen_lowpart (V4DImode, src), + const1_rtx); + } + else + { + d = gen_reg_rtx (V2TImode); + tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src), + GEN_INT (i / 2)); + } + break; + case V64QImode: + case V32HImode: + case V16SImode: + case V16SFmode: + case V8DImode: + case V8DFmode: + if (i > 128) + tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest), + gen_lowpart (V16SImode, src), + gen_lowpart (V16SImode, src), + GEN_INT (0x4 + (i == 512 ? 4 : 0)), + GEN_INT (0x5 + (i == 512 ? 4 : 0)), + GEN_INT (0x6 + (i == 512 ? 4 : 0)), + GEN_INT (0x7 + (i == 512 ? 4 : 0)), + GEN_INT (0xC), GEN_INT (0xD), + GEN_INT (0xE), GEN_INT (0xF), + GEN_INT (0x10), GEN_INT (0x11), + GEN_INT (0x12), GEN_INT (0x13), + GEN_INT (0x14), GEN_INT (0x15), + GEN_INT (0x16), GEN_INT (0x17)); + else + tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest), + gen_lowpart (V16SImode, src), + GEN_INT (i == 128 ? 0x2 : 0x1), + GEN_INT (0x3), + GEN_INT (0x3), + GEN_INT (0x3), + GEN_INT (i == 128 ? 0x6 : 0x5), + GEN_INT (0x7), + GEN_INT (0x7), + GEN_INT (0x7), + GEN_INT (i == 128 ? 0xA : 0x9), + GEN_INT (0xB), + GEN_INT (0xB), + GEN_INT (0xB), + GEN_INT (i == 128 ? 0xE : 0xD), + GEN_INT (0xF), + GEN_INT (0xF), + GEN_INT (0xF)); + break; + default: + gcc_unreachable (); + } + emit_insn (tem); + if (d != dest) + emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); +} + +/* Expand a vector reduction. FN is the binary pattern to reduce; + DEST is the destination; IN is the input vector. */ + +void +ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) +{ + rtx half, dst, vec = in; + machine_mode mode = GET_MODE (in); + int i; + + /* SSE4 has a special instruction for V8HImode UMIN reduction. */ + if (TARGET_SSE4_1 + && mode == V8HImode + && fn == gen_uminv8hi3) + { + emit_insn (gen_sse4_1_phminposuw (dest, in)); + return; + } + + for (i = GET_MODE_BITSIZE (mode); + i > GET_MODE_UNIT_BITSIZE (mode); + i >>= 1) + { + half = gen_reg_rtx (mode); + emit_reduc_half (half, vec, i); + if (i == GET_MODE_UNIT_BITSIZE (mode) * 2) + dst = dest; + else + dst = gen_reg_rtx (mode); + emit_insn (fn (dst, half, vec)); + vec = dst; + } +} + +/* Target hook for scalar_mode_supported_p. */ +static bool +ix86_scalar_mode_supported_p (machine_mode mode) +{ + if (DECIMAL_FLOAT_MODE_P (mode)) + return default_decimal_float_supported_p (); + else if (mode == TFmode) + return true; + else + return default_scalar_mode_supported_p (mode); +} + +/* Implements target hook vector_mode_supported_p. */ +static bool +ix86_vector_mode_supported_p (machine_mode mode) +{ + if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) + return true; + if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) + return true; + if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) + return true; + if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) + return true; + if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) + return true; + if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) + return true; + return false; +} + +/* Implement target hook libgcc_floating_mode_supported_p. */ +static bool +ix86_libgcc_floating_mode_supported_p (machine_mode mode) +{ + switch (mode) + { + case SFmode: + case DFmode: + case XFmode: + return true; + + case TFmode: +#ifdef IX86_NO_LIBGCC_TFMODE + return false; +#elif defined IX86_MAYBE_NO_LIBGCC_TFMODE + return TARGET_LONG_DOUBLE_128; +#else + return true; +#endif + + default: + return false; + } +} + +/* Target hook for c_mode_for_suffix. */ +static machine_mode +ix86_c_mode_for_suffix (char suffix) +{ + if (suffix == 'q') + return TFmode; + if (suffix == 'w') + return XFmode; + + return VOIDmode; +} + +/* Worker function for TARGET_MD_ASM_ADJUST. + + We implement asm flag outputs, and maintain source compatibility + with the old cc0-based compiler. */ + +static rtx_insn * +ix86_md_asm_adjust (vec &outputs, vec &/*inputs*/, + vec &constraints, + vec &clobbers, HARD_REG_SET &clobbered_regs) +{ + clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG)); + SET_HARD_REG_BIT (clobbered_regs, FPSR_REG); + + bool saw_asm_flag = false; + + start_sequence (); + for (unsigned i = 0, n = outputs.length (); i < n; ++i) + { + const char *con = constraints[i]; + if (strncmp (con, "=@cc", 4) != 0) + continue; + con += 4; + if (strchr (con, ',') != NULL) + { + error ("alternatives not allowed in asm flag output"); + continue; + } + + bool invert = false; + if (con[0] == 'n') + invert = true, con++; + + machine_mode mode = CCmode; + rtx_code code = UNKNOWN; + + switch (con[0]) + { + case 'a': + if (con[1] == 0) + mode = CCAmode, code = EQ; + else if (con[1] == 'e' && con[2] == 0) + mode = CCCmode, code = NE; + break; + case 'b': + if (con[1] == 0) + mode = CCCmode, code = EQ; + else if (con[1] == 'e' && con[2] == 0) + mode = CCAmode, code = NE; + break; + case 'c': + if (con[1] == 0) + mode = CCCmode, code = EQ; + break; + case 'e': + if (con[1] == 0) + mode = CCZmode, code = EQ; + break; + case 'g': + if (con[1] == 0) + mode = CCGCmode, code = GT; + else if (con[1] == 'e' && con[2] == 0) + mode = CCGCmode, code = GE; + break; + case 'l': + if (con[1] == 0) + mode = CCGCmode, code = LT; + else if (con[1] == 'e' && con[2] == 0) + mode = CCGCmode, code = LE; + break; + case 'o': + if (con[1] == 0) + mode = CCOmode, code = EQ; + break; + case 'p': + if (con[1] == 0) + mode = CCPmode, code = EQ; + break; + case 's': + if (con[1] == 0) + mode = CCSmode, code = EQ; + break; + case 'z': + if (con[1] == 0) + mode = CCZmode, code = EQ; + break; + } + if (code == UNKNOWN) + { + error ("unknown asm flag output %qs", constraints[i]); + continue; + } + if (invert) + code = reverse_condition (code); + + rtx dest = outputs[i]; + if (!saw_asm_flag) + { + /* This is the first asm flag output. Here we put the flags + register in as the real output and adjust the condition to + allow it. */ + constraints[i] = "=Bf"; + outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG); + saw_asm_flag = true; + } + else + { + /* We don't need the flags register as output twice. */ + constraints[i] = "=X"; + outputs[i] = gen_rtx_SCRATCH (SImode); + } + + rtx x = gen_rtx_REG (mode, FLAGS_REG); + x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx); + + machine_mode dest_mode = GET_MODE (dest); + if (!SCALAR_INT_MODE_P (dest_mode)) + { + error ("invalid type for asm flag output"); + continue; + } + + if (dest_mode == DImode && !TARGET_64BIT) + dest_mode = SImode; + + if (dest_mode != QImode) + { + rtx destqi = gen_reg_rtx (QImode); + emit_insn (gen_rtx_SET (destqi, x)); + + if (TARGET_ZERO_EXTEND_WITH_AND + && optimize_function_for_speed_p (cfun)) + { + x = force_reg (dest_mode, const0_rtx); + + emit_insn (gen_movstrictqi + (gen_lowpart (QImode, x), destqi)); + } + else + x = gen_rtx_ZERO_EXTEND (dest_mode, destqi); + } + + if (dest_mode != GET_MODE (dest)) + { + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_rtx_SET (tmp, x)); + emit_insn (gen_zero_extendsidi2 (dest, tmp)); + } + else + emit_insn (gen_rtx_SET (dest, x)); + } + rtx_insn *seq = get_insns (); + end_sequence (); + + if (saw_asm_flag) + return seq; + else + { + /* If we had no asm flag outputs, clobber the flags. */ + clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG)); + SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG); + return NULL; + } +} + +/* Implements target vector targetm.asm.encode_section_info. */ + +static void ATTRIBUTE_UNUSED +ix86_encode_section_info (tree decl, rtx rtl, int first) +{ + default_encode_section_info (decl, rtl, first); + + if (ix86_in_large_data_p (decl)) + SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; +} + +/* Worker function for REVERSE_CONDITION. */ + +enum rtx_code +ix86_reverse_condition (enum rtx_code code, machine_mode mode) +{ + return (mode != CCFPmode && mode != CCFPUmode + ? reverse_condition (code) + : reverse_condition_maybe_unordered (code)); +} + +/* Output code to perform an x87 FP register move, from OPERANDS[1] + to OPERANDS[0]. */ + +const char * +output_387_reg_move (rtx insn, rtx *operands) +{ + if (REG_P (operands[0])) + { + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + { + if (REGNO (operands[0]) == FIRST_STACK_REG) + return output_387_ffreep (operands, 0); + return "fstp\t%y0"; + } + if (STACK_TOP_P (operands[0])) + return "fld%Z1\t%y1"; + return "fst\t%y0"; + } + else if (MEM_P (operands[0])) + { + gcc_assert (REG_P (operands[1])); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%Z0\t%y0"; + else + { + /* There is no non-popping store to memory for XFmode. + So if we need one, follow the store with a load. */ + if (GET_MODE (operands[0]) == XFmode) + return "fstp%Z0\t%y0\n\tfld%Z0\t%y0"; + else + return "fst%Z0\t%y0"; + } + } + else + gcc_unreachable(); +} + +/* Output code to perform a conditional jump to LABEL, if C2 flag in + FP status register is set. */ + +void +ix86_emit_fp_unordered_jump (rtx label) +{ + rtx reg = gen_reg_rtx (HImode); + rtx temp; + + emit_insn (gen_x86_fnstsw_1 (reg)); + + if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) + { + emit_insn (gen_x86_sahf_1 (reg)); + + temp = gen_rtx_REG (CCmode, FLAGS_REG); + temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); + } + else + { + emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); + + temp = gen_rtx_REG (CCNOmode, FLAGS_REG); + temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); + } + + temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + temp = gen_rtx_SET (pc_rtx, temp); + + emit_jump_insn (temp); + predict_jump (REG_BR_PROB_BASE * 10 / 100); +} + +/* Output code to perform a log1p XFmode calculation. */ + +void ix86_emit_i387_log1p (rtx op0, rtx op1) +{ + rtx_code_label *label1 = gen_label_rtx (); + rtx_code_label *label2 = gen_label_rtx (); + + rtx tmp = gen_reg_rtx (XFmode); + rtx tmp2 = gen_reg_rtx (XFmode); + rtx test; + + emit_insn (gen_absxf2 (tmp, op1)); + test = gen_rtx_GE (VOIDmode, tmp, + const_double_from_real_value ( + REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), + XFmode)); + emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1)); + + emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ + emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2)); + emit_jump (label2); + + emit_label (label1); + emit_move_insn (tmp, CONST1_RTX (XFmode)); + emit_insn (gen_addxf3 (tmp, op1, tmp)); + emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ + emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2)); + + emit_label (label2); +} + +/* Emit code for round calculation. */ +void ix86_emit_i387_round (rtx op0, rtx op1) +{ + machine_mode inmode = GET_MODE (op1); + machine_mode outmode = GET_MODE (op0); + rtx e1, e2, res, tmp, tmp1, half; + rtx scratch = gen_reg_rtx (HImode); + rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); + rtx_code_label *jump_label = gen_label_rtx (); + rtx insn; + rtx (*gen_abs) (rtx, rtx); + rtx (*gen_neg) (rtx, rtx); + + switch (inmode) + { + case SFmode: + gen_abs = gen_abssf2; + break; + case DFmode: + gen_abs = gen_absdf2; + break; + case XFmode: + gen_abs = gen_absxf2; + break; + default: + gcc_unreachable (); + } + + switch (outmode) + { + case SFmode: + gen_neg = gen_negsf2; + break; + case DFmode: + gen_neg = gen_negdf2; + break; + case XFmode: + gen_neg = gen_negxf2; + break; + case HImode: + gen_neg = gen_neghi2; + break; + case SImode: + gen_neg = gen_negsi2; + break; + case DImode: + gen_neg = gen_negdi2; + break; + default: + gcc_unreachable (); + } + + e1 = gen_reg_rtx (inmode); + e2 = gen_reg_rtx (inmode); + res = gen_reg_rtx (outmode); + + half = const_double_from_real_value (dconsthalf, inmode); + + /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */ + + /* scratch = fxam(op1) */ + emit_insn (gen_rtx_SET (scratch, + gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1), + UNSPEC_FXAM))); + /* e1 = fabs(op1) */ + emit_insn (gen_abs (e1, op1)); + + /* e2 = e1 + 0.5 */ + half = force_reg (inmode, half); + emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half))); + + /* res = floor(e2) */ + if (inmode != XFmode) + { + tmp1 = gen_reg_rtx (XFmode); + + emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2))); + } + else + tmp1 = e2; + + switch (outmode) + { + case SFmode: + case DFmode: + { + rtx tmp0 = gen_reg_rtx (XFmode); + + emit_insn (gen_frndintxf2_floor (tmp0, tmp1)); + + emit_insn (gen_rtx_SET (res, + gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0), + UNSPEC_TRUNC_NOOP))); + } + break; + case XFmode: + emit_insn (gen_frndintxf2_floor (res, tmp1)); + break; + case HImode: + emit_insn (gen_lfloorxfhi2 (res, tmp1)); + break; + case SImode: + emit_insn (gen_lfloorxfsi2 (res, tmp1)); + break; + case DImode: + emit_insn (gen_lfloorxfdi2 (res, tmp1)); + break; + default: + gcc_unreachable (); + } + + /* flags = signbit(a) */ + emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02))); + + /* if (flags) then res = -res */ + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_EQ (VOIDmode, flags, const0_rtx), + gen_rtx_LABEL_REF (VOIDmode, jump_label), + pc_rtx); + insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + predict_jump (REG_BR_PROB_BASE * 50 / 100); + JUMP_LABEL (insn) = jump_label; + + emit_insn (gen_neg (res, res)); + + emit_label (jump_label); + LABEL_NUSES (jump_label) = 1; + + emit_move_insn (op0, res); +} + +/* Output code to perform a Newton-Rhapson approximation of a single precision + floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */ + +void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) +{ + rtx x0, x1, e0, e1; + + x0 = gen_reg_rtx (mode); + e0 = gen_reg_rtx (mode); + e1 = gen_reg_rtx (mode); + x1 = gen_reg_rtx (mode); + + /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ + + b = force_reg (mode, b); + + /* x0 = rcp(b) estimate */ + if (mode == V16SFmode || mode == V8DFmode) + emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), + UNSPEC_RCP14))); + else + emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), + UNSPEC_RCP))); + + /* e0 = x0 * b */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); + + /* e0 = x0 * e0 */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); + + /* e1 = x0 + x0 */ + emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); + + /* x1 = e1 - e0 */ + emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); + + /* res = a * x1 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); +} + +/* Output code to perform a Newton-Rhapson approximation of a + single precision floating point [reciprocal] square root. */ + +void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) +{ + rtx x0, e0, e1, e2, e3, mthree, mhalf; + REAL_VALUE_TYPE r; + int unspec; + + x0 = gen_reg_rtx (mode); + e0 = gen_reg_rtx (mode); + e1 = gen_reg_rtx (mode); + e2 = gen_reg_rtx (mode); + e3 = gen_reg_rtx (mode); + + real_from_integer (&r, VOIDmode, -3, SIGNED); + mthree = const_double_from_real_value (r, SFmode); + + real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL); + mhalf = const_double_from_real_value (r, SFmode); + unspec = UNSPEC_RSQRT; + + if (VECTOR_MODE_P (mode)) + { + mthree = ix86_build_const_vector (mode, true, mthree); + mhalf = ix86_build_const_vector (mode, true, mhalf); + /* There is no 512-bit rsqrt. There is however rsqrt14. */ + if (GET_MODE_SIZE (mode) == 64) + unspec = UNSPEC_RSQRT14; + } + + /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) + rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */ + + a = force_reg (mode, a); + + /* x0 = rsqrt(a) estimate */ + emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), + unspec))); + + /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ + if (!recip) + { + rtx zero = force_reg (mode, CONST0_RTX(mode)); + rtx mask; + + /* Handle masked compare. */ + if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) + { + mask = gen_reg_rtx (HImode); + /* Imm value 0x4 corresponds to not-equal comparison. */ + emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4))); + emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask)); + } + else + { + mask = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a))); + emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask))); + } + } + + /* e0 = x0 * a */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); + /* e1 = e0 * x0 */ + emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0))); + + /* e2 = e1 - 3. */ + mthree = force_reg (mode, mthree); + emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree))); + + mhalf = force_reg (mode, mhalf); + if (recip) + /* e3 = -.5 * x0 */ + emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf))); + else + /* e3 = -.5 * e0 */ + emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf))); + /* ret = e2 * e3 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3))); +} + +#ifdef TARGET_SOLARIS +/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ + +static void +i386_solaris_elf_named_section (const char *name, unsigned int flags, + tree decl) +{ + /* With Binutils 2.15, the "@unwind" marker must be specified on + every occurrence of the ".eh_frame" section, not just the first + one. */ + if (TARGET_64BIT + && strcmp (name, ".eh_frame") == 0) + { + fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, + flags & SECTION_WRITE ? "aw" : "a"); + return; + } + +#ifndef USE_GAS + if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) + { + solaris_elf_asm_comdat_section (name, flags, decl); + return; + } +#endif + + default_elf_asm_named_section (name, flags, decl); +} +#endif /* TARGET_SOLARIS */ + +/* Return the mangling of TYPE if it is an extended fundamental type. */ + +static const char * +ix86_mangle_type (const_tree type) +{ + type = TYPE_MAIN_VARIANT (type); + + if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE + && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) + return NULL; + + switch (TYPE_MODE (type)) + { + case TFmode: + /* __float128 is "g". */ + return "g"; + case XFmode: + /* "long double" or __float80 is "e". */ + return "e"; + default: + return NULL; + } +} + +/* For 32-bit code we can save PIC register setup by using + __stack_chk_fail_local hidden function instead of calling + __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC + register, so it is better to call __stack_chk_fail directly. */ + +static tree ATTRIBUTE_UNUSED +ix86_stack_protect_fail (void) +{ + return TARGET_64BIT + ? default_external_stack_protect_fail () + : default_hidden_stack_protect_fail (); +} + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. + + ??? All x86 object file formats are capable of representing this. + After all, the relocation needed is the same as for the call insn. + Whether or not a particular assembler allows us to enter such, I + guess we'll have to see. */ +int +asm_preferred_eh_data_format (int code, int global) +{ + if (flag_pic) + { + int type = DW_EH_PE_sdata8; + if (!TARGET_64BIT + || ix86_cmodel == CM_SMALL_PIC + || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) + type = DW_EH_PE_sdata4; + return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; + } + if (ix86_cmodel == CM_SMALL + || (ix86_cmodel == CM_MEDIUM && code)) + return DW_EH_PE_udata4; + return DW_EH_PE_absptr; +} + +/* Expand copysign from SIGN to the positive value ABS_VALUE + storing in RESULT. If MASK is non-null, it shall be a mask to mask out + the sign-bit. */ +static void +ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask) +{ + machine_mode mode = GET_MODE (sign); + rtx sgn = gen_reg_rtx (mode); + if (mask == NULL_RTX) + { + machine_mode vmode; + + if (mode == SFmode) + vmode = V4SFmode; + else if (mode == DFmode) + vmode = V2DFmode; + else + vmode = mode; + + mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false); + if (!VECTOR_MODE_P (mode)) + { + /* We need to generate a scalar mode mask in this case. */ + rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); + tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); + mask = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (mask, tmp)); + } + } + else + mask = gen_rtx_NOT (mode, mask); + emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign))); + emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn))); +} + +/* Expand fabs (OP0) and return a new rtx that holds the result. The + mask for masking out the sign-bit is stored in *SMASK, if that is + non-null. */ +static rtx +ix86_expand_sse_fabs (rtx op0, rtx *smask) +{ + machine_mode vmode, mode = GET_MODE (op0); + rtx xa, mask; + + xa = gen_reg_rtx (mode); + if (mode == SFmode) + vmode = V4SFmode; + else if (mode == DFmode) + vmode = V2DFmode; + else + vmode = mode; + mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true); + if (!VECTOR_MODE_P (mode)) + { + /* We need to generate a scalar mode mask in this case. */ + rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); + tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); + mask = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (mask, tmp)); + } + emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask))); + + if (smask) + *smask = mask; + + return xa; +} + +/* Expands a comparison of OP0 with OP1 using comparison code CODE, + swapping the operands if SWAP_OPERANDS is true. The expanded + code is a forward jump to a newly created label in case the + comparison is true. The generated label rtx is returned. */ +static rtx_code_label * +ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1, + bool swap_operands) +{ + machine_mode fpcmp_mode = ix86_fp_compare_mode (code); + rtx_code_label *label; + rtx tmp; + + if (swap_operands) + std::swap (op0, op1); + + label = gen_label_rtx (); + tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG); + emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1))); + tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + return label; +} + +/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1 + using comparison code CODE. Operands are swapped for the comparison if + SWAP_OPERANDS is true. Returns a rtx for the generated mask. */ +static rtx +ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, + bool swap_operands) +{ + rtx (*insn)(rtx, rtx, rtx, rtx); + machine_mode mode = GET_MODE (op0); + rtx mask = gen_reg_rtx (mode); + + if (swap_operands) + std::swap (op0, op1); + + insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse; + + emit_insn (insn (mask, op0, op1, + gen_rtx_fmt_ee (code, mode, op0, op1))); + return mask; +} + +/* Generate and return a rtx of mode MODE for 2**n where n is the number + of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */ +static rtx +ix86_gen_TWO52 (machine_mode mode) +{ + REAL_VALUE_TYPE TWO52r; + rtx TWO52; + + real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23); + TWO52 = const_double_from_real_value (TWO52r, mode); + TWO52 = force_reg (mode, TWO52); + + return TWO52; +} + +/* Expand SSE sequence for computing lround from OP1 storing + into OP0. */ +void +ix86_expand_lround (rtx op0, rtx op1) +{ + /* C code for the stuff we're doing below: + tmp = op1 + copysign (nextafter (0.5, 0.0), op1) + return (long)tmp; + */ + machine_mode mode = GET_MODE (op1); + const struct real_format *fmt; + REAL_VALUE_TYPE pred_half, half_minus_pred_half; + rtx adj; + + /* load nextafter (0.5, 0.0) */ + fmt = REAL_MODE_FORMAT (mode); + real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); + real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); + + /* adj = copysign (0.5, op1) */ + adj = force_reg (mode, const_double_from_real_value (pred_half, mode)); + ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX); + + /* adj = op1 + adj */ + adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT); + + /* op0 = (imode)adj */ + expand_fix (op0, adj, 0); +} + +/* Expand SSE2 sequence for computing lround from OPERAND1 storing + into OPERAND0. */ +void +ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor) +{ + /* C code for the stuff we're doing below (for do_floor): + xi = (long)op1; + xi -= (double)xi > op1 ? 1 : 0; + return xi; + */ + machine_mode fmode = GET_MODE (op1); + machine_mode imode = GET_MODE (op0); + rtx ireg, freg, tmp; + rtx_code_label *label; + + /* reg = (long)op1 */ + ireg = gen_reg_rtx (imode); + expand_fix (ireg, op1, 0); + + /* freg = (double)reg */ + freg = gen_reg_rtx (fmode); + expand_float (freg, ireg, 0); + + /* ireg = (freg > op1) ? ireg - 1 : ireg */ + label = ix86_expand_sse_compare_and_jump (UNLE, + freg, op1, !do_floor); + tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS, + ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT); + emit_move_insn (ireg, tmp); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (op0, ireg); +} + +/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the + result in OPERAND0. */ +void +ix86_expand_rint (rtx operand0, rtx operand1) +{ + /* C code for the stuff we're doing below: + xa = fabs (operand1); + if (!isless (xa, 2**52)) + return operand1; + xa = xa + 2**52 - 2**52; + return copysign (xa, operand1); + */ + machine_mode mode = GET_MODE (operand0); + rtx res, xa, TWO52, mask; + rtx_code_label *label; + + res = gen_reg_rtx (mode); + emit_move_insn (res, operand1); + + /* xa = abs (operand1) */ + xa = ix86_expand_sse_fabs (res, &mask); + + /* if (!isless (xa, TWO52)) goto label; */ + TWO52 = ix86_gen_TWO52 (mode); + label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); + + xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); + xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); + + ix86_sse_copysign_to_positive (res, xa, res, mask); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operand0, res); +} + +/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing + into OPERAND0. */ +void +ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) +{ + /* C code for the stuff we expand below. + double xa = fabs (x), x2; + if (!isless (xa, TWO52)) + return x; + xa = xa + TWO52 - TWO52; + x2 = copysign (xa, x); + Compensate. Floor: + if (x2 > x) + x2 -= 1; + Compensate. Ceil: + if (x2 < x) + x2 -= -1; + return x2; + */ + machine_mode mode = GET_MODE (operand0); + rtx xa, TWO52, tmp, one, res, mask; + rtx_code_label *label; + + TWO52 = ix86_gen_TWO52 (mode); + + /* Temporary for holding the result, initialized to the input + operand to ease control flow. */ + res = gen_reg_rtx (mode); + emit_move_insn (res, operand1); + + /* xa = abs (operand1) */ + xa = ix86_expand_sse_fabs (res, &mask); + + /* if (!isless (xa, TWO52)) goto label; */ + label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); + + /* xa = xa + TWO52 - TWO52; */ + xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); + xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); + + /* xa = copysign (xa, operand1) */ + ix86_sse_copysign_to_positive (xa, xa, res, mask); + + /* generate 1.0 or -1.0 */ + one = force_reg (mode, + const_double_from_real_value (do_floor + ? dconst1 : dconstm1, mode)); + + /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ + tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); + emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); + /* We always need to subtract here to preserve signed zero. */ + tmp = expand_simple_binop (mode, MINUS, + xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); + emit_move_insn (res, tmp); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operand0, res); +} + +/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing + into OPERAND0. */ +void +ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) +{ + /* C code for the stuff we expand below. + double xa = fabs (x), x2; + if (!isless (xa, TWO52)) + return x; + x2 = (double)(long)x; + Compensate. Floor: + if (x2 > x) + x2 -= 1; + Compensate. Ceil: + if (x2 < x) + x2 += 1; + if (HONOR_SIGNED_ZEROS (mode)) + return copysign (x2, x); + return x2; + */ + machine_mode mode = GET_MODE (operand0); + rtx xa, xi, TWO52, tmp, one, res, mask; + rtx_code_label *label; + + TWO52 = ix86_gen_TWO52 (mode); + + /* Temporary for holding the result, initialized to the input + operand to ease control flow. */ + res = gen_reg_rtx (mode); + emit_move_insn (res, operand1); + + /* xa = abs (operand1) */ + xa = ix86_expand_sse_fabs (res, &mask); + + /* if (!isless (xa, TWO52)) goto label; */ + label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); + + /* xa = (double)(long)x */ + xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); + expand_fix (xi, res, 0); + expand_float (xa, xi, 0); + + /* generate 1.0 */ + one = force_reg (mode, const_double_from_real_value (dconst1, mode)); + + /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ + tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); + emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); + tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, + xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); + emit_move_insn (res, tmp); + + if (HONOR_SIGNED_ZEROS (mode)) + ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operand0, res); +} + +/* Expand SSE sequence for computing round from OPERAND1 storing + into OPERAND0. Sequence that works without relying on DImode truncation + via cvttsd2siq that is only available on 64bit targets. */ +void +ix86_expand_rounddf_32 (rtx operand0, rtx operand1) +{ + /* C code for the stuff we expand below. + double xa = fabs (x), xa2, x2; + if (!isless (xa, TWO52)) + return x; + Using the absolute value and copying back sign makes + -0.0 -> -0.0 correct. + xa2 = xa + TWO52 - TWO52; + Compensate. + dxa = xa2 - xa; + if (dxa <= -0.5) + xa2 += 1; + else if (dxa > 0.5) + xa2 -= 1; + x2 = copysign (xa2, x); + return x2; + */ + machine_mode mode = GET_MODE (operand0); + rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask; + rtx_code_label *label; + + TWO52 = ix86_gen_TWO52 (mode); + + /* Temporary for holding the result, initialized to the input + operand to ease control flow. */ + res = gen_reg_rtx (mode); + emit_move_insn (res, operand1); + + /* xa = abs (operand1) */ + xa = ix86_expand_sse_fabs (res, &mask); + + /* if (!isless (xa, TWO52)) goto label; */ + label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); + + /* xa2 = xa + TWO52 - TWO52; */ + xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); + xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT); + + /* dxa = xa2 - xa; */ + dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT); + + /* generate 0.5, 1.0 and -0.5 */ + half = force_reg (mode, const_double_from_real_value (dconsthalf, mode)); + one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT); + mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX, + 0, OPTAB_DIRECT); + + /* Compensate. */ + tmp = gen_reg_rtx (mode); + /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */ + tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false); + emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); + xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); + /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */ + tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false); + emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); + xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); + + /* res = copysign (xa2, operand1) */ + ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operand0, res); +} + +/* Expand SSE sequence for computing trunc from OPERAND1 storing + into OPERAND0. */ +void +ix86_expand_trunc (rtx operand0, rtx operand1) +{ + /* C code for SSE variant we expand below. + double xa = fabs (x), x2; + if (!isless (xa, TWO52)) + return x; + x2 = (double)(long)x; + if (HONOR_SIGNED_ZEROS (mode)) + return copysign (x2, x); + return x2; + */ + machine_mode mode = GET_MODE (operand0); + rtx xa, xi, TWO52, res, mask; + rtx_code_label *label; + + TWO52 = ix86_gen_TWO52 (mode); + + /* Temporary for holding the result, initialized to the input + operand to ease control flow. */ + res = gen_reg_rtx (mode); + emit_move_insn (res, operand1); + + /* xa = abs (operand1) */ + xa = ix86_expand_sse_fabs (res, &mask); + + /* if (!isless (xa, TWO52)) goto label; */ + label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); + + /* x = (double)(long)x */ + xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); + expand_fix (xi, res, 0); + expand_float (res, xi, 0); + + if (HONOR_SIGNED_ZEROS (mode)) + ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operand0, res); +} + +/* Expand SSE sequence for computing trunc from OPERAND1 storing + into OPERAND0. */ +void +ix86_expand_truncdf_32 (rtx operand0, rtx operand1) +{ + machine_mode mode = GET_MODE (operand0); + rtx xa, mask, TWO52, one, res, smask, tmp; + rtx_code_label *label; + + /* C code for SSE variant we expand below. + double xa = fabs (x), x2; + if (!isless (xa, TWO52)) + return x; + xa2 = xa + TWO52 - TWO52; + Compensate: + if (xa2 > xa) + xa2 -= 1.0; + x2 = copysign (xa2, x); + return x2; + */ + + TWO52 = ix86_gen_TWO52 (mode); + + /* Temporary for holding the result, initialized to the input + operand to ease control flow. */ + res = gen_reg_rtx (mode); + emit_move_insn (res, operand1); + + /* xa = abs (operand1) */ + xa = ix86_expand_sse_fabs (res, &smask); + + /* if (!isless (xa, TWO52)) goto label; */ + label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); + + /* res = xa + TWO52 - TWO52; */ + tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); + tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT); + emit_move_insn (res, tmp); + + /* generate 1.0 */ + one = force_reg (mode, const_double_from_real_value (dconst1, mode)); + + /* Compensate: res = xa2 - (res > xa ? 1 : 0) */ + mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false); + emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one))); + tmp = expand_simple_binop (mode, MINUS, + res, mask, NULL_RTX, 0, OPTAB_DIRECT); + emit_move_insn (res, tmp); + + /* res = copysign (res, operand1) */ + ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operand0, res); +} + +/* Expand SSE sequence for computing round from OPERAND1 storing + into OPERAND0. */ +void +ix86_expand_round (rtx operand0, rtx operand1) +{ + /* C code for the stuff we're doing below: + double xa = fabs (x); + if (!isless (xa, TWO52)) + return x; + xa = (double)(long)(xa + nextafter (0.5, 0.0)); + return copysign (xa, x); + */ + machine_mode mode = GET_MODE (operand0); + rtx res, TWO52, xa, xi, half, mask; + rtx_code_label *label; + const struct real_format *fmt; + REAL_VALUE_TYPE pred_half, half_minus_pred_half; + + /* Temporary for holding the result, initialized to the input + operand to ease control flow. */ + res = gen_reg_rtx (mode); + emit_move_insn (res, operand1); + + TWO52 = ix86_gen_TWO52 (mode); + xa = ix86_expand_sse_fabs (res, &mask); + label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); + + /* load nextafter (0.5, 0.0) */ + fmt = REAL_MODE_FORMAT (mode); + real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); + real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); + + /* xa = xa + 0.5 */ + half = force_reg (mode, const_double_from_real_value (pred_half, mode)); + xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT); + + /* xa = (double)(int64_t)xa */ + xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); + expand_fix (xi, xa, 0); + expand_float (xa, xi, 0); + + /* res = copysign (xa, operand1) */ + ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operand0, res); +} + +/* Expand SSE sequence for computing round + from OP1 storing into OP0 using sse4 round insn. */ +void +ix86_expand_round_sse4 (rtx op0, rtx op1) +{ + machine_mode mode = GET_MODE (op0); + rtx e1, e2, res, half; + const struct real_format *fmt; + REAL_VALUE_TYPE pred_half, half_minus_pred_half; + rtx (*gen_copysign) (rtx, rtx, rtx); + rtx (*gen_round) (rtx, rtx, rtx); + + switch (mode) + { + case SFmode: + gen_copysign = gen_copysignsf3; + gen_round = gen_sse4_1_roundsf2; + break; + case DFmode: + gen_copysign = gen_copysigndf3; + gen_round = gen_sse4_1_rounddf2; + break; + default: + gcc_unreachable (); + } + + /* round (a) = trunc (a + copysign (0.5, a)) */ + + /* load nextafter (0.5, 0.0) */ + fmt = REAL_MODE_FORMAT (mode); + real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); + real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); + half = const_double_from_real_value (pred_half, mode); + + /* e1 = copysign (0.5, op1) */ + e1 = gen_reg_rtx (mode); + emit_insn (gen_copysign (e1, half, op1)); + + /* e2 = op1 + e1 */ + e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT); + + /* res = trunc (e2) */ + res = gen_reg_rtx (mode); + emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC))); + + emit_move_insn (op0, res); +} + + +/* Table of valid machine attributes. */ +static const struct attribute_spec ix86_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity } */ + /* Stdcall attribute says callee is responsible for popping arguments + if they are not variable. */ + { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute, + true }, + /* Fastcall attribute says callee is responsible for popping arguments + if they are not variable. */ + { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute, + true }, + /* Thiscall attribute says callee is responsible for popping arguments + if they are not variable. */ + { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute, + true }, + /* Cdecl attribute says the callee is a normal C declaration */ + { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute, + true }, + /* Regparm attribute specifies how many integer arguments are to be + passed in registers. */ + { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute, + true }, + /* Sseregparm attribute says we are using x86_64 calling conventions + for FP arguments. */ + { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute, + true }, + /* The transactional memory builtins are implicitly regparm or fastcall + depending on the ABI. Override the generic do-nothing attribute that + these builtins were declared with. */ + { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute, + true }, + /* force_align_arg_pointer says this function realigns the stack at entry. */ + { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, + false, true, true, ix86_handle_force_align_arg_pointer_attribute, false }, +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES + { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false }, + { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false }, + { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute, + false }, +#endif + { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute, + false }, + { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute, + false }, +#ifdef SUBTARGET_ATTRIBUTE_TABLE + SUBTARGET_ATTRIBUTE_TABLE, +#endif + /* ms_abi and sysv_abi calling convention function attributes. */ + { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true }, + { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true }, + { "ms_abi va_list", 0, 0, false, false, false, NULL, false }, + { "sysv_abi va_list", 0, 0, false, false, false, NULL, false }, + { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute, + false }, + { "callee_pop_aggregate_return", 1, 1, false, true, true, + ix86_handle_callee_pop_aggregate_return, true }, + /* End element. */ + { NULL, 0, 0, false, false, false, NULL, false } +}; + +/* Implement targetm.vectorize.builtin_vectorization_cost. */ +static int +ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, int) +{ + unsigned elements; + + switch (type_of_cost) + { + case scalar_stmt: + return ix86_cost->scalar_stmt_cost; + + case scalar_load: + return ix86_cost->scalar_load_cost; + + case scalar_store: + return ix86_cost->scalar_store_cost; + + case vector_stmt: + return ix86_cost->vec_stmt_cost; + + case vector_load: + return ix86_cost->vec_align_load_cost; + + case vector_store: + return ix86_cost->vec_store_cost; + + case vec_to_scalar: + return ix86_cost->vec_to_scalar_cost; + + case scalar_to_vec: + return ix86_cost->scalar_to_vec_cost; + + case unaligned_load: + case unaligned_store: + return ix86_cost->vec_unalign_load_cost; + + case cond_branch_taken: + return ix86_cost->cond_taken_branch_cost; + + case cond_branch_not_taken: + return ix86_cost->cond_not_taken_branch_cost; + + case vec_perm: + case vec_promote_demote: + return ix86_cost->vec_stmt_cost; + + case vec_construct: + elements = TYPE_VECTOR_SUBPARTS (vectype); + return ix86_cost->vec_stmt_cost * (elements / 2 + 1); + + default: + gcc_unreachable (); + } +} + +/* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel []))) + insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh + insn every time. */ + +static GTY(()) rtx_insn *vselect_insn; + +/* Initialize vselect_insn. */ + +static void +init_vselect_insn (void) +{ + unsigned i; + rtx x; + + x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN)); + for (i = 0; i < MAX_VECT_LEN; ++i) + XVECEXP (x, 0, i) = const0_rtx; + x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx, + const0_rtx), x); + x = gen_rtx_SET (const0_rtx, x); + start_sequence (); + vselect_insn = emit_insn (x); + end_sequence (); +} + +/* Construct (set target (vec_select op0 (parallel perm))) and + return true if that's a valid instruction in the active ISA. */ + +static bool +expand_vselect (rtx target, rtx op0, const unsigned char *perm, + unsigned nelt, bool testing_p) +{ + unsigned int i; + rtx x, save_vconcat; + int icode; + + if (vselect_insn == NULL_RTX) + init_vselect_insn (); + + x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1); + PUT_NUM_ELEM (XVEC (x, 0), nelt); + for (i = 0; i < nelt; ++i) + XVECEXP (x, 0, i) = GEN_INT (perm[i]); + save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0); + XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0; + PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target)); + SET_DEST (PATTERN (vselect_insn)) = target; + icode = recog_memoized (vselect_insn); + + if (icode >= 0 && !testing_p) + emit_insn (copy_rtx (PATTERN (vselect_insn))); + + SET_DEST (PATTERN (vselect_insn)) = const0_rtx; + XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat; + INSN_CODE (vselect_insn) = -1; + + return icode >= 0; +} + +/* Similar, but generate a vec_concat from op0 and op1 as well. */ + +static bool +expand_vselect_vconcat (rtx target, rtx op0, rtx op1, + const unsigned char *perm, unsigned nelt, + bool testing_p) +{ + machine_mode v2mode; + rtx x; + bool ok; + + if (vselect_insn == NULL_RTX) + init_vselect_insn (); + + v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0)); + x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0); + PUT_MODE (x, v2mode); + XEXP (x, 0) = op0; + XEXP (x, 1) = op1; + ok = expand_vselect (target, x, perm, nelt, testing_p); + XEXP (x, 0) = const0_rtx; + XEXP (x, 1) = const0_rtx; + return ok; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D + in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */ + +static bool +expand_vec_perm_blend (struct expand_vec_perm_d *d) +{ + machine_mode mmode, vmode = d->vmode; + unsigned i, mask, nelt = d->nelt; + rtx target, op0, op1, maskop, x; + rtx rperm[32], vperm; + + if (d->one_operand_p) + return false; + if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 + && (TARGET_AVX512BW + || GET_MODE_UNIT_SIZE (vmode) >= 4)) + ; + else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) + ; + else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) + ; + else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) + ; + else + return false; + + /* This is a blend, not a permute. Elements must stay in their + respective lanes. */ + for (i = 0; i < nelt; ++i) + { + unsigned e = d->perm[i]; + if (!(e == i || e == i + nelt)) + return false; + } + + if (d->testing_p) + return true; + + /* ??? Without SSE4.1, we could implement this with and/andn/or. This + decision should be extracted elsewhere, so that we only try that + sequence once all budget==3 options have been tried. */ + target = d->target; + op0 = d->op0; + op1 = d->op1; + mask = 0; + + switch (vmode) + { + case V8DFmode: + case V16SFmode: + case V4DFmode: + case V8SFmode: + case V2DFmode: + case V4SFmode: + case V8HImode: + case V8SImode: + case V32HImode: + case V64QImode: + case V16SImode: + case V8DImode: + for (i = 0; i < nelt; ++i) + mask |= (d->perm[i] >= nelt) << i; + break; + + case V2DImode: + for (i = 0; i < 2; ++i) + mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4); + vmode = V8HImode; + goto do_subreg; + + case V4SImode: + for (i = 0; i < 4; ++i) + mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); + vmode = V8HImode; + goto do_subreg; + + case V16QImode: + /* See if bytes move in pairs so we can use pblendw with + an immediate argument, rather than pblendvb with a vector + argument. */ + for (i = 0; i < 16; i += 2) + if (d->perm[i] + 1 != d->perm[i + 1]) + { + use_pblendvb: + for (i = 0; i < nelt; ++i) + rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx); + + finish_pblendvb: + vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); + vperm = force_reg (vmode, vperm); + + if (GET_MODE_SIZE (vmode) == 16) + emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm)); + else + emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm)); + if (target != d->target) + emit_move_insn (d->target, gen_lowpart (d->vmode, target)); + return true; + } + + for (i = 0; i < 8; ++i) + mask |= (d->perm[i * 2] >= 16) << i; + vmode = V8HImode; + /* FALLTHRU */ + + do_subreg: + target = gen_reg_rtx (vmode); + op0 = gen_lowpart (vmode, op0); + op1 = gen_lowpart (vmode, op1); + break; + + case V32QImode: + /* See if bytes move in pairs. If not, vpblendvb must be used. */ + for (i = 0; i < 32; i += 2) + if (d->perm[i] + 1 != d->perm[i + 1]) + goto use_pblendvb; + /* See if bytes move in quadruplets. If yes, vpblendd + with immediate can be used. */ + for (i = 0; i < 32; i += 4) + if (d->perm[i] + 2 != d->perm[i + 2]) + break; + if (i < 32) + { + /* See if bytes move the same in both lanes. If yes, + vpblendw with immediate can be used. */ + for (i = 0; i < 16; i += 2) + if (d->perm[i] + 16 != d->perm[i + 16]) + goto use_pblendvb; + + /* Use vpblendw. */ + for (i = 0; i < 16; ++i) + mask |= (d->perm[i * 2] >= 32) << i; + vmode = V16HImode; + goto do_subreg; + } + + /* Use vpblendd. */ + for (i = 0; i < 8; ++i) + mask |= (d->perm[i * 4] >= 32) << i; + vmode = V8SImode; + goto do_subreg; + + case V16HImode: + /* See if words move in pairs. If yes, vpblendd can be used. */ + for (i = 0; i < 16; i += 2) + if (d->perm[i] + 1 != d->perm[i + 1]) + break; + if (i < 16) + { + /* See if words move the same in both lanes. If not, + vpblendvb must be used. */ + for (i = 0; i < 8; i++) + if (d->perm[i] + 8 != d->perm[i + 8]) + { + /* Use vpblendvb. */ + for (i = 0; i < 32; ++i) + rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx); + + vmode = V32QImode; + nelt = 32; + target = gen_reg_rtx (vmode); + op0 = gen_lowpart (vmode, op0); + op1 = gen_lowpart (vmode, op1); + goto finish_pblendvb; + } + + /* Use vpblendw. */ + for (i = 0; i < 16; ++i) + mask |= (d->perm[i] >= 16) << i; + break; + } + + /* Use vpblendd. */ + for (i = 0; i < 8; ++i) + mask |= (d->perm[i * 2] >= 16) << i; + vmode = V8SImode; + goto do_subreg; + + case V4DImode: + /* Use vpblendd. */ + for (i = 0; i < 4; ++i) + mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); + vmode = V8SImode; + goto do_subreg; + + default: + gcc_unreachable (); + } + + switch (vmode) + { + case V8DFmode: + case V8DImode: + mmode = QImode; + break; + case V16SFmode: + case V16SImode: + mmode = HImode; + break; + case V32HImode: + mmode = SImode; + break; + case V64QImode: + mmode = DImode; + break; + default: + mmode = VOIDmode; + } + + if (mmode != VOIDmode) + maskop = force_reg (mmode, gen_int_mode (mask, mmode)); + else + maskop = GEN_INT (mask); + + /* This matches five different patterns with the different modes. */ + x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop); + x = gen_rtx_SET (target, x); + emit_insn (x); + if (target != d->target) + emit_move_insn (d->target, gen_lowpart (d->vmode, target)); + + return true; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D + in terms of the variable form of vpermilps. + + Note that we will have already failed the immediate input vpermilps, + which requires that the high and low part shuffle be identical; the + variable form doesn't require that. */ + +static bool +expand_vec_perm_vpermil (struct expand_vec_perm_d *d) +{ + rtx rperm[8], vperm; + unsigned i; + + if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p) + return false; + + /* We can only permute within the 128-bit lane. */ + for (i = 0; i < 8; ++i) + { + unsigned e = d->perm[i]; + if (i < 4 ? e >= 4 : e < 4) + return false; + } + + if (d->testing_p) + return true; + + for (i = 0; i < 8; ++i) + { + unsigned e = d->perm[i]; + + /* Within each 128-bit lane, the elements of op0 are numbered + from 0 and the elements of op1 are numbered from 4. */ + if (e >= 8 + 4) + e -= 8; + else if (e >= 4) + e -= 4; + + rperm[i] = GEN_INT (e); + } + + vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm)); + vperm = force_reg (V8SImode, vperm); + emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm)); + + return true; +} + +/* Return true if permutation D can be performed as VMODE permutation + instead. */ + +static bool +valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d) +{ + unsigned int i, j, chunk; + + if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT + || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT + || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode)) + return false; + + if (GET_MODE_NUNITS (vmode) >= d->nelt) + return true; + + chunk = d->nelt / GET_MODE_NUNITS (vmode); + for (i = 0; i < d->nelt; i += chunk) + if (d->perm[i] & (chunk - 1)) + return false; + else + for (j = 1; j < chunk; ++j) + if (d->perm[i] + j != d->perm[i + j]) + return false; + + return true; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D + in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */ + +static bool +expand_vec_perm_pshufb (struct expand_vec_perm_d *d) +{ + unsigned i, nelt, eltsz, mask; + unsigned char perm[64]; + machine_mode vmode = V16QImode; + rtx rperm[64], vperm, target, op0, op1; + + nelt = d->nelt; + + if (!d->one_operand_p) + { + if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16) + { + if (TARGET_AVX2 + && valid_perm_using_mode_p (V2TImode, d)) + { + if (d->testing_p) + return true; + + /* Use vperm2i128 insn. The pattern uses + V4DImode instead of V2TImode. */ + target = d->target; + if (d->vmode != V4DImode) + target = gen_reg_rtx (V4DImode); + op0 = gen_lowpart (V4DImode, d->op0); + op1 = gen_lowpart (V4DImode, d->op1); + rperm[0] + = GEN_INT ((d->perm[0] / (nelt / 2)) + | ((d->perm[nelt / 2] / (nelt / 2)) * 16)); + emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0])); + if (target != d->target) + emit_move_insn (d->target, gen_lowpart (d->vmode, target)); + return true; + } + return false; + } + } + else + { + if (GET_MODE_SIZE (d->vmode) == 16) + { + if (!TARGET_SSSE3) + return false; + } + else if (GET_MODE_SIZE (d->vmode) == 32) + { + if (!TARGET_AVX2) + return false; + + /* V4DImode should be already handled through + expand_vselect by vpermq instruction. */ + gcc_assert (d->vmode != V4DImode); + + vmode = V32QImode; + if (d->vmode == V8SImode + || d->vmode == V16HImode + || d->vmode == V32QImode) + { + /* First see if vpermq can be used for + V8SImode/V16HImode/V32QImode. */ + if (valid_perm_using_mode_p (V4DImode, d)) + { + for (i = 0; i < 4; i++) + perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3; + if (d->testing_p) + return true; + target = gen_reg_rtx (V4DImode); + if (expand_vselect (target, gen_lowpart (V4DImode, d->op0), + perm, 4, false)) + { + emit_move_insn (d->target, + gen_lowpart (d->vmode, target)); + return true; + } + return false; + } + + /* Next see if vpermd can be used. */ + if (valid_perm_using_mode_p (V8SImode, d)) + vmode = V8SImode; + } + /* Or if vpermps can be used. */ + else if (d->vmode == V8SFmode) + vmode = V8SImode; + + if (vmode == V32QImode) + { + /* vpshufb only works intra lanes, it is not + possible to shuffle bytes in between the lanes. */ + for (i = 0; i < nelt; ++i) + if ((d->perm[i] ^ i) & (nelt / 2)) + return false; + } + } + else if (GET_MODE_SIZE (d->vmode) == 64) + { + if (!TARGET_AVX512BW) + return false; + + /* If vpermq didn't work, vpshufb won't work either. */ + if (d->vmode == V8DFmode || d->vmode == V8DImode) + return false; + + vmode = V64QImode; + if (d->vmode == V16SImode + || d->vmode == V32HImode + || d->vmode == V64QImode) + { + /* First see if vpermq can be used for + V16SImode/V32HImode/V64QImode. */ + if (valid_perm_using_mode_p (V8DImode, d)) + { + for (i = 0; i < 8; i++) + perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7; + if (d->testing_p) + return true; + target = gen_reg_rtx (V8DImode); + if (expand_vselect (target, gen_lowpart (V8DImode, d->op0), + perm, 8, false)) + { + emit_move_insn (d->target, + gen_lowpart (d->vmode, target)); + return true; + } + return false; + } + + /* Next see if vpermd can be used. */ + if (valid_perm_using_mode_p (V16SImode, d)) + vmode = V16SImode; + } + /* Or if vpermps can be used. */ + else if (d->vmode == V16SFmode) + vmode = V16SImode; + if (vmode == V64QImode) + { + /* vpshufb only works intra lanes, it is not + possible to shuffle bytes in between the lanes. */ + for (i = 0; i < nelt; ++i) + if ((d->perm[i] ^ i) & (nelt / 4)) + return false; + } + } + else + return false; + } + + if (d->testing_p) + return true; + + if (vmode == V8SImode) + for (i = 0; i < 8; ++i) + rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7); + else if (vmode == V16SImode) + for (i = 0; i < 16; ++i) + rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15); + else + { + eltsz = GET_MODE_UNIT_SIZE (d->vmode); + if (!d->one_operand_p) + mask = 2 * nelt - 1; + else if (vmode == V16QImode) + mask = nelt - 1; + else if (vmode == V64QImode) + mask = nelt / 4 - 1; + else + mask = nelt / 2 - 1; + + for (i = 0; i < nelt; ++i) + { + unsigned j, e = d->perm[i] & mask; + for (j = 0; j < eltsz; ++j) + rperm[i * eltsz + j] = GEN_INT (e * eltsz + j); + } + } + + vperm = gen_rtx_CONST_VECTOR (vmode, + gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm)); + vperm = force_reg (vmode, vperm); + + target = d->target; + if (d->vmode != vmode) + target = gen_reg_rtx (vmode); + op0 = gen_lowpart (vmode, d->op0); + if (d->one_operand_p) + { + if (vmode == V16QImode) + emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm)); + else if (vmode == V32QImode) + emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm)); + else if (vmode == V64QImode) + emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm)); + else if (vmode == V8SFmode) + emit_insn (gen_avx2_permvarv8sf (target, op0, vperm)); + else if (vmode == V8SImode) + emit_insn (gen_avx2_permvarv8si (target, op0, vperm)); + else if (vmode == V16SFmode) + emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm)); + else if (vmode == V16SImode) + emit_insn (gen_avx512f_permvarv16si (target, op0, vperm)); + else + gcc_unreachable (); + } + else + { + op1 = gen_lowpart (vmode, d->op1); + emit_insn (gen_xop_pperm (target, op0, op1, vperm)); + } + if (target != d->target) + emit_move_insn (d->target, gen_lowpart (d->vmode, target)); + + return true; +} + +/* For V*[QHS]Imode permutations, check if the same permutation + can't be performed in a 2x, 4x or 8x wider inner mode. */ + +static bool +canonicalize_vector_int_perm (const struct expand_vec_perm_d *d, + struct expand_vec_perm_d *nd) +{ + int i; + enum machine_mode mode = VOIDmode; + + switch (d->vmode) + { + case V16QImode: mode = V8HImode; break; + case V32QImode: mode = V16HImode; break; + case V64QImode: mode = V32HImode; break; + case V8HImode: mode = V4SImode; break; + case V16HImode: mode = V8SImode; break; + case V32HImode: mode = V16SImode; break; + case V4SImode: mode = V2DImode; break; + case V8SImode: mode = V4DImode; break; + case V16SImode: mode = V8DImode; break; + default: return false; + } + for (i = 0; i < d->nelt; i += 2) + if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1) + return false; + nd->vmode = mode; + nd->nelt = d->nelt / 2; + for (i = 0; i < nd->nelt; i++) + nd->perm[i] = d->perm[2 * i] / 2; + if (GET_MODE_INNER (mode) != DImode) + canonicalize_vector_int_perm (nd, nd); + if (nd != d) + { + nd->one_operand_p = d->one_operand_p; + nd->testing_p = d->testing_p; + if (d->op0 == d->op1) + nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0); + else + { + nd->op0 = gen_lowpart (nd->vmode, d->op0); + nd->op1 = gen_lowpart (nd->vmode, d->op1); + } + if (d->testing_p) + nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1); + else + nd->target = gen_reg_rtx (nd->vmode); + } + return true; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D + in a single instruction. */ + +static bool +expand_vec_perm_1 (struct expand_vec_perm_d *d) +{ + unsigned i, nelt = d->nelt; + struct expand_vec_perm_d nd; + + /* Check plain VEC_SELECT first, because AVX has instructions that could + match both SEL and SEL+CONCAT, but the plain SEL will allow a memory + input where SEL+CONCAT may not. */ + if (d->one_operand_p) + { + int mask = nelt - 1; + bool identity_perm = true; + bool broadcast_perm = true; + + for (i = 0; i < nelt; i++) + { + nd.perm[i] = d->perm[i] & mask; + if (nd.perm[i] != i) + identity_perm = false; + if (nd.perm[i]) + broadcast_perm = false; + } + + if (identity_perm) + { + if (!d->testing_p) + emit_move_insn (d->target, d->op0); + return true; + } + else if (broadcast_perm && TARGET_AVX2) + { + /* Use vpbroadcast{b,w,d}. */ + rtx (*gen) (rtx, rtx) = NULL; + switch (d->vmode) + { + case V64QImode: + if (TARGET_AVX512BW) + gen = gen_avx512bw_vec_dupv64qi_1; + break; + case V32QImode: + gen = gen_avx2_pbroadcastv32qi_1; + break; + case V32HImode: + if (TARGET_AVX512BW) + gen = gen_avx512bw_vec_dupv32hi_1; + break; + case V16HImode: + gen = gen_avx2_pbroadcastv16hi_1; + break; + case V16SImode: + if (TARGET_AVX512F) + gen = gen_avx512f_vec_dupv16si_1; + break; + case V8SImode: + gen = gen_avx2_pbroadcastv8si_1; + break; + case V16QImode: + gen = gen_avx2_pbroadcastv16qi; + break; + case V8HImode: + gen = gen_avx2_pbroadcastv8hi; + break; + case V16SFmode: + if (TARGET_AVX512F) + gen = gen_avx512f_vec_dupv16sf_1; + break; + case V8SFmode: + gen = gen_avx2_vec_dupv8sf_1; + break; + case V8DFmode: + if (TARGET_AVX512F) + gen = gen_avx512f_vec_dupv8df_1; + break; + case V8DImode: + if (TARGET_AVX512F) + gen = gen_avx512f_vec_dupv8di_1; + break; + /* For other modes prefer other shuffles this function creates. */ + default: break; + } + if (gen != NULL) + { + if (!d->testing_p) + emit_insn (gen (d->target, d->op0)); + return true; + } + } + + if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p)) + return true; + + /* There are plenty of patterns in sse.md that are written for + SEL+CONCAT and are not replicated for a single op. Perhaps + that should be changed, to avoid the nastiness here. */ + + /* Recognize interleave style patterns, which means incrementing + every other permutation operand. */ + for (i = 0; i < nelt; i += 2) + { + nd.perm[i] = d->perm[i] & mask; + nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt; + } + if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, + d->testing_p)) + return true; + + /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */ + if (nelt >= 4) + { + for (i = 0; i < nelt; i += 4) + { + nd.perm[i + 0] = d->perm[i + 0] & mask; + nd.perm[i + 1] = d->perm[i + 1] & mask; + nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt; + nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt; + } + + if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, + d->testing_p)) + return true; + } + } + + /* Finally, try the fully general two operand permute. */ + if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt, + d->testing_p)) + return true; + + /* Recognize interleave style patterns with reversed operands. */ + if (!d->one_operand_p) + { + for (i = 0; i < nelt; ++i) + { + unsigned e = d->perm[i]; + if (e >= nelt) + e -= nelt; + else + e += nelt; + nd.perm[i] = e; + } + + if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt, + d->testing_p)) + return true; + } + + /* Try the SSE4.1 blend variable merge instructions. */ + if (expand_vec_perm_blend (d)) + return true; + + /* Try one of the AVX vpermil variable permutations. */ + if (expand_vec_perm_vpermil (d)) + return true; + + /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128, + vpshufb, vpermd, vpermps or vpermq variable permutation. */ + if (expand_vec_perm_pshufb (d)) + return true; + + /* Try the AVX2 vpalignr instruction. */ + if (expand_vec_perm_palignr (d, true)) + return true; + + /* Try the AVX512F vpermi2 instructions. */ + if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) + return true; + + /* See if we can get the same permutation in different vector integer + mode. */ + if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) + { + if (!d->testing_p) + emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); + return true; + } + return false; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D + in terms of a pair of pshuflw + pshufhw instructions. */ + +static bool +expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d) +{ + unsigned char perm2[MAX_VECT_LEN]; + unsigned i; + bool ok; + + if (d->vmode != V8HImode || !d->one_operand_p) + return false; + + /* The two permutations only operate in 64-bit lanes. */ + for (i = 0; i < 4; ++i) + if (d->perm[i] >= 4) + return false; + for (i = 4; i < 8; ++i) + if (d->perm[i] < 4) + return false; + + if (d->testing_p) + return true; + + /* Emit the pshuflw. */ + memcpy (perm2, d->perm, 4); + for (i = 4; i < 8; ++i) + perm2[i] = i; + ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p); + gcc_assert (ok); + + /* Emit the pshufhw. */ + memcpy (perm2 + 4, d->perm + 4, 4); + for (i = 0; i < 4; ++i) + perm2[i] = i; + ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p); + gcc_assert (ok); + + return true; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify + the permutation using the SSSE3 palignr instruction. This succeeds + when all of the elements in PERM fit within one vector and we merely + need to shift them down so that a single vector permutation has a + chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only + the vpalignr instruction itself can perform the requested permutation. */ + +static bool +expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p) +{ + unsigned i, nelt = d->nelt; + unsigned min, max, minswap, maxswap; + bool in_order, ok, swap = false; + rtx shift, target; + struct expand_vec_perm_d dcopy; + + /* Even with AVX, palignr only operates on 128-bit vectors, + in AVX2 palignr operates on both 128-bit lanes. */ + if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) + && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32)) + return false; + + min = 2 * nelt; + max = 0; + minswap = 2 * nelt; + maxswap = 0; + for (i = 0; i < nelt; ++i) + { + unsigned e = d->perm[i]; + unsigned eswap = d->perm[i] ^ nelt; + if (GET_MODE_SIZE (d->vmode) == 32) + { + e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1); + eswap = e ^ (nelt / 2); + } + if (e < min) + min = e; + if (e > max) + max = e; + if (eswap < minswap) + minswap = eswap; + if (eswap > maxswap) + maxswap = eswap; + } + if (min == 0 + || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt)) + { + if (d->one_operand_p + || minswap == 0 + || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32 + ? nelt / 2 : nelt)) + return false; + swap = true; + min = minswap; + max = maxswap; + } + + /* Given that we have SSSE3, we know we'll be able to implement the + single operand permutation after the palignr with pshufb for + 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed + first. */ + if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p) + return true; + + dcopy = *d; + if (swap) + { + dcopy.op0 = d->op1; + dcopy.op1 = d->op0; + for (i = 0; i < nelt; ++i) + dcopy.perm[i] ^= nelt; + } + + in_order = true; + for (i = 0; i < nelt; ++i) + { + unsigned e = dcopy.perm[i]; + if (GET_MODE_SIZE (d->vmode) == 32 + && e >= nelt + && (e & (nelt / 2 - 1)) < min) + e = e - min - (nelt / 2); + else + e = e - min; + if (e != i) + in_order = false; + dcopy.perm[i] = e; + } + dcopy.one_operand_p = true; + + if (single_insn_only_p && !in_order) + return false; + + /* For AVX2, test whether we can permute the result in one instruction. */ + if (d->testing_p) + { + if (in_order) + return true; + dcopy.op1 = dcopy.op0; + return expand_vec_perm_1 (&dcopy); + } + + shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode)); + if (GET_MODE_SIZE (d->vmode) == 16) + { + target = gen_reg_rtx (TImode); + emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1), + gen_lowpart (TImode, dcopy.op0), shift)); + } + else + { + target = gen_reg_rtx (V2TImode); + emit_insn (gen_avx2_palignrv2ti (target, + gen_lowpart (V2TImode, dcopy.op1), + gen_lowpart (V2TImode, dcopy.op0), + shift)); + } + + dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target); + + /* Test for the degenerate case where the alignment by itself + produces the desired permutation. */ + if (in_order) + { + emit_move_insn (d->target, dcopy.op0); + return true; + } + + ok = expand_vec_perm_1 (&dcopy); + gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32); + + return ok; +} + +/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify + the permutation using the SSE4_1 pblendv instruction. Potentially + reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */ + +static bool +expand_vec_perm_pblendv (struct expand_vec_perm_d *d) +{ + unsigned i, which, nelt = d->nelt; + struct expand_vec_perm_d dcopy, dcopy1; + machine_mode vmode = d->vmode; + bool ok; + + /* Use the same checks as in expand_vec_perm_blend. */ + if (d->one_operand_p) + return false; + if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) + ; + else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) + ; + else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) + ; + else + return false; + + /* Figure out where permutation elements stay not in their + respective lanes. */ + for (i = 0, which = 0; i < nelt; ++i) + { + unsigned e = d->perm[i]; + if (e != i) + which |= (e < nelt ? 1 : 2); + } + /* We can pblend the part where elements stay not in their + respective lanes only when these elements are all in one + half of a permutation. + {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective + lanes, but both 8 and 9 >= 8 + {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their + respective lanes and 8 >= 8, but 2 not. */ + if (which != 1 && which != 2) + return false; + if (d->testing_p && GET_MODE_SIZE (vmode) == 16) + return true; + + /* First we apply one operand permutation to the part where + elements stay not in their respective lanes. */ + dcopy = *d; + if (which == 2) + dcopy.op0 = dcopy.op1 = d->op1; + else + dcopy.op0 = dcopy.op1 = d->op0; + if (!d->testing_p) + dcopy.target = gen_reg_rtx (vmode); + dcopy.one_operand_p = true; + + for (i = 0; i < nelt; ++i) + dcopy.perm[i] = d->perm[i] & (nelt - 1); + + ok = expand_vec_perm_1 (&dcopy); + if (GET_MODE_SIZE (vmode) != 16 && !ok) + return false; + else + gcc_assert (ok); + if (d->testing_p) + return true; + + /* Next we put permuted elements into their positions. */ + dcopy1 = *d; + if (which == 2) + dcopy1.op1 = dcopy.target; + else + dcopy1.op0 = dcopy.target; + + for (i = 0; i < nelt; ++i) + dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i); + + ok = expand_vec_perm_blend (&dcopy1); + gcc_assert (ok); + + return true; +} + +static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d); + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify + a two vector permutation into a single vector permutation by using + an interleave operation to merge the vectors. */ + +static bool +expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) +{ + struct expand_vec_perm_d dremap, dfinal; + unsigned i, nelt = d->nelt, nelt2 = nelt / 2; + unsigned HOST_WIDE_INT contents; + unsigned char remap[2 * MAX_VECT_LEN]; + rtx_insn *seq; + bool ok, same_halves = false; + + if (GET_MODE_SIZE (d->vmode) == 16) + { + if (d->one_operand_p) + return false; + } + else if (GET_MODE_SIZE (d->vmode) == 32) + { + if (!TARGET_AVX) + return false; + /* For 32-byte modes allow even d->one_operand_p. + The lack of cross-lane shuffling in some instructions + might prevent a single insn shuffle. */ + dfinal = *d; + dfinal.testing_p = true; + /* If expand_vec_perm_interleave3 can expand this into + a 3 insn sequence, give up and let it be expanded as + 3 insn sequence. While that is one insn longer, + it doesn't need a memory operand and in the common + case that both interleave low and high permutations + with the same operands are adjacent needs 4 insns + for both after CSE. */ + if (expand_vec_perm_interleave3 (&dfinal)) + return false; + } + else + return false; + + /* Examine from whence the elements come. */ + contents = 0; + for (i = 0; i < nelt; ++i) + contents |= HOST_WIDE_INT_1U << d->perm[i]; + + memset (remap, 0xff, sizeof (remap)); + dremap = *d; + + if (GET_MODE_SIZE (d->vmode) == 16) + { + unsigned HOST_WIDE_INT h1, h2, h3, h4; + + /* Split the two input vectors into 4 halves. */ + h1 = (HOST_WIDE_INT_1U << nelt2) - 1; + h2 = h1 << nelt2; + h3 = h2 << nelt2; + h4 = h3 << nelt2; + + /* If the elements from the low halves use interleave low, and similarly + for interleave high. If the elements are from mis-matched halves, we + can use shufps for V4SF/V4SI or do a DImode shuffle. */ + if ((contents & (h1 | h3)) == contents) + { + /* punpckl* */ + for (i = 0; i < nelt2; ++i) + { + remap[i] = i * 2; + remap[i + nelt] = i * 2 + 1; + dremap.perm[i * 2] = i; + dremap.perm[i * 2 + 1] = i + nelt; + } + if (!TARGET_SSE2 && d->vmode == V4SImode) + dremap.vmode = V4SFmode; + } + else if ((contents & (h2 | h4)) == contents) + { + /* punpckh* */ + for (i = 0; i < nelt2; ++i) + { + remap[i + nelt2] = i * 2; + remap[i + nelt + nelt2] = i * 2 + 1; + dremap.perm[i * 2] = i + nelt2; + dremap.perm[i * 2 + 1] = i + nelt + nelt2; + } + if (!TARGET_SSE2 && d->vmode == V4SImode) + dremap.vmode = V4SFmode; + } + else if ((contents & (h1 | h4)) == contents) + { + /* shufps */ + for (i = 0; i < nelt2; ++i) + { + remap[i] = i; + remap[i + nelt + nelt2] = i + nelt2; + dremap.perm[i] = i; + dremap.perm[i + nelt2] = i + nelt + nelt2; + } + if (nelt != 4) + { + /* shufpd */ + dremap.vmode = V2DImode; + dremap.nelt = 2; + dremap.perm[0] = 0; + dremap.perm[1] = 3; + } + } + else if ((contents & (h2 | h3)) == contents) + { + /* shufps */ + for (i = 0; i < nelt2; ++i) + { + remap[i + nelt2] = i; + remap[i + nelt] = i + nelt2; + dremap.perm[i] = i + nelt2; + dremap.perm[i + nelt2] = i + nelt; + } + if (nelt != 4) + { + /* shufpd */ + dremap.vmode = V2DImode; + dremap.nelt = 2; + dremap.perm[0] = 1; + dremap.perm[1] = 2; + } + } + else + return false; + } + else + { + unsigned int nelt4 = nelt / 4, nzcnt = 0; + unsigned HOST_WIDE_INT q[8]; + unsigned int nonzero_halves[4]; + + /* Split the two input vectors into 8 quarters. */ + q[0] = (HOST_WIDE_INT_1U << nelt4) - 1; + for (i = 1; i < 8; ++i) + q[i] = q[0] << (nelt4 * i); + for (i = 0; i < 4; ++i) + if (((q[2 * i] | q[2 * i + 1]) & contents) != 0) + { + nonzero_halves[nzcnt] = i; + ++nzcnt; + } + + if (nzcnt == 1) + { + gcc_assert (d->one_operand_p); + nonzero_halves[1] = nonzero_halves[0]; + same_halves = true; + } + else if (d->one_operand_p) + { + gcc_assert (nonzero_halves[0] == 0); + gcc_assert (nonzero_halves[1] == 1); + } + + if (nzcnt <= 2) + { + if (d->perm[0] / nelt2 == nonzero_halves[1]) + { + /* Attempt to increase the likelihood that dfinal + shuffle will be intra-lane. */ + std::swap (nonzero_halves[0], nonzero_halves[1]); + } + + /* vperm2f128 or vperm2i128. */ + for (i = 0; i < nelt2; ++i) + { + remap[i + nonzero_halves[1] * nelt2] = i + nelt2; + remap[i + nonzero_halves[0] * nelt2] = i; + dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2; + dremap.perm[i] = i + nonzero_halves[0] * nelt2; + } + + if (d->vmode != V8SFmode + && d->vmode != V4DFmode + && d->vmode != V8SImode) + { + dremap.vmode = V8SImode; + dremap.nelt = 8; + for (i = 0; i < 4; ++i) + { + dremap.perm[i] = i + nonzero_halves[0] * 4; + dremap.perm[i + 4] = i + nonzero_halves[1] * 4; + } + } + } + else if (d->one_operand_p) + return false; + else if (TARGET_AVX2 + && (contents & (q[0] | q[2] | q[4] | q[6])) == contents) + { + /* vpunpckl* */ + for (i = 0; i < nelt4; ++i) + { + remap[i] = i * 2; + remap[i + nelt] = i * 2 + 1; + remap[i + nelt2] = i * 2 + nelt2; + remap[i + nelt + nelt2] = i * 2 + nelt2 + 1; + dremap.perm[i * 2] = i; + dremap.perm[i * 2 + 1] = i + nelt; + dremap.perm[i * 2 + nelt2] = i + nelt2; + dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2; + } + } + else if (TARGET_AVX2 + && (contents & (q[1] | q[3] | q[5] | q[7])) == contents) + { + /* vpunpckh* */ + for (i = 0; i < nelt4; ++i) + { + remap[i + nelt4] = i * 2; + remap[i + nelt + nelt4] = i * 2 + 1; + remap[i + nelt2 + nelt4] = i * 2 + nelt2; + remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1; + dremap.perm[i * 2] = i + nelt4; + dremap.perm[i * 2 + 1] = i + nelt + nelt4; + dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4; + dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4; + } + } + else + return false; + } + + /* Use the remapping array set up above to move the elements from their + swizzled locations into their final destinations. */ + dfinal = *d; + for (i = 0; i < nelt; ++i) + { + unsigned e = remap[d->perm[i]]; + gcc_assert (e < nelt); + /* If same_halves is true, both halves of the remapped vector are the + same. Avoid cross-lane accesses if possible. */ + if (same_halves && i >= nelt2) + { + gcc_assert (e < nelt2); + dfinal.perm[i] = e + nelt2; + } + else + dfinal.perm[i] = e; + } + if (!d->testing_p) + { + dremap.target = gen_reg_rtx (dremap.vmode); + dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); + } + dfinal.op1 = dfinal.op0; + dfinal.one_operand_p = true; + + /* Test if the final remap can be done with a single insn. For V4SFmode or + V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */ + start_sequence (); + ok = expand_vec_perm_1 (&dfinal); + seq = get_insns (); + end_sequence (); + + if (!ok) + return false; + + if (d->testing_p) + return true; + + if (dremap.vmode != dfinal.vmode) + { + dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0); + dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1); + } + + ok = expand_vec_perm_1 (&dremap); + gcc_assert (ok); + + emit_insn (seq); + return true; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify + a single vector cross-lane permutation into vpermq followed + by any of the single insn permutations. */ + +static bool +expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d) +{ + struct expand_vec_perm_d dremap, dfinal; + unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4; + unsigned contents[2]; + bool ok; + + if (!(TARGET_AVX2 + && (d->vmode == V32QImode || d->vmode == V16HImode) + && d->one_operand_p)) + return false; + + contents[0] = 0; + contents[1] = 0; + for (i = 0; i < nelt2; ++i) + { + contents[0] |= 1u << (d->perm[i] / nelt4); + contents[1] |= 1u << (d->perm[i + nelt2] / nelt4); + } + + for (i = 0; i < 2; ++i) + { + unsigned int cnt = 0; + for (j = 0; j < 4; ++j) + if ((contents[i] & (1u << j)) != 0 && ++cnt > 2) + return false; + } + + if (d->testing_p) + return true; + + dremap = *d; + dremap.vmode = V4DImode; + dremap.nelt = 4; + dremap.target = gen_reg_rtx (V4DImode); + dremap.op0 = gen_lowpart (V4DImode, d->op0); + dremap.op1 = dremap.op0; + dremap.one_operand_p = true; + for (i = 0; i < 2; ++i) + { + unsigned int cnt = 0; + for (j = 0; j < 4; ++j) + if ((contents[i] & (1u << j)) != 0) + dremap.perm[2 * i + cnt++] = j; + for (; cnt < 2; ++cnt) + dremap.perm[2 * i + cnt] = 0; + } + + dfinal = *d; + dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); + dfinal.op1 = dfinal.op0; + dfinal.one_operand_p = true; + for (i = 0, j = 0; i < nelt; ++i) + { + if (i == nelt2) + j = 2; + dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0); + if ((d->perm[i] / nelt4) == dremap.perm[j]) + ; + else if ((d->perm[i] / nelt4) == dremap.perm[j + 1]) + dfinal.perm[i] |= nelt4; + else + gcc_unreachable (); + } + + ok = expand_vec_perm_1 (&dremap); + gcc_assert (ok); + + ok = expand_vec_perm_1 (&dfinal); + gcc_assert (ok); + + return true; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand + a vector permutation using two instructions, vperm2f128 resp. + vperm2i128 followed by any single in-lane permutation. */ + +static bool +expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d) +{ + struct expand_vec_perm_d dfirst, dsecond; + unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm; + bool ok; + + if (!TARGET_AVX + || GET_MODE_SIZE (d->vmode) != 32 + || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2)) + return false; + + dsecond = *d; + dsecond.one_operand_p = false; + dsecond.testing_p = true; + + /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128 + immediate. For perm < 16 the second permutation uses + d->op0 as first operand, for perm >= 16 it uses d->op1 + as first operand. The second operand is the result of + vperm2[fi]128. */ + for (perm = 0; perm < 32; perm++) + { + /* Ignore permutations which do not move anything cross-lane. */ + if (perm < 16) + { + /* The second shuffle for e.g. V4DFmode has + 0123 and ABCD operands. + Ignore AB23, as 23 is already in the second lane + of the first operand. */ + if ((perm & 0xc) == (1 << 2)) continue; + /* And 01CD, as 01 is in the first lane of the first + operand. */ + if ((perm & 3) == 0) continue; + /* And 4567, as then the vperm2[fi]128 doesn't change + anything on the original 4567 second operand. */ + if ((perm & 0xf) == ((3 << 2) | 2)) continue; + } + else + { + /* The second shuffle for e.g. V4DFmode has + 4567 and ABCD operands. + Ignore AB67, as 67 is already in the second lane + of the first operand. */ + if ((perm & 0xc) == (3 << 2)) continue; + /* And 45CD, as 45 is in the first lane of the first + operand. */ + if ((perm & 3) == 2) continue; + /* And 0123, as then the vperm2[fi]128 doesn't change + anything on the original 0123 first operand. */ + if ((perm & 0xf) == (1 << 2)) continue; + } + + for (i = 0; i < nelt; i++) + { + j = d->perm[i] / nelt2; + if (j == ((perm >> (2 * (i >= nelt2))) & 3)) + dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1)); + else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16)) + dsecond.perm[i] = d->perm[i] & (nelt - 1); + else + break; + } + + if (i == nelt) + { + start_sequence (); + ok = expand_vec_perm_1 (&dsecond); + end_sequence (); + } + else + ok = false; + + if (ok) + { + if (d->testing_p) + return true; + + /* Found a usable second shuffle. dfirst will be + vperm2f128 on d->op0 and d->op1. */ + dsecond.testing_p = false; + dfirst = *d; + dfirst.target = gen_reg_rtx (d->vmode); + for (i = 0; i < nelt; i++) + dfirst.perm[i] = (i & (nelt2 - 1)) + + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2; + + canonicalize_perm (&dfirst); + ok = expand_vec_perm_1 (&dfirst); + gcc_assert (ok); + + /* And dsecond is some single insn shuffle, taking + d->op0 and result of vperm2f128 (if perm < 16) or + d->op1 and result of vperm2f128 (otherwise). */ + if (perm >= 16) + dsecond.op0 = dsecond.op1; + dsecond.op1 = dfirst.target; + + ok = expand_vec_perm_1 (&dsecond); + gcc_assert (ok); + + return true; + } + + /* For one operand, the only useful vperm2f128 permutation is 0x01 + aka lanes swap. */ + if (d->one_operand_p) + return false; + } + + return false; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify + a two vector permutation using 2 intra-lane interleave insns + and cross-lane shuffle for 32-byte vectors. */ + +static bool +expand_vec_perm_interleave3 (struct expand_vec_perm_d *d) +{ + unsigned i, nelt; + rtx (*gen) (rtx, rtx, rtx); + + if (d->one_operand_p) + return false; + if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32) + ; + else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode)) + ; + else + return false; + + nelt = d->nelt; + if (d->perm[0] != 0 && d->perm[0] != nelt / 2) + return false; + for (i = 0; i < nelt; i += 2) + if (d->perm[i] != d->perm[0] + i / 2 + || d->perm[i + 1] != d->perm[0] + i / 2 + nelt) + return false; + + if (d->testing_p) + return true; + + switch (d->vmode) + { + case V32QImode: + if (d->perm[0]) + gen = gen_vec_interleave_highv32qi; + else + gen = gen_vec_interleave_lowv32qi; + break; + case V16HImode: + if (d->perm[0]) + gen = gen_vec_interleave_highv16hi; + else + gen = gen_vec_interleave_lowv16hi; + break; + case V8SImode: + if (d->perm[0]) + gen = gen_vec_interleave_highv8si; + else + gen = gen_vec_interleave_lowv8si; + break; + case V4DImode: + if (d->perm[0]) + gen = gen_vec_interleave_highv4di; + else + gen = gen_vec_interleave_lowv4di; + break; + case V8SFmode: + if (d->perm[0]) + gen = gen_vec_interleave_highv8sf; + else + gen = gen_vec_interleave_lowv8sf; + break; + case V4DFmode: + if (d->perm[0]) + gen = gen_vec_interleave_highv4df; + else + gen = gen_vec_interleave_lowv4df; + break; + default: + gcc_unreachable (); + } + + emit_insn (gen (d->target, d->op0, d->op1)); + return true; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement + a single vector permutation using a single intra-lane vector + permutation, vperm2f128 swapping the lanes and vblend* insn blending + the non-swapped and swapped vectors together. */ + +static bool +expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d) +{ + struct expand_vec_perm_d dfirst, dsecond; + unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2; + rtx_insn *seq; + bool ok; + rtx (*blend) (rtx, rtx, rtx, rtx) = NULL; + + if (!TARGET_AVX + || TARGET_AVX2 + || (d->vmode != V8SFmode && d->vmode != V4DFmode) + || !d->one_operand_p) + return false; + + dfirst = *d; + for (i = 0; i < nelt; i++) + dfirst.perm[i] = 0xff; + for (i = 0, msk = 0; i < nelt; i++) + { + j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2; + if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i]) + return false; + dfirst.perm[j] = d->perm[i]; + if (j != i) + msk |= (1 << i); + } + for (i = 0; i < nelt; i++) + if (dfirst.perm[i] == 0xff) + dfirst.perm[i] = i; + + if (!d->testing_p) + dfirst.target = gen_reg_rtx (dfirst.vmode); + + start_sequence (); + ok = expand_vec_perm_1 (&dfirst); + seq = get_insns (); + end_sequence (); + + if (!ok) + return false; + + if (d->testing_p) + return true; + + emit_insn (seq); + + dsecond = *d; + dsecond.op0 = dfirst.target; + dsecond.op1 = dfirst.target; + dsecond.one_operand_p = true; + dsecond.target = gen_reg_rtx (dsecond.vmode); + for (i = 0; i < nelt; i++) + dsecond.perm[i] = i ^ nelt2; + + ok = expand_vec_perm_1 (&dsecond); + gcc_assert (ok); + + blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256; + emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk))); + return true; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF + permutation using two vperm2f128, followed by a vshufpd insn blending + the two vectors together. */ + +static bool +expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d) +{ + struct expand_vec_perm_d dfirst, dsecond, dthird; + bool ok; + + if (!TARGET_AVX || (d->vmode != V4DFmode)) + return false; + + if (d->testing_p) + return true; + + dfirst = *d; + dsecond = *d; + dthird = *d; + + dfirst.perm[0] = (d->perm[0] & ~1); + dfirst.perm[1] = (d->perm[0] & ~1) + 1; + dfirst.perm[2] = (d->perm[2] & ~1); + dfirst.perm[3] = (d->perm[2] & ~1) + 1; + dsecond.perm[0] = (d->perm[1] & ~1); + dsecond.perm[1] = (d->perm[1] & ~1) + 1; + dsecond.perm[2] = (d->perm[3] & ~1); + dsecond.perm[3] = (d->perm[3] & ~1) + 1; + dthird.perm[0] = (d->perm[0] % 2); + dthird.perm[1] = (d->perm[1] % 2) + 4; + dthird.perm[2] = (d->perm[2] % 2) + 2; + dthird.perm[3] = (d->perm[3] % 2) + 6; + + dfirst.target = gen_reg_rtx (dfirst.vmode); + dsecond.target = gen_reg_rtx (dsecond.vmode); + dthird.op0 = dfirst.target; + dthird.op1 = dsecond.target; + dthird.one_operand_p = false; + + canonicalize_perm (&dfirst); + canonicalize_perm (&dsecond); + + ok = expand_vec_perm_1 (&dfirst) + && expand_vec_perm_1 (&dsecond) + && expand_vec_perm_1 (&dthird); + + gcc_assert (ok); + + return true; +} + +/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word + permutation with two pshufb insns and an ior. We should have already + failed all two instruction sequences. */ + +static bool +expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d) +{ + rtx rperm[2][16], vperm, l, h, op, m128; + unsigned int i, nelt, eltsz; + + if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) + return false; + gcc_assert (!d->one_operand_p); + + if (d->testing_p) + return true; + + nelt = d->nelt; + eltsz = GET_MODE_UNIT_SIZE (d->vmode); + + /* Generate two permutation masks. If the required element is within + the given vector it is shuffled into the proper lane. If the required + element is in the other vector, force a zero into the lane by setting + bit 7 in the permutation mask. */ + m128 = GEN_INT (-128); + for (i = 0; i < nelt; ++i) + { + unsigned j, e = d->perm[i]; + unsigned which = (e >= nelt); + if (e >= nelt) + e -= nelt; + + for (j = 0; j < eltsz; ++j) + { + rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j); + rperm[1-which][i*eltsz + j] = m128; + } + } + + vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0])); + vperm = force_reg (V16QImode, vperm); + + l = gen_reg_rtx (V16QImode); + op = gen_lowpart (V16QImode, d->op0); + emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm)); + + vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1])); + vperm = force_reg (V16QImode, vperm); + + h = gen_reg_rtx (V16QImode); + op = gen_lowpart (V16QImode, d->op1); + emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm)); + + op = d->target; + if (d->vmode != V16QImode) + op = gen_reg_rtx (V16QImode); + emit_insn (gen_iorv16qi3 (op, l, h)); + if (op != d->target) + emit_move_insn (d->target, gen_lowpart (d->vmode, op)); + + return true; +} + +/* Implement arbitrary permutation of one V32QImode and V16QImode operand + with two vpshufb insns, vpermq and vpor. We should have already failed + all two or three instruction sequences. */ + +static bool +expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d) +{ + rtx rperm[2][32], vperm, l, h, hp, op, m128; + unsigned int i, nelt, eltsz; + + if (!TARGET_AVX2 + || !d->one_operand_p + || (d->vmode != V32QImode && d->vmode != V16HImode)) + return false; + + if (d->testing_p) + return true; + + nelt = d->nelt; + eltsz = GET_MODE_UNIT_SIZE (d->vmode); + + /* Generate two permutation masks. If the required element is within + the same lane, it is shuffled in. If the required element from the + other lane, force a zero by setting bit 7 in the permutation mask. + In the other mask the mask has non-negative elements if element + is requested from the other lane, but also moved to the other lane, + so that the result of vpshufb can have the two V2TImode halves + swapped. */ + m128 = GEN_INT (-128); + for (i = 0; i < nelt; ++i) + { + unsigned j, e = d->perm[i] & (nelt / 2 - 1); + unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz; + + for (j = 0; j < eltsz; ++j) + { + rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j); + rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128; + } + } + + vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1])); + vperm = force_reg (V32QImode, vperm); + + h = gen_reg_rtx (V32QImode); + op = gen_lowpart (V32QImode, d->op0); + emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm)); + + /* Swap the 128-byte lanes of h into hp. */ + hp = gen_reg_rtx (V4DImode); + op = gen_lowpart (V4DImode, h); + emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx, + const1_rtx)); + + vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0])); + vperm = force_reg (V32QImode, vperm); + + l = gen_reg_rtx (V32QImode); + op = gen_lowpart (V32QImode, d->op0); + emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm)); + + op = d->target; + if (d->vmode != V32QImode) + op = gen_reg_rtx (V32QImode); + emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp))); + if (op != d->target) + emit_move_insn (d->target, gen_lowpart (d->vmode, op)); + + return true; +} + +/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even + and extract-odd permutations of two V32QImode and V16QImode operand + with two vpshufb insns, vpor and vpermq. We should have already + failed all two or three instruction sequences. */ + +static bool +expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d) +{ + rtx rperm[2][32], vperm, l, h, ior, op, m128; + unsigned int i, nelt, eltsz; + + if (!TARGET_AVX2 + || d->one_operand_p + || (d->vmode != V32QImode && d->vmode != V16HImode)) + return false; + + for (i = 0; i < d->nelt; ++i) + if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2)) + return false; + + if (d->testing_p) + return true; + + nelt = d->nelt; + eltsz = GET_MODE_UNIT_SIZE (d->vmode); + + /* Generate two permutation masks. In the first permutation mask + the first quarter will contain indexes for the first half + of the op0, the second quarter will contain bit 7 set, third quarter + will contain indexes for the second half of the op0 and the + last quarter bit 7 set. In the second permutation mask + the first quarter will contain bit 7 set, the second quarter + indexes for the first half of the op1, the third quarter bit 7 set + and last quarter indexes for the second half of the op1. + I.e. the first mask e.g. for V32QImode extract even will be: + 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128 + (all values masked with 0xf except for -128) and second mask + for extract even will be + -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */ + m128 = GEN_INT (-128); + for (i = 0; i < nelt; ++i) + { + unsigned j, e = d->perm[i] & (nelt / 2 - 1); + unsigned which = d->perm[i] >= nelt; + unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0; + + for (j = 0; j < eltsz; ++j) + { + rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j); + rperm[1 - which][(i * eltsz + j) ^ xorv] = m128; + } + } + + vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0])); + vperm = force_reg (V32QImode, vperm); + + l = gen_reg_rtx (V32QImode); + op = gen_lowpart (V32QImode, d->op0); + emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm)); + + vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1])); + vperm = force_reg (V32QImode, vperm); + + h = gen_reg_rtx (V32QImode); + op = gen_lowpart (V32QImode, d->op1); + emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm)); + + ior = gen_reg_rtx (V32QImode); + emit_insn (gen_iorv32qi3 (ior, l, h)); + + /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */ + op = gen_reg_rtx (V4DImode); + ior = gen_lowpart (V4DImode, ior); + emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx, + const1_rtx, GEN_INT (3))); + emit_move_insn (d->target, gen_lowpart (d->vmode, op)); + + return true; +} + +/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even + and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands + with two "and" and "pack" or two "shift" and "pack" insns. We should + have already failed all two instruction sequences. */ + +static bool +expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d) +{ + rtx op, dop0, dop1, t, rperm[16]; + unsigned i, odd, c, s, nelt = d->nelt; + bool end_perm = false; + machine_mode half_mode; + rtx (*gen_and) (rtx, rtx, rtx); + rtx (*gen_pack) (rtx, rtx, rtx); + rtx (*gen_shift) (rtx, rtx, rtx); + + if (d->one_operand_p) + return false; + + switch (d->vmode) + { + case V8HImode: + /* Required for "pack". */ + if (!TARGET_SSE4_1) + return false; + c = 0xffff; + s = 16; + half_mode = V4SImode; + gen_and = gen_andv4si3; + gen_pack = gen_sse4_1_packusdw; + gen_shift = gen_lshrv4si3; + break; + case V16QImode: + /* No check as all instructions are SSE2. */ + c = 0xff; + s = 8; + half_mode = V8HImode; + gen_and = gen_andv8hi3; + gen_pack = gen_sse2_packuswb; + gen_shift = gen_lshrv8hi3; + break; + case V16HImode: + if (!TARGET_AVX2) + return false; + c = 0xffff; + s = 16; + half_mode = V8SImode; + gen_and = gen_andv8si3; + gen_pack = gen_avx2_packusdw; + gen_shift = gen_lshrv8si3; + end_perm = true; + break; + case V32QImode: + if (!TARGET_AVX2) + return false; + c = 0xff; + s = 8; + half_mode = V16HImode; + gen_and = gen_andv16hi3; + gen_pack = gen_avx2_packuswb; + gen_shift = gen_lshrv16hi3; + end_perm = true; + break; + default: + /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than + general shuffles. */ + return false; + } + + /* Check that permutation is even or odd. */ + odd = d->perm[0]; + if (odd > 1) + return false; + + for (i = 1; i < nelt; ++i) + if (d->perm[i] != 2 * i + odd) + return false; + + if (d->testing_p) + return true; + + dop0 = gen_reg_rtx (half_mode); + dop1 = gen_reg_rtx (half_mode); + if (odd == 0) + { + for (i = 0; i < nelt / 2; i++) + rperm[i] = GEN_INT (c); + t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm)); + t = force_reg (half_mode, t); + emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0))); + emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1))); + } + else + { + emit_insn (gen_shift (dop0, + gen_lowpart (half_mode, d->op0), + GEN_INT (s))); + emit_insn (gen_shift (dop1, + gen_lowpart (half_mode, d->op1), + GEN_INT (s))); + } + /* In AVX2 for 256 bit case we need to permute pack result. */ + if (TARGET_AVX2 && end_perm) + { + op = gen_reg_rtx (d->vmode); + t = gen_reg_rtx (V4DImode); + emit_insn (gen_pack (op, dop0, dop1)); + emit_insn (gen_avx2_permv4di_1 (t, + gen_lowpart (V4DImode, op), + const0_rtx, + const2_rtx, + const1_rtx, + GEN_INT (3))); + emit_move_insn (d->target, gen_lowpart (d->vmode, t)); + } + else + emit_insn (gen_pack (d->target, dop0, dop1)); + + return true; +} + +/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even + and extract-odd permutations of two V64QI operands + with two "shifts", two "truncs" and one "concat" insns for "odd" + and two "truncs" and one concat insn for "even." + Have already failed all two instruction sequences. */ + +static bool +expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d) +{ + rtx t1, t2, t3, t4; + unsigned i, odd, nelt = d->nelt; + + if (!TARGET_AVX512BW + || d->one_operand_p + || d->vmode != V64QImode) + return false; + + /* Check that permutation is even or odd. */ + odd = d->perm[0]; + if (odd > 1) + return false; + + for (i = 1; i < nelt; ++i) + if (d->perm[i] != 2 * i + odd) + return false; + + if (d->testing_p) + return true; + + + if (odd) + { + t1 = gen_reg_rtx (V32HImode); + t2 = gen_reg_rtx (V32HImode); + emit_insn (gen_lshrv32hi3 (t1, + gen_lowpart (V32HImode, d->op0), + GEN_INT (8))); + emit_insn (gen_lshrv32hi3 (t2, + gen_lowpart (V32HImode, d->op1), + GEN_INT (8))); + } + else + { + t1 = gen_lowpart (V32HImode, d->op0); + t2 = gen_lowpart (V32HImode, d->op1); + } + + t3 = gen_reg_rtx (V32QImode); + t4 = gen_reg_rtx (V32QImode); + emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1)); + emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2)); + emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4)); + + return true; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even + and extract-odd permutations. */ + +static bool +expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) +{ + rtx t1, t2, t3, t4, t5; + + switch (d->vmode) + { + case V4DFmode: + if (d->testing_p) + break; + t1 = gen_reg_rtx (V4DFmode); + t2 = gen_reg_rtx (V4DFmode); + + /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */ + emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20))); + emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31))); + + /* Now an unpck[lh]pd will produce the result required. */ + if (odd) + t3 = gen_avx_unpckhpd256 (d->target, t1, t2); + else + t3 = gen_avx_unpcklpd256 (d->target, t1, t2); + emit_insn (t3); + break; + + case V8SFmode: + { + int mask = odd ? 0xdd : 0x88; + + if (d->testing_p) + break; + t1 = gen_reg_rtx (V8SFmode); + t2 = gen_reg_rtx (V8SFmode); + t3 = gen_reg_rtx (V8SFmode); + + /* Shuffle within the 128-bit lanes to produce: + { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */ + emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1, + GEN_INT (mask))); + + /* Shuffle the lanes around to produce: + { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */ + emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1, + GEN_INT (0x3))); + + /* Shuffle within the 128-bit lanes to produce: + { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */ + emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44))); + + /* Shuffle within the 128-bit lanes to produce: + { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */ + emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee))); + + /* Shuffle the lanes around to produce: + { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */ + emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2, + GEN_INT (0x20))); + } + break; + + case V2DFmode: + case V4SFmode: + case V2DImode: + case V4SImode: + /* These are always directly implementable by expand_vec_perm_1. */ + gcc_unreachable (); + + case V8HImode: + if (TARGET_SSE4_1) + return expand_vec_perm_even_odd_pack (d); + else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB) + return expand_vec_perm_pshufb2 (d); + else + { + if (d->testing_p) + break; + /* We need 2*log2(N)-1 operations to achieve odd/even + with interleave. */ + t1 = gen_reg_rtx (V8HImode); + t2 = gen_reg_rtx (V8HImode); + emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1)); + emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1)); + emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1)); + emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1)); + if (odd) + t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2); + else + t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2); + emit_insn (t3); + } + break; + + case V16QImode: + return expand_vec_perm_even_odd_pack (d); + + case V16HImode: + case V32QImode: + return expand_vec_perm_even_odd_pack (d); + + case V64QImode: + return expand_vec_perm_even_odd_trunc (d); + + case V4DImode: + if (!TARGET_AVX2) + { + struct expand_vec_perm_d d_copy = *d; + d_copy.vmode = V4DFmode; + if (d->testing_p) + d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1); + else + d_copy.target = gen_reg_rtx (V4DFmode); + d_copy.op0 = gen_lowpart (V4DFmode, d->op0); + d_copy.op1 = gen_lowpart (V4DFmode, d->op1); + if (expand_vec_perm_even_odd_1 (&d_copy, odd)) + { + if (!d->testing_p) + emit_move_insn (d->target, + gen_lowpart (V4DImode, d_copy.target)); + return true; + } + return false; + } + + if (d->testing_p) + break; + + t1 = gen_reg_rtx (V4DImode); + t2 = gen_reg_rtx (V4DImode); + + /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */ + emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20))); + emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31))); + + /* Now an vpunpck[lh]qdq will produce the result required. */ + if (odd) + t3 = gen_avx2_interleave_highv4di (d->target, t1, t2); + else + t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2); + emit_insn (t3); + break; + + case V8SImode: + if (!TARGET_AVX2) + { + struct expand_vec_perm_d d_copy = *d; + d_copy.vmode = V8SFmode; + if (d->testing_p) + d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1); + else + d_copy.target = gen_reg_rtx (V8SFmode); + d_copy.op0 = gen_lowpart (V8SFmode, d->op0); + d_copy.op1 = gen_lowpart (V8SFmode, d->op1); + if (expand_vec_perm_even_odd_1 (&d_copy, odd)) + { + if (!d->testing_p) + emit_move_insn (d->target, + gen_lowpart (V8SImode, d_copy.target)); + return true; + } + return false; + } + + if (d->testing_p) + break; + + t1 = gen_reg_rtx (V8SImode); + t2 = gen_reg_rtx (V8SImode); + t3 = gen_reg_rtx (V4DImode); + t4 = gen_reg_rtx (V4DImode); + t5 = gen_reg_rtx (V4DImode); + + /* Shuffle the lanes around into + { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */ + emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0), + gen_lowpart (V4DImode, d->op1), + GEN_INT (0x20))); + emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0), + gen_lowpart (V4DImode, d->op1), + GEN_INT (0x31))); + + /* Swap the 2nd and 3rd position in each lane into + { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */ + emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3), + GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); + emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4), + GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); + + /* Now an vpunpck[lh]qdq will produce + { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */ + if (odd) + t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1), + gen_lowpart (V4DImode, t2)); + else + t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1), + gen_lowpart (V4DImode, t2)); + emit_insn (t3); + emit_move_insn (d->target, gen_lowpart (V8SImode, t5)); + break; + + default: + gcc_unreachable (); + } + + return true; +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match + extract-even and extract-odd permutations. */ + +static bool +expand_vec_perm_even_odd (struct expand_vec_perm_d *d) +{ + unsigned i, odd, nelt = d->nelt; + + odd = d->perm[0]; + if (odd != 0 && odd != 1) + return false; + + for (i = 1; i < nelt; ++i) + if (d->perm[i] != 2 * i + odd) + return false; + + return expand_vec_perm_even_odd_1 (d, odd); +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast + permutations. We assume that expand_vec_perm_1 has already failed. */ + +static bool +expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) +{ + unsigned elt = d->perm[0], nelt2 = d->nelt / 2; + machine_mode vmode = d->vmode; + unsigned char perm2[4]; + rtx op0 = d->op0, dest; + bool ok; + + switch (vmode) + { + case V4DFmode: + case V8SFmode: + /* These are special-cased in sse.md so that we can optionally + use the vbroadcast instruction. They expand to two insns + if the input happens to be in a register. */ + gcc_unreachable (); + + case V2DFmode: + case V2DImode: + case V4SFmode: + case V4SImode: + /* These are always implementable using standard shuffle patterns. */ + gcc_unreachable (); + + case V8HImode: + case V16QImode: + /* These can be implemented via interleave. We save one insn by + stopping once we have promoted to V4SImode and then use pshufd. */ + if (d->testing_p) + return true; + do + { + rtx dest; + rtx (*gen) (rtx, rtx, rtx) + = vmode == V16QImode ? gen_vec_interleave_lowv16qi + : gen_vec_interleave_lowv8hi; + + if (elt >= nelt2) + { + gen = vmode == V16QImode ? gen_vec_interleave_highv16qi + : gen_vec_interleave_highv8hi; + elt -= nelt2; + } + nelt2 /= 2; + + dest = gen_reg_rtx (vmode); + emit_insn (gen (dest, op0, op0)); + vmode = get_mode_wider_vector (vmode); + op0 = gen_lowpart (vmode, dest); + } + while (vmode != V4SImode); + + memset (perm2, elt, 4); + dest = gen_reg_rtx (V4SImode); + ok = expand_vselect (dest, op0, perm2, 4, d->testing_p); + gcc_assert (ok); + if (!d->testing_p) + emit_move_insn (d->target, gen_lowpart (d->vmode, dest)); + return true; + + case V64QImode: + case V32QImode: + case V16HImode: + case V8SImode: + case V4DImode: + /* For AVX2 broadcasts of the first element vpbroadcast* or + vpermq should be used by expand_vec_perm_1. */ + gcc_assert (!TARGET_AVX2 || d->perm[0]); + return false; + + default: + gcc_unreachable (); + } +} + +/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match + broadcast permutations. */ + +static bool +expand_vec_perm_broadcast (struct expand_vec_perm_d *d) +{ + unsigned i, elt, nelt = d->nelt; + + if (!d->one_operand_p) + return false; + + elt = d->perm[0]; + for (i = 1; i < nelt; ++i) + if (d->perm[i] != elt) + return false; + + return expand_vec_perm_broadcast_1 (d); +} + +/* Implement arbitrary permutations of two V64QImode operands + will 2 vpermi2w, 2 vpshufb and one vpor instruction. */ +static bool +expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d) +{ + if (!TARGET_AVX512BW || !(d->vmode == V64QImode)) + return false; + + if (d->testing_p) + return true; + + struct expand_vec_perm_d ds[2]; + rtx rperm[128], vperm, target0, target1; + unsigned int i, nelt; + machine_mode vmode; + + nelt = d->nelt; + vmode = V64QImode; + + for (i = 0; i < 2; i++) + { + ds[i] = *d; + ds[i].vmode = V32HImode; + ds[i].nelt = 32; + ds[i].target = gen_reg_rtx (V32HImode); + ds[i].op0 = gen_lowpart (V32HImode, d->op0); + ds[i].op1 = gen_lowpart (V32HImode, d->op1); + } + + /* Prepare permutations such that the first one takes care of + putting the even bytes into the right positions or one higher + positions (ds[0]) and the second one takes care of + putting the odd bytes into the right positions or one below + (ds[1]). */ + + for (i = 0; i < nelt; i++) + { + ds[i & 1].perm[i / 2] = d->perm[i] / 2; + if (i & 1) + { + rperm[i] = constm1_rtx; + rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1)); + } + else + { + rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1)); + rperm[i + 64] = constm1_rtx; + } + } + + bool ok = expand_vec_perm_1 (&ds[0]); + gcc_assert (ok); + ds[0].target = gen_lowpart (V64QImode, ds[0].target); + + ok = expand_vec_perm_1 (&ds[1]); + gcc_assert (ok); + ds[1].target = gen_lowpart (V64QImode, ds[1].target); + + vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm)); + vperm = force_reg (vmode, vperm); + target0 = gen_reg_rtx (V64QImode); + emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm)); + + vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64)); + vperm = force_reg (vmode, vperm); + target1 = gen_reg_rtx (V64QImode); + emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm)); + + emit_insn (gen_iorv64qi3 (d->target, target0, target1)); + return true; +} + +/* Implement arbitrary permutation of two V32QImode and V16QImode operands + with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed + all the shorter instruction sequences. */ + +static bool +expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d) +{ + rtx rperm[4][32], vperm, l[2], h[2], op, m128; + unsigned int i, nelt, eltsz; + bool used[4]; + + if (!TARGET_AVX2 + || d->one_operand_p + || (d->vmode != V32QImode && d->vmode != V16HImode)) + return false; + + if (d->testing_p) + return true; + + nelt = d->nelt; + eltsz = GET_MODE_UNIT_SIZE (d->vmode); + + /* Generate 4 permutation masks. If the required element is within + the same lane, it is shuffled in. If the required element from the + other lane, force a zero by setting bit 7 in the permutation mask. + In the other mask the mask has non-negative elements if element + is requested from the other lane, but also moved to the other lane, + so that the result of vpshufb can have the two V2TImode halves + swapped. */ + m128 = GEN_INT (-128); + for (i = 0; i < 32; ++i) + { + rperm[0][i] = m128; + rperm[1][i] = m128; + rperm[2][i] = m128; + rperm[3][i] = m128; + } + used[0] = false; + used[1] = false; + used[2] = false; + used[3] = false; + for (i = 0; i < nelt; ++i) + { + unsigned j, e = d->perm[i] & (nelt / 2 - 1); + unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz; + unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0); + + for (j = 0; j < eltsz; ++j) + rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j); + used[which] = true; + } + + for (i = 0; i < 2; ++i) + { + if (!used[2 * i + 1]) + { + h[i] = NULL_RTX; + continue; + } + vperm = gen_rtx_CONST_VECTOR (V32QImode, + gen_rtvec_v (32, rperm[2 * i + 1])); + vperm = force_reg (V32QImode, vperm); + h[i] = gen_reg_rtx (V32QImode); + op = gen_lowpart (V32QImode, i ? d->op1 : d->op0); + emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm)); + } + + /* Swap the 128-byte lanes of h[X]. */ + for (i = 0; i < 2; ++i) + { + if (h[i] == NULL_RTX) + continue; + op = gen_reg_rtx (V4DImode); + emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]), + const2_rtx, GEN_INT (3), const0_rtx, + const1_rtx)); + h[i] = gen_lowpart (V32QImode, op); + } + + for (i = 0; i < 2; ++i) + { + if (!used[2 * i]) + { + l[i] = NULL_RTX; + continue; + } + vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i])); + vperm = force_reg (V32QImode, vperm); + l[i] = gen_reg_rtx (V32QImode); + op = gen_lowpart (V32QImode, i ? d->op1 : d->op0); + emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm)); + } + + for (i = 0; i < 2; ++i) + { + if (h[i] && l[i]) + { + op = gen_reg_rtx (V32QImode); + emit_insn (gen_iorv32qi3 (op, l[i], h[i])); + l[i] = op; + } + else if (h[i]) + l[i] = h[i]; + } + + gcc_assert (l[0] && l[1]); + op = d->target; + if (d->vmode != V32QImode) + op = gen_reg_rtx (V32QImode); + emit_insn (gen_iorv32qi3 (op, l[0], l[1])); + if (op != d->target) + emit_move_insn (d->target, gen_lowpart (d->vmode, op)); + return true; +} + +/* The guts of ix86_expand_vec_perm_const, also used by the ok hook. + With all of the interface bits taken care of, perform the expansion + in D and return true on success. */ + +static bool +ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) +{ + /* Try a single instruction expansion. */ + if (expand_vec_perm_1 (d)) + return true; + + /* Try sequences of two instructions. */ + + if (expand_vec_perm_pshuflw_pshufhw (d)) + return true; + + if (expand_vec_perm_palignr (d, false)) + return true; + + if (expand_vec_perm_interleave2 (d)) + return true; + + if (expand_vec_perm_broadcast (d)) + return true; + + if (expand_vec_perm_vpermq_perm_1 (d)) + return true; + + if (expand_vec_perm_vperm2f128 (d)) + return true; + + if (expand_vec_perm_pblendv (d)) + return true; + + /* Try sequences of three instructions. */ + + if (expand_vec_perm_even_odd_pack (d)) + return true; + + if (expand_vec_perm_2vperm2f128_vshuf (d)) + return true; + + if (expand_vec_perm_pshufb2 (d)) + return true; + + if (expand_vec_perm_interleave3 (d)) + return true; + + if (expand_vec_perm_vperm2f128_vblend (d)) + return true; + + /* Try sequences of four instructions. */ + + if (expand_vec_perm_even_odd_trunc (d)) + return true; + if (expand_vec_perm_vpshufb2_vpermq (d)) + return true; + + if (expand_vec_perm_vpshufb2_vpermq_even_odd (d)) + return true; + + if (expand_vec_perm_vpermi2_vpshub2 (d)) + return true; + + /* ??? Look for narrow permutations whose element orderings would + allow the promotion to a wider mode. */ + + /* ??? Look for sequences of interleave or a wider permute that place + the data into the correct lanes for a half-vector shuffle like + pshuf[lh]w or vpermilps. */ + + /* ??? Look for sequences of interleave that produce the desired results. + The combinatorics of punpck[lh] get pretty ugly... */ + + if (expand_vec_perm_even_odd (d)) + return true; + + /* Even longer sequences. */ + if (expand_vec_perm_vpshufb4_vpermq2 (d)) + return true; + + /* See if we can get the same permutation in different vector integer + mode. */ + struct expand_vec_perm_d nd; + if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) + { + if (!d->testing_p) + emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); + return true; + } + + return false; +} + +/* If a permutation only uses one operand, make it clear. Returns true + if the permutation references both operands. */ + +static bool +canonicalize_perm (struct expand_vec_perm_d *d) +{ + int i, which, nelt = d->nelt; + + for (i = which = 0; i < nelt; ++i) + which |= (d->perm[i] < nelt ? 1 : 2); + + d->one_operand_p = true; + switch (which) + { + default: + gcc_unreachable(); + + case 3: + if (!rtx_equal_p (d->op0, d->op1)) + { + d->one_operand_p = false; + break; + } + /* The elements of PERM do not suggest that only the first operand + is used, but both operands are identical. Allow easier matching + of the permutation by folding the permutation into the single + input vector. */ + /* FALLTHRU */ + + case 2: + for (i = 0; i < nelt; ++i) + d->perm[i] &= nelt - 1; + d->op0 = d->op1; + break; + + case 1: + d->op1 = d->op0; + break; + } + + return (which == 3); +} + +bool +ix86_expand_vec_perm_const (rtx operands[4]) +{ + struct expand_vec_perm_d d; + unsigned char perm[MAX_VECT_LEN]; + int i, nelt; + bool two_args; + rtx sel; + + d.target = operands[0]; + d.op0 = operands[1]; + d.op1 = operands[2]; + sel = operands[3]; + + d.vmode = GET_MODE (d.target); + gcc_assert (VECTOR_MODE_P (d.vmode)); + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = false; + + gcc_assert (GET_CODE (sel) == CONST_VECTOR); + gcc_assert (XVECLEN (sel, 0) == nelt); + gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); + + for (i = 0; i < nelt; ++i) + { + rtx e = XVECEXP (sel, 0, i); + int ei = INTVAL (e) & (2 * nelt - 1); + d.perm[i] = ei; + perm[i] = ei; + } + + two_args = canonicalize_perm (&d); + + if (ix86_expand_vec_perm_const_1 (&d)) + return true; + + /* If the selector says both arguments are needed, but the operands are the + same, the above tried to expand with one_operand_p and flattened selector. + If that didn't work, retry without one_operand_p; we succeeded with that + during testing. */ + if (two_args && d.one_operand_p) + { + d.one_operand_p = false; + memcpy (d.perm, perm, sizeof (perm)); + return ix86_expand_vec_perm_const_1 (&d); + } + + return false; +} + +/* Implement targetm.vectorize.vec_perm_const_ok. */ + +static bool +ix86_vectorize_vec_perm_const_ok (machine_mode vmode, + const unsigned char *sel) +{ + struct expand_vec_perm_d d; + unsigned int i, nelt, which; + bool ret; + + d.vmode = vmode; + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = true; + + /* Given sufficient ISA support we can just return true here + for selected vector modes. */ + switch (d.vmode) + { + case V16SFmode: + case V16SImode: + case V8DImode: + case V8DFmode: + if (TARGET_AVX512F) + /* All implementable with a single vpermi2 insn. */ + return true; + break; + case V32HImode: + if (TARGET_AVX512BW) + /* All implementable with a single vpermi2 insn. */ + return true; + break; + case V64QImode: + if (TARGET_AVX512BW) + /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */ + return true; + break; + case V8SImode: + case V8SFmode: + case V4DFmode: + case V4DImode: + if (TARGET_AVX512VL) + /* All implementable with a single vpermi2 insn. */ + return true; + break; + case V16HImode: + if (TARGET_AVX2) + /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ + return true; + break; + case V32QImode: + if (TARGET_AVX2) + /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ + return true; + break; + case V4SImode: + case V4SFmode: + case V8HImode: + case V16QImode: + /* All implementable with a single vpperm insn. */ + if (TARGET_XOP) + return true; + /* All implementable with 2 pshufb + 1 ior. */ + if (TARGET_SSSE3) + return true; + break; + case V2DImode: + case V2DFmode: + /* All implementable with shufpd or unpck[lh]pd. */ + return true; + default: + return false; + } + + /* Extract the values from the vector CST into the permutation + array in D. */ + memcpy (d.perm, sel, nelt); + for (i = which = 0; i < nelt; ++i) + { + unsigned char e = d.perm[i]; + gcc_assert (e < 2 * nelt); + which |= (e < nelt ? 1 : 2); + } + + /* For all elements from second vector, fold the elements to first. */ + if (which == 2) + for (i = 0; i < nelt; ++i) + d.perm[i] -= nelt; + + /* Check whether the mask can be applied to the vector type. */ + d.one_operand_p = (which != 3); + + /* Implementable with shufps or pshufd. */ + if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode)) + return true; + + /* Otherwise we have to go through the motions and see if we can + figure out how to generate the requested permutation. */ + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_operand_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + ret = ix86_expand_vec_perm_const_1 (&d); + end_sequence (); + + return ret; +} + +void +ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd) +{ + struct expand_vec_perm_d d; + unsigned i, nelt; + + d.target = targ; + d.op0 = op0; + d.op1 = op1; + d.vmode = GET_MODE (targ); + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.one_operand_p = false; + d.testing_p = false; + + for (i = 0; i < nelt; ++i) + d.perm[i] = i * 2 + odd; + + /* We'll either be able to implement the permutation directly... */ + if (expand_vec_perm_1 (&d)) + return; + + /* ... or we use the special-case patterns. */ + expand_vec_perm_even_odd_1 (&d, odd); +} + +static void +ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p) +{ + struct expand_vec_perm_d d; + unsigned i, nelt, base; + bool ok; + + d.target = targ; + d.op0 = op0; + d.op1 = op1; + d.vmode = GET_MODE (targ); + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.one_operand_p = false; + d.testing_p = false; + + base = high_p ? nelt / 2 : 0; + for (i = 0; i < nelt / 2; ++i) + { + d.perm[i * 2] = i + base; + d.perm[i * 2 + 1] = i + base + nelt; + } + + /* Note that for AVX this isn't one instruction. */ + ok = ix86_expand_vec_perm_const_1 (&d); + gcc_assert (ok); +} + + +/* Expand a vector operation CODE for a V*QImode in terms of the + same operation on V*HImode. */ + +void +ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2) +{ + machine_mode qimode = GET_MODE (dest); + machine_mode himode; + rtx (*gen_il) (rtx, rtx, rtx); + rtx (*gen_ih) (rtx, rtx, rtx); + rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h; + struct expand_vec_perm_d d; + bool ok, full_interleave; + bool uns_p = false; + int i; + + switch (qimode) + { + case V16QImode: + himode = V8HImode; + gen_il = gen_vec_interleave_lowv16qi; + gen_ih = gen_vec_interleave_highv16qi; + break; + case V32QImode: + himode = V16HImode; + gen_il = gen_avx2_interleave_lowv32qi; + gen_ih = gen_avx2_interleave_highv32qi; + break; + case V64QImode: + himode = V32HImode; + gen_il = gen_avx512bw_interleave_lowv64qi; + gen_ih = gen_avx512bw_interleave_highv64qi; + break; + default: + gcc_unreachable (); + } + + op2_l = op2_h = op2; + switch (code) + { + case MULT: + /* Unpack data such that we've got a source byte in each low byte of + each word. We don't care what goes into the high byte of each word. + Rather than trying to get zero in there, most convenient is to let + it be a copy of the low byte. */ + op2_l = gen_reg_rtx (qimode); + op2_h = gen_reg_rtx (qimode); + emit_insn (gen_il (op2_l, op2, op2)); + emit_insn (gen_ih (op2_h, op2, op2)); + /* FALLTHRU */ + + op1_l = gen_reg_rtx (qimode); + op1_h = gen_reg_rtx (qimode); + emit_insn (gen_il (op1_l, op1, op1)); + emit_insn (gen_ih (op1_h, op1, op1)); + full_interleave = qimode == V16QImode; + break; + + case ASHIFT: + case LSHIFTRT: + uns_p = true; + /* FALLTHRU */ + case ASHIFTRT: + op1_l = gen_reg_rtx (himode); + op1_h = gen_reg_rtx (himode); + ix86_expand_sse_unpack (op1_l, op1, uns_p, false); + ix86_expand_sse_unpack (op1_h, op1, uns_p, true); + full_interleave = true; + break; + default: + gcc_unreachable (); + } + + /* Perform the operation. */ + res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX, + 1, OPTAB_DIRECT); + res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX, + 1, OPTAB_DIRECT); + gcc_assert (res_l && res_h); + + /* Merge the data back into the right place. */ + d.target = dest; + d.op0 = gen_lowpart (qimode, res_l); + d.op1 = gen_lowpart (qimode, res_h); + d.vmode = qimode; + d.nelt = GET_MODE_NUNITS (qimode); + d.one_operand_p = false; + d.testing_p = false; + + if (full_interleave) + { + /* For SSE2, we used an full interleave, so the desired + results are in the even elements. */ + for (i = 0; i < d.nelt; ++i) + d.perm[i] = i * 2; + } + else + { + /* For AVX, the interleave used above was not cross-lane. So the + extraction is evens but with the second and third quarter swapped. + Happily, that is even one insn shorter than even extraction. + For AVX512BW we have 4 lanes. We extract evens from within a lane, + always first from the first and then from the second source operand, + the index bits above the low 4 bits remains the same. + Thus, for d.nelt == 32 we want permutation + 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62 + and for d.nelt == 64 we want permutation + 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94, + 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */ + for (i = 0; i < d.nelt; ++i) + d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15); + } + + ok = ix86_expand_vec_perm_const_1 (&d); + gcc_assert (ok); + + set_unique_reg_note (get_last_insn (), REG_EQUAL, + gen_rtx_fmt_ee (code, qimode, op1, op2)); +} + +/* Helper function of ix86_expand_mul_widen_evenodd. Return true + if op is CONST_VECTOR with all odd elements equal to their + preceding element. */ + +static bool +const_vector_equal_evenodd_p (rtx op) +{ + machine_mode mode = GET_MODE (op); + int i, nunits = GET_MODE_NUNITS (mode); + if (GET_CODE (op) != CONST_VECTOR + || nunits != CONST_VECTOR_NUNITS (op)) + return false; + for (i = 0; i < nunits; i += 2) + if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1)) + return false; + return true; +} + +void +ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2, + bool uns_p, bool odd_p) +{ + machine_mode mode = GET_MODE (op1); + machine_mode wmode = GET_MODE (dest); + rtx x; + rtx orig_op1 = op1, orig_op2 = op2; + + if (!nonimmediate_operand (op1, mode)) + op1 = force_reg (mode, op1); + if (!nonimmediate_operand (op2, mode)) + op2 = force_reg (mode, op2); + + /* We only play even/odd games with vectors of SImode. */ + gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode); + + /* If we're looking for the odd results, shift those members down to + the even slots. For some cpus this is faster than a PSHUFD. */ + if (odd_p) + { + /* For XOP use vpmacsdqh, but only for smult, as it is only + signed. */ + if (TARGET_XOP && mode == V4SImode && !uns_p) + { + x = force_reg (wmode, CONST0_RTX (wmode)); + emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x)); + return; + } + + x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode)); + if (!const_vector_equal_evenodd_p (orig_op1)) + op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1), + x, NULL, 1, OPTAB_DIRECT); + if (!const_vector_equal_evenodd_p (orig_op2)) + op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2), + x, NULL, 1, OPTAB_DIRECT); + op1 = gen_lowpart (mode, op1); + op2 = gen_lowpart (mode, op2); + } + + if (mode == V16SImode) + { + if (uns_p) + x = gen_vec_widen_umult_even_v16si (dest, op1, op2); + else + x = gen_vec_widen_smult_even_v16si (dest, op1, op2); + } + else if (mode == V8SImode) + { + if (uns_p) + x = gen_vec_widen_umult_even_v8si (dest, op1, op2); + else + x = gen_vec_widen_smult_even_v8si (dest, op1, op2); + } + else if (uns_p) + x = gen_vec_widen_umult_even_v4si (dest, op1, op2); + else if (TARGET_SSE4_1) + x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2); + else + { + rtx s1, s2, t0, t1, t2; + + /* The easiest way to implement this without PMULDQ is to go through + the motions as if we are performing a full 64-bit multiply. With + the exception that we need to do less shuffling of the elements. */ + + /* Compute the sign-extension, aka highparts, of the two operands. */ + s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode), + op1, pc_rtx, pc_rtx); + s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode), + op2, pc_rtx, pc_rtx); + + /* Multiply LO(A) * HI(B), and vice-versa. */ + t1 = gen_reg_rtx (wmode); + t2 = gen_reg_rtx (wmode); + emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2)); + emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1)); + + /* Multiply LO(A) * LO(B). */ + t0 = gen_reg_rtx (wmode); + emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2)); + + /* Combine and shift the highparts into place. */ + t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT); + t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1, + 1, OPTAB_DIRECT); + + /* Combine high and low parts. */ + force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT); + return; + } + emit_insn (x); +} + +void +ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2, + bool uns_p, bool high_p) +{ + machine_mode wmode = GET_MODE (dest); + machine_mode mode = GET_MODE (op1); + rtx t1, t2, t3, t4, mask; + + switch (mode) + { + case V4SImode: + t1 = gen_reg_rtx (mode); + t2 = gen_reg_rtx (mode); + if (TARGET_XOP && !uns_p) + { + /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case, + shuffle the elements once so that all elements are in the right + place for immediate use: { A C B D }. */ + emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx, + const1_rtx, GEN_INT (3))); + emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx, + const1_rtx, GEN_INT (3))); + } + else + { + /* Put the elements into place for the multiply. */ + ix86_expand_vec_interleave (t1, op1, op1, high_p); + ix86_expand_vec_interleave (t2, op2, op2, high_p); + high_p = false; + } + ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p); + break; + + case V8SImode: + /* Shuffle the elements between the lanes. After this we + have { A B E F | C D G H } for each operand. */ + t1 = gen_reg_rtx (V4DImode); + t2 = gen_reg_rtx (V4DImode); + emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1), + const0_rtx, const2_rtx, + const1_rtx, GEN_INT (3))); + emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2), + const0_rtx, const2_rtx, + const1_rtx, GEN_INT (3))); + + /* Shuffle the elements within the lanes. After this we + have { A A B B | C C D D } or { E E F F | G G H H }. */ + t3 = gen_reg_rtx (V8SImode); + t4 = gen_reg_rtx (V8SImode); + mask = GEN_INT (high_p + ? 2 + (2 << 2) + (3 << 4) + (3 << 6) + : 0 + (0 << 2) + (1 << 4) + (1 << 6)); + emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask)); + emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask)); + + ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false); + break; + + case V8HImode: + case V16HImode: + t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX, + uns_p, OPTAB_DIRECT); + t2 = expand_binop (mode, + uns_p ? umul_highpart_optab : smul_highpart_optab, + op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT); + gcc_assert (t1 && t2); + + t3 = gen_reg_rtx (mode); + ix86_expand_vec_interleave (t3, t1, t2, high_p); + emit_move_insn (dest, gen_lowpart (wmode, t3)); + break; + + case V16QImode: + case V32QImode: + case V32HImode: + case V16SImode: + case V64QImode: + t1 = gen_reg_rtx (wmode); + t2 = gen_reg_rtx (wmode); + ix86_expand_sse_unpack (t1, op1, uns_p, high_p); + ix86_expand_sse_unpack (t2, op2, uns_p, high_p); + + emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2))); + break; + + default: + gcc_unreachable (); + } +} + +void +ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2) +{ + rtx res_1, res_2, res_3, res_4; + + res_1 = gen_reg_rtx (V4SImode); + res_2 = gen_reg_rtx (V4SImode); + res_3 = gen_reg_rtx (V2DImode); + res_4 = gen_reg_rtx (V2DImode); + ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false); + ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true); + + /* Move the results in element 2 down to element 1; we don't care + what goes in elements 2 and 3. Then we can merge the parts + back together with an interleave. + + Note that two other sequences were tried: + (1) Use interleaves at the start instead of psrldq, which allows + us to use a single shufps to merge things back at the end. + (2) Use shufps here to combine the two vectors, then pshufd to + put the elements in the correct order. + In both cases the cost of the reformatting stall was too high + and the overall sequence slower. */ + + emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3), + const0_rtx, const2_rtx, + const0_rtx, const0_rtx)); + emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4), + const0_rtx, const2_rtx, + const0_rtx, const0_rtx)); + res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2)); + + set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2)); +} + +void +ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) +{ + machine_mode mode = GET_MODE (op0); + rtx t1, t2, t3, t4, t5, t6; + + if (TARGET_AVX512DQ && mode == V8DImode) + emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2)); + else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode) + emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); + else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode) + emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2)); + else if (TARGET_XOP && mode == V2DImode) + { + /* op1: A,B,C,D, op2: E,F,G,H */ + op1 = gen_lowpart (V4SImode, op1); + op2 = gen_lowpart (V4SImode, op2); + + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + t3 = gen_reg_rtx (V2DImode); + t4 = gen_reg_rtx (V2DImode); + + /* t1: B,A,D,C */ + emit_insn (gen_sse2_pshufd_1 (t1, op1, + GEN_INT (1), + GEN_INT (0), + GEN_INT (3), + GEN_INT (2))); + + /* t2: (B*E),(A*F),(D*G),(C*H) */ + emit_insn (gen_mulv4si3 (t2, t1, op2)); + + /* t3: (B*E)+(A*F), (D*G)+(C*H) */ + emit_insn (gen_xop_phadddq (t3, t2)); + + /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ + emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32))); + + /* Multiply lower parts and add all */ + t5 = gen_reg_rtx (V2DImode); + emit_insn (gen_vec_widen_umult_even_v4si (t5, + gen_lowpart (V4SImode, op1), + gen_lowpart (V4SImode, op2))); + op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT); + + } + else + { + machine_mode nmode; + rtx (*umul) (rtx, rtx, rtx); + + if (mode == V2DImode) + { + umul = gen_vec_widen_umult_even_v4si; + nmode = V4SImode; + } + else if (mode == V4DImode) + { + umul = gen_vec_widen_umult_even_v8si; + nmode = V8SImode; + } + else if (mode == V8DImode) + { + umul = gen_vec_widen_umult_even_v16si; + nmode = V16SImode; + } + else + gcc_unreachable (); + + + /* Multiply low parts. */ + t1 = gen_reg_rtx (mode); + emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2))); + + /* Shift input vectors right 32 bits so we can multiply high parts. */ + t6 = GEN_INT (32); + t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT); + t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT); + + /* Multiply high parts by low parts. */ + t4 = gen_reg_rtx (mode); + t5 = gen_reg_rtx (mode); + emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2))); + emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1))); + + /* Combine and shift the highparts back. */ + t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT); + t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT); + + /* Combine high and low parts. */ + force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT); + } + + set_unique_reg_note (get_last_insn (), REG_EQUAL, + gen_rtx_MULT (mode, op1, op2)); +} + +/* Return 1 if control tansfer instruction INSN + should be encoded with bnd prefix. + If insn is NULL then return 1 when control + transfer instructions should be prefixed with + bnd by default for current function. */ + +bool +ix86_bnd_prefixed_insn_p (rtx insn) +{ + /* For call insns check special flag. */ + if (insn && CALL_P (insn)) + { + rtx call = get_call_rtx_from (insn); + if (call) + return CALL_EXPR_WITH_BOUNDS_P (call); + } + + /* All other insns are prefixed only if function is instrumented. */ + return chkp_function_instrumented_p (current_function_decl); +} + +/* Calculate integer abs() using only SSE2 instructions. */ + +void +ix86_expand_sse2_abs (rtx target, rtx input) +{ + machine_mode mode = GET_MODE (target); + rtx tmp0, tmp1, x; + + switch (mode) + { + /* For 32-bit signed integer X, the best way to calculate the absolute + value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */ + case V4SImode: + tmp0 = expand_simple_binop (mode, ASHIFTRT, input, + GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1), + NULL, 0, OPTAB_DIRECT); + tmp1 = expand_simple_binop (mode, XOR, tmp0, input, + NULL, 0, OPTAB_DIRECT); + x = expand_simple_binop (mode, MINUS, tmp1, tmp0, + target, 0, OPTAB_DIRECT); + break; + + /* For 16-bit signed integer X, the best way to calculate the absolute + value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */ + case V8HImode: + tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); + + x = expand_simple_binop (mode, SMAX, tmp0, input, + target, 0, OPTAB_DIRECT); + break; + + /* For 8-bit signed integer X, the best way to calculate the absolute + value of X is min ((unsigned char) X, (unsigned char) (-X)), + as SSE2 provides the PMINUB insn. */ + case V16QImode: + tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); + + x = expand_simple_binop (V16QImode, UMIN, tmp0, input, + target, 0, OPTAB_DIRECT); + break; + + default: + gcc_unreachable (); + } + + if (x != target) + emit_move_insn (target, x); +} + +/* Expand an extract from a vector register through pextr insn. + Return true if successful. */ + +bool +ix86_expand_pextr (rtx *operands) +{ + rtx dst = operands[0]; + rtx src = operands[1]; + + unsigned int size = INTVAL (operands[2]); + unsigned int pos = INTVAL (operands[3]); + + if (SUBREG_P (dst)) + { + /* Reject non-lowpart subregs. */ + if (SUBREG_BYTE (dst) > 0) + return false; + dst = SUBREG_REG (dst); + } + + if (SUBREG_P (src)) + { + pos += SUBREG_BYTE (src) * BITS_PER_UNIT; + src = SUBREG_REG (src); + } + + switch (GET_MODE (src)) + { + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + case V1TImode: + case TImode: + { + machine_mode srcmode, dstmode; + rtx d, pat; + + dstmode = mode_for_size (size, MODE_INT, 0); + + switch (dstmode) + { + case QImode: + if (!TARGET_SSE4_1) + return false; + srcmode = V16QImode; + break; + + case HImode: + if (!TARGET_SSE2) + return false; + srcmode = V8HImode; + break; + + case SImode: + if (!TARGET_SSE4_1) + return false; + srcmode = V4SImode; + break; + + case DImode: + gcc_assert (TARGET_64BIT); + if (!TARGET_SSE4_1) + return false; + srcmode = V2DImode; + break; + + default: + return false; + } + + /* Reject extractions from misaligned positions. */ + if (pos & (size-1)) + return false; + + if (GET_MODE (dst) == dstmode) + d = dst; + else + d = gen_reg_rtx (dstmode); + + /* Construct insn pattern. */ + pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size))); + pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat); + + /* Let the rtl optimizers know about the zero extension performed. */ + if (dstmode == QImode || dstmode == HImode) + { + pat = gen_rtx_ZERO_EXTEND (SImode, pat); + d = gen_lowpart (SImode, d); + } + + emit_insn (gen_rtx_SET (d, pat)); + + if (d != dst) + emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); + return true; + } + + default: + return false; + } +} + +/* Expand an insert into a vector register through pinsr insn. + Return true if successful. */ + +bool +ix86_expand_pinsr (rtx *operands) +{ + rtx dst = operands[0]; + rtx src = operands[3]; + + unsigned int size = INTVAL (operands[1]); + unsigned int pos = INTVAL (operands[2]); + + if (SUBREG_P (dst)) + { + pos += SUBREG_BYTE (dst) * BITS_PER_UNIT; + dst = SUBREG_REG (dst); + } + + switch (GET_MODE (dst)) + { + case V16QImode: + case V8HImode: + case V4SImode: + case V2DImode: + case V1TImode: + case TImode: + { + machine_mode srcmode, dstmode; + rtx (*pinsr)(rtx, rtx, rtx, rtx); + rtx d; + + srcmode = mode_for_size (size, MODE_INT, 0); + + switch (srcmode) + { + case QImode: + if (!TARGET_SSE4_1) + return false; + dstmode = V16QImode; + pinsr = gen_sse4_1_pinsrb; + break; + + case HImode: + if (!TARGET_SSE2) + return false; + dstmode = V8HImode; + pinsr = gen_sse2_pinsrw; + break; + + case SImode: + if (!TARGET_SSE4_1) + return false; + dstmode = V4SImode; + pinsr = gen_sse4_1_pinsrd; + break; + + case DImode: + gcc_assert (TARGET_64BIT); + if (!TARGET_SSE4_1) + return false; + dstmode = V2DImode; + pinsr = gen_sse4_1_pinsrq; + break; + + default: + return false; + } + + /* Reject insertions to misaligned positions. */ + if (pos & (size-1)) + return false; + + if (SUBREG_P (src)) + { + unsigned int srcpos = SUBREG_BYTE (src); + + if (srcpos > 0) + { + rtx extr_ops[4]; + + extr_ops[0] = gen_reg_rtx (srcmode); + extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src)); + extr_ops[2] = GEN_INT (size); + extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT); + + if (!ix86_expand_pextr (extr_ops)) + return false; + + src = extr_ops[0]; + } + else + src = gen_lowpart (srcmode, SUBREG_REG (src)); + } + + if (GET_MODE (dst) == dstmode) + d = dst; + else + d = gen_reg_rtx (dstmode); + + emit_insn (pinsr (d, gen_lowpart (dstmode, dst), + gen_lowpart (srcmode, src), + GEN_INT (1 << (pos / size)))); + if (d != dst) + emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); + return true; + } + + default: + return false; + } +} + +/* This function returns the calling abi specific va_list type node. + It returns the FNDECL specific va_list type. */ + +static tree +ix86_fn_abi_va_list (tree fndecl) +{ + if (!TARGET_64BIT) + return va_list_type_node; + gcc_assert (fndecl != NULL_TREE); + + if (ix86_function_abi ((const_tree) fndecl) == MS_ABI) + return ms_va_list_type_node; + else + return sysv_va_list_type_node; +} + +/* Returns the canonical va_list type specified by TYPE. If there + is no valid TYPE provided, it return NULL_TREE. */ + +static tree +ix86_canonical_va_list_type (tree type) +{ + /* Resolve references and pointers to va_list type. */ + if (TREE_CODE (type) == MEM_REF) + type = TREE_TYPE (type); + else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type))) + type = TREE_TYPE (type); + else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE) + type = TREE_TYPE (type); + + if (TARGET_64BIT) + { + if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type))) + return ms_va_list_type_node; + + if ((TREE_CODE (type) == ARRAY_TYPE + && integer_zerop (array_type_nelts (type))) + || POINTER_TYPE_P (type)) + { + tree elem_type = TREE_TYPE (type); + if (TREE_CODE (elem_type) == RECORD_TYPE + && lookup_attribute ("sysv_abi va_list", + TYPE_ATTRIBUTES (elem_type))) + return sysv_va_list_type_node; + } + + return NULL_TREE; + } + + return std_canonical_va_list_type (type); +} + +/* Iterate through the target-specific builtin types for va_list. + IDX denotes the iterator, *PTREE is set to the result type of + the va_list builtin, and *PNAME to its internal type. + Returns zero if there is no element for this index, otherwise + IDX should be increased upon the next call. + Note, do not iterate a base builtin's name like __builtin_va_list. + Used from c_common_nodes_and_builtins. */ + +static int +ix86_enum_va_list (int idx, const char **pname, tree *ptree) +{ + if (TARGET_64BIT) + { + switch (idx) + { + default: + break; + + case 0: + *ptree = ms_va_list_type_node; + *pname = "__builtin_ms_va_list"; + return 1; + + case 1: + *ptree = sysv_va_list_type_node; + *pname = "__builtin_sysv_va_list"; + return 1; + } + } + + return 0; +} + +#undef TARGET_SCHED_DISPATCH +#define TARGET_SCHED_DISPATCH has_dispatch +#undef TARGET_SCHED_DISPATCH_DO +#define TARGET_SCHED_DISPATCH_DO do_dispatch +#undef TARGET_SCHED_REASSOCIATION_WIDTH +#define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER ix86_sched_reorder +#undef TARGET_SCHED_ADJUST_PRIORITY +#define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority +#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK +#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \ + ix86_dependencies_evaluation_hook + +/* The size of the dispatch window is the total number of bytes of + object code allowed in a window. */ +#define DISPATCH_WINDOW_SIZE 16 + +/* Number of dispatch windows considered for scheduling. */ +#define MAX_DISPATCH_WINDOWS 3 + +/* Maximum number of instructions in a window. */ +#define MAX_INSN 4 + +/* Maximum number of immediate operands in a window. */ +#define MAX_IMM 4 + +/* Maximum number of immediate bits allowed in a window. */ +#define MAX_IMM_SIZE 128 + +/* Maximum number of 32 bit immediates allowed in a window. */ +#define MAX_IMM_32 4 + +/* Maximum number of 64 bit immediates allowed in a window. */ +#define MAX_IMM_64 2 + +/* Maximum total of loads or prefetches allowed in a window. */ +#define MAX_LOAD 2 + +/* Maximum total of stores allowed in a window. */ +#define MAX_STORE 1 + +#undef BIG +#define BIG 100 + + +/* Dispatch groups. Istructions that affect the mix in a dispatch window. */ +enum dispatch_group { + disp_no_group = 0, + disp_load, + disp_store, + disp_load_store, + disp_prefetch, + disp_imm, + disp_imm_32, + disp_imm_64, + disp_branch, + disp_cmp, + disp_jcc, + disp_last +}; + +/* Number of allowable groups in a dispatch window. It is an array + indexed by dispatch_group enum. 100 is used as a big number, + because the number of these kind of operations does not have any + effect in dispatch window, but we need them for other reasons in + the table. */ +static unsigned int num_allowable_groups[disp_last] = { + 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG +}; + +char group_name[disp_last + 1][16] = { + "disp_no_group", "disp_load", "disp_store", "disp_load_store", + "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64", + "disp_branch", "disp_cmp", "disp_jcc", "disp_last" +}; + +/* Instruction path. */ +enum insn_path { + no_path = 0, + path_single, /* Single micro op. */ + path_double, /* Double micro op. */ + path_multi, /* Instructions with more than 2 micro op.. */ + last_path +}; + +/* sched_insn_info defines a window to the instructions scheduled in + the basic block. It contains a pointer to the insn_info table and + the instruction scheduled. + + Windows are allocated for each basic block and are linked + together. */ +typedef struct sched_insn_info_s { + rtx insn; + enum dispatch_group group; + enum insn_path path; + int byte_len; + int imm_bytes; +} sched_insn_info; + +/* Linked list of dispatch windows. This is a two way list of + dispatch windows of a basic block. It contains information about + the number of uops in the window and the total number of + instructions and of bytes in the object code for this dispatch + window. */ +typedef struct dispatch_windows_s { + int num_insn; /* Number of insn in the window. */ + int num_uops; /* Number of uops in the window. */ + int window_size; /* Number of bytes in the window. */ + int window_num; /* Window number between 0 or 1. */ + int num_imm; /* Number of immediates in an insn. */ + int num_imm_32; /* Number of 32 bit immediates in an insn. */ + int num_imm_64; /* Number of 64 bit immediates in an insn. */ + int imm_size; /* Total immediates in the window. */ + int num_loads; /* Total memory loads in the window. */ + int num_stores; /* Total memory stores in the window. */ + int violation; /* Violation exists in window. */ + sched_insn_info *window; /* Pointer to the window. */ + struct dispatch_windows_s *next; + struct dispatch_windows_s *prev; +} dispatch_windows; + +/* Immediate valuse used in an insn. */ +typedef struct imm_info_s + { + int imm; + int imm32; + int imm64; + } imm_info; + +static dispatch_windows *dispatch_window_list; +static dispatch_windows *dispatch_window_list1; + +/* Get dispatch group of insn. */ + +static enum dispatch_group +get_mem_group (rtx_insn *insn) +{ + enum attr_memory memory; + + if (INSN_CODE (insn) < 0) + return disp_no_group; + memory = get_attr_memory (insn); + if (memory == MEMORY_STORE) + return disp_store; + + if (memory == MEMORY_LOAD) + return disp_load; + + if (memory == MEMORY_BOTH) + return disp_load_store; + + return disp_no_group; +} + +/* Return true if insn is a compare instruction. */ + +static bool +is_cmp (rtx_insn *insn) +{ + enum attr_type type; + + type = get_attr_type (insn); + return (type == TYPE_TEST + || type == TYPE_ICMP + || type == TYPE_FCMP + || GET_CODE (PATTERN (insn)) == COMPARE); +} + +/* Return true if a dispatch violation encountered. */ + +static bool +dispatch_violation (void) +{ + if (dispatch_window_list->next) + return dispatch_window_list->next->violation; + return dispatch_window_list->violation; +} + +/* Return true if insn is a branch instruction. */ + +static bool +is_branch (rtx_insn *insn) +{ + return (CALL_P (insn) || JUMP_P (insn)); +} + +/* Return true if insn is a prefetch instruction. */ + +static bool +is_prefetch (rtx_insn *insn) +{ + return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH; +} + +/* This function initializes a dispatch window and the list container holding a + pointer to the window. */ + +static void +init_window (int window_num) +{ + int i; + dispatch_windows *new_list; + + if (window_num == 0) + new_list = dispatch_window_list; + else + new_list = dispatch_window_list1; + + new_list->num_insn = 0; + new_list->num_uops = 0; + new_list->window_size = 0; + new_list->next = NULL; + new_list->prev = NULL; + new_list->window_num = window_num; + new_list->num_imm = 0; + new_list->num_imm_32 = 0; + new_list->num_imm_64 = 0; + new_list->imm_size = 0; + new_list->num_loads = 0; + new_list->num_stores = 0; + new_list->violation = false; + + for (i = 0; i < MAX_INSN; i++) + { + new_list->window[i].insn = NULL; + new_list->window[i].group = disp_no_group; + new_list->window[i].path = no_path; + new_list->window[i].byte_len = 0; + new_list->window[i].imm_bytes = 0; + } + return; +} + +/* This function allocates and initializes a dispatch window and the + list container holding a pointer to the window. */ + +static dispatch_windows * +allocate_window (void) +{ + dispatch_windows *new_list = XNEW (struct dispatch_windows_s); + new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1); + + return new_list; +} + +/* This routine initializes the dispatch scheduling information. It + initiates building dispatch scheduler tables and constructs the + first dispatch window. */ + +static void +init_dispatch_sched (void) +{ + /* Allocate a dispatch list and a window. */ + dispatch_window_list = allocate_window (); + dispatch_window_list1 = allocate_window (); + init_window (0); + init_window (1); +} + +/* This function returns true if a branch is detected. End of a basic block + does not have to be a branch, but here we assume only branches end a + window. */ + +static bool +is_end_basic_block (enum dispatch_group group) +{ + return group == disp_branch; +} + +/* This function is called when the end of a window processing is reached. */ + +static void +process_end_window (void) +{ + gcc_assert (dispatch_window_list->num_insn <= MAX_INSN); + if (dispatch_window_list->next) + { + gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN); + gcc_assert (dispatch_window_list->window_size + + dispatch_window_list1->window_size <= 48); + init_window (1); + } + init_window (0); +} + +/* Allocates a new dispatch window and adds it to WINDOW_LIST. + WINDOW_NUM is either 0 or 1. A maximum of two windows are generated + for 48 bytes of instructions. Note that these windows are not dispatch + windows that their sizes are DISPATCH_WINDOW_SIZE. */ + +static dispatch_windows * +allocate_next_window (int window_num) +{ + if (window_num == 0) + { + if (dispatch_window_list->next) + init_window (1); + init_window (0); + return dispatch_window_list; + } + + dispatch_window_list->next = dispatch_window_list1; + dispatch_window_list1->prev = dispatch_window_list; + + return dispatch_window_list1; +} + +/* Compute number of immediate operands of an instruction. */ + +static void +find_constant (rtx in_rtx, imm_info *imm_values) +{ + if (INSN_P (in_rtx)) + in_rtx = PATTERN (in_rtx); + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, in_rtx, ALL) + if (const_rtx x = *iter) + switch (GET_CODE (x)) + { + case CONST: + case SYMBOL_REF: + case CONST_INT: + (imm_values->imm)++; + if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode)) + (imm_values->imm32)++; + else + (imm_values->imm64)++; + break; + + case CONST_DOUBLE: + case CONST_WIDE_INT: + (imm_values->imm)++; + (imm_values->imm64)++; + break; + + case CODE_LABEL: + if (LABEL_KIND (x) == LABEL_NORMAL) + { + (imm_values->imm)++; + (imm_values->imm32)++; + } + break; + + default: + break; + } +} + +/* Return total size of immediate operands of an instruction along with number + of corresponding immediate-operands. It initializes its parameters to zero + befor calling FIND_CONSTANT. + INSN is the input instruction. IMM is the total of immediates. + IMM32 is the number of 32 bit immediates. IMM64 is the number of 64 + bit immediates. */ + +static int +get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64) +{ + imm_info imm_values = {0, 0, 0}; + + find_constant (insn, &imm_values); + *imm = imm_values.imm; + *imm32 = imm_values.imm32; + *imm64 = imm_values.imm64; + return imm_values.imm32 * 4 + imm_values.imm64 * 8; +} + +/* This function indicates if an operand of an instruction is an + immediate. */ + +static bool +has_immediate (rtx_insn *insn) +{ + int num_imm_operand; + int num_imm32_operand; + int num_imm64_operand; + + if (insn) + return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand, + &num_imm64_operand); + return false; +} + +/* Return single or double path for instructions. */ + +static enum insn_path +get_insn_path (rtx_insn *insn) +{ + enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn); + + if ((int)path == 0) + return path_single; + + if ((int)path == 1) + return path_double; + + return path_multi; +} + +/* Return insn dispatch group. */ + +static enum dispatch_group +get_insn_group (rtx_insn *insn) +{ + enum dispatch_group group = get_mem_group (insn); + if (group) + return group; + + if (is_branch (insn)) + return disp_branch; + + if (is_cmp (insn)) + return disp_cmp; + + if (has_immediate (insn)) + return disp_imm; + + if (is_prefetch (insn)) + return disp_prefetch; + + return disp_no_group; +} + +/* Count number of GROUP restricted instructions in a dispatch + window WINDOW_LIST. */ + +static int +count_num_restricted (rtx_insn *insn, dispatch_windows *window_list) +{ + enum dispatch_group group = get_insn_group (insn); + int imm_size; + int num_imm_operand; + int num_imm32_operand; + int num_imm64_operand; + + if (group == disp_no_group) + return 0; + + if (group == disp_imm) + { + imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand, + &num_imm64_operand); + if (window_list->imm_size + imm_size > MAX_IMM_SIZE + || num_imm_operand + window_list->num_imm > MAX_IMM + || (num_imm32_operand > 0 + && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32 + || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32)) + || (num_imm64_operand > 0 + && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64 + || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32)) + || (window_list->imm_size + imm_size == MAX_IMM_SIZE + && num_imm64_operand > 0 + && ((window_list->num_imm_64 > 0 + && window_list->num_insn >= 2) + || window_list->num_insn >= 3))) + return BIG; + + return 1; + } + + if ((group == disp_load_store + && (window_list->num_loads >= MAX_LOAD + || window_list->num_stores >= MAX_STORE)) + || ((group == disp_load + || group == disp_prefetch) + && window_list->num_loads >= MAX_LOAD) + || (group == disp_store + && window_list->num_stores >= MAX_STORE)) + return BIG; + + return 1; +} + +/* This function returns true if insn satisfies dispatch rules on the + last window scheduled. */ + +static bool +fits_dispatch_window (rtx_insn *insn) +{ + dispatch_windows *window_list = dispatch_window_list; + dispatch_windows *window_list_next = dispatch_window_list->next; + unsigned int num_restrict; + enum dispatch_group group = get_insn_group (insn); + enum insn_path path = get_insn_path (insn); + int sum; + + /* Make disp_cmp and disp_jcc get scheduled at the latest. These + instructions should be given the lowest priority in the + scheduling process in Haifa scheduler to make sure they will be + scheduled in the same dispatch window as the reference to them. */ + if (group == disp_jcc || group == disp_cmp) + return false; + + /* Check nonrestricted. */ + if (group == disp_no_group || group == disp_branch) + return true; + + /* Get last dispatch window. */ + if (window_list_next) + window_list = window_list_next; + + if (window_list->window_num == 1) + { + sum = window_list->prev->window_size + window_list->window_size; + + if (sum == 32 + || (min_insn_size (insn) + sum) >= 48) + /* Window 1 is full. Go for next window. */ + return true; + } + + num_restrict = count_num_restricted (insn, window_list); + + if (num_restrict > num_allowable_groups[group]) + return false; + + /* See if it fits in the first window. */ + if (window_list->window_num == 0) + { + /* The first widow should have only single and double path + uops. */ + if (path == path_double + && (window_list->num_uops + 2) > MAX_INSN) + return false; + else if (path != path_single) + return false; + } + return true; +} + +/* Add an instruction INSN with NUM_UOPS micro-operations to the + dispatch window WINDOW_LIST. */ + +static void +add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops) +{ + int byte_len = min_insn_size (insn); + int num_insn = window_list->num_insn; + int imm_size; + sched_insn_info *window = window_list->window; + enum dispatch_group group = get_insn_group (insn); + enum insn_path path = get_insn_path (insn); + int num_imm_operand; + int num_imm32_operand; + int num_imm64_operand; + + if (!window_list->violation && group != disp_cmp + && !fits_dispatch_window (insn)) + window_list->violation = true; + + imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand, + &num_imm64_operand); + + /* Initialize window with new instruction. */ + window[num_insn].insn = insn; + window[num_insn].byte_len = byte_len; + window[num_insn].group = group; + window[num_insn].path = path; + window[num_insn].imm_bytes = imm_size; + + window_list->window_size += byte_len; + window_list->num_insn = num_insn + 1; + window_list->num_uops = window_list->num_uops + num_uops; + window_list->imm_size += imm_size; + window_list->num_imm += num_imm_operand; + window_list->num_imm_32 += num_imm32_operand; + window_list->num_imm_64 += num_imm64_operand; + + if (group == disp_store) + window_list->num_stores += 1; + else if (group == disp_load + || group == disp_prefetch) + window_list->num_loads += 1; + else if (group == disp_load_store) + { + window_list->num_stores += 1; + window_list->num_loads += 1; + } +} + +/* Adds a scheduled instruction, INSN, to the current dispatch window. + If the total bytes of instructions or the number of instructions in + the window exceed allowable, it allocates a new window. */ + +static void +add_to_dispatch_window (rtx_insn *insn) +{ + int byte_len; + dispatch_windows *window_list; + dispatch_windows *next_list; + dispatch_windows *window0_list; + enum insn_path path; + enum dispatch_group insn_group; + bool insn_fits; + int num_insn; + int num_uops; + int window_num; + int insn_num_uops; + int sum; + + if (INSN_CODE (insn) < 0) + return; + + byte_len = min_insn_size (insn); + window_list = dispatch_window_list; + next_list = window_list->next; + path = get_insn_path (insn); + insn_group = get_insn_group (insn); + + /* Get the last dispatch window. */ + if (next_list) + window_list = dispatch_window_list->next; + + if (path == path_single) + insn_num_uops = 1; + else if (path == path_double) + insn_num_uops = 2; + else + insn_num_uops = (int) path; + + /* If current window is full, get a new window. + Window number zero is full, if MAX_INSN uops are scheduled in it. + Window number one is full, if window zero's bytes plus window + one's bytes is 32, or if the bytes of the new instruction added + to the total makes it greater than 48, or it has already MAX_INSN + instructions in it. */ + num_insn = window_list->num_insn; + num_uops = window_list->num_uops; + window_num = window_list->window_num; + insn_fits = fits_dispatch_window (insn); + + if (num_insn >= MAX_INSN + || num_uops + insn_num_uops > MAX_INSN + || !(insn_fits)) + { + window_num = ~window_num & 1; + window_list = allocate_next_window (window_num); + } + + if (window_num == 0) + { + add_insn_window (insn, window_list, insn_num_uops); + if (window_list->num_insn >= MAX_INSN + && insn_group == disp_branch) + { + process_end_window (); + return; + } + } + else if (window_num == 1) + { + window0_list = window_list->prev; + sum = window0_list->window_size + window_list->window_size; + if (sum == 32 + || (byte_len + sum) >= 48) + { + process_end_window (); + window_list = dispatch_window_list; + } + + add_insn_window (insn, window_list, insn_num_uops); + } + else + gcc_unreachable (); + + if (is_end_basic_block (insn_group)) + { + /* End of basic block is reached do end-basic-block process. */ + process_end_window (); + return; + } +} + +/* Print the dispatch window, WINDOW_NUM, to FILE. */ + +DEBUG_FUNCTION static void +debug_dispatch_window_file (FILE *file, int window_num) +{ + dispatch_windows *list; + int i; + + if (window_num == 0) + list = dispatch_window_list; + else + list = dispatch_window_list1; + + fprintf (file, "Window #%d:\n", list->window_num); + fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n", + list->num_insn, list->num_uops, list->window_size); + fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n", + list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size); + + fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads, + list->num_stores); + fprintf (file, " insn info:\n"); + + for (i = 0; i < MAX_INSN; i++) + { + if (!list->window[i].insn) + break; + fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n", + i, group_name[list->window[i].group], + i, (void *)list->window[i].insn, + i, list->window[i].path, + i, list->window[i].byte_len, + i, list->window[i].imm_bytes); + } +} + +/* Print to stdout a dispatch window. */ + +DEBUG_FUNCTION void +debug_dispatch_window (int window_num) +{ + debug_dispatch_window_file (stdout, window_num); +} + +/* Print INSN dispatch information to FILE. */ + +DEBUG_FUNCTION static void +debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn) +{ + int byte_len; + enum insn_path path; + enum dispatch_group group; + int imm_size; + int num_imm_operand; + int num_imm32_operand; + int num_imm64_operand; + + if (INSN_CODE (insn) < 0) + return; + + byte_len = min_insn_size (insn); + path = get_insn_path (insn); + group = get_insn_group (insn); + imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand, + &num_imm64_operand); + + fprintf (file, " insn info:\n"); + fprintf (file, " group = %s, path = %d, byte_len = %d\n", + group_name[group], path, byte_len); + fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n", + num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size); +} + +/* Print to STDERR the status of the ready list with respect to + dispatch windows. */ + +DEBUG_FUNCTION void +debug_ready_dispatch (void) +{ + int i; + int no_ready = number_in_ready (); + + fprintf (stdout, "Number of ready: %d\n", no_ready); + + for (i = 0; i < no_ready; i++) + debug_insn_dispatch_info_file (stdout, get_ready_element (i)); +} + +/* This routine is the driver of the dispatch scheduler. */ + +static void +do_dispatch (rtx_insn *insn, int mode) +{ + if (mode == DISPATCH_INIT) + init_dispatch_sched (); + else if (mode == ADD_TO_DISPATCH_WINDOW) + add_to_dispatch_window (insn); +} + +/* Return TRUE if Dispatch Scheduling is supported. */ + +static bool +has_dispatch (rtx_insn *insn, int action) +{ + if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 + || TARGET_BDVER4 || TARGET_ZNVER1) && flag_dispatch_scheduler) + switch (action) + { + default: + return false; + + case IS_DISPATCH_ON: + return true; + break; + + case IS_CMP: + return is_cmp (insn); + + case DISPATCH_VIOLATION: + return dispatch_violation (); + + case FITS_DISPATCH_WINDOW: + return fits_dispatch_window (insn); + } + + return false; +} + +/* Implementation of reassociation_width target hook used by + reassoc phase to identify parallelism level in reassociated + tree. Statements tree_code is passed in OPC. Arguments type + is passed in MODE. + + Currently parallel reassociation is enabled for Atom + processors only and we set reassociation width to be 2 + because Atom may issue up to 2 instructions per cycle. + + Return value should be fixed if parallel reassociation is + enabled for other processors. */ + +static int +ix86_reassociation_width (unsigned int, machine_mode mode) +{ + /* Vector part. */ + if (VECTOR_MODE_P (mode)) + { + if (TARGET_VECTOR_PARALLEL_EXECUTION) + return 2; + else + return 1; + } + + /* Scalar part. */ + if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL) + return 2; + else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL) + return ((TARGET_64BIT && ix86_tune == PROCESSOR_HASWELL)? 4 : 2); + else + return 1; +} + +/* ??? No autovectorization into MMX or 3DNOW until we can reliably + place emms and femms instructions. */ + +static machine_mode +ix86_preferred_simd_mode (machine_mode mode) +{ + if (!TARGET_SSE) + return word_mode; + + switch (mode) + { + case QImode: + return TARGET_AVX512BW ? V64QImode : + (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode; + case HImode: + return TARGET_AVX512BW ? V32HImode : + (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode; + case SImode: + return TARGET_AVX512F ? V16SImode : + (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode; + case DImode: + return TARGET_AVX512F ? V8DImode : + (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode; + + case SFmode: + if (TARGET_AVX512F) + return V16SFmode; + else if (TARGET_AVX && !TARGET_PREFER_AVX128) + return V8SFmode; + else + return V4SFmode; + + case DFmode: + if (TARGET_AVX512F) + return V8DFmode; + else if (TARGET_AVX && !TARGET_PREFER_AVX128) + return V4DFmode; + else if (TARGET_SSE2) + return V2DFmode; + /* FALLTHRU */ + + default: + return word_mode; + } +} + +/* If AVX is enabled then try vectorizing with both 256bit and 128bit + vectors. If AVX512F is enabled then try vectorizing with 512bit, + 256bit and 128bit vectors. */ + +static unsigned int +ix86_autovectorize_vector_sizes (void) +{ + return TARGET_AVX512F ? 64 | 32 | 16 : + (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0; +} + +/* Implemenation of targetm.vectorize.get_mask_mode. */ + +static machine_mode +ix86_get_mask_mode (unsigned nunits, unsigned vector_size) +{ + unsigned elem_size = vector_size / nunits; + + /* Scalar mask case. */ + if ((TARGET_AVX512F && vector_size == 64) + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) + { + if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW) + return smallest_mode_for_size (nunits, MODE_INT); + } + + machine_mode elem_mode + = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT); + + gcc_assert (elem_size * nunits == vector_size); + + return mode_for_vector (elem_mode, nunits); +} + + + +/* Return class of registers which could be used for pseudo of MODE + and of class RCLASS for spilling instead of memory. Return NO_REGS + if it is not possible or non-profitable. */ +static reg_class_t +ix86_spill_class (reg_class_t rclass, machine_mode mode) +{ + if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX + && (mode == SImode || (TARGET_64BIT && mode == DImode)) + && rclass != NO_REGS && INTEGER_CLASS_P (rclass)) + return ALL_SSE_REGS; + return NO_REGS; +} + +/* Implement targetm.vectorize.init_cost. */ + +static void * +ix86_init_cost (struct loop *) +{ + unsigned *cost = XNEWVEC (unsigned, 3); + cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0; + return cost; +} + +/* Implement targetm.vectorize.add_stmt_cost. */ + +static unsigned +ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, + struct _stmt_vec_info *stmt_info, int misalign, + enum vect_cost_model_location where) +{ + unsigned *cost = (unsigned *) data; + unsigned retval = 0; + + tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; + int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); + + /* Penalize DFmode vector operations for !TARGET_VECTORIZE_DOUBLE. */ + if (kind == vector_stmt && !TARGET_VECTORIZE_DOUBLE + && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode) + stmt_cost *= 5; /* FIXME: The value here is arbitrary. */ + + /* Statements in an inner loop relative to the loop being + vectorized are weighted more heavily. The value here is + arbitrary and could potentially be improved with analysis. */ + if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) + count *= 50; /* FIXME. */ + + retval = (unsigned) (count * stmt_cost); + + /* We need to multiply all vector stmt cost by 1.7 (estimated cost) + for Silvermont as it has out of order integer pipeline and can execute + 2 scalar instruction per tick, but has in order SIMD pipeline. */ + if (TARGET_SILVERMONT || TARGET_INTEL) + if (stmt_info && stmt_info->stmt) + { + tree lhs_op = gimple_get_lhs (stmt_info->stmt); + if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE) + retval = (retval * 17) / 10; + } + + cost[where] += retval; + + return retval; +} + +/* Implement targetm.vectorize.finish_cost. */ + +static void +ix86_finish_cost (void *data, unsigned *prologue_cost, + unsigned *body_cost, unsigned *epilogue_cost) +{ + unsigned *cost = (unsigned *) data; + *prologue_cost = cost[vect_prologue]; + *body_cost = cost[vect_body]; + *epilogue_cost = cost[vect_epilogue]; +} + +/* Implement targetm.vectorize.destroy_cost_data. */ + +static void +ix86_destroy_cost_data (void *data) +{ + free (data); +} + +/* Validate target specific memory model bits in VAL. */ + +static unsigned HOST_WIDE_INT +ix86_memmodel_check (unsigned HOST_WIDE_INT val) +{ + enum memmodel model = memmodel_from_int (val); + bool strong; + + if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE + |MEMMODEL_MASK) + || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE))) + { + warning (OPT_Winvalid_memory_model, + "Unknown architecture specific memory model"); + return MEMMODEL_SEQ_CST; + } + strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model)); + if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong)) + { + warning (OPT_Winvalid_memory_model, + "HLE_ACQUIRE not used with ACQUIRE or stronger memory model"); + return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE; + } + if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong)) + { + warning (OPT_Winvalid_memory_model, + "HLE_RELEASE not used with RELEASE or stronger memory model"); + return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE; + } + return val; +} + +/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int, + CLONEI->vecsize_float and if CLONEI->simdlen is 0, also + CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted, + or number of vecsize_mangle variants that should be emitted. */ + +static int +ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, + struct cgraph_simd_clone *clonei, + tree base_type, int num) +{ + int ret = 1; + + if (clonei->simdlen + && (clonei->simdlen < 2 + || clonei->simdlen > 1024 + || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) + { + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %d", clonei->simdlen); + return 0; + } + + tree ret_type = TREE_TYPE (TREE_TYPE (node->decl)); + if (TREE_CODE (ret_type) != VOID_TYPE) + switch (TYPE_MODE (ret_type)) + { + case QImode: + case HImode: + case SImode: + case DImode: + case SFmode: + case DFmode: + /* case SCmode: */ + /* case DCmode: */ + break; + default: + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported return type %qT for simd\n", ret_type); + return 0; + } + + tree t; + int i; + + for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++) + /* FIXME: Shouldn't we allow such arguments if they are uniform? */ + switch (TYPE_MODE (TREE_TYPE (t))) + { + case QImode: + case HImode: + case SImode: + case DImode: + case SFmode: + case DFmode: + /* case SCmode: */ + /* case DCmode: */ + break; + default: + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported argument type %qT for simd\n", TREE_TYPE (t)); + return 0; + } + + if (clonei->cilk_elemental) + { + /* Parse here processor clause. If not present, default to 'b'. */ + clonei->vecsize_mangle = 'b'; + } + else if (!TREE_PUBLIC (node->decl)) + { + /* If the function isn't exported, we can pick up just one ISA + for the clones. */ + if (TARGET_AVX512F) + clonei->vecsize_mangle = 'e'; + else if (TARGET_AVX2) + clonei->vecsize_mangle = 'd'; + else if (TARGET_AVX) + clonei->vecsize_mangle = 'c'; + else + clonei->vecsize_mangle = 'b'; + ret = 1; + } + else + { + clonei->vecsize_mangle = "bcde"[num]; + ret = 4; + } + clonei->mask_mode = VOIDmode; + switch (clonei->vecsize_mangle) + { + case 'b': + clonei->vecsize_int = 128; + clonei->vecsize_float = 128; + break; + case 'c': + clonei->vecsize_int = 128; + clonei->vecsize_float = 256; + break; + case 'd': + clonei->vecsize_int = 256; + clonei->vecsize_float = 256; + break; + case 'e': + clonei->vecsize_int = 512; + clonei->vecsize_float = 512; + if (TYPE_MODE (base_type) == QImode) + clonei->mask_mode = DImode; + else + clonei->mask_mode = SImode; + break; + } + if (clonei->simdlen == 0) + { + if (SCALAR_INT_MODE_P (TYPE_MODE (base_type))) + clonei->simdlen = clonei->vecsize_int; + else + clonei->simdlen = clonei->vecsize_float; + clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type)); + } + else if (clonei->simdlen > 16) + { + /* For compatibility with ICC, use the same upper bounds + for simdlen. In particular, for CTYPE below, use the return type, + unless the function returns void, in that case use the characteristic + type. If it is possible for given SIMDLEN to pass CTYPE value + in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs + for 64-bit code), accept that SIMDLEN, otherwise warn and don't + emit corresponding clone. */ + tree ctype = ret_type; + if (TREE_CODE (ret_type) == VOID_TYPE) + ctype = base_type; + int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen; + if (SCALAR_INT_MODE_P (TYPE_MODE (ctype))) + cnt /= clonei->vecsize_int; + else + cnt /= clonei->vecsize_float; + if (cnt > (TARGET_64BIT ? 16 : 8)) + { + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %d", clonei->simdlen); + return 0; + } + } + return ret; +} + +/* Add target attribute to SIMD clone NODE if needed. */ + +static void +ix86_simd_clone_adjust (struct cgraph_node *node) +{ + const char *str = NULL; + gcc_assert (node->decl == cfun->decl); + switch (node->simdclone->vecsize_mangle) + { + case 'b': + if (!TARGET_SSE2) + str = "sse2"; + break; + case 'c': + if (!TARGET_AVX) + str = "avx"; + break; + case 'd': + if (!TARGET_AVX2) + str = "avx2"; + break; + case 'e': + if (!TARGET_AVX512F) + str = "avx512f"; + break; + default: + gcc_unreachable (); + } + if (str == NULL) + return; + push_cfun (NULL); + tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str)); + bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); + gcc_assert (ok); + pop_cfun (); + ix86_reset_previous_fndecl (); + ix86_set_current_function (node->decl); +} + +/* If SIMD clone NODE can't be used in a vectorized loop + in current function, return -1, otherwise return a badness of using it + (0 if it is most desirable from vecsize_mangle point of view, 1 + slightly less desirable, etc.). */ + +static int +ix86_simd_clone_usable (struct cgraph_node *node) +{ + switch (node->simdclone->vecsize_mangle) + { + case 'b': + if (!TARGET_SSE2) + return -1; + if (!TARGET_AVX) + return 0; + return TARGET_AVX2 ? 2 : 1; + case 'c': + if (!TARGET_AVX) + return -1; + return TARGET_AVX2 ? 1 : 0; + break; + case 'd': + if (!TARGET_AVX2) + return -1; + return 0; + case 'e': + if (!TARGET_AVX512F) + return -1; + return 0; + default: + gcc_unreachable (); + } +} + +/* This function adjusts the unroll factor based on + the hardware capabilities. For ex, bdver3 has + a loop buffer which makes unrolling of smaller + loops less important. This function decides the + unroll factor using number of memory references + (value 32 is used) as a heuristic. */ + +static unsigned +ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop) +{ + basic_block *bbs; + rtx_insn *insn; + unsigned i; + unsigned mem_count = 0; + + if (!TARGET_ADJUST_UNROLL) + return nunroll; + + /* Count the number of memory references within the loop body. + This value determines the unrolling factor for bdver3 and bdver4 + architectures. */ + subrtx_iterator::array_type array; + bbs = get_loop_body (loop); + for (i = 0; i < loop->num_nodes; i++) + FOR_BB_INSNS (bbs[i], insn) + if (NONDEBUG_INSN_P (insn)) + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) + if (const_rtx x = *iter) + if (MEM_P (x)) + { + machine_mode mode = GET_MODE (x); + unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; + if (n_words > 4) + mem_count += 2; + else + mem_count += 1; + } + free (bbs); + + if (mem_count && mem_count <=32) + return 32/mem_count; + + return nunroll; +} + + +/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */ + +static bool +ix86_float_exceptions_rounding_supported_p (void) +{ + /* For x87 floating point with standard excess precision handling, + there is no adddf3 pattern (since x87 floating point only has + XFmode operations) so the default hook implementation gets this + wrong. */ + return TARGET_80387 || TARGET_SSE_MATH; +} + +/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ + +static void +ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) +{ + if (!TARGET_80387 && !TARGET_SSE_MATH) + return; + tree exceptions_var = create_tmp_var_raw (integer_type_node); + if (TARGET_80387) + { + tree fenv_index_type = build_index_type (size_int (6)); + tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type); + tree fenv_var = create_tmp_var_raw (fenv_type); + TREE_ADDRESSABLE (fenv_var) = 1; + tree fenv_ptr = build_pointer_type (fenv_type); + tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var); + fenv_addr = fold_convert (ptr_type_node, fenv_addr); + tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV]; + tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV]; + tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW]; + tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX]; + tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr); + tree hold_fnclex = build_call_expr (fnclex, 0); + fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv, + NULL_TREE, NULL_TREE); + *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, + hold_fnclex); + *clear = build_call_expr (fnclex, 0); + tree sw_var = create_tmp_var_raw (short_unsigned_type_node); + tree fnstsw_call = build_call_expr (fnstsw, 0); + tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node, + sw_var, fnstsw_call); + tree exceptions_x87 = fold_convert (integer_type_node, sw_var); + tree update_mod = build2 (MODIFY_EXPR, integer_type_node, + exceptions_var, exceptions_x87); + *update = build2 (COMPOUND_EXPR, integer_type_node, + sw_mod, update_mod); + tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr); + *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv); + } + if (TARGET_SSE_MATH) + { + tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node); + tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node); + tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR]; + tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR]; + tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0); + tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node, + mxcsr_orig_var, stmxcsr_hold_call); + tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node, + mxcsr_orig_var, + build_int_cst (unsigned_type_node, 0x1f80)); + hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val, + build_int_cst (unsigned_type_node, 0xffffffc0)); + tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node, + mxcsr_mod_var, hold_mod_val); + tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); + tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node, + hold_assign_orig, hold_assign_mod); + hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all, + ldmxcsr_hold_call); + if (*hold) + *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all); + else + *hold = hold_all; + tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); + if (*clear) + *clear = build2 (COMPOUND_EXPR, void_type_node, *clear, + ldmxcsr_clear_call); + else + *clear = ldmxcsr_clear_call; + tree stxmcsr_update_call = build_call_expr (stmxcsr, 0); + tree exceptions_sse = fold_convert (integer_type_node, + stxmcsr_update_call); + if (*update) + { + tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node, + exceptions_var, exceptions_sse); + tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node, + exceptions_var, exceptions_mod); + *update = build2 (COMPOUND_EXPR, integer_type_node, *update, + exceptions_assign); + } + else + *update = build2 (MODIFY_EXPR, integer_type_node, + exceptions_var, exceptions_sse); + tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var); + *update = build2 (COMPOUND_EXPR, void_type_node, *update, + ldmxcsr_update_call); + } + tree atomic_feraiseexcept + = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); + tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept, + 1, exceptions_var); + *update = build2 (COMPOUND_EXPR, void_type_node, *update, + atomic_feraiseexcept_call); +} + +/* Return mode to be used for bounds or VOIDmode + if bounds are not supported. */ + +static enum machine_mode +ix86_mpx_bound_mode () +{ + /* Do not support pointer checker if MPX + is not enabled. */ + if (!TARGET_MPX) + { + if (flag_check_pointer_bounds) + warning (0, "Pointer Checker requires MPX support on this target." + " Use -mmpx options to enable MPX."); + return VOIDmode; + } + + return BNDmode; +} + +/* Return constant used to statically initialize constant bounds. + + This function is used to create special bound values. For now + only INIT bounds and NONE bounds are expected. More special + values may be added later. */ + +static tree +ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub) +{ + tree low = lb ? build_minus_one_cst (pointer_sized_int_node) + : build_zero_cst (pointer_sized_int_node); + tree high = ub ? build_zero_cst (pointer_sized_int_node) + : build_minus_one_cst (pointer_sized_int_node); + + /* This function is supposed to be used to create INIT and + NONE bounds only. */ + gcc_assert ((lb == 0 && ub == -1) + || (lb == -1 && ub == 0)); + + return build_complex (NULL, low, high); +} + +/* Generate a list of statements STMTS to initialize pointer bounds + variable VAR with bounds LB and UB. Return the number of generated + statements. */ + +static int +ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts) +{ + tree bnd_ptr = build_pointer_type (pointer_sized_int_node); + tree lhs, modify, var_p; + + ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub); + var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var)); + + lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p); + modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb); + append_to_statement_list (modify, stmts); + + lhs = build1 (INDIRECT_REF, pointer_sized_int_node, + build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p, + TYPE_SIZE_UNIT (pointer_sized_int_node))); + modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub); + append_to_statement_list (modify, stmts); + + return 2; +} + +#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES +/* For i386, common symbol is local only for non-PIE binaries. For + x86-64, common symbol is local only for non-PIE binaries or linker + supports copy reloc in PIE binaries. */ + +static bool +ix86_binds_local_p (const_tree exp) +{ + return default_binds_local_p_3 (exp, flag_shlib != 0, true, true, + (!flag_pic + || (TARGET_64BIT + && HAVE_LD_PIE_COPYRELOC != 0))); +} +#endif + +/* If MEM is in the form of [base+offset], extract the two parts + of address and set to BASE and OFFSET, otherwise return false. */ + +static bool +extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset) +{ + rtx addr; + + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + + if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF) + { + *base = addr; + *offset = const0_rtx; + return true; + } + + if (GET_CODE (addr) == PLUS + && (REG_P (XEXP (addr, 0)) + || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF) + && CONST_INT_P (XEXP (addr, 1))) + { + *base = XEXP (addr, 0); + *offset = XEXP (addr, 1); + return true; + } + + return false; +} + +/* Given OPERANDS of consecutive load/store, check if we can merge + them into move multiple. LOAD is true if they are load instructions. + MODE is the mode of memory operands. */ + +bool +ix86_operands_ok_for_move_multiple (rtx *operands, bool load, + enum machine_mode mode) +{ + HOST_WIDE_INT offval_1, offval_2, msize; + rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2; + + if (load) + { + mem_1 = operands[1]; + mem_2 = operands[3]; + reg_1 = operands[0]; + reg_2 = operands[2]; + } + else + { + mem_1 = operands[0]; + mem_2 = operands[2]; + reg_1 = operands[1]; + reg_2 = operands[3]; + } + + gcc_assert (REG_P (reg_1) && REG_P (reg_2)); + + if (REGNO (reg_1) != REGNO (reg_2)) + return false; + + /* Check if the addresses are in the form of [base+offset]. */ + if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1)) + return false; + if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2)) + return false; + + /* Check if the bases are the same. */ + if (!rtx_equal_p (base_1, base_2)) + return false; + + offval_1 = INTVAL (offset_1); + offval_2 = INTVAL (offset_2); + msize = GET_MODE_SIZE (mode); + /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */ + if (offval_1 + msize != offval_2) + return false; + + return true; +} + +/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ + +static bool +ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, + optimization_type opt_type) +{ + switch (op) + { + case asin_optab: + case acos_optab: + case log1p_optab: + case exp_optab: + case exp10_optab: + case exp2_optab: + case expm1_optab: + case ldexp_optab: + case scalb_optab: + case round_optab: + return opt_type == OPTIMIZE_FOR_SPEED; + + case rint_optab: + if (SSE_FLOAT_MODE_P (mode1) + && TARGET_SSE_MATH + && !flag_trapping_math + && !TARGET_ROUND) + return opt_type == OPTIMIZE_FOR_SPEED; + return true; + + case floor_optab: + case ceil_optab: + case btrunc_optab: + if (SSE_FLOAT_MODE_P (mode1) + && TARGET_SSE_MATH + && !flag_trapping_math + && TARGET_ROUND) + return true; + return opt_type == OPTIMIZE_FOR_SPEED; + + case rsqrt_optab: + return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (); + + default: + return true; + } +} + +/* Address space support. + + This is not "far pointers" in the 16-bit sense, but an easy way + to use %fs and %gs segment prefixes. Therefore: + + (a) All address spaces have the same modes, + (b) All address spaces have the same addresss forms, + (c) While %fs and %gs are technically subsets of the generic + address space, they are probably not subsets of each other. + (d) Since we have no access to the segment base register values + without resorting to a system call, we cannot convert a + non-default address space to a default address space. + Therefore we do not claim %fs or %gs are subsets of generic. + + Therefore we can (mostly) use the default hooks. */ + +/* All use of segmentation is assumed to make address 0 valid. */ + +static bool +ix86_addr_space_zero_address_valid (addr_space_t as) +{ + return as != ADDR_SPACE_GENERIC; +} +#undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID +#define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid + +/* Initialize the GCC target structure. */ +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table +#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P +#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES +# undef TARGET_MERGE_DECL_ATTRIBUTES +# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes +#endif + +#undef TARGET_COMP_TYPE_ATTRIBUTES +#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS ix86_init_builtins +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL ix86_builtin_decl +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN ix86_expand_builtin + +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION +#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ + ix86_builtin_vectorized_function + +#undef TARGET_VECTORIZE_BUILTIN_GATHER +#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather + +#undef TARGET_VECTORIZE_BUILTIN_SCATTER +#define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter + +#undef TARGET_BUILTIN_RECIPROCAL +#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal + +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue + +#undef TARGET_ENCODE_SECTION_INFO +#ifndef SUBTARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info +#else +#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO +#endif + +#undef TARGET_ASM_OPEN_PAREN +#define TARGET_ASM_OPEN_PAREN "" +#undef TARGET_ASM_CLOSE_PAREN +#define TARGET_ASM_CLOSE_PAREN "" + +#undef TARGET_ASM_BYTE_OP +#define TARGET_ASM_BYTE_OP ASM_BYTE + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG +#ifdef ASM_QUAD +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD +#endif + +#undef TARGET_PROFILE_BEFORE_PROLOGUE +#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue + +#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME +#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name + +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND ix86_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra + +#undef TARGET_SCHED_INIT_GLOBAL +#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ + ia32_multipass_dfa_lookahead +#undef TARGET_SCHED_MACRO_FUSION_P +#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P +#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall + +#undef TARGET_MEMMODEL_CHECK +#define TARGET_MEMMODEL_CHECK ix86_memmodel_check + +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv + +#ifdef HAVE_AS_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS true +#endif +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem +#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P +#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true + +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address + +#undef TARGET_MS_BITFIELD_LAYOUT_P +#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p + +#if TARGET_MACHO +#undef TARGET_BINDS_LOCAL_P +#define TARGET_BINDS_LOCAL_P darwin_binds_local_p +#else +#undef TARGET_BINDS_LOCAL_P +#define TARGET_BINDS_LOCAL_P ix86_binds_local_p +#endif +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES +#undef TARGET_BINDS_LOCAL_P +#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p +#endif + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START x86_file_start + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE ix86_option_override + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS ix86_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST ix86_address_cost + +#undef TARGET_FIXED_CONDITION_CODE_REGS +#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs +#undef TARGET_CC_MODES_COMPATIBLE +#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg + +#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE +#define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list + +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN ix86_fold_builtin + +#undef TARGET_GIMPLE_FOLD_BUILTIN +#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin + +#undef TARGET_COMPARE_VERSION_PRIORITY +#define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority + +#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY +#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ + ix86_generate_version_dispatcher_body + +#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER +#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \ + ix86_get_function_versions_dispatcher + +#undef TARGET_ENUM_VA_LIST_P +#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list + +#undef TARGET_FN_ABI_VA_LIST +#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list + +#undef TARGET_CANONICAL_VA_LIST_TYPE +#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type + +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start + +#undef TARGET_MD_ASM_ADJUST +#define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust + +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG ix86_function_arg +#undef TARGET_INIT_PIC_REG +#define TARGET_INIT_PIC_REG ix86_init_pic_reg +#undef TARGET_USE_PSEUDO_PIC_REG +#define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference +#undef TARGET_INTERNAL_ARG_POINTER +#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer +#undef TARGET_UPDATE_STACK_BOUNDARY +#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary +#undef TARGET_GET_DRAP_RTX +#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true +#undef TARGET_STATIC_CHAIN +#define TARGET_STATIC_CHAIN ix86_static_chain +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init +#undef TARGET_RETURN_POPS_ARGS +#define TARGET_RETURN_POPS_ARGS ix86_return_pops_args + +#undef TARGET_LEGITIMATE_COMBINED_INSN +#define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn + +#undef TARGET_ASAN_SHADOW_OFFSET +#define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset + +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p + +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p + +#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P +#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ + ix86_libgcc_floating_mode_supported_p + +#undef TARGET_C_MODE_FOR_SUFFIX +#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix + +#ifdef HAVE_AS_TLS +#undef TARGET_ASM_OUTPUT_DWARF_DTPREL +#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel +#endif + +#ifdef SUBTARGET_INSERT_ATTRIBUTES +#undef TARGET_INSERT_ATTRIBUTES +#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES +#endif + +#undef TARGET_MANGLE_TYPE +#define TARGET_MANGLE_TYPE ix86_mangle_type + +#if !TARGET_MACHO +#undef TARGET_STACK_PROTECT_FAIL +#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail +#endif + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE ix86_function_value + +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode + +#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE +#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change + +#undef TARGET_MEMBER_TYPE_FORCES_BLK +#define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk + +#undef TARGET_INSTANTIATE_DECLS +#define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD ix86_secondary_reload + +#undef TARGET_CLASS_MAX_NREGS +#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs + +#undef TARGET_PREFERRED_RELOAD_CLASS +#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class +#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS +#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class +#undef TARGET_CLASS_LIKELY_SPILLED_P +#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p + +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + ix86_builtin_vectorization_cost +#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK +#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ + ix86_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ + ix86_preferred_simd_mode +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ + ix86_autovectorize_vector_sizes +#undef TARGET_VECTORIZE_GET_MASK_MODE +#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode +#undef TARGET_VECTORIZE_INIT_COST +#define TARGET_VECTORIZE_INIT_COST ix86_init_cost +#undef TARGET_VECTORIZE_ADD_STMT_COST +#define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost +#undef TARGET_VECTORIZE_FINISH_COST +#define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost +#undef TARGET_VECTORIZE_DESTROY_COST_DATA +#define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data + +#undef TARGET_SET_CURRENT_FUNCTION +#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function + +#undef TARGET_OPTION_VALID_ATTRIBUTE_P +#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p + +#undef TARGET_OPTION_SAVE +#define TARGET_OPTION_SAVE ix86_function_specific_save + +#undef TARGET_OPTION_RESTORE +#define TARGET_OPTION_RESTORE ix86_function_specific_restore + +#undef TARGET_OPTION_POST_STREAM_IN +#define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in + +#undef TARGET_OPTION_PRINT +#define TARGET_OPTION_PRINT ix86_function_specific_print + +#undef TARGET_OPTION_FUNCTION_VERSIONS +#define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions + +#undef TARGET_CAN_INLINE_P +#define TARGET_CAN_INLINE_P ix86_can_inline_p + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p + +#undef TARGET_LRA_P +#define TARGET_LRA_P hook_bool_void_true + +#undef TARGET_REGISTER_PRIORITY +#define TARGET_REGISTER_PRIORITY ix86_register_priority + +#undef TARGET_REGISTER_USAGE_LEVELING_P +#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true + +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE ix86_can_eliminate + +#undef TARGET_EXTRA_LIVE_ON_ENTRY +#define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry + +#undef TARGET_ASM_CODE_END +#define TARGET_ASM_CODE_END ix86_code_end + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage + +#if TARGET_MACHO +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS darwin_rename_builtins +#endif + +#undef TARGET_LOOP_UNROLL_ADJUST +#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust + +#undef TARGET_SPILL_CLASS +#define TARGET_SPILL_CLASS ix86_spill_class + +#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN +#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \ + ix86_simd_clone_compute_vecsize_and_simdlen + +#undef TARGET_SIMD_CLONE_ADJUST +#define TARGET_SIMD_CLONE_ADJUST \ + ix86_simd_clone_adjust + +#undef TARGET_SIMD_CLONE_USABLE +#define TARGET_SIMD_CLONE_USABLE \ + ix86_simd_clone_usable + +#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P +#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ + ix86_float_exceptions_rounding_supported_p + +#undef TARGET_MODE_EMIT +#define TARGET_MODE_EMIT ix86_emit_mode_set + +#undef TARGET_MODE_NEEDED +#define TARGET_MODE_NEEDED ix86_mode_needed + +#undef TARGET_MODE_AFTER +#define TARGET_MODE_AFTER ix86_mode_after + +#undef TARGET_MODE_ENTRY +#define TARGET_MODE_ENTRY ix86_mode_entry + +#undef TARGET_MODE_EXIT +#define TARGET_MODE_EXIT ix86_mode_exit + +#undef TARGET_MODE_PRIORITY +#define TARGET_MODE_PRIORITY ix86_mode_priority + +#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS +#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true + +#undef TARGET_LOAD_BOUNDS_FOR_ARG +#define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds + +#undef TARGET_STORE_BOUNDS_FOR_ARG +#define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds + +#undef TARGET_LOAD_RETURNED_BOUNDS +#define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds + +#undef TARGET_STORE_RETURNED_BOUNDS +#define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds + +#undef TARGET_CHKP_BOUND_MODE +#define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode + +#undef TARGET_BUILTIN_CHKP_FUNCTION +#define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function + +#undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS +#define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds + +#undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT +#define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant + +#undef TARGET_CHKP_INITIALIZE_BOUNDS +#define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds + +#undef TARGET_SETUP_INCOMING_VARARG_BOUNDS +#define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds + +#undef TARGET_OFFLOAD_OPTIONS +#define TARGET_OFFLOAD_OPTIONS \ + ix86_offload_options + +#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT +#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512 + +#undef TARGET_OPTAB_SUPPORTED_P +#define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p + +struct gcc_target targetm = TARGET_INITIALIZER; + +#include "gt-i386.h" +#endif Index: src/x86/Target.cpp =================================================================== --- src/x86/Target.cpp +++ src/x86/Target.cpp @@ -27,6 +27,7 @@ // LLVM headers #include "llvm/MC/SubtargetFeature.h" #include "llvm/IR/Module.h" +#include "llvm/IR/MDBuilder.h" // System headers #include @@ -46,15 +47,26 @@ #include "tree.h" #include "diagnostic.h" +#if (GCC_MAJOR > 4) +#include "function.h" +#include "basic-block.h" +#include "tree-core.h" +#include "rtl.h" +#endif #include "gimple.h" -#if (GCC_MINOR > 6) +#if GCC_VERSION_CODE > GCC_VERSION(4, 6) #include "gimple-pretty-print.h" #endif #include "toplev.h" -#if (GCC_MINOR == 6) +#if (GCC_MAJOR > 4) +struct stringop_algs; +extern void debug_gimple_stmt(gimple *); +#else +#if GCC_VERSION_CODE == GCC_VERSION(4, 6) extern void debug_gimple_stmt(union gimple_statement_d *); #endif +#endif #ifndef ENABLE_BUILD_WITH_CXX } // extern "C" @@ -65,17 +77,30 @@ // One day we will do parameter marshalling right: by using CUMULATIVE_ARGS. // While waiting for that happy day, just include a chunk of i386.c. +#if (GCC_MAJOR > 4) +#include "ABIHack6.inc" +#define MAX_CLASSES 8 +#else #include "ABIHack.inc" +#endif using namespace llvm; -static LLVMContext &Context = getGlobalContext(); +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) +static LLVMContext &TheContext = getGlobalContext(); +#endif /// BitCastToIntVector - Bitcast the vector operand to a vector of integers of // the same length. static Value *BitCastToIntVector(Value *Op, LLVMBuilder &Builder) { VectorType *VecTy = cast(Op->getType()); Type *EltTy = VecTy->getElementType(); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + EltTy->getContext(); +#else + TheContext; +#endif Type *IntTy = IntegerType::get(Context, EltTy->getPrimitiveSizeInBits()); return Builder.CreateBitCast(Op, VectorType::get(IntTy, VecTy->getNumElements())); @@ -106,9 +131,11 @@ * code, emit the code now. If we can handle the code, this macro should emit * the code, return true. */ -bool TreeToLLVM::TargetIntrinsicLower( - gimple stmt, tree fndecl, const MemRef */*DestLoc*/, Value *&Result, - Type *ResultType, std::vector &Ops) { +bool TreeToLLVM::TargetIntrinsicLower(GimpleTy *stmt, tree fndecl, + const MemRef */*DestLoc*/, + Value *&Result, + Type *ResultType, + std::vector &Ops) { // DECL_FUNCTION_CODE contains a value of the enumerated type ix86_builtins, // declared in i386.c. If this type was visible to us then we could simply // use a switch statement on DECL_FUNCTION_CODE to jump to the right code for @@ -161,6 +188,12 @@ bool flip = false; unsigned PredCode; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + ResultType->getContext(); +#else + TheContext; +#endif switch (Handler) { case SearchForHandler: debug_gimple_stmt(stmt); @@ -878,10 +911,12 @@ Ops[1] = ConstantInt::get(IntTy, (shiftVal - 16) * 8); // create i32 constant +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) Function *F = Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse2_psrl_dq); Result = Builder.CreateCall(F, ArrayRef(&Ops[0], 2), "palignr"); +#endif Result = Builder.CreateBitCast(Result, ResultType); return true; } @@ -905,6 +940,7 @@ case movntq: case movntsd: case movntss: { +#if LLVM_VERSION_CODE < LLVM_VERSION(3, 9) MDNode *Node = MDNode::get(Context, Builder.getInt32(1)); // Convert the type of the pointer to a pointer to the stored type. @@ -914,6 +950,7 @@ StoreInst *SI = Builder.CreateAlignedStore(Ops[1], Ptr, 16); SI->setMetadata(TheModule->getMDKindID("nontemporal"), Node); +#endif return true; } case rsqrtf: { @@ -1034,7 +1071,12 @@ Result = Builder.CreateTruncOrBitCast(Ops[0], Int16Ty); Function *ctlz = Intrinsic::getDeclaration(TheModule, Intrinsic::ctlz, Int16Ty); - Result = Builder.CreateCall2(ctlz, Result, Builder.getTrue()); + Result = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Builder.CreateCall(ctlz, {Result, Builder.getTrue()}); +#else + Builder.CreateCall2(ctlz, Result, Builder.getTrue()); +#endif return true; } case ctzs: { @@ -1043,7 +1085,12 @@ Result = Builder.CreateTruncOrBitCast(Ops[0], Int16Ty); Function *cttz = Intrinsic::getDeclaration(TheModule, Intrinsic::cttz, Int16Ty); - Result = Builder.CreateCall2(cttz, Result, Builder.getTrue()); + Result = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Builder.CreateCall(cttz, {Result, Builder.getTrue()}); +#else + Builder.CreateCall2(cttz, Result, Builder.getTrue()); +#endif return true; } case rdrand16_step: @@ -1076,11 +1123,13 @@ tree TreeType, enum machine_mode Mode) { int IntRegs, SSERegs; /* If examine_argument return 0, then it's passed byval in memory.*/ +#if (GCC_MAJOR < 5) int ret = examine_argument(Mode, TreeType, 0, &IntRegs, &SSERegs); if (ret == 0) return true; if (ret == 1 && IntRegs == 0 && SSERegs == 0) // zero-sized struct return true; +#endif return false; } @@ -1116,6 +1165,12 @@ if (!STy || STy->isPacked()) return false; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + STy->getContext(); +#else + TheContext; +#endif for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Type *EltTy = STy->getElementType(i); // 32 and 64-bit integers are fine, as are float and double. Long double @@ -1152,6 +1207,12 @@ // makes it ABI compatible for x86-64. Same for _Complex char and _Complex // short in 32-bit. Type *EltTy = STy->getElementType(0); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + EltTy->getContext(); +#else + TheContext; +#endif return !((TARGET_64BIT && (EltTy->isIntegerTy() || EltTy == Type::getFloatTy(Context) || EltTy == Type::getDoubleTy(Context))) || EltTy->isIntegerTy(16) || @@ -1164,6 +1225,7 @@ if (llvm_x86_should_pass_aggregate_as_fca(TreeType, Ty)) return false; +#if (GCC_MAJOR < 5) enum machine_mode Mode = type_natural_mode(TreeType, NULL); HOST_WIDE_INT Bytes = (Mode == BLKmode) ? int_size_in_bytes(TreeType) : (int) GET_MODE_SIZE(Mode); @@ -1177,6 +1239,7 @@ return !llvm_x86_32_should_pass_aggregate_in_mixed_regs(TreeType, Ty, Elts); } return llvm_x86_64_should_pass_aggregate_in_memory(TreeType, Mode); +#endif } /* count_num_registers_uses - Return the number of GPRs and XMMs parameter @@ -1185,6 +1248,12 @@ unsigned &NumGPRs, unsigned &NumXMMs) { for (size_t i = 0, e = ScalarElts.size(); i != e; ++i) { Type *Ty = ScalarElts[i]; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif if (VectorType *VTy = llvm::dyn_cast(Ty)) { if (!TARGET_MACHO) continue; @@ -1266,6 +1335,7 @@ return false; enum x86_64_reg_class Class[MAX_CLASSES]; +#if (GCC_MAJOR < 5) enum machine_mode Mode = type_natural_mode(TreeType, NULL); bool totallyEmpty = true; HOST_WIDE_INT Bytes = (Mode == BLKmode) ? int_size_in_bytes(TreeType) : (int) @@ -1387,6 +1457,7 @@ } return !totallyEmpty; +#endif } /* On Darwin x86-32, vectors which are not MMX nor SSE should be passed as @@ -1500,6 +1571,7 @@ // Let gcc specific routine answer the question. enum x86_64_reg_class Class[MAX_CLASSES]; +#if (GCC_MAJOR < 5) enum machine_mode Mode = type_natural_mode(TreeType, NULL); int NumClasses = classify_argument(Mode, TreeType, Class, 0); if (NumClasses == 0) @@ -1515,6 +1587,7 @@ // One word is padding which is not passed at all; treat this as returning // the scalar type of the other word. return false; +#endif // Otherwise, use of multiple value return is OK. return true; @@ -1526,6 +1599,12 @@ *Offset = 0; Type *Ty = ConvertType(type); uint64_t Size = getDataLayout().getTypeAllocSize(Ty); + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif if (Size == 0) return Type::getVoidTy(Context); else if (Size == 1) @@ -1543,6 +1622,7 @@ // This logic relies on llvm_suitable_multiple_ret_value_type to have // removed anything not expected here. enum x86_64_reg_class Class[MAX_CLASSES]; +#if (GCC_MAJOR < 5) enum machine_mode Mode = type_natural_mode(type, NULL); int NumClasses = classify_argument(Mode, type, Class, 0); if (NumClasses == 0) @@ -1590,6 +1670,7 @@ llvm_unreachable("Unexpected type!"); } llvm_unreachable("Unexpected type!"); +#endif } else { if (Size <= 8) return Type::getInt64Ty(Context); @@ -1609,6 +1690,7 @@ static void llvm_x86_64_get_multiple_return_reg_classes( tree TreeType, Type */*Ty*/, std::vector &Elts) { enum x86_64_reg_class Class[MAX_CLASSES]; +#if (GCC_MAJOR < 5) enum machine_mode Mode = type_natural_mode(TreeType, NULL); HOST_WIDE_INT Bytes = (Mode == BLKmode) ? int_size_in_bytes(TreeType) : (int) GET_MODE_SIZE(Mode); @@ -1727,6 +1809,7 @@ llvm_unreachable("Unexpected register class!"); } } +#endif } // Return LLVM Type if TYPE can be returned as an aggregate, @@ -1738,6 +1821,12 @@ StructType *STy = cast(Ty); std::vector ElementTypes; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + Ty->getContext(); +#else + TheContext; +#endif // Special handling for _Complex. if (llvm_x86_should_not_return_complex_in_memory(type)) { @@ -1765,6 +1854,12 @@ Value *EVI = Builder.CreateExtractValue(Src, SrcFieldNo, "mrv_gr"); StructType *STy = cast(Src->getType()); Value *Idxs[3]; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + STy->getContext(); +#else + TheContext; +#endif Idxs[0] = ConstantInt::get(Type::getInt32Ty(Context), 0); Idxs[1] = ConstantInt::get(Type::getInt32Ty(Context), DestFieldNo); Idxs[2] = ConstantInt::get(Type::getInt32Ty(Context), DestElemNo); @@ -1793,6 +1888,13 @@ unsigned SNO = 0; unsigned DNO = 0; + LLVMContext &Context = +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + STy->getContext(); +#else + TheContext; +#endif + if (DestTy->getNumElements() == 3 && DestTy->getElementType(0)->getTypeID() == Type::FloatTyID && DestTy->getElementType(1)->getTypeID() == Type::FloatTyID && @@ -1804,15 +1906,27 @@ Value *E0Index = ConstantInt::get(Type::getInt32Ty(Context), 0); Value *EVI0 = Builder.CreateExtractElement(EVI, E0Index, "mrv.v"); - Value *GEP0 = Builder.CreateStructGEP(Dest, 0, "mrv_gep"); + Value *GEP0 = Builder.CreateStructGEP( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DestTy, +#endif + Dest, 0, "mrv_gep"); Builder.CreateAlignedStore(EVI0, GEP0, 1, isVolatile); Value *E1Index = ConstantInt::get(Type::getInt32Ty(Context), 1); Value *EVI1 = Builder.CreateExtractElement(EVI, E1Index, "mrv.v"); - Value *GEP1 = Builder.CreateStructGEP(Dest, 1, "mrv_gep"); + Value *GEP1 = Builder.CreateStructGEP( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DestTy, +#endif + Dest, 1, "mrv_gep"); Builder.CreateAlignedStore(EVI1, GEP1, 1, isVolatile); - Value *GEP2 = Builder.CreateStructGEP(Dest, 2, "mrv_gep"); + Value *GEP2 = Builder.CreateStructGEP( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DestTy, +#endif + Dest, 2, "mrv_gep"); Value *EVI2 = Builder.CreateExtractValue(Src, 1, "mrv_gr"); Builder.CreateAlignedStore(EVI2, GEP2, 1, isVolatile); return; @@ -1824,7 +1938,11 @@ // Directly access first class values using getresult. if (DestElemType->isSingleValueType()) { - Value *GEP = Builder.CreateStructGEP(Dest, DNO, "mrv_gep"); + Value *GEP = Builder.CreateStructGEP( +#if LLVM_VERSION_CODE > LLVM_VERSION(3, 8) + DestTy, +#endif + Dest, DNO, "mrv_gep"); Value *EVI = Builder.CreateExtractValue(Src, SNO, "mrv_gr"); Builder.CreateAlignedStore(EVI, GEP, 1, isVolatile); ++DNO; @@ -1893,6 +2011,7 @@ *size = 0; if (TARGET_64BIT) { enum x86_64_reg_class Class[MAX_CLASSES]; +#if (GCC_MAJOR < 5) enum machine_mode Mode = type_natural_mode(type, NULL); int NumClasses = classify_argument(Mode, type, Class, 0); *DontCheckAlignment = true; @@ -1923,6 +2042,7 @@ return true; } } +#endif return false; } else return !isSingleElementStructOrArray(type, false, true); Index: utils/TargetInfo.cpp =================================================================== --- utils/TargetInfo.cpp +++ utils/TargetInfo.cpp @@ -29,16 +29,16 @@ std::cout << T.getTriple() << "\n"; } static void PrintArchName(Triple &T) { - std::cout << T.getArchTypeName(T.getArch()) << "\n"; + std::cout << std::string(T.getArchTypeName(T.getArch())) << "\n"; } static void PrintVendorName(Triple &T) { - std::cout << T.getVendorTypeName(T.getVendor()) << "\n"; + std::cout << std::string(T.getVendorTypeName(T.getVendor())) << "\n"; } static void PrintOSName(Triple &T) { - std::cout << T.getOSTypeName(T.getOS()) << "\n"; + std::cout << std::string(T.getOSTypeName(T.getOS())) << "\n"; } static void PrintArchTypePrefix(Triple &T) { - std::cout << T.getArchTypePrefix(T.getArch()) << "\n"; + std::cout << std::string(T.getArchTypePrefix(T.getArch())) << "\n"; } struct Option {