Index: ../include/llvm/IR/IRBuilder.h =================================================================== --- ../include/llvm/IR/IRBuilder.h +++ ../include/llvm/IR/IRBuilder.h @@ -522,7 +522,8 @@ /// \brief Create a call to a masked intrinsic with given Id. /// Masked intrinsic has only one overloaded type - data type. CallInst *CreateMaskedIntrinsic(Intrinsic::ID Id, ArrayRef Ops, - Type *DataTy, const Twine &Name = ""); + Type *DataTy, unsigned AS, + const Twine &Name = ""); Value *getCastedInt8PtrValue(Value *Ptr); }; Index: ../include/llvm/IR/Intrinsics.h =================================================================== --- ../include/llvm/IR/Intrinsics.h +++ ../include/llvm/IR/Intrinsics.h @@ -133,6 +133,11 @@ /// of IITDescriptors. void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl &T); + /// \brief Attach Address Space suffix to intrinsic \p Name + void attachAddressSpace(std::string& Name, unsigned AS); + + /// \brief Detach Address Space suffixes from intrinsic \p Name + void detachAddressSpace(std::string& Name); } // End Intrinsic namespace } // End llvm namespace Index: ../lib/IR/Function.cpp =================================================================== --- ../lib/IR/Function.cpp +++ ../lib/IR/Function.cpp @@ -1027,3 +1027,15 @@ } return NewFuncName; } + +void Intrinsic::attachAddressSpace(std::string& Name, unsigned AddrSpace) { + Name += ".a_" + std::to_string(AddrSpace); +} + +void Intrinsic::detachAddressSpace(std::string& Name) { + std::string::size_type Pos = Name.rfind(".a_"); + while (Pos != std::string::npos) { + Name.erase(Pos, Name.size()); + Pos = Name.rfind(".a_"); + } +} Index: ../lib/IR/IRBuilder.cpp =================================================================== --- ../lib/IR/IRBuilder.cpp +++ ../lib/IR/IRBuilder.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Statepoint.h" +#include "llvm/IR/Module.h" using namespace llvm; /// CreateGlobalString - Make a new global variable with an initializer that @@ -218,7 +219,9 @@ if (!PassThru) PassThru = UndefValue::get(DataTy); Value *Ops[] = { Ptr, getInt32(Align), Mask, PassThru}; - return CreateMaskedIntrinsic(Intrinsic::masked_load, Ops, DataTy, Name); + return CreateMaskedIntrinsic(Intrinsic::masked_load, Ops, DataTy, + Ptr->getType()->getPointerAddressSpace(), + Name); } /// \brief Create a call to a Masked Store intrinsic. @@ -231,18 +234,50 @@ unsigned Align, Value *Mask) { Value *Ops[] = { Val, Ptr, getInt32(Align), Mask }; // Type of the data to be stored - the only one overloaded type - return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, Val->getType()); + return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, Val->getType(), + Ptr->getType()->getPointerAddressSpace()); +} + +static void applyAddressSpace(std::string& IntrName, FunctionType *& FTy, + unsigned AS) { + if (AS == 0) + return; + Intrinsic::attachAddressSpace(IntrName, AS); + + auto changePtrTy = [](Type *T, unsigned AS) { + if (T->isPointerTy()) + return cast(PointerType::get(T->getPointerElementType(), AS)); + if (T->isVectorTy() && T->getVectorElementType()->isPointerTy()) { + Type *OldPtrTy = T->getVectorElementType()->getPointerElementType(); + Type *NewPtrTy = PointerType::get(OldPtrTy, AS); + return cast(VectorType::get(NewPtrTy, T->getVectorNumElements())); + } + return T; + }; + + Type *RetTy = changePtrTy(FTy->getReturnType(), AS); + SmallVector ParamTypes; + for (unsigned i = 0; i != FTy->getNumParams(); ++i) { + Type *T = changePtrTy(FTy->getParamType(i), AS); + ParamTypes.push_back(T); + } + FTy = FunctionType::get(RetTy, ParamTypes, false); } /// Create a call to a Masked intrinsic, with given intrinsic Id, /// an array of operands - Ops, and one overloaded type - DataTy CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id, ArrayRef Ops, - Type *DataTy, + Type *DataTy, unsigned AS, const Twine &Name) { Module *M = BB->getParent()->getParent(); Type *OverloadedTypes[] = { DataTy }; - Value *TheFn = Intrinsic::getDeclaration(M, Id, OverloadedTypes); + std::string IntrName = Intrinsic::getName(Id, OverloadedTypes); + FunctionType *FTy = Intrinsic::getType(M->getContext(), Id, OverloadedTypes); + + applyAddressSpace(IntrName, FTy, AS); + + Value *TheFn = M->getOrInsertFunction(IntrName, FTy); return createCallHelper(TheFn, Ops, this, Name); } @@ -270,7 +305,8 @@ // We specify only one type when we create this intrinsic. Types of other // arguments are derived from this type. - return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, DataTy, Name); + return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, DataTy, + PtrTy->getPointerAddressSpace(), Name); } /// \brief Create a call to a Masked Scatter intrinsic. @@ -300,7 +336,8 @@ // We specify only one type when we create this intrinsic. Types of other // arguments are derived from this type. - return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, DataTy); + unsigned AS = PtrsTy->getElementType()->getPointerAddressSpace(); + return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, DataTy, AS); } template Index: ../lib/IR/Verifier.cpp =================================================================== --- ../lib/IR/Verifier.cpp +++ ../lib/IR/Verifier.cpp @@ -430,7 +430,8 @@ bool performTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, int VT, unsigned ArgNo, std::string &Suffix); bool verifyIntrinsicType(Type *Ty, ArrayRef &Infos, - SmallVectorImpl &ArgTys); + SmallVectorImpl &ArgTys, + bool *AnyAddressSpace); bool verifyIntrinsicIsVarArg(bool isVarArg, ArrayRef &Infos); bool verifyAttributeCount(AttributeSet Attrs, unsigned Params); @@ -3578,7 +3579,8 @@ /// This returns true on error but does not print a message. bool Verifier::verifyIntrinsicType(Type *Ty, ArrayRef &Infos, - SmallVectorImpl &ArgTys) { + SmallVectorImpl &ArgTys, + bool *AnyAddressSpace) { using namespace Intrinsic; // If we ran out of descriptors, there are too many arguments. @@ -3599,12 +3601,14 @@ case IITDescriptor::Vector: { VectorType *VT = dyn_cast(Ty); return !VT || VT->getNumElements() != D.Vector_Width || - verifyIntrinsicType(VT->getElementType(), Infos, ArgTys); + verifyIntrinsicType(VT->getElementType(), Infos, ArgTys, + AnyAddressSpace); } case IITDescriptor::Pointer: { PointerType *PT = dyn_cast(Ty); return !PT || PT->getAddressSpace() != D.Pointer_AddressSpace || - verifyIntrinsicType(PT->getElementType(), Infos, ArgTys); + verifyIntrinsicType(PT->getElementType(), Infos, ArgTys, + AnyAddressSpace); } case IITDescriptor::Struct: { @@ -3613,7 +3617,8 @@ return true; for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i) - if (verifyIntrinsicType(ST->getElementType(i), Infos, ArgTys)) + if (verifyIntrinsicType(ST->getElementType(i), Infos, ArgTys, + AnyAddressSpace)) return true; return false; } @@ -3685,11 +3690,12 @@ ThisArgType->getVectorNumElements())) return true; return verifyIntrinsicType(ThisArgType->getVectorElementType(), - Infos, ArgTys); + Infos, ArgTys, AnyAddressSpace); } case IITDescriptor::PtrToArgument: { if (D.getArgumentNumber() >= ArgTys.size()) return true; + *AnyAddressSpace = true; Type * ReferenceType = ArgTys[D.getArgumentNumber()]; PointerType *ThisArgType = dyn_cast(Ty); return (!ThisArgType || ThisArgType->getElementType() != ReferenceType); @@ -3697,6 +3703,7 @@ case IITDescriptor::VecOfPtrsToElt: { if (D.getArgumentNumber() >= ArgTys.size()) return true; + *AnyAddressSpace = true; VectorType * ReferenceType = dyn_cast (ArgTys[D.getArgumentNumber()]); VectorType *ThisArgVecTy = dyn_cast(Ty); @@ -3757,10 +3764,13 @@ ArrayRef TableRef = Table; SmallVector ArgTys; - Assert(!verifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys), + bool AnyAddressSpace = false; + Assert(!verifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys, + &AnyAddressSpace), "Intrinsic has incorrect return type!", IF); for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i) - Assert(!verifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys), + Assert(!verifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys, + &AnyAddressSpace), "Intrinsic has incorrect argument type!", IF); // Verify if the intrinsic call matches the vararg property. @@ -3779,7 +3789,11 @@ // usual means. This allows us to verify the mangling of argument types into // the name. const std::string ExpectedName = Intrinsic::getName(ID, ArgTys); - Assert(ExpectedName == IF->getName(), + + std::string NameToCheck = IF->getName(); + if (AnyAddressSpace) + Intrinsic::detachAddressSpace(NameToCheck); + Assert(ExpectedName == NameToCheck, "Intrinsic name not mangled correctly for type arguments! " "Should be: " + ExpectedName, Index: ../test/Transforms/LoopVectorize/X86/gather_scatter.ll =================================================================== --- ../test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ ../test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -155,6 +155,67 @@ ret void } +;AVX512-LABEL: @foo2_a_1 +;AVX512: getelementptr %struct.In, %struct.In addrspace(1)* %in, <16 x i64> %induction, i32 1 +;AVX512: llvm.masked.gather.v16f32.a_1 +;AVX512: llvm.masked.store.v16f32 +;AVX512: ret void +define void @foo2_a_1(%struct.In addrspace(1)* noalias %in, float* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 { +entry: + %in.addr = alloca %struct.In addrspace(1)*, align 8 + %out.addr = alloca float*, align 8 + %trigger.addr = alloca i32*, align 8 + %index.addr = alloca i32*, align 8 + %i = alloca i32, align 4 + store %struct.In addrspace(1)* %in, %struct.In addrspace(1)** %in.addr, align 8 + store float* %out, float** %out.addr, align 8 + store i32* %trigger, i32** %trigger.addr, align 8 + store i32* %index, i32** %index.addr, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 4096 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32, i32* %i, align 4 + %idxprom = sext i32 %1 to i64 + %2 = load i32*, i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %3, 0 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %4 = load i32, i32* %i, align 4 + %idxprom2 = sext i32 %4 to i64 + %5 = load %struct.In addrspace(1)*, %struct.In addrspace(1)** %in.addr, align 8 + %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %5, i64 %idxprom2 + %b = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %arrayidx3, i32 0, i32 1 + %6 = load float, float addrspace(1)* %b, align 4 + %add = fadd float %6, 5.000000e-01 + %7 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %8 = load float*, float** %out.addr, align 8 + %arrayidx5 = getelementptr inbounds float, float* %8, i64 %idxprom4 + store float %add, float* %arrayidx5, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %9 = load i32, i32* %i, align 4 + %inc = add nsw i32 %9, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + ; The source code ;struct Out { ; float a;