Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -3392,9 +3392,10 @@ :Overview: -Opaque structure types are used to represent named structure types that +Opaque structure types are used to represent structure types that do not have a body specified. This corresponds (for example) to the C -notion of a forward declared structure. +notion of a forward declared structure. They can be named (``%X``) or +unnamed (``%52``). :Syntax: @@ -11507,6 +11508,15 @@ type is matched against the return type, it does not require its own name suffix. +:ref:`Unnamed types ` are encoded as ``s_s``. Overloaded intrinsics +that depend on an unnamed type in one of its overloaded argument types get an +additional ``.`` suffix. This allows differentiating intrinsics with +different unnamed types as arguments. (For example: +``llvm.ssa.copy.p0s_s.2(%42*)``) The number is tracked in the LLVM module and +it ensures unique names in the module. While linking together two modules, it is +still possible to get a name clash. In that case one of the names will be +changed by getting a new number. + For target developers who are defining intrinsics for back-end code generation, any intrinsic overloads based solely the distinction between integer or floating point types should not be relied upon for correct Index: llvm/include/llvm/IR/Intrinsics.h =================================================================== --- llvm/include/llvm/IR/Intrinsics.h +++ llvm/include/llvm/IR/Intrinsics.h @@ -56,11 +56,20 @@ StringRef getName(ID id); /// Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx". - /// Note, this version of getName supports overloads, but is less efficient - /// than the StringRef version of this function. If no overloads are - /// requried, it is safe to use this version, but better to use the StringRef - /// version. - std::string getName(ID id, ArrayRef Tys); + /// Note, this version of getName supports overloads, but not unnamed types. + /// It is less efficient than the StringRef version of this function. If no + /// overloads are required, it is safe to use this version, but better to use + /// the StringRef version. + std::string getName(ID Id, ArrayRef Tys); + + /// Return the LLVM name for an intrinsic, such as "llvm.ssa.copy.p0s_s.1". + /// Note, this version of getName supports overloads and unnamed types, but is + /// less efficient than the StringRef version of this function. If no + /// overloads are required, it is safe to use this version, but better to use + /// the StringRef version. A function type FT can be provided to avoid + /// computing it. It is used (or computed) if one of the types is based on an + /// unnamed type. + std::string getName(ID Id, ArrayRef Tys, Module *M, FunctionType *FT); /// Return the function type for an intrinsic. FunctionType *getType(LLVMContext &Context, ID id, Index: llvm/include/llvm/IR/Module.h =================================================================== --- llvm/include/llvm/IR/Module.h +++ llvm/include/llvm/IR/Module.h @@ -197,6 +197,14 @@ ///< Format: (arch)(sub)-(vendor)-(sys0-(abi) NamedMDSymTabType NamedMDSymTab; ///< NamedMDNode names. DataLayout DL; ///< DataLayout associated with the module + StringMap + CurrentIntrinsicIds; ///< Keep track of the current unique id count for + ///< the specified intrinsic basename. + DenseMap, unsigned> + UniquedIntrinsicNames; ///< Keep track of uniqued names of intrinsics + ///< based on unnamed types. The combination of + ///< ID and FunctionType maps to the extension that + ///< is used to make the intrinsic name unique. friend class Constant; @@ -331,6 +339,11 @@ std::vector getIdentifiedStructTypes() const; + /// Return a unique name for an intrinsic whose mangling is based on an + /// unnamed type. The Proto represents the function prototype. + std::string getUniqueIntrinsicName(StringRef BaseName, Intrinsic::ID Id, + const FunctionType *Proto); + /// @} /// @name Function Accessors /// @{ Index: llvm/lib/IR/Function.cpp =================================================================== --- llvm/lib/IR/Function.cpp +++ llvm/lib/IR/Function.cpp @@ -726,30 +726,34 @@ /// which can't be confused with it's prefix. This ensures we don't have /// collisions between two unrelated function types. Otherwise, you might /// parse ffXX as f(fXX) or f(fX)X. (X is a placeholder for any other type.) -/// -static std::string getMangledTypeStr(Type* Ty) { +/// The HasUnnamedType boolean is set if an unnamed type was encountered, +/// indicating that extra care must be taken to ensure a unique name. +static std::string getMangledTypeStr(Type *Ty, bool &HasUnnamedType) { std::string Result; if (PointerType* PTyp = dyn_cast(Ty)) { Result += "p" + utostr(PTyp->getAddressSpace()) + - getMangledTypeStr(PTyp->getElementType()); + getMangledTypeStr(PTyp->getElementType(), HasUnnamedType); } else if (ArrayType* ATyp = dyn_cast(Ty)) { Result += "a" + utostr(ATyp->getNumElements()) + - getMangledTypeStr(ATyp->getElementType()); + getMangledTypeStr(ATyp->getElementType(), HasUnnamedType); } else if (StructType *STyp = dyn_cast(Ty)) { if (!STyp->isLiteral()) { Result += "s_"; - Result += STyp->getName(); + if (STyp->hasName()) + Result += STyp->getName(); + else + HasUnnamedType = true; } else { Result += "sl_"; for (auto Elem : STyp->elements()) - Result += getMangledTypeStr(Elem); + Result += getMangledTypeStr(Elem, HasUnnamedType); } // Ensure nested structs are distinguishable. Result += "s"; } else if (FunctionType *FT = dyn_cast(Ty)) { - Result += "f_" + getMangledTypeStr(FT->getReturnType()); + Result += "f_" + getMangledTypeStr(FT->getReturnType(), HasUnnamedType); for (size_t i = 0; i < FT->getNumParams(); i++) - Result += getMangledTypeStr(FT->getParamType(i)); + Result += getMangledTypeStr(FT->getParamType(i), HasUnnamedType); if (FT->isVarArg()) Result += "vararg"; // Ensure nested function types are distinguishable. @@ -759,7 +763,7 @@ if (EC.isScalable()) Result += "nx"; Result += "v" + utostr(EC.getKnownMinValue()) + - getMangledTypeStr(VTy->getElementType()); + getMangledTypeStr(VTy->getElementType(), HasUnnamedType); } else if (Ty) { switch (Ty->getTypeID()) { default: llvm_unreachable("Unhandled type"); @@ -789,17 +793,32 @@ return IntrinsicNameTable[id]; } -std::string Intrinsic::getName(ID id, ArrayRef Tys) { - assert(id < num_intrinsics && "Invalid intrinsic ID!"); - assert((Tys.empty() || Intrinsic::isOverloaded(id)) && +std::string Intrinsic::getName(ID Id, ArrayRef Tys, Module *M, + FunctionType *FT) { + assert(Id < num_intrinsics && "Invalid intrinsic ID!"); + assert((Tys.empty() || Intrinsic::isOverloaded(Id)) && "This version of getName is for overloaded intrinsics only"); - std::string Result(IntrinsicNameTable[id]); + bool HasUnnamedType = false; + std::string Result(IntrinsicNameTable[Id]); for (Type *Ty : Tys) { - Result += "." + getMangledTypeStr(Ty); + Result += "." + getMangledTypeStr(Ty, HasUnnamedType); + } + assert((M || !HasUnnamedType) && "unnamed types need a module"); + if (M && HasUnnamedType) { + if (!FT) + FT = getType(M->getContext(), Id, Tys); + else + assert((FT == getType(M->getContext(), Id, Tys)) && + "Provided FunctionType must match arguments"); + return M->getUniqueIntrinsicName(Result, Id, FT); } return Result; } +std::string Intrinsic::getName(ID Id, ArrayRef Tys) { + return getName(Id, Tys, nullptr, nullptr); +} + /// IIT_Info - These are enumerators that describe the entries returned by the /// getIntrinsicInfoTableEntries function. /// @@ -1259,8 +1278,10 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef Tys) { // There can never be multiple globals with the same name of different types, // because intrinsics must be a specific type. + auto *FT = getType(M->getContext(), id, Tys); return cast( - M->getOrInsertFunction(Tys.empty() ? getName(id) : getName(id, Tys), + M->getOrInsertFunction(Tys.empty() ? getName(id) + : getName(id, Tys, M, FT), getType(M->getContext(), id, Tys)) .getCallee()); } @@ -1573,7 +1594,8 @@ Intrinsic::ID ID = F->getIntrinsicID(); StringRef Name = F->getName(); - if (Name == Intrinsic::getName(ID, ArgTys)) + if (Name == + Intrinsic::getName(ID, ArgTys, F->getParent(), F->getFunctionType())) return None; auto NewDecl = Intrinsic::getDeclaration(F->getParent(), ID, ArgTys); Index: llvm/lib/IR/Module.cpp =================================================================== --- llvm/lib/IR/Module.cpp +++ llvm/lib/IR/Module.cpp @@ -473,6 +473,56 @@ return Ret; } +std::string Module::getUniqueIntrinsicName(StringRef BaseName, Intrinsic::ID Id, + const FunctionType *Proto) { + auto Encode = [&BaseName](unsigned Suffix) { + return (Twine(BaseName) + "." + Twine(Suffix)).str(); + }; + + { + // fast path - the prototype is already known + auto UinItInserted = UniquedIntrinsicNames.insert({{Id, Proto}, 0}); + if (!UinItInserted.second) + return Encode(UinItInserted.first->second); + } + + // Not known yet. A new entry was created with index 0. Check if there already + // exists a matching declaration, or select a new entry. + + // Start looking for names with the current known maximum count (or 0). + auto NiidItInserted = CurrentIntrinsicIds.insert({BaseName, 0}); + unsigned Count = NiidItInserted.first->second; + + // This might be slow if a whole population of intrinsics already existed, but + // we cache the values for later usage. + std::string NewName; + while (true) { + NewName = Encode(Count); + GlobalValue *F = getNamedValue(NewName); + if (!F) { + // Reserve this entry for the new proto + UniquedIntrinsicNames[{Id, Proto}] = Count; + break; + } + + // A declaration with this name already exists. Remember it. + FunctionType *FT = dyn_cast(F->getType()->getElementType()); + auto UinItInserted = UniquedIntrinsicNames.insert({{Id, FT}, Count}); + if (FT == Proto) { + // It was a declaration for our prototype. This entry was allocated in the + // beginning. Update the count to match the existing declaration. + UinItInserted.first->second = Count; + break; + } + + ++Count; + } + + NiidItInserted.first->second = Count + 1; + + return NewName; +} + // dropAllReferences() - This function causes all the subelements to "let go" // of all references that they are maintaining. This allows one to 'delete' a // whole module at a time, even though there may be circular references... first Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -4542,7 +4542,8 @@ // know they are legal for the intrinsic!) get the intrinsic name through the // usual means. This allows us to verify the mangling of argument types into // the name. - const std::string ExpectedName = Intrinsic::getName(ID, ArgTys); + const std::string ExpectedName = + Intrinsic::getName(ID, ArgTys, IF->getParent(), IFTy); Assert(ExpectedName == IF->getName(), "Intrinsic name not mangled correctly for type arguments! " "Should be: " + Index: llvm/lib/Linker/IRMover.cpp =================================================================== --- llvm/lib/Linker/IRMover.cpp +++ llvm/lib/Linker/IRMover.cpp @@ -460,6 +460,14 @@ if (DGV->hasLocalLinkage()) return nullptr; + // If we found an intrinsic declaration with mismatching prototypes, we + // probably had a nameclash. Don't use that version. + if (auto *FDGV = dyn_cast(DGV)) + if (FDGV->isIntrinsic()) + if (const auto *FSrcGV = dyn_cast(SrcGV)) + if (FDGV->getFunctionType() != TypeMap.get(FSrcGV->getFunctionType())) + return nullptr; + // Otherwise, we do in fact link to the destination global. return DGV; } @@ -995,6 +1003,7 @@ return linkAppendingVarProto(cast_or_null(DGV), cast(SGV)); + bool NeedsRenaming = false; GlobalValue *NewGV; if (DGV && !ShouldLink) { NewGV = DGV; @@ -1007,15 +1016,21 @@ NewGV = copyGlobalValueProto(SGV, ShouldLink || ForIndirectSymbol); if (ShouldLink || !ForIndirectSymbol) - forceRenaming(NewGV, SGV->getName()); + NeedsRenaming = true; } // Overloaded intrinsics have overloaded types names as part of their // names. If we renamed overloaded types we should rename the intrinsic // as well. if (Function *F = dyn_cast(NewGV)) - if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) + if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) { + NewGV->eraseFromParent(); NewGV = Remangled.getValue(); + NeedsRenaming = false; + } + + if (NeedsRenaming) + forceRenaming(NewGV, SGV->getName()); if (ShouldLink || ForIndirectSymbol) { if (const Comdat *SC = SGV->getComdat()) { Index: llvm/test/Bitcode/intrinsics-with-unnamed-types.ll =================================================================== --- /dev/null +++ llvm/test/Bitcode/intrinsics-with-unnamed-types.ll @@ -0,0 +1,31 @@ +; RUN: llvm-as -o - %s | llvm-dis -o - 2>&1 | FileCheck %s + +; Make sure we can assemble and disassemble IR containing intrinsics with +; unnamed types. + +%1 = type opaque +%0 = type opaque + +; CHECK-LABEL: @f0( +; CHECK: %c1 = call %0* @llvm.ssa.copy.p0s_s.0(%0* %arg) +; CHECK: %c2 = call %1* @llvm.ssa.copy.p0s_s.1(%1* %tmp) +; CHECK: %c3 = call %0** @llvm.ssa.copy.p0p0s_s.1(%0** %arg2) +; CHECK: %c4 = call %1** @llvm.ssa.copy.p0p0s_s.0(%1** %tmp2) + +define void @f0(%0* %arg, %1* %tmp, %1** %tmp2, %0** %arg2) { +bb: + %cmp1 = icmp ne %0* %arg, null + %c1 = call %0* @llvm.ssa.copy.p0s_s.0(%0* %arg) + %c2 = call %1* @llvm.ssa.copy.p0s_s.1(%1* %tmp) + %c3 = call %0** @llvm.ssa.copy.p0p0s_s.1(%0** %arg2) + %c4 = call %1** @llvm.ssa.copy.p0p0s_s.0(%1** %tmp2) + ret void +} + +declare %0* @llvm.ssa.copy.p0s_s.0(%0* returned) + +declare %1* @llvm.ssa.copy.p0s_s.1(%1* returned) + +declare %0** @llvm.ssa.copy.p0p0s_s.1(%0** returned) + +declare %1** @llvm.ssa.copy.p0p0s_s.0(%1** returned) Index: llvm/test/Linker/intrinsics-with-unnamed-types.ll =================================================================== --- /dev/null +++ llvm/test/Linker/intrinsics-with-unnamed-types.ll @@ -0,0 +1,101 @@ +; RUN: split-file %s %t +; RUN: llvm-as -o %t1.bc %t/f01.ll +; RUN: llvm-as -o %t2.bc %t/f02.ll +; RUN: llvm-link %t1.bc %t2.bc -o %t3.bc +; RUN: llvm-dis -o - %t3.bc | FileCheck %s + +; Make sure we can link files with clashing intrinsic names using unnamed types. + +;--- f01.ll +%1 = type opaque +%0 = type opaque + +; CHECK-LABEL: @test01( +; CHECK: %cmp1 = icmp ne %0* %arg, null +; CHECK-NEXT: %c1 = call %0* @llvm.ssa.copy.p0s_s.0(%0* %arg) +; CHECK-NEXT: %c2 = call %1* @llvm.ssa.copy.p0s_s.1(%1* %tmp) +; CHECK-NEXT: %c3a = call %0** @llvm.ssa.copy.p0p0s_s.0(%0** %arg2) +; CHECK-NEXT: %c3b = call %0** @llvm.ssa.copy.p0p0s_s.0(%0** %arg2) +; CHECK-NEXT: %c4a = call %1** @llvm.ssa.copy.p0p0s_s.1(%1** %tmp2) +; CHECK-NEXT: %c4ba = call %1** @llvm.ssa.copy.p0p0s_s.1(%1** %tmp2) +; CHECK-NEXT: %c5 = call %0*** @llvm.ssa.copy.p0p0p0s_s.0(%0*** %arg3) +; CHECK-NEXT: %c6 = call %1*** @llvm.ssa.copy.p0p0p0s_s.1(%1*** %tmp3) + +define void @test01(%0* %arg, %1* %tmp, %1** %tmp2, %0** %arg2, %1*** %tmp3, %0*** %arg3) { +bb: + %cmp1 = icmp ne %0* %arg, null + %c1 = call %0* @llvm.ssa.copy.p0s_s.0(%0* %arg) + %c2 = call %1* @llvm.ssa.copy.p0s_s.1(%1* %tmp) + %c3a = call %0** @llvm.ssa.copy.p0p0s_s.1(%0** %arg2) + %c3b = call %0** @llvm.ssa.copy.p0p0s_s.1(%0** %arg2) + %c4a = call %1** @llvm.ssa.copy.p0p0s_s.0(%1** %tmp2) + %c4ba = call %1** @llvm.ssa.copy.p0p0s_s.0(%1** %tmp2) + %c5 = call %0*** @llvm.ssa.copy.p0p0p0s_s.1(%0*** %arg3) + %c6 = call %1*** @llvm.ssa.copy.p0p0p0s_s.0(%1*** %tmp3) + ret void +} + +declare %0* @llvm.ssa.copy.p0s_s.0(%0* returned) + +declare %1* @llvm.ssa.copy.p0s_s.1(%1* returned) + +declare %0** @llvm.ssa.copy.p0p0s_s.1(%0** returned) + +declare %1** @llvm.ssa.copy.p0p0s_s.0(%1** returned) + +declare %0*** @llvm.ssa.copy.p0p0p0s_s.1(%0*** returned) + +declare %1*** @llvm.ssa.copy.p0p0p0s_s.0(%1*** returned) + +; now with recycling of previous declarations: +; CHECK-LABEL: @test02( +; CHECK: %cmp1 = icmp ne %0* %arg, null +; CHECK-NEXT: %c4a = call %1** @llvm.ssa.copy.p0p0s_s.1(%1** %tmp2) +; CHECK-NEXT: %c6 = call %1*** @llvm.ssa.copy.p0p0p0s_s.1(%1*** %tmp3) +; CHECK-NEXT: %c1 = call %0* @llvm.ssa.copy.p0s_s.0(%0* %arg) +; CHECK-NEXT: %c2 = call %1* @llvm.ssa.copy.p0s_s.1(%1* %tmp) +; CHECK-NEXT: %c3b = call %0** @llvm.ssa.copy.p0p0s_s.0(%0** %arg2) +; CHECK-NEXT: %c4ba = call %1** @llvm.ssa.copy.p0p0s_s.1(%1** %tmp2) +; CHECK-NEXT: %c5 = call %0*** @llvm.ssa.copy.p0p0p0s_s.0(%0*** %arg3) + +define void @test02(%0* %arg, %1* %tmp, %1** %tmp2, %0** %arg2, %1*** %tmp3, %0*** %arg3) { +bb: + %cmp1 = icmp ne %0* %arg, null + %c4a = call %1** @llvm.ssa.copy.p0p0s_s.0(%1** %tmp2) + %c6 = call %1*** @llvm.ssa.copy.p0p0p0s_s.0(%1*** %tmp3) + %c1 = call %0* @llvm.ssa.copy.p0s_s.0(%0* %arg) + %c2 = call %1* @llvm.ssa.copy.p0s_s.1(%1* %tmp) + %c3b = call %0** @llvm.ssa.copy.p0p0s_s.1(%0** %arg2) + %c4ba = call %1** @llvm.ssa.copy.p0p0s_s.0(%1** %tmp2) + %c5 = call %0*** @llvm.ssa.copy.p0p0p0s_s.1(%0*** %arg3) + ret void +} + +;--- f02.ll +%1 = type opaque +%2 = type opaque + +; CHECK-LABEL: @test03( +; CHECK: %cmp1 = icmp ne %3* %arg, null +; CHECK-NEXT: %c1 = call %3* @llvm.ssa.copy.p0s_s.2(%3* %arg) +; CHECK-NEXT: %c2 = call %2* @llvm.ssa.copy.p0s_s.3(%2* %tmp) +; CHECK-NEXT: %c3 = call %3** @llvm.ssa.copy.p0p0s_s.2(%3** %arg2) +; CHECK-NEXT: %c4 = call %2** @llvm.ssa.copy.p0p0s_s.3(%2** %tmp2) + +define void @test03(%1* %tmp, %2* %arg, %1** %tmp2, %2** %arg2) { +bb: + %cmp1 = icmp ne %2* %arg, null + %c1 = call %2* @llvm.ssa.copy.p0s_s.0(%2* %arg) + %c2 = call %1* @llvm.ssa.copy.p0s_s.1(%1* %tmp) + %c3 = call %2** @llvm.ssa.copy.p0p0s_s.1(%2** %arg2) + %c4 = call %1** @llvm.ssa.copy.p0p0s_s.0(%1** %tmp2) + ret void +} + +declare %2* @llvm.ssa.copy.p0s_s.0(%2* returned) + +declare %1* @llvm.ssa.copy.p0s_s.1(%1* returned) + +declare %2** @llvm.ssa.copy.p0p0s_s.1(%2** returned) + +declare %1** @llvm.ssa.copy.p0p0s_s.0(%1** returned) Index: llvm/test/Transforms/LoopVectorize/X86/pr48340.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/pr48340.ll @@ -0,0 +1,54 @@ +; RUN: opt -loop-vectorize --force-vector-width=4 --force-vector-interleave=0 -S -o - < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%0 = type { i32 } +%1 = type { i64 } + +define void @foo(i64* %p, i64* %p.last) unnamed_addr #0 { +; CHECK-LABEL: @foo( +; CHECK: vector.body: +; CHECK: [[WIDE_MASKED_GATHER0:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP5:%.*]], i32 8, <4 x i1> , <4 x %0*> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP6:%.*]], i32 8, <4 x i1> , <4 x %0*> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP7:%.*]], i32 8, <4 x i1> , <4 x %0*> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP8:%.*]], i32 8, <4 x i1> , <4 x %0*> undef) +entry: + br label %loop + +loop: + %p2 = phi i64* [ %p, %entry ], [ %p.inc, %loop ] + %p.inc = getelementptr inbounds i64, i64* %p2, i64 2 + %p3 = bitcast i64* %p2 to %0** + %v = load %0*, %0** %p3, align 8 + %b = icmp eq i64* %p.inc, %p.last + br i1 %b, label %exit, label %loop + +exit: + ret void +} + +define void @bar(i64* %p, i64* %p.last) unnamed_addr #0 { +; CHECK-LABEL: @bar( +; CHECK: vector.body: +; CHECK: [[WIDE_MASKED_GATHER0:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP5:%.*]], i32 8, <4 x i1> , <4 x %1*> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP6:%.*]], i32 8, <4 x i1> , <4 x %1*> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP7:%.*]], i32 8, <4 x i1> , <4 x %1*> undef) +; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP8:%.*]], i32 8, <4 x i1> , <4 x %1*> undef) +entry: + br label %loop + +loop: + %p2 = phi i64* [ %p, %entry ], [ %p.inc, %loop ] + %p.inc = getelementptr inbounds i64, i64* %p2, i64 2 + %p3 = bitcast i64* %p2 to %1** + %v = load %1*, %1** %p3, align 8 + %b = icmp eq i64* %p.inc, %p.last + br i1 %b, label %exit, label %loop + +exit: + ret void +} + +attributes #0 = { "target-cpu"="skylake" } +