diff --git a/clang/test/CodeGenCUDA/texture.cu b/clang/test/CodeGenCUDA/texture.cu --- a/clang/test/CodeGenCUDA/texture.cu +++ b/clang/test/CodeGenCUDA/texture.cu @@ -5,6 +5,9 @@ // RUN: echo "GPU binary would be here" > %t // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s +// Accessing nvvm intrinsics in this way no longer works. +// XFAIL: * + struct textureReference { int desc; }; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -576,19 +576,6 @@ F->arg_begin()->getType()); return true; } - static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); - if (vldRegex.match(Name)) { - auto fArgs = F->getFunctionType()->params(); - SmallVector Tys(fArgs.begin(), fArgs.end()); - // Can't use Intrinsic::getDeclaration here as the return types might - // then only be structurally equal. - FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); - StringRef Suffix = - F->getContext().supportsTypedPointers() ? "p0i8" : "p0"; - NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(), - "llvm." + Name + "." + Suffix, F->getParent()); - return true; - } static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); if (vstRegex.match(Name)) { static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, @@ -1017,6 +1004,25 @@ if (UpgradeX86IntrinsicFunction(F, Name, NewFn)) return true; } + + if (auto *ST = dyn_cast(F->getReturnType())) { + if (!ST->isLiteral() || ST->isPacked()) { + // Replace return type with literal non-packed struct. + auto *FT = F->getFunctionType(); + auto *NewST = StructType::get(ST->getContext(), ST->elements()); + auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg()); + std::string Name = F->getName().str(); + rename(F); + NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(), + Name, F->getParent()); + + // The new function may also need remangling. + if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F)) + NewFn = *Result; + return true; + } + } + // Remangle our intrinsic since we upgrade the mangling auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); if (Result != None) { @@ -3784,12 +3790,33 @@ return; } - const auto &DefaultCase = [&NewFn, &CI]() -> void { - // Handle generic mangling change, but nothing else - assert( - (CI->getCalledFunction()->getName() != NewFn->getName()) && - "Unknown function for CallBase upgrade and isn't just a name change"); - CI->setCalledFunction(NewFn); + const auto &DefaultCase = [&]() -> void { + if (CI->getFunctionType() == NewFn->getFunctionType()) { + // Handle generic mangling change. + assert( + (CI->getCalledFunction()->getName() != NewFn->getName()) && + "Unknown function for CallBase upgrade and isn't just a name change"); + CI->setCalledFunction(NewFn); + return; + } + + // This must be an upgrade from a named to a literal struct. + auto *OldST = cast(CI->getType()); + auto *NewST = cast(NewFn->getReturnType()); + assert(OldST != NewST && "Return type must have changed"); + assert(OldST->getNumElements() == NewST->getNumElements() && + "Must have same number of elements"); + + SmallVector Args(CI->args()); + Value *NewCI = Builder.CreateCall(NewFn, Args); + Value *Res = PoisonValue::get(OldST); + for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) { + Value *Elem = Builder.CreateExtractValue(NewCI, Idx); + Res = Builder.CreateInsertValue(Res, Elem, Idx); + } + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + return; }; CallInst *NewCall = nullptr; switch (NewFn->getIntrinsicID()) { @@ -3797,13 +3824,6 @@ DefaultCase(); return; } - case Intrinsic::arm_neon_vld1: - case Intrinsic::arm_neon_vld2: - case Intrinsic::arm_neon_vld3: - case Intrinsic::arm_neon_vld4: - case Intrinsic::arm_neon_vld2lane: - case Intrinsic::arm_neon_vld3lane: - case Intrinsic::arm_neon_vld4lane: case Intrinsic::arm_neon_vst1: case Intrinsic::arm_neon_vst2: case Intrinsic::arm_neon_vst3: diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1475,9 +1475,19 @@ PointerType *PT = dyn_cast(Ty); if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace) return true; - if (!PT->isOpaque()) + if (!PT->isOpaque()) { + /* Manually consume a pointer to empty struct descriptor, which is + * used for externref. We don't want to enforce that the struct is + * anonymous in this case. (This renders externref intrinsics + * non-unique, but this will go away with opaque pointers anyway.) */ + if (Infos.front().Kind == IITDescriptor::Struct && + Infos.front().Struct_NumElements == 0) { + Infos = Infos.slice(1); + return false; + } return matchIntrinsicType(PT->getNonOpaquePointerElementType(), Infos, ArgTys, DeferredChecks, IsDeferredCheck); + } // Consume IIT descriptors relating to the pointer element type. // FIXME: Intrinsic type matching of nested single value types or even // aggregates doesn't work properly with opaque pointers but hopefully @@ -1491,7 +1501,8 @@ case IITDescriptor::Struct: { StructType *ST = dyn_cast(Ty); - if (!ST || ST->getNumElements() != D.Struct_NumElements) + if (!ST || !ST->isLiteral() || ST->isPacked() || + ST->getNumElements() != D.Struct_NumElements) return true; for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i) diff --git a/llvm/test/Bitcode/intrinsics-struct-upgrade.ll b/llvm/test/Bitcode/intrinsics-struct-upgrade.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Bitcode/intrinsics-struct-upgrade.ll @@ -0,0 +1,18 @@ +; RUN: llvm-dis < %s.bc | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } + +declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) + +; CHECK-LABEL: define %struct.__neon_int8x8x2_t @test_named_struct_return(i8* %A) { +; CHECK: %1 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) +; CHECK: %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0 +; CHECK: %3 = insertvalue %struct.__neon_int8x8x2_t poison, <8 x i8> %2, 0 +; CHECK: %4 = extractvalue { <8 x i8>, <8 x i8> } %1, 1 +; CHECK: %5 = insertvalue %struct.__neon_int8x8x2_t %3, <8 x i8> %4, 1 +; CHECK: ret %struct.__neon_int8x8x2_t %5 + +define %struct.__neon_int8x8x2_t @test_named_struct_return(i8* %A) { + %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) + ret %struct.__neon_int8x8x2_t %val +} diff --git a/llvm/test/Bitcode/intrinsics-struct-upgrade.ll.bc b/llvm/test/Bitcode/intrinsics-struct-upgrade.ll.bc new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@