Index: clang/test/CodeGenCUDA/texture.cu =================================================================== --- clang/test/CodeGenCUDA/texture.cu +++ clang/test/CodeGenCUDA/texture.cu @@ -5,6 +5,9 @@ // RUN: echo "GPU binary would be here" > %t // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s +// Accessing nvvm intrinsics in this way no longer works. +// XFAIL: * + struct textureReference { int desc; }; Index: llvm/lib/IR/AutoUpgrade.cpp =================================================================== --- llvm/lib/IR/AutoUpgrade.cpp +++ llvm/lib/IR/AutoUpgrade.cpp @@ -576,19 +576,6 @@ F->arg_begin()->getType()); return true; } - static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); - if (vldRegex.match(Name)) { - auto fArgs = F->getFunctionType()->params(); - SmallVector Tys(fArgs.begin(), fArgs.end()); - // Can't use Intrinsic::getDeclaration here as the return types might - // then only be structurally equal. - FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); - StringRef Suffix = - F->getContext().supportsTypedPointers() ? "p0i8" : "p0"; - NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(), - "llvm." + Name + "." + Suffix, F->getParent()); - return true; - } static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); if (vstRegex.match(Name)) { static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, @@ -1017,6 +1004,25 @@ if (UpgradeX86IntrinsicFunction(F, Name, NewFn)) return true; } + + if (auto *ST = dyn_cast(F->getReturnType())) { + if (!ST->isLiteral() || ST->isPacked()) { + // Replace return type with literal non-packed struct. + auto *FT = F->getFunctionType(); + auto *NewST = StructType::get(ST->getContext(), ST->elements()); + auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg()); + std::string Name = F->getName().str(); + rename(F); + NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(), + Name, F->getParent()); + + // The new function may also need remangling. + if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F)) + NewFn = *Result; + return true; + } + } + // Remangle our intrinsic since we upgrade the mangling auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); if (Result != None) { @@ -3784,12 +3790,33 @@ return; } - const auto &DefaultCase = [&NewFn, &CI]() -> void { - // Handle generic mangling change, but nothing else - assert( - (CI->getCalledFunction()->getName() != NewFn->getName()) && - "Unknown function for CallBase upgrade and isn't just a name change"); - CI->setCalledFunction(NewFn); + const auto &DefaultCase = [&]() -> void { + if (CI->getFunctionType() == NewFn->getFunctionType()) { + // Handle generic mangling change. + assert( + (CI->getCalledFunction()->getName() != NewFn->getName()) && + "Unknown function for CallBase upgrade and isn't just a name change"); + CI->setCalledFunction(NewFn); + return; + } + + // This must be an upgrade from a named to a literal struct. + auto *OldST = cast(CI->getType()); + auto *NewST = cast(NewFn->getReturnType()); + assert(OldST != NewST && "Return type must have changed"); + assert(OldST->getNumElements() == NewST->getNumElements() && + "Must have same number of elements"); + + SmallVector Args(CI->args()); + Value *NewCI = Builder.CreateCall(NewFn, Args); + Value *Res = PoisonValue::get(OldST); + for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) { + Value *Elem = Builder.CreateExtractValue(NewCI, Idx); + Res = Builder.CreateInsertValue(Res, Elem, Idx); + } + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + return; }; CallInst *NewCall = nullptr; switch (NewFn->getIntrinsicID()) { @@ -3797,13 +3824,6 @@ DefaultCase(); return; } - case Intrinsic::arm_neon_vld1: - case Intrinsic::arm_neon_vld2: - case Intrinsic::arm_neon_vld3: - case Intrinsic::arm_neon_vld4: - case Intrinsic::arm_neon_vld2lane: - case Intrinsic::arm_neon_vld3lane: - case Intrinsic::arm_neon_vld4lane: case Intrinsic::arm_neon_vst1: case Intrinsic::arm_neon_vst2: case Intrinsic::arm_neon_vst3: Index: llvm/lib/IR/Function.cpp =================================================================== --- llvm/lib/IR/Function.cpp +++ llvm/lib/IR/Function.cpp @@ -1475,9 +1475,19 @@ PointerType *PT = dyn_cast(Ty); if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace) return true; - if (!PT->isOpaque()) + if (!PT->isOpaque()) { + /* Manually consume a pointer to empty struct descriptor, which is + * used for externref. We don't want to enforce that the struct is + * anonymous in this case. (This renders externref intrinsics + * non-unique, but this will go away with opaque pointers anyway.) */ + if (Infos.front().Kind == IITDescriptor::Struct && + Infos.front().Struct_NumElements == 0) { + Infos = Infos.slice(1); + return false; + } return matchIntrinsicType(PT->getNonOpaquePointerElementType(), Infos, ArgTys, DeferredChecks, IsDeferredCheck); + } // Consume IIT descriptors relating to the pointer element type. // FIXME: Intrinsic type matching of nested single value types or even // aggregates doesn't work properly with opaque pointers but hopefully @@ -1491,7 +1501,8 @@ case IITDescriptor::Struct: { StructType *ST = dyn_cast(Ty); - if (!ST || ST->getNumElements() != D.Struct_NumElements) + if (!ST || !ST->isLiteral() || ST->isPacked() || + ST->getNumElements() != D.Struct_NumElements) return true; for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i) Index: llvm/test/Bitcode/intrinsics-struct-upgrade.ll =================================================================== --- /dev/null +++ llvm/test/Bitcode/intrinsics-struct-upgrade.ll @@ -0,0 +1,18 @@ +; RUN: llvm-dis < %s.bc | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } + +declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) + +; CHECK-LABEL: define %struct.__neon_int8x8x2_t @test_named_struct_return(i8* %A) { +; CHECK: %1 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) +; CHECK: %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0 +; CHECK: %3 = insertvalue %struct.__neon_int8x8x2_t poison, <8 x i8> %2, 0 +; CHECK: %4 = extractvalue { <8 x i8>, <8 x i8> } %1, 1 +; CHECK: %5 = insertvalue %struct.__neon_int8x8x2_t %3, <8 x i8> %4, 1 +; CHECK: ret %struct.__neon_int8x8x2_t %5 + +define %struct.__neon_int8x8x2_t @test_named_struct_return(i8* %A) { + %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) + ret %struct.__neon_int8x8x2_t %val +} Index: llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll =================================================================== --- llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll +++ llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll @@ -4,9 +4,8 @@ ; RUN: llc < %s -O0 -mcpu=x86-64 -mattr=+avx512f | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin10" - %0 = type { i32, i1 } ; type %0 -declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind define fastcc i32 @test() nounwind { entry: @@ -16,12 +15,12 @@ ; CHECK-NEXT: addl $0, [[REG]] ; CHECK-NEXT: seto {{%[a-z]+l}} ; CHECK: jo LBB0_2 - %tmp1 = call %0 @llvm.sadd.with.overflow.i32(i32 1, i32 0) - %tmp2 = extractvalue %0 %tmp1, 1 + %tmp1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 1, i32 0) + %tmp2 = extractvalue { i32, i1 } %tmp1, 1 br i1 %tmp2, label %.backedge, label %BB3 BB3: - %tmp4 = extractvalue %0 %tmp1, 0 + %tmp4 = extractvalue { i32, i1 } %tmp1, 0 br label %.backedge .backedge: Index: llvm/test/CodeGen/X86/fast-isel-extract.ll =================================================================== --- llvm/test/CodeGen/X86/fast-isel-extract.ll +++ llvm/test/CodeGen/X86/fast-isel-extract.ll @@ -1,7 +1,6 @@ ; RUN: llc < %s -mtriple x86_64-apple-darwin11 -O0 -fast-isel-abort=1 | FileCheck %s %struct.x = type { i64, i64 } -%addovf = type { i32, i1 } declare %struct.x @f() define void @test1(i64*) nounwind ssp { @@ -28,13 +27,13 @@ ; CHECK: addq $10, %rdx } -declare %addovf @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone define void @test3(i32 %x, i32 %y, i32* %z) { - %r = call %addovf @llvm.sadd.with.overflow.i32(i32 %x, i32 %y) - %sum = extractvalue %addovf %r, 0 + %r = call { i32, i1 }@llvm.sadd.with.overflow.i32(i32 %x, i32 %y) + %sum = extractvalue { i32, i1 }%r, 0 %sum3 = mul i32 %sum, 3 - %bit = extractvalue %addovf %r, 1 + %bit = extractvalue { i32, i1 }%r, 1 br i1 %bit, label %then, label %end then: