Index: clang/lib/Driver/ToolChains/Cuda.h =================================================================== --- clang/lib/Driver/ToolChains/Cuda.h +++ clang/lib/Driver/ToolChains/Cuda.h @@ -30,6 +30,8 @@ const Driver &D; bool IsValid = false; CudaVersion Version = CudaVersion::UNKNOWN; + std::string DetectedVersion; + bool DetectedVersionIsNotSupported = false; std::string InstallPath; std::string BinPath; std::string LibPath; @@ -75,6 +77,10 @@ std::string getLibDeviceFile(StringRef Gpu) const { return LibDeviceMap.lookup(Gpu); } + void WarnIfUnsupportedVersion(); + +private: + void ParseCudaVersionFile(llvm::StringRef V); }; namespace tools { Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -32,24 +32,28 @@ // Parses the contents of version.txt in an CUDA installation. It should // contain one line of the from e.g. "CUDA Version 7.5.2". -static CudaVersion ParseCudaVersionFile(const Driver &D, llvm::StringRef V) { +void CudaInstallationDetector::ParseCudaVersionFile(llvm::StringRef V) { + Version = CudaVersion::UNKNOWN; if (!V.startswith("CUDA Version ")) - return CudaVersion::UNKNOWN; + return; V = V.substr(strlen("CUDA Version ")); SmallVector VersionParts; V.split(VersionParts, '.'); if (VersionParts.size() < 2) - return CudaVersion::UNKNOWN; - std::string MajorMinor = join_items(".", VersionParts[0], VersionParts[1]); - CudaVersion Version = CudaStringToVersion(MajorMinor); + return; + DetectedVersion = join_items(".", VersionParts[0], VersionParts[1]); + Version = CudaStringToVersion(DetectedVersion); if (Version != CudaVersion::UNKNOWN) - return Version; + return; - // Issue a warning and assume that the version we've found is compatible with - // the latest version we support. - D.Diag(diag::warn_drv_unknown_cuda_version) - << MajorMinor << CudaVersionToString(CudaVersion::LATEST); - return CudaVersion::LATEST; + Version = CudaVersion::LATEST; + DetectedVersionIsNotSupported = true; +} + +void CudaInstallationDetector::WarnIfUnsupportedVersion() { + if (DetectedVersionIsNotSupported) + D.Diag(diag::warn_drv_unknown_cuda_version) + << DetectedVersion << CudaVersionToString(Version); } CudaInstallationDetector::CudaInstallationDetector( @@ -147,7 +151,7 @@ // version.txt isn't present. Version = CudaVersion::CUDA_70; } else { - Version = ParseCudaVersionFile(D, (*VersionFile)->getBuffer()); + ParseCudaVersionFile((*VersionFile)->getBuffer()); } if (Version >= CudaVersion::CUDA_90) { @@ -565,8 +569,10 @@ const Action::OffloadKind OK) : ToolChain(D, Triple, Args), HostTC(HostTC), CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) { - if (CudaInstallation.isValid()) + if (CudaInstallation.isValid()) { + CudaInstallation.WarnIfUnsupportedVersion(); getProgramPaths().push_back(CudaInstallation.getBinPath()); + } // Lookup binaries into the driver directory, this is used to // discover the clang-offload-bundler executable. getProgramPaths().push_back(getDriver().Dir); Index: clang/test/Driver/cuda-version-check.cu =================================================================== --- clang/test/Driver/cuda-version-check.cu +++ clang/test/Driver/cuda-version-check.cu @@ -10,6 +10,10 @@ // RUN: FileCheck %s --check-prefix=OK // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \ // RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION +// Make sure that we don't warn about CUDA version during C++ compilation. +// RUN: %clang --target=x86_64-linux -v -### -x c++ --cuda-gpu-arch=sm_60 \ +// RUN: --cuda-path=%S/Inputs/CUDA-unknown/usr/local/cuda 2>&1 %s | \ +// RUN: FileCheck %s --check-prefix=UNKNOWN_VERSION_CXX // The installation at Inputs/CUDA is CUDA 7.0, which doesn't support sm_60. // RUN: %clang --target=x86_64-linux -v -### --cuda-gpu-arch=sm_60 --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 %s | \ @@ -62,3 +66,4 @@ // ERR_SM61-NOT: error: GPU arch sm_61 // UNKNOWN_VERSION: Unknown CUDA version 999.999. Assuming the latest supported version +// UNKNOWN_VERSION_CXX-NOT: Unknown CUDA version Index: llvm/include/llvm/Transforms/Scalar/SROA.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/SROA.h +++ llvm/include/llvm/Transforms/Scalar/SROA.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" #include namespace llvm { @@ -77,8 +78,8 @@ /// A collection of instructions to delete. /// We try to batch deletions to simplify code and make things a bit more - /// efficient. - SetVector> DeadInsts; + /// efficient. We also make sure there is no dangling pointers. + SmallVector DeadInsts; /// Post-promotion worklist. /// Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -2451,7 +2451,7 @@ void deleteIfTriviallyDead(Value *V) { Instruction *I = cast(V); if (isInstructionTriviallyDead(I)) - Pass.DeadInsts.insert(I); + Pass.DeadInsts.push_back(I); } Value *rewriteVectorizedLoadInst() { @@ -2589,7 +2589,7 @@ LI.replaceAllUsesWith(V); } - Pass.DeadInsts.insert(&LI); + Pass.DeadInsts.push_back(&LI); deleteIfTriviallyDead(OldOp); LLVM_DEBUG(dbgs() << " to: " << *V << "\n"); return !LI.isVolatile() && !IsPtrAdjusted; @@ -2617,7 +2617,7 @@ StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); if (AATags) Store->setAAMetadata(AATags); - Pass.DeadInsts.insert(&SI); + Pass.DeadInsts.push_back(&SI); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2640,7 +2640,7 @@ LLVMContext::MD_access_group}); if (AATags) Store->setAAMetadata(AATags); - Pass.DeadInsts.insert(&SI); + Pass.DeadInsts.push_back(&SI); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return true; } @@ -2713,7 +2713,7 @@ NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); if (NewSI->isAtomic()) NewSI->setAlignment(SI.getAlign()); - Pass.DeadInsts.insert(&SI); + Pass.DeadInsts.push_back(&SI); deleteIfTriviallyDead(OldOp); LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n"); @@ -2774,7 +2774,7 @@ } // Record this instruction for deletion. - Pass.DeadInsts.insert(&II); + Pass.DeadInsts.push_back(&II); Type *AllocaTy = NewAI.getAllocatedType(); Type *ScalarTy = AllocaTy->getScalarType(); @@ -2940,7 +2940,7 @@ return false; } // Record this instruction for deletion. - Pass.DeadInsts.insert(&II); + Pass.DeadInsts.push_back(&II); // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after rewriting this instruction. @@ -3075,7 +3075,7 @@ assert(II.getArgOperand(1) == OldPtr); // Record this instruction for deletion. - Pass.DeadInsts.insert(&II); + Pass.DeadInsts.push_back(&II); // Lifetime intrinsics are only promotable if they cover the whole alloca. // Therefore, we drop lifetime intrinsics which don't cover the whole @@ -3942,7 +3942,7 @@ } // Mark the original store as dead. - DeadInsts.insert(SI); + DeadInsts.push_back(SI); } // Save the split loads if there are deferred stores among the users. @@ -3950,7 +3950,7 @@ SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads))); // Mark the original load as dead and kill the original slice. - DeadInsts.insert(LI); + DeadInsts.push_back(LI); Offsets.S->kill(); } @@ -4072,9 +4072,9 @@ // trivial CSE, including instcombine. if (LI->hasOneUse()) { assert(*LI->user_begin() == SI && "Single use isn't this store!"); - DeadInsts.insert(LI); + DeadInsts.push_back(LI); } - DeadInsts.insert(SI); + DeadInsts.push_back(SI); Offsets.S->kill(); } @@ -4425,7 +4425,7 @@ // minimal. if (Instruction *OldI = dyn_cast(OldV)) if (isInstructionTriviallyDead(OldI)) { - DeadInsts.insert(OldI); + DeadInsts.push_back(OldI); } } @@ -4473,7 +4473,7 @@ DeadUser->replaceAllUsesWith(UndefValue::get(DeadUser->getType())); // And mark it for deletion. - DeadInsts.insert(DeadUser); + DeadInsts.push_back(DeadUser); Changed = true; } for (Use *DeadOp : AS.getDeadOperands()) { @@ -4511,7 +4511,9 @@ SmallPtrSetImpl &DeletedAllocas) { bool Changed = false; while (!DeadInsts.empty()) { - Instruction *I = DeadInsts.pop_back_val(); + Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); + if (I == NULL) continue; + LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); // If the instruction is an alloca, find the possible dbg.declare connected @@ -4530,7 +4532,7 @@ // Zero out the operand and see if it becomes trivially dead. Operand = nullptr; if (isInstructionTriviallyDead(U)) - DeadInsts.insert(U); + DeadInsts.push_back(U); } ++NumDeleted;