diff --git a/.mailmap b/.mailmap --- a/.mailmap +++ b/.mailmap @@ -35,4 +35,5 @@ Jon Roelofs LLVM GN Syncbot Martin Storsjö +Ramkumar Ramachandra Saleem Abdulrasool diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -323,7 +323,7 @@ if (FileBuildID) return StringRef(*FileBuildID); - return None; + return std::nullopt; } void setFileBuildID(StringRef ID) { FileBuildID = std::string(ID); } diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -980,7 +980,7 @@ if (Callback(StringRef(Name))) return StringRef(Name); - return None; + return std::nullopt; } /// Check if (possibly one out of many) function name matches the given @@ -1318,7 +1318,7 @@ /// Return the name of the section this function originated from. Optional getOriginSectionName() const { if (!OriginSection) - return None; + return std::nullopt; return OriginSection->getName(); } diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h --- a/bolt/include/bolt/Core/DebugData.h +++ b/bolt/include/bolt/Core/DebugData.h @@ -1016,7 +1016,8 @@ /// Most of the time, using type units with DWO is not a good idea. /// If type units are used, the caller is responsible for verifying /// that abbreviations are shared by CU and TUs. - DebugAbbrevWriter(DWARFContext &Context, Optional DWOId = None) + DebugAbbrevWriter(DWARFContext &Context, + Optional DWOId = std::nullopt) : Context(Context), DWOId(DWOId) {} DebugAbbrevWriter(const DebugAbbrevWriter &) = delete; diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -136,7 +136,7 @@ unsigned Index) const { const MCInst *AnnotationInst = getAnnotationInst(Inst); if (!AnnotationInst) - return None; + return std::nullopt; for (int I = AnnotationInst->getNumOperands() - 1; I >= 0; --I) { int64_t ImmValue = AnnotationInst->getOperand(I).getImm(); @@ -145,7 +145,7 @@ } } - return None; + return std::nullopt; } protected: @@ -1670,7 +1670,7 @@ auto AI = AnnotationNameIndexMap.find(Name); if (AI != AnnotationNameIndexMap.end()) return AI->second; - return None; + return std::nullopt; } /// Return annotation index matching the \p Name. Create a new index if the diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h b/bolt/include/bolt/Passes/ReachingDefOrUse.h --- a/bolt/include/bolt/Passes/ReachingDefOrUse.h +++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h @@ -34,7 +34,7 @@ public: ReachingDefOrUse(const RegAnalysis &RA, BinaryFunction &BF, - Optional TrackingReg = None, + Optional TrackingReg = std::nullopt, MCPlusBuilder::AllocatorIdTy AllocId = 0) : InstrsDataflowAnalysis, !Def>(BF, AllocId), RA(RA), TrackingReg(TrackingReg) {} @@ -125,7 +125,7 @@ } // Gen if (!this->BC.MIB->isCFI(Point)) { - if (TrackingReg == None) { + if (TrackingReg == std::nullopt) { // Track all instructions Next.set(this->ExprToIdx[&Point]); } else { diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h --- a/bolt/include/bolt/Rewrite/DWARFRewriter.h +++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h @@ -98,7 +98,7 @@ DebugAbbrevWriter &AbbrevWriter, DebugLocWriter &DebugLocWriter, DebugRangesSectionWriter &RangesWriter, - Optional RangesBase = None); + Optional RangesBase = std::nullopt); /// Patches the binary for an object's address ranges to be updated. /// The object can be anything that has associated address ranges via either @@ -109,12 +109,10 @@ /// \p DIE is the object's DIE in the input binary. /// \p RangesBase if present, update \p DIE to use DW_AT_GNU_ranges_base /// attribute. - void updateDWARFObjectAddressRanges(const DWARFDie DIE, - uint64_t DebugRangesOffset, - SimpleBinaryPatcher &DebugInfoPatcher, - DebugAbbrevWriter &AbbrevWriter, - uint64_t LowPCToUse, - Optional RangesBase = None); + void updateDWARFObjectAddressRanges( + const DWARFDie DIE, uint64_t DebugRangesOffset, + SimpleBinaryPatcher &DebugInfoPatcher, DebugAbbrevWriter &AbbrevWriter, + uint64_t LowPCToUse, Optional RangesBase = std::nullopt); std::unique_ptr makeFinalLocListsSection(DebugInfoBinaryPatcher &DebugInfoPatcher, @@ -165,15 +163,16 @@ void convertToRangesPatchAbbrev(const DWARFUnit &Unit, const DWARFAbbreviationDeclaration *Abbrev, DebugAbbrevWriter &AbbrevWriter, - Optional RangesBase = None); + Optional RangesBase = std::nullopt); /// Update \p DIE that was using DW_AT_(low|high)_pc with DW_AT_ranges offset. /// Updates to the DIE should be synced with abbreviation updates using the /// function above. - void convertToRangesPatchDebugInfo(DWARFDie DIE, uint64_t RangesSectionOffset, - SimpleBinaryPatcher &DebugInfoPatcher, - uint64_t LowPCToUse, - Optional RangesBase = None); + void + convertToRangesPatchDebugInfo(DWARFDie DIE, uint64_t RangesSectionOffset, + SimpleBinaryPatcher &DebugInfoPatcher, + uint64_t LowPCToUse, + Optional RangesBase = std::nullopt); /// Helper function for creating and returning per-DWO patchers/writers. template diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1494,8 +1494,8 @@ FileName = *FName; assert(FileName != ""); DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(DestCUID); - return cantFail(getDwarfFile(Dir, FileName, 0, None, None, DestCUID, - DstUnit->getVersion())); + return cantFail(getDwarfFile(Dir, FileName, 0, std::nullopt, std::nullopt, + DestCUID, DstUnit->getVersion())); } std::vector BinaryContext::getSortedFunctions() { @@ -1530,7 +1530,7 @@ Optional BinaryContext::getDWOCU(uint64_t DWOId) { auto Iter = DWOCUs.find(DWOId); if (Iter == DWOCUs.end()) - return None; + return std::nullopt; return Iter->second; } @@ -1657,7 +1657,7 @@ Iter->second->getUnitDIE().find(dwarf::DW_AT_name), nullptr); } BinaryLineTable.setRootFile(CU->getCompilationDir(), *Name, Checksum, - None); + std::nullopt); } BinaryLineTable.setDwarfVersion(DwarfVersion); @@ -1665,8 +1665,8 @@ // Assign a unique label to every line table, one per CU. // Make sure empty debug line tables are registered too. if (FileNames.empty()) { - cantFail( - getDwarfFile("", "", 0, None, None, CUID, DwarfVersion)); + cantFail(getDwarfFile("", "", 0, std::nullopt, std::nullopt, + CUID, DwarfVersion)); continue; } const uint32_t Offset = DwarfVersion < 5 ? 1 : 0; @@ -1686,8 +1686,8 @@ Optional Checksum; if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5) Checksum = LineTable->Prologue.FileNames[I].Checksum; - cantFail( - getDwarfFile(Dir, FileName, 0, Checksum, None, CUID, DwarfVersion)); + cantFail(getDwarfFile(Dir, FileName, 0, Checksum, std::nullopt, CUID, + DwarfVersion)); } } } @@ -1897,7 +1897,7 @@ } } - return None; + return std::nullopt; } ErrorOr BinaryContext::getSectionForAddress(uint64_t Address) { diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp --- a/bolt/lib/Core/DebugData.cpp +++ b/bolt/lib/Core/DebugData.cpp @@ -52,7 +52,7 @@ Optional Value = AbbrevDecl->getAttributeValueFromOffset(Index, Offset, U); if (!Value) - return None; + return std::nullopt; // AttributeSpec const DWARFAbbreviationDeclaration::AttributeSpec *AttrVal = AbbrevDecl->attributes().begin() + Index; @@ -76,14 +76,14 @@ Optional findAttributeInfo(const DWARFDie DIE, dwarf::Attribute Attr) { if (!DIE.isValid()) - return None; + return std::nullopt; const DWARFAbbreviationDeclaration *AbbrevDecl = DIE.getAbbreviationDeclarationPtr(); if (!AbbrevDecl) - return None; + return std::nullopt; Optional Index = AbbrevDecl->findAttributeIndex(Attr); if (!Index) - return None; + return std::nullopt; return findAttributeInfo(DIE, AbbrevDecl, *Index); } @@ -1585,7 +1585,7 @@ if (LineTables.empty()) return; // In a v5 non-split line table, put the strings in a separate section. - Optional LineStr(None); + Optional LineStr(std::nullopt); ErrorOr LineStrSection = BC.getUniqueSectionByName(".debug_line_str"); // Some versions of GCC output DWARF5 .debug_info, but DWARF4 or lower diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -135,15 +135,15 @@ Optional MCPlusBuilder::getEHInfo(const MCInst &Inst) const { if (!isCall(Inst)) - return None; + return std::nullopt; Optional LPSym = getAnnotationOpValue(Inst, MCAnnotation::kEHLandingPad); if (!LPSym) - return None; + return std::nullopt; Optional Action = getAnnotationOpValue(Inst, MCAnnotation::kEHAction); if (!Action) - return None; + return std::nullopt; return std::make_pair(reinterpret_cast(*LPSym), static_cast(*Action)); @@ -221,7 +221,7 @@ Optional Value = getAnnotationOpValue(Inst, MCAnnotation::kConditionalTailCall); if (!Value) - return None; + return std::nullopt; return static_cast(*Value); } @@ -243,7 +243,7 @@ Optional MCPlusBuilder::getOffset(const MCInst &Inst) const { Optional Value = getAnnotationOpValue(Inst, MCAnnotation::kOffset); if (!Value) - return None; + return std::nullopt; return static_cast(*Value); } diff --git a/bolt/lib/Passes/AsmDump.cpp b/bolt/lib/Passes/AsmDump.cpp --- a/bolt/lib/Passes/AsmDump.cpp +++ b/bolt/lib/Passes/AsmDump.cpp @@ -174,7 +174,7 @@ /*ShowInst=*/false)); AsmStreamer->initSections(true, *BC.STI); std::unique_ptr TM(BC.TheTarget->createTargetMachine( - BC.TripleName, "", "", TargetOptions(), None)); + BC.TripleName, "", "", TargetOptions(), std::nullopt)); std::unique_ptr MAP( BC.TheTarget->createAsmPrinter(*TM, std::move(AsmStreamer))); diff --git a/bolt/lib/Passes/DataflowInfoManager.cpp b/bolt/lib/Passes/DataflowInfoManager.cpp --- a/bolt/lib/Passes/DataflowInfoManager.cpp +++ b/bolt/lib/Passes/DataflowInfoManager.cpp @@ -19,7 +19,7 @@ if (RD) return *RD; assert(RA && "RegAnalysis required"); - RD.reset(new ReachingDefOrUse(*RA, BF, None, AllocatorId)); + RD.reset(new ReachingDefOrUse(*RA, BF, std::nullopt, AllocatorId)); RD->run(); return *RD; } @@ -30,7 +30,7 @@ if (RU) return *RU; assert(RA && "RegAnalysis required"); - RU.reset(new ReachingDefOrUse(*RA, BF, None, AllocatorId)); + RU.reset(new ReachingDefOrUse(*RA, BF, std::nullopt, AllocatorId)); RU->run(); return *RU; } diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -263,7 +263,7 @@ auto Iter = Maps.find(FuncAddress); if (Iter == Maps.end()) - return None; + return std::nullopt; const MapTy &Map = Iter->second; auto FromIter = Map.upper_bound(From); diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -246,8 +246,8 @@ } TempFiles.push_back(PPI.StderrPath.data()); - Optional Redirects[] = { - llvm::None, // Stdin + std::optional Redirects[] = { + std::nullopt, // Stdin StringRef(PPI.StdoutPath.data()), // Stdout StringRef(PPI.StderrPath.data())}; // Stderr @@ -261,9 +261,9 @@ if (Wait) PPI.PI.ReturnCode = sys::ExecuteAndWait(PerfPath.data(), Argv, - /*envp*/ llvm::None, Redirects); + /*envp*/ std::nullopt, Redirects); else - PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ llvm::None, + PPI.PI = sys::ExecuteNoWait(PerfPath.data(), Argv, /*envp*/ std::nullopt, Redirects); free(WritableArgsString); @@ -943,7 +943,7 @@ SmallVector, 16> Res; if (!recordTrace(BF, FirstLBR, SecondLBR, Count, &Res)) - return None; + return std::nullopt; return Res; } @@ -1820,13 +1820,13 @@ if (LineEnd == StringRef::npos) { reportError("expected rest of line"); Diag << "Found: " << ParsingBuf << "\n"; - return None; + return std::nullopt; } StringRef Line = ParsingBuf.substr(0, LineEnd); size_t Pos = Line.find("PERF_RECORD_COMM exec"); if (Pos == StringRef::npos) - return None; + return std::nullopt; Line = Line.drop_front(Pos); // Line: @@ -1836,7 +1836,7 @@ if (PIDStr.getAsInteger(10, PID)) { reportError("expected PID"); Diag << "Found: " << PIDStr << "in '" << Line << "'\n"; - return None; + return std::nullopt; } return PID; @@ -1850,7 +1850,7 @@ uint64_t USecTime; if (SecTimeStr.getAsInteger(10, SecTime) || USecTimeStr.getAsInteger(10, USecTime)) - return None; + return std::nullopt; return SecTime * 1000000ULL + USecTime; } } @@ -1863,14 +1863,14 @@ if (LineEnd == StringRef::npos) { reportError("expected rest of line"); Diag << "Found: " << ParsingBuf << "\n"; - return None; + return std::nullopt; } StringRef Line = ParsingBuf.substr(0, LineEnd); size_t Pos = Line.find("PERF_RECORD_FORK"); if (Pos == StringRef::npos) { consumeRestOfLine(); - return None; + return std::nullopt; } ForkInfo FI; @@ -1889,14 +1889,14 @@ if (ChildPIDStr.getAsInteger(10, FI.ChildPID)) { reportError("expected PID"); Diag << "Found: " << ChildPIDStr << "in '" << Line << "'\n"; - return None; + return std::nullopt; } const StringRef ParentPIDStr = Line.rsplit('(').second.split(':').first; if (ParentPIDStr.getAsInteger(10, FI.ParentPID)) { reportError("expected PID"); Diag << "Found: " << ParentPIDStr << "in '" << Line << "'\n"; - return None; + return std::nullopt; } consumeRestOfLine(); @@ -2147,17 +2147,17 @@ ErrorOr BuildIDStr = parseString(FieldSeparator, true); if (std::error_code EC = BuildIDStr.getError()) - return None; + return std::nullopt; // If one of the strings is missing, don't issue a parsing error, but still // do not return a value. consumeAllRemainingFS(); if (checkNewLine()) - return None; + return std::nullopt; ErrorOr NameStr = parseString(FieldSeparator, true); if (std::error_code EC = NameStr.getError()) - return None; + return std::nullopt; consumeRestOfLine(); return std::make_pair(NameStr.get(), BuildIDStr.get()); @@ -2205,7 +2205,7 @@ if (!FileName.empty()) return FileName; - return None; + return std::nullopt; } std::error_code diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -49,7 +49,7 @@ return Name.substr(0, LTOSuffixPos + 11); if ((LTOSuffixPos = Name.find(".llvm.")) != StringRef::npos) return Name.substr(0, LTOSuffixPos + 6); - return None; + return std::nullopt; } namespace { diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -67,7 +67,7 @@ for (dwarf::Attribute &Attr : Attrs) if (Optional Info = findAttributeInfo(DIE, Attr)) return Info; - return None; + return std::nullopt; } } // namespace bolt } // namespace llvm @@ -302,7 +302,7 @@ DebugLocWriter->finalize(*DwoDebugInfoPatcher, *DWOAbbrevWriter); DwoDebugInfoPatcher->clearDestinationLabels(); if (!DwoDebugInfoPatcher->getWasRangBasedUsed()) - RangesBase = None; + RangesBase = std::nullopt; if (Unit->getVersion() >= 5) TempRangesSectionWriter->finalizeSection(); } @@ -821,7 +821,7 @@ DebugInfoPatcher.addLE32Patch(RangesBaseAttrInfo->Offset, static_cast(*RangesBase), RangesBaseAttrInfo->Size); - RangesBase = None; + RangesBase = std::nullopt; } } @@ -1218,7 +1218,7 @@ auto SectionIter = KnownSections.find(SectionName); if (SectionIter == KnownSections.end()) - return None; + return std::nullopt; Streamer.switchSection(SectionIter->second.first); StringRef OutData = SectionContents; diff --git a/bolt/lib/Rewrite/MachORewriteInstance.cpp b/bolt/lib/Rewrite/MachORewriteInstance.cpp --- a/bolt/lib/Rewrite/MachORewriteInstance.cpp +++ b/bolt/lib/Rewrite/MachORewriteInstance.cpp @@ -229,7 +229,7 @@ } return (TextVMAddr && StartOffset) ? Optional(*TextVMAddr + *StartOffset) - : llvm::None; + : std::nullopt; } } // anonymous namespace diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -681,7 +681,7 @@ Optional RewriteInstance::getPrintableBuildID() const { if (BuildID.empty()) - return None; + return std::nullopt; std::string Str; raw_string_ostream OS(Str); @@ -4763,7 +4763,7 @@ assert(SymbolName && "cannot get symbol name"); auto updateSymbolValue = [&](const StringRef Name, - Optional Value = None) { + Optional Value = std::nullopt) { NewSymbol.st_value = Value ? *Value : getNewValueForSymbol(Name); NewSymbol.st_shndx = ELF::SHN_ABS; outs() << "BOLT-INFO: setting " << Name << " to 0x" diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -2600,7 +2600,7 @@ if (FKI.Flags & MCFixupKindInfo::FKF_IsPCRel) { switch (FKI.TargetSize) { default: - return None; + return std::nullopt; case 8: RelType = ELF::R_X86_64_PC8; break; case 16: RelType = ELF::R_X86_64_PC16; break; case 32: RelType = ELF::R_X86_64_PC32; break; @@ -2609,7 +2609,7 @@ } else { switch (FKI.TargetSize) { default: - return None; + return std::nullopt; case 8: RelType = ELF::R_X86_64_8; break; case 16: RelType = ELF::R_X86_64_16; break; case 32: RelType = ELF::R_X86_64_32; break; diff --git a/bolt/lib/Utils/Utils.cpp b/bolt/lib/Utils/Utils.cpp --- a/bolt/lib/Utils/Utils.cpp +++ b/bolt/lib/Utils/Utils.cpp @@ -69,7 +69,7 @@ Optional readDWARFExpressionTargetReg(StringRef ExprBytes) { uint8_t Opcode = ExprBytes[0]; if (Opcode == dwarf::DW_CFA_def_cfa_expression) - return None; + return std::nullopt; assert((Opcode == dwarf::DW_CFA_expression || Opcode == dwarf::DW_CFA_val_expression) && "invalid DWARF expression CFI"); diff --git a/clang-tools-extra/clang-doc/HTMLGenerator.cpp b/clang-tools-extra/clang-doc/HTMLGenerator.cpp --- a/clang-tools-extra/clang-doc/HTMLGenerator.cpp +++ b/clang-tools-extra/clang-doc/HTMLGenerator.cpp @@ -308,7 +308,7 @@ static std::unique_ptr genReference(const Reference &Type, StringRef CurrentDirectory, - llvm::Optional JumpToSection = None) { + llvm::Optional JumpToSection = std::nullopt) { if (Type.Path.empty()) { if (!JumpToSection) return std::make_unique(Type.Name); @@ -437,7 +437,7 @@ static std::unique_ptr writeFileDefinition(const Location &L, - llvm::Optional RepositoryUrl = None) { + llvm::Optional RepositoryUrl = std::nullopt) { if (!L.IsFileInRootDir || !RepositoryUrl) return std::make_unique( HTMLTag::TAG_P, "Defined at line " + std::to_string(L.LineNumber) + diff --git a/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllMacros.cpp b/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllMacros.cpp --- a/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllMacros.cpp +++ b/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllMacros.cpp @@ -26,7 +26,7 @@ std::string FilePath = getIncludePath(*SM, info->getDefinitionLoc(), Collector); if (FilePath.empty()) - return llvm::None; + return std::nullopt; return SymbolInfo(MacroNameTok.getIdentifierInfo()->getName(), SymbolInfo::SymbolKind::Macro, FilePath, {}); } diff --git a/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllSymbols.cpp b/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllSymbols.cpp --- a/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllSymbols.cpp +++ b/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllSymbols.cpp @@ -85,7 +85,7 @@ Type = SymbolInfo::SymbolKind::EnumDecl; // Ignore anonymous enum declarations. if (ND->getName().empty()) - return llvm::None; + return std::nullopt; } else { assert(llvm::isa(ND) && "Matched decl must be one of VarDecl, " @@ -93,7 +93,7 @@ "EnumDecl and RecordDecl!"); // C-style record decl can have empty name, e.g "struct { ... } var;". if (ND->getName().empty()) - return llvm::None; + return std::nullopt; Type = SymbolInfo::SymbolKind::Class; } @@ -102,11 +102,12 @@ llvm::errs() << "Declaration " << ND->getDeclName() << "(" << ND->getDeclKindName() << ") has invalid declaration location."; - return llvm::None; + return std::nullopt; } std::string FilePath = getIncludePath(SM, Loc, Collector); - if (FilePath.empty()) return llvm::None; + if (FilePath.empty()) + return std::nullopt; return SymbolInfo(ND->getNameAsString(), Type, FilePath, GetContexts(ND)); } diff --git a/clang-tools-extra/clang-query/Query.cpp b/clang-tools-extra/clang-query/Query.cpp --- a/clang-tools-extra/clang-query/Query.cpp +++ b/clang-tools-extra/clang-query/Query.cpp @@ -121,7 +121,7 @@ continue; TD.emitDiagnostic(FullSourceLoc(Iter->first, SM), DiagnosticsEngine::Note, - "source locations here", None, None); + "source locations here", std::nullopt, std::nullopt); Iter = PrintLocations(OS, Iter, Locs.LocationAccessors.end()); OS << '\n'; @@ -137,10 +137,10 @@ SM.getPresumedLineNumber(Iter->first.getEnd())) continue; - TD.emitDiagnostic(FullSourceLoc(Iter->first.getBegin(), SM), - DiagnosticsEngine::Note, - "source ranges here " + Iter->first.printToString(SM), - CharSourceRange::getTokenRange(Iter->first), None); + TD.emitDiagnostic( + FullSourceLoc(Iter->first.getBegin(), SM), DiagnosticsEngine::Note, + "source ranges here " + Iter->first.printToString(SM), + CharSourceRange::getTokenRange(Iter->first), std::nullopt); Iter = PrintLocations(OS, Iter, Locs.RangeAccessors.end()); } @@ -157,7 +157,7 @@ TD.emitDiagnostic( FullSourceLoc(Iter->first.getBegin(), SM), DiagnosticsEngine::Note, "source range " + Iter->first.printToString(SM) + " starting here...", - CharSourceRange::getTokenRange(Iter->first), None); + CharSourceRange::getTokenRange(Iter->first), std::nullopt); auto ColNum = SM.getPresumedColumnNumber(Iter->first.getEnd()); auto LastLineLoc = Iter->first.getEnd().getLocWithOffset(-(ColNum - 1)); @@ -166,7 +166,7 @@ DiagnosticsEngine::Note, "... ending here", CharSourceRange::getTokenRange( SourceRange(LastLineLoc, Iter->first.getEnd())), - None); + std::nullopt); Iter = PrintLocations(OS, Iter, Locs.RangeAccessors.end()); } @@ -232,7 +232,7 @@ TD.emitDiagnostic( FullSourceLoc(R.getBegin(), AST->getSourceManager()), DiagnosticsEngine::Note, "\"" + BI->first + "\" binds here", - CharSourceRange::getTokenRange(R), None); + CharSourceRange::getTokenRange(R), std::nullopt); } } if (QS.PrintOutput) { diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.h b/clang-tools-extra/clang-tidy/ClangTidyCheck.h --- a/clang-tools-extra/clang-tidy/ClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.h @@ -198,7 +198,7 @@ return Result; diagnoseBadIntegerOption(NamePrefix + LocalName, *Value); } - return None; + return std::nullopt; } /// Read a named option from the ``Context`` and parse it as an @@ -235,14 +235,14 @@ IsGlobal = true; ValueOr = getLocalOrGlobal(LocalName); if (!ValueOr) - return None; + return std::nullopt; } T Result{}; if (!StringRef(*ValueOr).getAsInteger(10, Result)) return Result; diagnoseBadIntegerOption( IsGlobal ? Twine(LocalName) : NamePrefix + LocalName, *ValueOr); - return None; + return std::nullopt; } /// Read a named option from the ``Context`` and parse it as an @@ -279,7 +279,7 @@ if (llvm::Optional ValueOr = getEnumInt(LocalName, typeEraseMapping(), false, IgnoreCase)) return static_cast(*ValueOr); - return None; + return std::nullopt; } /// Read a named option from the ``Context`` and parse it as an @@ -319,7 +319,7 @@ if (llvm::Optional ValueOr = getEnumInt(LocalName, typeEraseMapping(), true, IgnoreCase)) return static_cast(*ValueOr); - return None; + return std::nullopt; } /// Read a named option from the ``Context`` and parse it as an diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp --- a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp @@ -58,7 +58,7 @@ const auto &Iter = CheckOptions.find((NamePrefix + LocalName).str()); if (Iter != CheckOptions.end()) return StringRef(Iter->getValue().Value); - return None; + return std::nullopt; } static ClangTidyOptions::OptionMap::const_iterator @@ -86,7 +86,7 @@ Context->getOptionsCollector()); if (Iter != CheckOptions.end()) return StringRef(Iter->getValue().Value); - return None; + return std::nullopt; } static Optional getAsBool(StringRef Value, @@ -99,7 +99,7 @@ long long Number; if (!Value.getAsInteger(10, Number)) return Number != 0; - return None; + return std::nullopt; } template <> @@ -110,7 +110,7 @@ return Result; diagnoseBadBooleanOption(NamePrefix + LocalName, *ValueOr); } - return None; + return std::nullopt; } template <> @@ -123,7 +123,7 @@ return Result; diagnoseBadBooleanOption(Iter->getKey(), Iter->getValue().Value); } - return None; + return std::nullopt; } void ClangTidyCheck::OptionsView::store(ClangTidyOptions::OptionMap &Options, @@ -155,7 +155,7 @@ Context->getOptionsCollector()) : CheckOptions.find((NamePrefix + LocalName).str()); if (Iter == CheckOptions.end()) - return None; + return std::nullopt; StringRef Value = Iter->getValue().Value; StringRef Closest; @@ -182,7 +182,7 @@ diagnoseBadEnumOption(Iter->getKey(), Iter->getValue().Value, Closest); else diagnoseBadEnumOption(Iter->getKey(), Iter->getValue().Value); - return None; + return std::nullopt; } static constexpr llvm::StringLiteral ConfigWarning( diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp @@ -258,7 +258,7 @@ llvm::Optional ClangTidyContext::getProfileStorageParams() const { if (ProfilePrefix.empty()) - return llvm::None; + return std::nullopt; return ClangTidyProfiling::StorageParams(ProfilePrefix, CurrentFile); } diff --git a/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp b/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp --- a/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp @@ -147,7 +147,7 @@ Options.HeaderFilterRegex = ""; Options.SystemHeaders = false; Options.FormatStyle = "none"; - Options.User = llvm::None; + Options.User = std::nullopt; for (const ClangTidyModuleRegistry::entry &Module : ClangTidyModuleRegistry::entries()) Options.mergeWith(Module.instantiate()->getModuleOptions(), 0); @@ -368,7 +368,7 @@ if (!DirectoryStatus || !DirectoryStatus->isDirectory()) { llvm::errs() << "Error reading configuration from " << Directory << ": directory doesn't exist.\n"; - return llvm::None; + return std::nullopt; } for (const ConfigFileHandler &ConfigHandler : ConfigHandlers) { @@ -403,7 +403,7 @@ } return OptionsSource(*ParsedOptions, std::string(ConfigFile)); } - return llvm::None; + return std::nullopt; } /// Parses -line-filter option and stores it to the \c Options. diff --git a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp --- a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp +++ b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp @@ -52,7 +52,7 @@ .Case("NOLINTNEXTLINE", NoLintType::NoLintNextLine) .Case("NOLINTBEGIN", NoLintType::NoLintBegin) .Case("NOLINTEND", NoLintType::NoLintEnd) - .Default(None); + .Default(std::nullopt); return Type; } diff --git a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp --- a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp @@ -30,7 +30,7 @@ .Case("Seconds", DurationScale::Seconds) .Case("Minutes", DurationScale::Minutes) .Case("Hours", DurationScale::Hours) - .Default(llvm::None); + .Default(std::nullopt); } // Given either an integer or float literal, return its value. @@ -89,7 +89,7 @@ break; } - return llvm::None; + return std::nullopt; } // Given the scale of a duration and a `Multiplier`, determine if `Multiplier` @@ -107,7 +107,7 @@ OldScale = std::get<0>(*Result); } - return llvm::None; + return std::nullopt; } void DurationFactoryScaleCheck::registerMatchers(MatchFinder *Finder) { diff --git a/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp b/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp --- a/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp @@ -31,11 +31,11 @@ double Value = FloatLiteral.getValueAsApproximateDouble(); if (std::fmod(Value, 1) == 0) { if (Value >= static_cast(1u << 31)) - return llvm::None; + return std::nullopt; return llvm::APSInt::get(static_cast(Value)); } - return llvm::None; + return std::nullopt; } const std::pair & @@ -83,7 +83,7 @@ return tooling::fixit::getText(*MaybeCallArg, *Result.Context).str(); } - return llvm::None; + return std::nullopt; } /// If `Node` is a call to the inverse of `Scale`, return that inverse's @@ -99,7 +99,7 @@ return tooling::fixit::getText(*MaybeCallArg, *Result.Context).str(); } - return llvm::None; + return std::nullopt; } /// Returns the factory function name for a given `Scale`. @@ -201,7 +201,7 @@ Node, *Result.Context))) return tooling::fixit::getText(*MaybeCastArg, *Result.Context).str(); - return llvm::None; + return std::nullopt; } llvm::Optional @@ -212,7 +212,7 @@ if (llvm::Optional IntValue = truncateIfIntegral(*LitFloat)) return toString(*IntValue, /*radix=*/10); - return llvm::None; + return std::nullopt; } std::string simplifyDurationFactoryArg(const MatchFinder::MatchResult &Result, @@ -247,7 +247,7 @@ auto ScaleIter = ScaleMap.find(std::string(Name)); if (ScaleIter == ScaleMap.end()) - return llvm::None; + return std::nullopt; return ScaleIter->second; } @@ -263,7 +263,7 @@ auto ScaleIter = ScaleMap.find(std::string(Name)); if (ScaleIter == ScaleMap.end()) - return llvm::None; + return std::nullopt; return ScaleIter->second; } diff --git a/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.cpp b/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.cpp --- a/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.cpp @@ -45,11 +45,11 @@ // Now replace the " with '. std::string::size_type Pos = Result.find_first_of('"'); if (Pos == Result.npos) - return llvm::None; + return std::nullopt; Result[Pos] = '\''; Pos = Result.find_last_of('"'); if (Pos == Result.npos) - return llvm::None; + return std::nullopt; Result[Pos] = '\''; return Result; } diff --git a/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.cpp --- a/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.cpp @@ -36,17 +36,17 @@ const auto TryExpandAsInteger = [](Preprocessor::macro_iterator It) -> Optional { if (It == PP->macro_end()) - return llvm::None; + return std::nullopt; const MacroInfo *MI = PP->getMacroInfo(It->first); const Token &T = MI->tokens().back(); if (!T.isLiteral() || !T.getLiteralData()) - return llvm::None; + return std::nullopt; StringRef ValueStr = StringRef(T.getLiteralData(), T.getLength()); llvm::APInt IntValue; constexpr unsigned AutoSenseRadix = 0; if (ValueStr.getAsInteger(AutoSenseRadix, IntValue)) - return llvm::None; + return std::nullopt; return IntValue.getZExtValue(); }; diff --git a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp --- a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp @@ -753,7 +753,7 @@ auto MemcpyS = Match({"memcpy_s", 0, 2, 3, false}); // void *memchr(const void *src, int c, size_t count) - auto Memchr = Match({"memchr", None, 0, 2, false}); + auto Memchr = Match({"memchr", std::nullopt, 0, 2, false}); // void *memmove(void *dest, const void *src, size_t count) auto Memmove = Match({"memmove", 0, 1, 2, false}); @@ -762,14 +762,14 @@ auto MemmoveS = Match({"memmove_s", 0, 2, 3, false}); // int strncmp(const char *str1, const char *str2, size_t count); - auto StrncmpRHS = Match({"strncmp", None, 1, 2, true}); - auto StrncmpLHS = Match({"strncmp", None, 0, 2, true}); + auto StrncmpRHS = Match({"strncmp", std::nullopt, 1, 2, true}); + auto StrncmpLHS = Match({"strncmp", std::nullopt, 0, 2, true}); // size_t strxfrm(char *dest, const char *src, size_t count); auto Strxfrm = Match({"strxfrm", 0, 1, 2, false}); // errno_t strerror_s(char *buffer, size_t bufferSize, int errnum); - auto StrerrorS = Match({"strerror_s", 0, None, 1, false}); + auto StrerrorS = Match({"strerror_s", 0, std::nullopt, 1, false}); auto AnyOfMatchers = anyOf(Memcpy, MemcpyS, Memmove, MemmoveS, StrncmpRHS, StrncmpLHS, Strxfrm, StrerrorS); diff --git a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp --- a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp @@ -72,7 +72,7 @@ getDoubleUnderscoreFixup(StringRef Name, const LangOptions &LangOpts) { if (hasReservedDoubleUnderscore(Name, LangOpts)) return collapseConsecutive(Name, '_'); - return None; + return std::nullopt; } static bool startsWithUnderscoreCapital(StringRef Name) { @@ -82,7 +82,7 @@ static Optional getUnderscoreCapitalFixup(StringRef Name) { if (startsWithUnderscoreCapital(Name)) return std::string(Name.drop_front(1)); - return None; + return std::nullopt; } static bool startsWithUnderscoreInGlobalNamespace(StringRef Name, @@ -94,7 +94,7 @@ getUnderscoreGlobalNamespaceFixup(StringRef Name, bool IsInGlobalNamespace) { if (startsWithUnderscoreInGlobalNamespace(Name, IsInGlobalNamespace)) return std::string(Name.drop_front(1)); - return None; + return std::nullopt; } static std::string getNonReservedFixup(std::string Name) { @@ -112,7 +112,7 @@ ArrayRef AllowedIdentifiers) { assert(!Name.empty()); if (llvm::is_contained(AllowedIdentifiers, Name)) - return None; + return std::nullopt; // TODO: Check for names identical to language keywords, and other names // specifically reserved by language standards, e.g. C++ 'zombie names' and C @@ -149,7 +149,7 @@ startsWithUnderscoreCapital(Name) || startsWithUnderscoreInGlobalNamespace(Name, IsInGlobalNamespace))) return FailureInfo{NonReservedTag, getNonReservedFixup(std::string(Name))}; - return None; + return std::nullopt; } Optional diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.cpp --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.cpp @@ -22,7 +22,7 @@ if (SizeExpr->EvaluateAsRValue(Result, Ctx)) return Ctx.toBits( CharUnits::fromQuantity(Result.Val.getInt().getExtValue())); - return None; + return std::nullopt; } void SuspiciousMemoryComparisonCheck::registerMatchers(MatchFinder *Finder) { diff --git a/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.cpp --- a/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UncheckedOptionalAccessCheck.cpp @@ -46,7 +46,7 @@ Expected Context = ControlFlowContext::build(&FuncDecl, FuncDecl.getBody(), &ASTCtx); if (!Context) - return llvm::None; + return std::nullopt; dataflow::DataflowAnalysisContext AnalysisContext( std::make_unique()); @@ -66,7 +66,7 @@ llvm::move(EltDiagnostics, std::back_inserter(Diagnostics)); }); if (!BlockToOutputState) - return llvm::None; + return std::nullopt; return Diagnostics; } diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.cpp --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.cpp @@ -51,7 +51,7 @@ getVirtualKeywordRange(const CXXDestructorDecl &Destructor, const SourceManager &SM, const LangOptions &LangOpts) { if (Destructor.getLocation().isMacroID()) - return None; + return std::nullopt; SourceLocation VirtualBeginLoc = Destructor.getBeginLoc(); SourceLocation VirtualBeginSpellingLoc = @@ -63,7 +63,7 @@ /// virtual is included. Optional NextToken = Lexer::findNextToken(VirtualEndLoc, SM, LangOpts); if (!NextToken) - return None; + return std::nullopt; SourceLocation StartOfNextToken = NextToken->getLocation(); return CharSourceRange::getCharRange(VirtualBeginLoc, StartOfNextToken); diff --git a/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.cpp b/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.cpp --- a/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.cpp @@ -37,7 +37,7 @@ return Mapping.second; } - return llvm::None; + return std::nullopt; } namespace { diff --git a/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.cpp b/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.cpp --- a/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.cpp @@ -26,7 +26,7 @@ .Case("io_state", "iostate") .Case("open_mode", "openmode") .Case("seek_dir", "seekdir") - .Default(llvm::None); + .Default(std::nullopt); } void DeprecatedIosBaseAliasesCheck::registerMatchers(MatchFinder *Finder) { diff --git a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp --- a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp @@ -212,7 +212,7 @@ // If the Token/Macro contains more than one type of tokens, we would need // to split the macro in order to move parts to the trailing return type. if (ContainsQualifiers + ContainsSpecifiers + ContainsSomethingElse > 1) - return llvm::None; + return std::nullopt; return CT; } @@ -243,7 +243,7 @@ if (!MI || MI->isFunctionLike()) { // Cannot handle function style macros. diag(F.getLocation(), Message); - return llvm::None; + return std::nullopt; } } @@ -255,7 +255,7 @@ ClassifiedTokens.push_back(*CT); else { diag(F.getLocation(), Message); - return llvm::None; + return std::nullopt; } } diff --git a/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.cpp b/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.cpp --- a/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.cpp +++ b/clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.cpp @@ -57,7 +57,7 @@ StringRef Ownership) { size_t Index = Text.find(Ownership); if (Index == StringRef::npos) - return llvm::None; + return std::nullopt; SourceLocation Begin = Range.getBegin().getLocWithOffset(Index); SourceLocation End = Begin.getLocWithOffset(Ownership.size()); @@ -71,7 +71,7 @@ assert(VD && "VarDecl parameter must not be null"); // Don't provide fix-its for any parameter variables at this time. if (isa(VD)) - return llvm::None; + return std::nullopt; // Currently there is no way to directly get the source range for the // __weak/__strong ObjC lifetime qualifiers, so it's necessary to string @@ -81,7 +81,7 @@ if (Range.isInvalid()) { // An invalid range likely means inside a macro, in which case don't supply // a fix-it. - return llvm::None; + return std::nullopt; } StringRef VarDeclText = Lexer::getSourceText(Range, SM, LangOpts); diff --git a/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.cpp b/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.cpp --- a/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.cpp @@ -30,11 +30,11 @@ // Now replace the " with '. auto Pos = Result.find_first_of('"'); if (Pos == Result.npos) - return llvm::None; + return std::nullopt; Result[Pos] = '\''; Pos = Result.find_last_of('"'); if (Pos == Result.npos) - return llvm::None; + return std::nullopt; Result[Pos] = '\''; return Result; } diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp b/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp --- a/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp +++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp @@ -46,7 +46,7 @@ bool Invalid; const char *TextAfter = SM.getCharacterData(Loc, &Invalid); if (Invalid) { - return llvm::None; + return std::nullopt; } size_t Offset = std::strcspn(TextAfter, "\n"); return Loc.getLocWithOffset(TextAfter[Offset] == '\0' ? Offset : Offset + 1); diff --git a/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.cpp b/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.cpp --- a/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.cpp @@ -28,7 +28,7 @@ findConstToRemove(const FunctionDecl *Def, const MatchFinder::MatchResult &Result) { if (!Def->getReturnType().isLocalConstQualified()) - return None; + return std::nullopt; // Get the begin location for the function name, including any qualifiers // written in the source (for out-of-line declarations). A FunctionDecl's @@ -45,7 +45,7 @@ *Result.SourceManager, Result.Context->getLangOpts()); if (FileRange.isInvalid()) - return None; + return std::nullopt; return utils::lexer::getQualifyingToken( tok::kw_const, FileRange, *Result.Context, *Result.SourceManager); diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp @@ -1361,14 +1361,14 @@ const IdentifierNamingCheck::HungarianNotationOption &HNOption, StyleKind SK, const SourceManager &SM, bool IgnoreFailedSplit) const { if (SK == SK_Invalid || !NamingStyles[SK]) - return None; + return std::nullopt; const IdentifierNamingCheck::NamingStyle &Style = *NamingStyles[SK]; if (Style.IgnoredRegexp.isValid() && Style.IgnoredRegexp.match(Name)) - return None; + return std::nullopt; if (matchesStyle(Type, Name, Style, HNOption, ND)) - return None; + return std::nullopt; std::string KindName = fixupWithCase(Type, StyleNames[SK], ND, Style, HNOption, @@ -1383,7 +1383,7 @@ << llvm::formatv(": unable to split words for {0} '{1}'\n", KindName, Name)); } - return None; + return std::nullopt; } return RenamerClangTidyCheck::FailureInfo{std::move(KindName), std::move(Fixup)}; @@ -1395,7 +1395,7 @@ SourceLocation Loc = Decl->getLocation(); const FileStyle &FileStyle = getStyleForFile(SM.getFilename(Loc)); if (!FileStyle.isActive()) - return llvm::None; + return std::nullopt; return getFailureInfo(HungarianNotation.getDeclTypeName(Decl), Decl->getName(), Decl, Loc, FileStyle.getStyles(), @@ -1411,7 +1411,7 @@ SourceLocation Loc = MacroNameTok.getLocation(); const FileStyle &Style = getStyleForFile(SM.getFilename(Loc)); if (!Style.isActive()) - return llvm::None; + return std::nullopt; return getFailureInfo("", MacroNameTok.getIdentifierInfo()->getName(), nullptr, Loc, Style.getStyles(), Style.getHNOption(), diff --git a/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.cpp b/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.cpp --- a/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.cpp @@ -110,10 +110,10 @@ const LangOptions &LangOpts) { std::size_t DeclCount = std::distance(DS->decl_begin(), DS->decl_end()); if (DeclCount < 2) - return None; + return std::nullopt; if (rangeContainsExpansionsOrDirectives(DS->getSourceRange(), SM, LangOpts)) - return None; + return std::nullopt; // The initial type of the declaration and each declaration has it's own // slice. This is necessary, because pointers and references bind only @@ -127,12 +127,12 @@ const auto *FirstDecl = dyn_cast(*DS->decl_begin()); if (FirstDecl == nullptr) - return None; + return std::nullopt; // FIXME: Member pointers are not transformed correctly right now, that's // why they are treated as problematic here. if (typeIsMemberPointer(FirstDecl->getType().IgnoreParens().getTypePtr())) - return None; + return std::nullopt; // Consider the following case: 'int * pointer, value = 42;' // Created slices (inclusive) [ ][ ] [ ] @@ -168,7 +168,7 @@ SourceRange DeclRange(DS->getBeginLoc(), Start); if (DeclRange.isInvalid() || isMacroID(DeclRange)) - return None; + return std::nullopt; // The first slice, that is prepended to every isolated declaration, is // created. @@ -182,7 +182,7 @@ // FIXME: Member pointers are not transformed correctly right now, that's // why they are treated as problematic here. if (typeIsMemberPointer(CurrentDecl->getType().IgnoreParens().getTypePtr())) - return None; + return std::nullopt; SourceLocation DeclEnd = CurrentDecl->hasInit() @@ -192,7 +192,7 @@ SourceRange VarNameRange(DeclBegin, DeclEnd); if (VarNameRange.isInvalid() || isMacroID(VarNameRange)) - return None; + return std::nullopt; Slices.emplace_back(VarNameRange); DeclBegin = DeclEnd.getLocWithOffset(1); @@ -212,14 +212,14 @@ LangOpts); if (CharRange.isInvalid()) - return None; + return std::nullopt; bool InvalidText = false; StringRef Snippet = Lexer::getSourceText(CharRange, SM, LangOpts, &InvalidText); if (InvalidText) - return None; + return std::nullopt; Snippets.emplace_back(Snippet); } diff --git a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp --- a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp @@ -73,7 +73,7 @@ } else if (T->is(tok::coloncolon)) { Result.append("::"); } else { // Any other kind of token is unexpected here. - return llvm::None; + return std::nullopt; } } } diff --git a/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp b/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp --- a/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp @@ -48,7 +48,7 @@ Result.Context->getLangOpts()); if (FileRange.isInvalid()) - return llvm::None; + return std::nullopt; tok::TokenKind Tok = Qual == Qualifier::Const @@ -70,7 +70,7 @@ if (TypeSpecifier.getBegin().isMacroID() || TypeSpecifier.getEnd().isMacroID()) - return llvm::None; + return std::nullopt; return TypeSpecifier; } @@ -78,11 +78,11 @@ const Token &ConstToken) { if (TypeSpecifier.getBegin().getLocWithOffset(-1) == ConstToken.getEndLoc()) { TypeSpecifier.setBegin(ConstToken.getLocation()); - return llvm::None; + return std::nullopt; } if (TypeSpecifier.getEnd().getLocWithOffset(1) == ConstToken.getLocation()) { TypeSpecifier.setEnd(ConstToken.getEndLoc()); - return llvm::None; + return std::nullopt; } return SourceRange(ConstToken.getLocation(), ConstToken.getEndLoc()); } diff --git a/clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.cpp --- a/clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.cpp @@ -50,7 +50,7 @@ E = Arg->getEndLoc(); } if (B.isInvalid() || E.isInvalid()) - return llvm::None; + return std::nullopt; return SourceRange(B, E); } diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp --- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp @@ -300,18 +300,18 @@ static Optional getAsBoolLiteral(const Expr *E, bool FilterMacro) { if (const auto *Bool = dyn_cast(E)) { if (FilterMacro && Bool->getBeginLoc().isMacroID()) - return llvm::None; + return std::nullopt; return Bool->getValue(); } if (const auto *UnaryOp = dyn_cast(E)) { if (FilterMacro && UnaryOp->getBeginLoc().isMacroID()) - return None; + return std::nullopt; if (UnaryOp->getOpcode() == UO_LNot) if (Optional Res = getAsBoolLiteral( UnaryOp->getSubExpr()->IgnoreImplicit(), FilterMacro)) return !*Res; } - return llvm::None; + return std::nullopt; } template struct NodeAndBool { @@ -567,7 +567,7 @@ if (Check->reportDeMorgan(Context, Op, BinaryOp, !IsProcessing, parent(), Parens) && !Check->areDiagsSelfContained()) { - llvm::SaveAndRestore RAII(IsProcessing, true); + llvm::SaveAndRestore RAII(IsProcessing, true); return Base::TraverseUnaryOperator(Op); } } diff --git a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp --- a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp @@ -587,7 +587,7 @@ assert(Idx < HeuristicCount); if (!Defaults[Idx].hasBounds()) - return None; + return std::nullopt; switch (BK) { case BoundKind::DissimilarBelow: diff --git a/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp b/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp --- a/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp @@ -67,11 +67,11 @@ const SourceManager &SM) { // Do nothing if the provided location is invalid. if (Loc.isInvalid()) - return llvm::None; + return std::nullopt; // Look where the location was *actually* written. SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); if (SpellingLoc.isInvalid()) - return llvm::None; + return std::nullopt; return SpellingLoc; } @@ -81,7 +81,7 @@ getMacroAwareLocation(Loc.getBegin(), SM); llvm::Optional End = getMacroAwareLocation(Loc.getEnd(), SM); if (!Begin || !End) - return llvm::None; + return std::nullopt; return SourceRange(*Begin, *End); } @@ -100,7 +100,7 @@ if (NewSuffix != NewSuffixes.end()) return NewSuffix->str(); // Nope, I guess we have to keep it as-is. - return llvm::None; + return std::nullopt; } template @@ -123,7 +123,7 @@ llvm::Optional Range = getMacroAwareSourceRange(ReplacementDsc.LiteralLocation, SM); if (!Range) - return llvm::None; + return std::nullopt; if (RangeCanBeFixed) ReplacementDsc.LiteralLocation = *Range; @@ -138,7 +138,7 @@ // Make sure the first character is actually a digit, instead of // something else, like a non-type template parameter. if (!std::isdigit(static_cast(LiteralSourceText.front()))) - return llvm::None; + return std::nullopt; size_t Skip = 0; @@ -161,7 +161,7 @@ // We can't check whether the *Literal has any suffix or not without actually // looking for the suffix. So it is totally possible that there is no suffix. if (Skip == StringRef::npos) - return llvm::None; + return std::nullopt; // Move the cursor in the source range to the beginning of the suffix. Range->setBegin(Range->getBegin().getLocWithOffset(Skip)); @@ -174,7 +174,7 @@ llvm::Optional NewSuffix = getNewSuffix(ReplacementDsc.OldSuffix, NewSuffixes); if (!NewSuffix || ReplacementDsc.OldSuffix == *NewSuffix) - return llvm::None; // The suffix was already the way it should be. + return std::nullopt; // The suffix was already the way it should be. if (RangeCanBeFixed) ReplacementDsc.FixIt = FixItHint::CreateReplacement(*Range, *NewSuffix); diff --git a/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.cpp b/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.cpp --- a/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.cpp @@ -57,10 +57,10 @@ getFileExtension(StringRef FileName, const FileExtensionsSet &FileExtensions) { StringRef Extension = llvm::sys::path::extension(FileName); if (Extension.empty()) - return llvm::None; + return std::nullopt; // Skip "." prefix. if (!FileExtensions.count(Extension.substr(1))) - return llvm::None; + return std::nullopt; return Extension; } diff --git a/clang-tools-extra/clang-tidy/utils/FixItHintUtils.cpp b/clang-tools-extra/clang-tidy/utils/FixItHintUtils.cpp --- a/clang-tools-extra/clang-tidy/utils/FixItHintUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/FixItHintUtils.cpp @@ -44,7 +44,7 @@ static Optional skipLParensBackwards(SourceLocation Start, const ASTContext &Context) { if (locDangerous(Start)) - return None; + return std::nullopt; auto PreviousTokenLParen = [&Start, &Context]() { Token T; @@ -58,14 +58,14 @@ Context.getLangOpts()); if (locDangerous(Start)) - return None; + return std::nullopt; return Start; } static Optional fixIfNotDangerous(SourceLocation Loc, StringRef Text) { if (locDangerous(Loc)) - return None; + return std::nullopt; return FixItHint::CreateInsertion(Loc, Text); } @@ -93,7 +93,7 @@ if (IgnoredParens) return fixIfNotDangerous(*IgnoredParens, buildQualifier(Qualifier)); - return None; + return std::nullopt; } llvm_unreachable("Unknown QualifierPolicy enum"); } @@ -102,13 +102,13 @@ DeclSpec::TQ Qualifier, const ASTContext &Context) { if (locDangerous(Var.getLocation())) - return None; + return std::nullopt; Optional IgnoredParens = skipLParensBackwards(Var.getLocation(), Context); if (IgnoredParens) return fixIfNotDangerous(*IgnoredParens, buildQualifier(Qualifier)); - return None; + return std::nullopt; } static Optional @@ -136,7 +136,7 @@ Var.getLocation(), Context.getSourceManager(), Context.getLangOpts(), tok::star); if (locDangerous(BeforeStar)) - return None; + return std::nullopt; Optional IgnoredParens = skipLParensBackwards(BeforeStar, Context); @@ -144,7 +144,7 @@ if (IgnoredParens) return fixIfNotDangerous(*IgnoredParens, buildQualifier(Qualifier, true)); - return None; + return std::nullopt; } } @@ -159,7 +159,7 @@ return fixIfNotDangerous(BeforeStar, buildQualifier(Qualifier, true)); } - return None; + return std::nullopt; } static Optional @@ -178,7 +178,7 @@ if (IgnoredParens) return fixIfNotDangerous(*IgnoredParens, buildQualifier(Qualifier, true)); - return None; + return std::nullopt; } Optional addQualifierToVarDecl(const VarDecl &Var, @@ -221,7 +221,7 @@ QualTarget, QualPolicy, Context); } - return None; + return std::nullopt; } } // namespace fixit } // namespace utils diff --git a/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp b/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp --- a/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp +++ b/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp @@ -71,13 +71,13 @@ IncludeInserter::createIncludeInsertion(FileID FileID, llvm::StringRef Header) { bool IsAngled = Header.consume_front("<"); if (IsAngled != Header.consume_back(">")) - return llvm::None; + return std::nullopt; // We assume the same Header will never be included both angled and not // angled. // In self contained diags mode we don't track what headers we have already // inserted. if (!SelfContainedDiags && !InsertedHeaders[FileID].insert(Header).second) - return llvm::None; + return std::nullopt; return getOrCreate(FileID).createIncludeInsertion(Header, IsAngled); } diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp --- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp +++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp @@ -179,7 +179,7 @@ return FixItHint::CreateInsertion(Location.getBegin(), IncludeStmt); } if (FileName == IncludeEntry) { - return llvm::None; + return std::nullopt; } } // FileName comes after all include entries in bucket, insert it after @@ -203,7 +203,7 @@ } } if (NonEmptyKind == IK_InvalidInclude) { - return llvm::None; + return std::nullopt; } if (NonEmptyKind < IncludeKind) { diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp @@ -138,7 +138,7 @@ if (Tok.is(tok::less)) SawTemplate = true; else if (Tok.isOneOf(tok::greater, tok::greatergreater)) - LastMatchAfterTemplate = None; + LastMatchAfterTemplate = std::nullopt; else if (Tok.is(TK)) { if (SawTemplate) LastMatchAfterTemplate = Tok; @@ -146,8 +146,8 @@ LastMatchBeforeTemplate = Tok; } } - return LastMatchAfterTemplate != None ? LastMatchAfterTemplate - : LastMatchBeforeTemplate; + return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate + : LastMatchBeforeTemplate; } static bool breakAndReturnEnd(const Stmt &S) { diff --git a/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.cpp b/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.cpp --- a/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.cpp +++ b/clang-tools-extra/clang-tidy/utils/NamespaceAliaser.cpp @@ -32,10 +32,10 @@ const std::vector &Abbreviations) { const FunctionDecl *Function = getSurroundingFunction(Context, Statement); if (!Function || !Function->hasBody()) - return None; + return std::nullopt; if (AddedAliases[Function].count(Namespace.str()) != 0) - return None; + return std::nullopt; // FIXME: Doesn't consider the order of declarations. // If we accidentally pick an alias defined later in the function, @@ -51,7 +51,7 @@ if (ExistingAlias != nullptr) { AddedAliases[Function][Namespace.str()] = ExistingAlias->getName().str(); - return None; + return std::nullopt; } for (const auto &Abbreviation : Abbreviations) { @@ -75,7 +75,7 @@ return FixItHint::CreateInsertion(Loc, Declaration); } - return None; + return std::nullopt; } std::string NamespaceAliaser::getNamespaceName(ASTContext &Context, diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp @@ -258,12 +258,12 @@ if (*Search) { if (Found) return NameLookup( - llvm::None); // Multiple decls found in different base classes. + std::nullopt); // Multiple decls found in different base classes. Found = *Search; continue; } } else - return NameLookup(llvm::None); // Propagate multiple resolution back up. + return NameLookup(std::nullopt); // Propagate multiple resolution back up. } return NameLookup(Found); // If nullptr, decl wasn't found. } diff --git a/clang-tools-extra/clang-tidy/utils/TypeTraits.cpp b/clang-tools-extra/clang-tidy/utils/TypeTraits.cpp --- a/clang-tools-extra/clang-tidy/utils/TypeTraits.cpp +++ b/clang-tools-extra/clang-tidy/utils/TypeTraits.cpp @@ -41,7 +41,7 @@ llvm::Optional isExpensiveToCopy(QualType Type, const ASTContext &Context) { if (Type->isDependentType() || Type->isIncompleteType()) - return llvm::None; + return std::nullopt; return !Type.isTriviallyCopyableType(Context) && !classHasTrivialCopyAndDestroy(Type) && !hasDeletedCopyConstructor(Type) && diff --git a/clang-tools-extra/clang-tidy/utils/UsingInserter.cpp b/clang-tools-extra/clang-tidy/utils/UsingInserter.cpp --- a/clang-tools-extra/clang-tidy/utils/UsingInserter.cpp +++ b/clang-tools-extra/clang-tidy/utils/UsingInserter.cpp @@ -34,17 +34,17 @@ StringRef UnqualifiedName = getUnqualifiedName(QualifiedName); const FunctionDecl *Function = getSurroundingFunction(Context, Statement); if (!Function) - return None; + return std::nullopt; if (AddedUsing.count(std::make_pair(Function, QualifiedName.str())) != 0) - return None; + return std::nullopt; SourceLocation InsertLoc = Lexer::getLocForEndOfToken( Function->getBody()->getBeginLoc(), 0, SourceMgr, Context.getLangOpts()); // Only use using declarations in the main file, not in includes. if (SourceMgr.getFileID(InsertLoc) != SourceMgr.getMainFileID()) - return None; + return std::nullopt; // FIXME: This declaration could be masked. Investigate if // there is a way to avoid using Sema. @@ -55,7 +55,7 @@ .empty(); if (AlreadyHasUsingDecl) { AddedUsing.emplace(NameInFunction(Function, QualifiedName.str())); - return None; + return std::nullopt; } // Find conflicting declarations and references. auto ConflictingDecl = namedDecl(hasName(UnqualifiedName)); @@ -65,7 +65,7 @@ !match(findAll(declRefExpr(to(ConflictingDecl))), *Function, Context) .empty(); if (HasConflictingDeclaration || HasConflictingDeclRef) - return None; + return std::nullopt; std::string Declaration = (llvm::Twine("\nusing ") + QualifiedName + ";").str(); diff --git a/clang-tools-extra/clangd/AST.cpp b/clang-tools-extra/clangd/AST.cpp --- a/clang-tools-extra/clangd/AST.cpp +++ b/clang-tools-extra/clangd/AST.cpp @@ -65,7 +65,7 @@ } // We return None for ClassTemplateSpecializationDecls because it does not // contain TemplateArgumentLoc information. - return llvm::None; + return std::nullopt; } template @@ -571,7 +571,7 @@ DeducedTypeVisitor V(Loc); V.TraverseAST(ASTCtx); if (V.DeducedType.isNull()) - return llvm::None; + return std::nullopt; return V.DeducedType; } @@ -862,7 +862,7 @@ return std::distance(Args.begin(), Begin); } } - return llvm::None; + return std::nullopt; } static FunctionDecl *getCalleeDeclOrUniqueOverload(CallExpr *E) { diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -67,7 +67,7 @@ return Result; if (!Encoded.empty()) // Empty can be e.g. diagnostics on close. elog("unexpected non-numeric version {0}", Encoded); - return llvm::None; + return std::nullopt; } const llvm::StringLiteral ApplyFixCommand = "clangd.applyFix"; @@ -808,7 +808,7 @@ void ClangdLSPServer::onPrepareRename(const TextDocumentPositionParams &Params, Callback> Reply) { Server->prepareRename( - Params.textDocument.uri.file(), Params.position, /*NewName*/ llvm::None, + Params.textDocument.uri.file(), Params.position, /*NewName*/ std::nullopt, Opts.Rename, [Reply = std::move(Reply)](llvm::Expected Result) mutable { if (!Result) @@ -890,7 +890,7 @@ auto File = Params.textDocument.uri.file(); auto Code = Server->getDraft(File); Server->formatFile(File, - /*Rng=*/llvm::None, + /*Rng=*/std::nullopt, [Code = std::move(Code), Reply = std::move(Reply)]( llvm::Expected Result) mutable { if (Result) @@ -951,14 +951,14 @@ static llvm::Optional asCommand(const CodeAction &Action) { Command Cmd; if (Action.command && Action.edit) - return None; // Not representable. (We never emit these anyway). + return std::nullopt; // Not representable. (We never emit these anyway). if (Action.command) { Cmd = *Action.command; } else if (Action.edit) { Cmd.command = std::string(ApplyFixCommand); Cmd.argument = *Action.edit; } else { - return None; + return std::nullopt; } Cmd.title = Action.title; if (Action.kind && *Action.kind == CodeAction::QUICKFIX_KIND) @@ -1153,7 +1153,7 @@ return Reply(Path.takeError()); if (*Path) return Reply(URIForFile::canonicalize(**Path, Params.uri.file())); - return Reply(llvm::None); + return Reply(std::nullopt); }); } @@ -1172,7 +1172,7 @@ if (!H) return Reply(H.takeError()); if (!*H) - return Reply(llvm::None); + return Reply(std::nullopt); Hover R; R.contents.kind = HoverContentFormat; diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -142,7 +142,7 @@ /// defaults and -resource-dir compiler flag). /// If None, ClangdServer calls CompilerInvocation::GetResourcePath() to /// obtain the standard resource directory. - llvm::Optional ResourceDir = llvm::None; + llvm::Optional ResourceDir = std::nullopt; /// Time to wait after a new file version before computing diagnostics. DebouncePolicy UpdateDebounce = DebouncePolicy{ diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -140,7 +140,7 @@ private: llvm::IntrusiveRefCntPtr viewImpl() const override { auto OFS = llvm::makeIntrusiveRefCnt( - Base.view(llvm::None)); + Base.view(std::nullopt)); OFS->pushOverlay(DirtyFiles.asVFS()); return OFS; } @@ -515,7 +515,7 @@ CB = std::move(CB), this]() mutable { auto Style = format::getStyle(format::DefaultFormatStyle, File, format::DefaultFallbackStyle, Code, - TFS.view(/*CWD=*/llvm::None).get()); + TFS.view(/*CWD=*/std::nullopt).get()); if (!Style) return CB(Style.takeError()); @@ -566,7 +566,7 @@ if (!InpAST) return CB(InpAST.takeError()); auto R = clangd::rename({Pos, NewName, InpAST->AST, File, - DirtyFS->view(llvm::None), Index, Opts}); + DirtyFS->view(std::nullopt), Index, Opts}); if (!R) return CB(R.takeError()); @@ -659,7 +659,7 @@ this](Expected InpAST) mutable { if (!InpAST) return CB(InpAST.takeError()); - auto FS = DirtyFS->view(llvm::None); + auto FS = DirtyFS->view(std::nullopt); auto Selections = tweakSelection(Sel, *InpAST, FS.get()); if (!Selections) return CB(Selections.takeError()); @@ -713,7 +713,7 @@ // 2) if 1) fails, we use the AST&Index approach, it is slower but supports // different code layout. if (auto CorrespondingFile = - getCorrespondingHeaderOrSource(Path, TFS.view(llvm::None))) + getCorrespondingHeaderOrSource(Path, TFS.view(std::nullopt))) return CB(std::move(CorrespondingFile)); auto Action = [Path = Path.str(), CB = std::move(CB), this](llvm::Expected InpAST) mutable { @@ -989,7 +989,7 @@ return false; }); if (!Success) - CB(llvm::None); + CB(std::nullopt); }; WorkScheduler->runWithAST("GetAST", File, std::move(Action)); } diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -258,14 +258,14 @@ headerToInsertIfAllowed(const CodeCompleteOptions &Opts) const { if (Opts.InsertIncludes == CodeCompleteOptions::NeverInsert || RankedIncludeHeaders.empty()) - return None; + return std::nullopt; if (SemaResult && SemaResult->Declaration) { // Avoid inserting new #include if the declaration is found in the current // file e.g. the symbol is forward declared. auto &SM = SemaResult->Declaration->getASTContext().getSourceManager(); for (const Decl *RD : SemaResult->Declaration->redecls()) if (SM.isInMainFile(SM.getExpansionLoc(RD->getBeginLoc()))) - return None; + return std::nullopt; } return RankedIncludeHeaders[0]; } @@ -1821,7 +1821,7 @@ (C.IndexResult && C.IndexResult->SymInfo.Kind == index::SymbolKind::Macro)) && !C.Name.startswith_insensitive(Filter->pattern())) - return None; + return std::nullopt; return Filter->match(C.Name); } @@ -2049,7 +2049,7 @@ Content = Content.rtrim(); if (Content.endswith("/*")) return Content.size() - 2; - return None; + return std::nullopt; } CodeCompleteResult codeComplete(PathRef FileName, Position Pos, diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include +#include #include #include @@ -41,34 +42,34 @@ auto Xcrun = llvm::sys::findProgramByName("xcrun"); if (!Xcrun) { log("Couldn't find xcrun. Hopefully you have a non-apple toolchain..."); - return llvm::None; + return std::nullopt; } llvm::SmallString<64> OutFile; llvm::sys::fs::createTemporaryFile("clangd-xcrun", "", OutFile); llvm::FileRemover OutRemover(OutFile); - llvm::Optional Redirects[3] = { + std::optional Redirects[3] = { /*stdin=*/{""}, /*stdout=*/{OutFile.str()}, /*stderr=*/{""}}; vlog("Invoking {0} to find clang installation", *Xcrun); int Ret = llvm::sys::ExecuteAndWait(*Xcrun, Argv, - /*Env=*/llvm::None, Redirects, + /*Env=*/std::nullopt, Redirects, /*SecondsToWait=*/10); if (Ret != 0) { log("xcrun exists but failed with code {0}. " "If you have a non-apple toolchain, this is OK. " "Otherwise, try xcode-select --install.", Ret); - return llvm::None; + return std::nullopt; } auto Buf = llvm::MemoryBuffer::getFile(OutFile); if (!Buf) { log("Can't read xcrun output: {0}", Buf.getError().message()); - return llvm::None; + return std::nullopt; } StringRef Path = Buf->get()->getBuffer().trim(); if (Path.empty()) { log("xcrun produced no output"); - return llvm::None; + return std::nullopt; } return Path.str(); } @@ -119,12 +120,12 @@ // The effect of this is to set -isysroot correctly. We do the same. llvm::Optional detectSysroot() { #ifndef __APPLE__ - return llvm::None; + return std::nullopt; #endif // SDKROOT overridden in environment, respect it. Driver will set isysroot. if (::getenv("SDKROOT")) - return llvm::None; + return std::nullopt; return queryXcrun({"xcrun", "--show-sdk-path"}); } diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h --- a/clang-tools-extra/clangd/Config.h +++ b/clang-tools-extra/clangd/Config.h @@ -65,7 +65,7 @@ std::vector &) const>> Edits; /// Where to search for compilation databases for this file's flags. - CDBSearchSpec CDBSearch = {CDBSearchSpec::Ancestors, llvm::None}; + CDBSearchSpec CDBSearch = {CDBSearchSpec::Ancestors, std::nullopt}; } CompileFlags; enum class BackgroundPolicy { Build, Skip }; diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp --- a/clang-tools-extra/clangd/ConfigCompile.cpp +++ b/clang-tools-extra/clangd/ConfigCompile.cpp @@ -111,7 +111,7 @@ std::string RegexError; if (!Result.isValid(RegexError)) { diag(Error, "Invalid regex " + Anchored + ": " + RegexError, Text.Range); - return llvm::None; + return std::nullopt; } return Result; } @@ -129,7 +129,7 @@ Description) .str(), Path.Range); - return llvm::None; + return std::nullopt; } llvm::SmallString<256> AbsPath = llvm::StringRef(*Path); llvm::sys::fs::make_absolute(FragmentDirectory, AbsPath); diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp --- a/clang-tools-extra/clangd/ConfigYAML.cpp +++ b/clang-tools-extra/clangd/ConfigYAML.cpp @@ -31,7 +31,7 @@ llvm::ArrayRef AllowedValues) { unsigned MaxEdit = (Search.size() + 1) / 3; if (!MaxEdit) - return llvm::None; + return std::nullopt; llvm::Optional Result; for (const auto &AllowedValue : AllowedValues) { unsigned EditDistance = Search.edit_distance(AllowedValue, true, MaxEdit); @@ -357,7 +357,7 @@ if (auto *BS = llvm::dyn_cast(&N)) return Located(BS->getValue().str(), N.getSourceRange()); warning(Desc + " should be scalar", N); - return llvm::None; + return std::nullopt; } llvm::Optional> boolValue(Node &N, llvm::StringRef Desc) { @@ -366,7 +366,7 @@ return Located(*Bool, Scalar->Range); warning(Desc + " should be a boolean", N); } - return llvm::None; + return std::nullopt; } // Try to parse a list of single scalar values, or just a single value. @@ -385,7 +385,7 @@ } } else { warning("Expected scalar or list of scalars", N); - return llvm::None; + return std::nullopt; } return Result; } diff --git a/clang-tools-extra/clangd/Diagnostics.cpp b/clang-tools-extra/clangd/Diagnostics.cpp --- a/clang-tools-extra/clangd/Diagnostics.cpp +++ b/clang-tools-extra/clangd/Diagnostics.cpp @@ -624,7 +624,7 @@ void StoreDiags::EndSourceFile() { flushLastDiag(); - LangOpts = None; + LangOpts = std::nullopt; OrigSrcMgr = nullptr; } @@ -925,7 +925,7 @@ // '-' in the name. std::tie(Module, Check) = Name.split('-'); if (Module.empty() || Check.empty()) - return llvm::None; + return std::nullopt; return ("https://clang.llvm.org/extra/clang-tidy/checks/" + Module + "/" + Check + ".html") .str(); @@ -939,7 +939,7 @@ // However we have no diagnostic codes, which the link should describe! break; } - return llvm::None; + return std::nullopt; } } // namespace clangd diff --git a/clang-tools-extra/clangd/DraftStore.cpp b/clang-tools-extra/clangd/DraftStore.cpp --- a/clang-tools-extra/clangd/DraftStore.cpp +++ b/clang-tools-extra/clangd/DraftStore.cpp @@ -20,7 +20,7 @@ auto It = Drafts.find(File); if (It == Drafts.end()) - return None; + return std::nullopt; return It->second.D; } diff --git a/clang-tools-extra/clangd/DumpAST.cpp b/clang-tools-extra/clangd/DumpAST.cpp --- a/clang-tools-extra/clangd/DumpAST.cpp +++ b/clang-tools-extra/clangd/DumpAST.cpp @@ -91,7 +91,7 @@ SourceRange SR = getSourceRange(Node); auto Spelled = Tokens.spelledForExpanded(Tokens.expandedTokens(SR)); if (!Spelled) - return llvm::None; + return std::nullopt; return halfOpenToRange( Tokens.sourceManager(), CharSourceRange::getCharRange(Spelled->front().location(), diff --git a/clang-tools-extra/clangd/ExpectedTypes.cpp b/clang-tools-extra/clangd/ExpectedTypes.cpp --- a/clang-tools-extra/clangd/ExpectedTypes.cpp +++ b/clang-tools-extra/clangd/ExpectedTypes.cpp @@ -48,10 +48,10 @@ D = Template->getTemplatedDecl(); auto *VD = dyn_cast_or_null(D); if (!VD) - return llvm::None; // We handle only variables and functions below. + return std::nullopt; // We handle only variables and functions below. auto T = VD->getType(); if (T.isNull()) - return llvm::None; + return std::nullopt; if (auto *FuncT = T->getAs()) { // Functions are a special case. They are completed as 'foo()' and we want // to match their return type rather than the function type itself. @@ -65,13 +65,13 @@ llvm::Optional OpaqueType::encode(ASTContext &Ctx, QualType T) { if (T.isNull()) - return None; + return std::nullopt; const Type *C = toEquivClass(Ctx, T); if (!C) - return None; + return std::nullopt; llvm::SmallString<128> Encoded; if (index::generateUSRForType(QualType(C, 0), Ctx, Encoded)) - return None; + return std::nullopt; return OpaqueType(std::string(Encoded.str())); } @@ -87,7 +87,7 @@ const CodeCompletionResult &R) { auto T = typeOfCompletion(R); if (!T) - return None; + return std::nullopt; return encode(Ctx, *T); } diff --git a/clang-tools-extra/clangd/FS.cpp b/clang-tools-extra/clangd/FS.cpp --- a/clang-tools-extra/clangd/FS.cpp +++ b/clang-tools-extra/clangd/FS.cpp @@ -47,7 +47,7 @@ if (I != StatCache.end()) // Returned Status name should always match the requested File. return llvm::vfs::Status::copyWithNewName(I->getValue(), File); - return None; + return std::nullopt; } llvm::IntrusiveRefCntPtr diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -227,7 +227,7 @@ const auto SymbolRange = toHalfOpenFileRange(SM, Ctx.getLangOpts(), {BeginLoc, EndLoc}); if (!SymbolRange) - return llvm::None; + return std::nullopt; index::SymbolInfo SymInfo = index::getSymbolInfo(&ND); // FIXME: This is not classifying constructors, destructors and operators diff --git a/clang-tools-extra/clangd/FuzzyMatch.cpp b/clang-tools-extra/clangd/FuzzyMatch.cpp --- a/clang-tools-extra/clangd/FuzzyMatch.cpp +++ b/clang-tools-extra/clangd/FuzzyMatch.cpp @@ -91,14 +91,14 @@ llvm::Optional FuzzyMatcher::match(llvm::StringRef Word) { if (!(WordContainsPattern = init(Word))) - return llvm::None; + return std::nullopt; if (!PatN) return 1; buildGraph(); auto Best = std::max(Scores[PatN][WordN][Miss].Score, Scores[PatN][WordN][Match].Score); if (isAwful(Best)) - return llvm::None; + return std::nullopt; float Score = ScoreScale * std::min(PerfectBonus * PatN, std::max(0, Best)); // If the pattern is as long as the word, we have an exact string match, diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.h b/clang-tools-extra/clangd/GlobalCompilationDatabase.h --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.h +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.h @@ -42,7 +42,7 @@ /// Finds the closest project to \p File. virtual llvm::Optional getProjectInfo(PathRef File) const { - return llvm::None; + return std::nullopt; } /// Makes a guess at how to build a file. diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -185,7 +185,7 @@ if (CachePopulatedAt > FreshTime) return CDB; - if (/*MayCache=*/load(*TFS.view(/*CWD=*/llvm::None))) { + if (/*MayCache=*/load(*TFS.view(/*CWD=*/std::nullopt))) { // Use new timestamp, as loading may be slow. CachePopulatedAt = stopwatch::now(); NoCDBAt.store((CDB ? stopwatch::time_point::min() : CachePopulatedAt) @@ -366,14 +366,14 @@ auto Res = lookupCDB(Req); if (!Res) { log("Failed to find compilation database for {0}", File); - return llvm::None; + return std::nullopt; } auto Candidates = Res->CDB->getCompileCommands(File); if (!Candidates.empty()) return std::move(Candidates.front()); - return None; + return std::nullopt; } std::vector @@ -413,7 +413,7 @@ const auto &Spec = Config::current().CompileFlags.CDBSearch; switch (Spec.Policy) { case Config::CDBSearchSpec::NoCDBSearch: - return llvm::None; + return std::nullopt; case Config::CDBSearchSpec::FixedDir: Storage = *Spec.FixedCDBPath; SearchDirs = {Storage}; @@ -444,7 +444,7 @@ } if (!CDB) - return llvm::None; + return std::nullopt; CDBLookupResult Result; Result.CDB = std::move(CDB); @@ -734,7 +734,7 @@ std::chrono::steady_clock::time_point::min(); auto Res = lookupCDB(Req); if (!Res) - return llvm::None; + return std::nullopt; return Res->PI; } @@ -756,7 +756,7 @@ if (!Cmd) Cmd = DelegatingCDB::getCompileCommand(File); if (!Cmd) - return llvm::None; + return std::nullopt; if (Mangler) Mangler(*Cmd, File); return Cmd; @@ -804,13 +804,13 @@ llvm::Optional DelegatingCDB::getCompileCommand(PathRef File) const { if (!Base) - return llvm::None; + return std::nullopt; return Base->getCompileCommand(File); } llvm::Optional DelegatingCDB::getProjectInfo(PathRef File) const { if (!Base) - return llvm::None; + return std::nullopt; return Base->getProjectInfo(File); } diff --git a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp --- a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp +++ b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp @@ -36,7 +36,7 @@ // We can only switch between the known extensions. if (!IsSource && !IsHeader) - return None; + return std::nullopt; // Array to lookup extensions for the switch. An opposite of where original // extension was found. @@ -60,7 +60,7 @@ if (VFS->exists(NewPath)) return Path(NewPath); } - return None; + return std::nullopt; } llvm::Optional getCorrespondingHeaderOrSource(PathRef OriginalFile, @@ -68,7 +68,7 @@ const SymbolIndex *Index) { if (!Index) { // FIXME: use the AST to do the inference. - return None; + return std::nullopt; } LookupRequest Request; // Find all symbols present in the original file. @@ -102,7 +102,7 @@ // that the background-index is not finished), we should use the decl/def // locations from the AST to do the inference (from .cc to .h). if (Candidates.empty()) - return None; + return std::nullopt; // Pickup the winner, who contains most of symbols. // FIXME: should we use other signals (file proximity) to help score? diff --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp --- a/clang-tools-extra/clangd/Headers.cpp +++ b/clang-tools-extra/clangd/Headers.cpp @@ -242,7 +242,7 @@ } auto It = UIDToIndex.find(Entry->getUniqueID()); if (It == UIDToIndex.end()) - return llvm::None; + return std::nullopt; return It->second; } @@ -336,7 +336,7 @@ } // FIXME: should we allow (some limited number of) "../header.h"? if (llvm::sys::path::is_absolute(Suggested)) - return None; + return std::nullopt; if (IsSystem) Suggested = "<" + Suggested + ">"; else diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -419,7 +419,7 @@ QualType T = E->getType(); if (T.isNull() || T->isFunctionType() || T->isFunctionPointerType() || T->isFunctionReferenceType() || T->isVoidType()) - return llvm::None; + return std::nullopt; Expr::EvalResult Constant; // Attempt to evaluate. If expr is dependent, evaluation crashes! @@ -427,7 +427,7 @@ // Disable printing for record-types, as they are usually confusing and // might make clang crash while printing the expressions. Constant.Val.isStruct() || Constant.Val.isUnion()) - return llvm::None; + return std::nullopt; // Show enums symbolically, not numerically like APValue::printPretty(). if (T->isEnumeralType() && Constant.Val.isInt() && @@ -468,16 +468,16 @@ break; } } - return llvm::None; + return std::nullopt; } llvm::Optional fieldName(const Expr *E) { const auto *ME = llvm::dyn_cast(E->IgnoreCasts()); if (!ME || !llvm::isa(ME->getBase()->IgnoreCasts())) - return llvm::None; + return std::nullopt; const auto *Field = llvm::dyn_cast(ME->getMemberDecl()); if (!Field || !Field->getDeclName().isIdentifier()) - return llvm::None; + return std::nullopt; return Field->getDeclName().getAsIdentifierInfo()->getName(); } @@ -485,13 +485,13 @@ llvm::Optional getterVariableName(const CXXMethodDecl *CMD) { assert(CMD->hasBody()); if (CMD->getNumParams() != 0 || CMD->isVariadic()) - return llvm::None; + return std::nullopt; const auto *Body = llvm::dyn_cast(CMD->getBody()); const auto *OnlyReturn = (Body && Body->size() == 1) ? llvm::dyn_cast(Body->body_front()) : nullptr; if (!OnlyReturn || !OnlyReturn->getRetValue()) - return llvm::None; + return std::nullopt; return fieldName(OnlyReturn->getRetValue()); } @@ -504,59 +504,59 @@ llvm::Optional setterVariableName(const CXXMethodDecl *CMD) { assert(CMD->hasBody()); if (CMD->isConst() || CMD->getNumParams() != 1 || CMD->isVariadic()) - return llvm::None; + return std::nullopt; const ParmVarDecl *Arg = CMD->getParamDecl(0); if (Arg->isParameterPack()) - return llvm::None; + return std::nullopt; const auto *Body = llvm::dyn_cast(CMD->getBody()); if (!Body || Body->size() == 0 || Body->size() > 2) - return llvm::None; + return std::nullopt; // If the second statement exists, it must be `return this` or `return *this`. if (Body->size() == 2) { auto *Ret = llvm::dyn_cast(Body->body_back()); if (!Ret || !Ret->getRetValue()) - return llvm::None; + return std::nullopt; const Expr *RetVal = Ret->getRetValue()->IgnoreCasts(); if (const auto *UO = llvm::dyn_cast(RetVal)) { if (UO->getOpcode() != UO_Deref) - return llvm::None; + return std::nullopt; RetVal = UO->getSubExpr()->IgnoreCasts(); } if (!llvm::isa(RetVal)) - return llvm::None; + return std::nullopt; } // The first statement must be an assignment of the arg to a field. const Expr *LHS, *RHS; if (const auto *BO = llvm::dyn_cast(Body->body_front())) { if (BO->getOpcode() != BO_Assign) - return llvm::None; + return std::nullopt; LHS = BO->getLHS(); RHS = BO->getRHS(); } else if (const auto *COCE = llvm::dyn_cast(Body->body_front())) { if (COCE->getOperator() != OO_Equal || COCE->getNumArgs() != 2) - return llvm::None; + return std::nullopt; LHS = COCE->getArg(0); RHS = COCE->getArg(1); } else { - return llvm::None; + return std::nullopt; } // Detect the case when the item is moved into the field. if (auto *CE = llvm::dyn_cast(RHS->IgnoreCasts())) { if (CE->getNumArgs() != 1) - return llvm::None; + return std::nullopt; auto *ND = llvm::dyn_cast_or_null(CE->getCalleeDecl()); if (!ND || !ND->getIdentifier() || ND->getName() != "move" || !ND->isInStdNamespace()) - return llvm::None; + return std::nullopt; RHS = CE->getArg(0); } auto *DRE = llvm::dyn_cast(RHS->IgnoreCasts()); if (!DRE || DRE->getDecl() != Arg) - return llvm::None; + return std::nullopt; return fieldName(LHS); } @@ -818,7 +818,7 @@ // There's not much value in hovering over "42" and getting a hover card // saying "42 is an int", similar for other literals. if (isLiteral(E)) - return llvm::None; + return std::nullopt; HoverInfo HI; // Print the type and the size for string literals @@ -837,7 +837,7 @@ HI.Name = std::string(getNameForExpr(E)); return HI; } - return llvm::None; + return std::nullopt; } // Generates hover info for attributes. @@ -1062,13 +1062,13 @@ auto CurLoc = sourceLocationInMainFile(SM, Pos); if (!CurLoc) { llvm::consumeError(CurLoc.takeError()); - return llvm::None; + return std::nullopt; } const auto &TB = AST.getTokens(); auto TokensTouchingCursor = syntax::spelledTokensTouching(*CurLoc, TB); // Early exit if there were no tokens around the cursor. if (TokensTouchingCursor.empty()) - return llvm::None; + return std::nullopt; // Show full header file path if cursor is on include directive. for (const auto &Inc : AST.getIncludeStructure().MainFileIncludes) { @@ -1111,7 +1111,7 @@ // If we can't find interesting hover information for this // auto/decltype keyword, return nothing to avoid showing // irrelevant or incorrect informations. - return llvm::None; + return std::nullopt; } } @@ -1146,7 +1146,7 @@ } if (!HI) - return llvm::None; + return std::nullopt; // Reformat Definition if (!HI->Definition.empty()) { @@ -1291,22 +1291,22 @@ llvm::StringRef Prefix = Line.substr(0, Offset); constexpr llvm::StringLiteral BeforeStartChars = " \t(="; if (!Prefix.empty() && !BeforeStartChars.contains(Prefix.back())) - return llvm::None; + return std::nullopt; // The quoted string must be nonempty and usually has no leading/trailing ws. auto Next = Line.find('`', Offset + 1); if (Next == llvm::StringRef::npos) - return llvm::None; + return std::nullopt; llvm::StringRef Contents = Line.slice(Offset + 1, Next); if (Contents.empty() || isWhitespace(Contents.front()) || isWhitespace(Contents.back())) - return llvm::None; + return std::nullopt; // The close-quote is usually followed by whitespace or punctuation. llvm::StringRef Suffix = Line.substr(Next + 1); constexpr llvm::StringLiteral AfterEndChars = " \t)=.,;:"; if (!Suffix.empty() && !AfterEndChars.contains(Suffix.front())) - return llvm::None; + return std::nullopt; return Line.slice(Offset, Next + 1); } diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -392,10 +392,10 @@ [&SM, &CanonIncludes](FileID ID) -> Optional { auto Entry = SM.getFileEntryRefForID(ID); if (!Entry) - return llvm::None; + return std::nullopt; auto PublicHeader = CanonIncludes.mapHeader(*Entry); if (PublicHeader.empty()) - return llvm::None; + return std::nullopt; return PublicHeader; }); } diff --git a/clang-tools-extra/clangd/IncludeFixer.cpp b/clang-tools-extra/clangd/IncludeFixer.cpp --- a/clang-tools-extra/clangd/IncludeFixer.cpp +++ b/clang-tools-extra/clangd/IncludeFixer.cpp @@ -58,7 +58,7 @@ case DiagnosticsEngine::ak_std_string: return llvm::StringRef(Info.getArgStdStr(Index)); default: - return llvm::None; + return std::nullopt; } } @@ -255,7 +255,7 @@ if (auto Edit = Inserter->insert(Spelled)) F.Edits.push_back(std::move(*Edit)); else - return llvm::None; + return std::nullopt; if (Symbol.empty()) F.Message = llvm::formatv("Include {0}", Spelled); @@ -355,7 +355,7 @@ NextLoc = IDTok->getLocation(); } if (Result.empty()) - return llvm::None; + return std::nullopt; return Result; } @@ -377,10 +377,10 @@ const SourceManager &SM) { // Support specifiers written within a single macro argument. if (!SM.isWrittenInSameFile(SS.getBeginLoc(), SS.getEndLoc())) - return llvm::None; + return std::nullopt; SourceRange Range(SM.getTopMacroCallerLoc(SS.getBeginLoc()), SM.getTopMacroCallerLoc(SS.getEndLoc())); if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) - return llvm::None; + return std::nullopt; return (toSourceCode(SM, Range) + "::").str(); } @@ -418,7 +418,7 @@ } else { // We don't fix symbols in scopes that are not top-level e.g. class // members, as we don't collect includes for them. - return llvm::None; + return std::nullopt; } } } @@ -571,7 +571,7 @@ return &I->second; if (IndexRequestCount >= IndexRequestLimit) - return llvm::None; + return std::nullopt; IndexRequestCount++; SymbolSlab::Builder Matches; @@ -596,7 +596,7 @@ return &I->second; if (IndexRequestCount >= IndexRequestLimit) - return llvm::None; + return std::nullopt; IndexRequestCount++; // FIXME: consider batching the requests for all diagnostics. diff --git a/clang-tools-extra/clangd/InlayHints.cpp b/clang-tools-extra/clangd/InlayHints.cpp --- a/clang-tools-extra/clangd/InlayHints.cpp +++ b/clang-tools-extra/clangd/InlayHints.cpp @@ -646,11 +646,11 @@ // TokenBuffer will return null if e.g. R corresponds to only part of a // macro expansion. if (!Spelled || Spelled->empty()) - return llvm::None; + return std::nullopt; // Hint must be within the main file, not e.g. a non-preamble include. if (SM.getFileID(Spelled->front().location()) != SM.getMainFileID() || SM.getFileID(Spelled->back().location()) != SM.getMainFileID()) - return llvm::None; + return std::nullopt; return Range{sourceLocToPosition(SM, Spelled->front().location()), sourceLocToPosition(SM, Spelled->back().endLocation())}; } diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -399,7 +399,7 @@ elog("Failed to prepare a compiler instance: {0}", !Diags.empty() ? static_cast(Diags.back()).Message : "unknown error"); - return None; + return std::nullopt; } tidy::ClangTidyOptions ClangTidyOpts; if (PreserveDiags) { @@ -443,7 +443,7 @@ if (!Action->BeginSourceFile(*Clang, MainInput)) { log("BeginSourceFile() failed when building AST for {0}", MainInput.getFile()); - return None; + return std::nullopt; } // If we saw an include guard in the preamble section of the main file, // mark the main-file as include-guarded. @@ -784,7 +784,7 @@ llvm::Optional ParsedAST::preambleVersion() const { if (!Preamble) - return llvm::None; + return std::nullopt; return llvm::StringRef(Preamble->Version); } diff --git a/clang-tools-extra/clangd/PathMapping.cpp b/clang-tools-extra/clangd/PathMapping.cpp --- a/clang-tools-extra/clangd/PathMapping.cpp +++ b/clang-tools-extra/clangd/PathMapping.cpp @@ -22,11 +22,11 @@ const PathMappings &Mappings) { // Return early to optimize for the common case, wherein S is not a file URI if (!S.startswith("file://")) - return llvm::None; + return std::nullopt; auto Uri = URI::parse(S); if (!Uri) { llvm::consumeError(Uri.takeError()); - return llvm::None; + return std::nullopt; } for (const auto &Mapping : Mappings) { const std::string &From = Dir == PathMapping::Direction::ClientToServer @@ -42,7 +42,7 @@ .toString(); } } - return llvm::None; + return std::nullopt; } void applyPathMappings(llvm::json::Value &V, PathMapping::Direction Dir, diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -332,7 +332,7 @@ std::move(CI), nullptr, std::move(PreambleContents), // Provide an empty FS to prevent preprocessor from performing IO. This // also implies missing resolved paths for includes. - FS.view(llvm::None), IgnoreDiags); + FS.view(std::nullopt), IgnoreDiags); if (Clang->getFrontendOpts().Inputs.empty()) return error("compiler instance had no inputs"); // We are only interested in main file includes. diff --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp --- a/clang-tools-extra/clangd/Quality.cpp +++ b/clang-tools-extra/clangd/Quality.cpp @@ -372,7 +372,7 @@ for (const auto &Word : ContextWords->keys()) if (Name.contains_insensitive(Word)) return Word; - return llvm::None; + return std::nullopt; } SymbolRelevanceSignals::DerivedSignals diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -517,7 +517,7 @@ // But SourceLocations for a file are numerically contiguous, so we // can use cheap integer operations instead. if (Loc < SelFileBounds.getBegin() || Loc >= SelFileBounds.getEnd()) - return llvm::None; + return std::nullopt; // FIXME: subtracting getRawEncoding() is dubious, move this logic into SM. return Loc.getRawEncoding() - SelFileBounds.getBegin().getRawEncoding(); } diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -109,7 +109,7 @@ if (auto *RD = llvm::dyn_cast(D)) { // We don't want to highlight lambdas like classes. if (RD->isLambda()) - return llvm::None; + return std::nullopt; return HighlightingKind::Class; } if (isa(D)) { if (isa(VD)) // e.g. ObjC Self - return llvm::None; + return std::nullopt; return VD->isStaticDataMember() ? HighlightingKind::StaticField : VD->isLocalVarDecl() ? HighlightingKind::LocalVariable @@ -162,12 +162,12 @@ } return HighlightingKind::Unknown; } - return llvm::None; + return std::nullopt; } llvm::Optional kindForType(const Type *TP, const HeuristicResolver *Resolver) { if (!TP) - return llvm::None; + return std::nullopt; if (TP->isBuiltinType()) // Builtins are special, they do not have decls. return HighlightingKind::Primitive; if (auto *TD = dyn_cast(TP)) @@ -176,7 +176,7 @@ return HighlightingKind::Class; if (auto *TD = TP->getAsTagDecl()) return kindForDecl(TD, Resolver); - return llvm::None; + return std::nullopt; } // Whether T is const in a loose sense - is a variable with this type readonly? @@ -337,7 +337,7 @@ unsigned Priority1 = evaluateHighlightPriority(A); unsigned Priority2 = evaluateHighlightPriority(B); if (Priority1 == Priority2 && A.Kind != B.Kind) - return llvm::None; + return std::nullopt; auto Result = Priority1 > Priority2 ? A : B; Result.Modifiers = A.Modifiers | B.Modifiers; return Result; @@ -477,7 +477,7 @@ llvm::Optional getRangeForSourceLocation(SourceLocation Loc) { Loc = getHighlightableSpellingToken(Loc, SourceMgr); if (Loc.isInvalid()) - return llvm::None; + return std::nullopt; const auto *Tok = TB.spelledTokenAt(Loc); assert(Tok); @@ -517,7 +517,7 @@ // Some template parameters (e.g. those for variable templates) don't have // meaningful DeclContexts. That doesn't mean they're global! if (DC->isTranslationUnit() && D->isTemplateParameter()) - return llvm::None; + return std::nullopt; // ExternalLinkage threshold could be tweaked, e.g. module-visible as global. if (D->getLinkageInternal() < ExternalLinkage) return HighlightingModifier::FileScope; @@ -526,14 +526,14 @@ llvm::Optional scopeModifier(const Type *T) { if (!T) - return llvm::None; + return std::nullopt; if (T->isBuiltinType()) return HighlightingModifier::GlobalScope; if (auto *TD = dyn_cast(T)) return scopeModifier(TD->getDecl()); if (auto *TD = T->getAsTagDecl()) return scopeModifier(TD); - return llvm::None; + return std::nullopt; } /// Produces highlightings, which are not captured by findExplicitReferences, diff --git a/clang-tools-extra/clangd/SemanticSelection.cpp b/clang-tools-extra/clangd/SemanticSelection.cpp --- a/clang-tools-extra/clangd/SemanticSelection.cpp +++ b/clang-tools-extra/clangd/SemanticSelection.cpp @@ -48,7 +48,7 @@ // file. Macros have their own FileID so this also checks if locations are not // within the macros. if ((Begin.first != SM.getMainFileID()) || (End.first != SM.getMainFileID())) - return llvm::None; + return std::nullopt; FoldingRange Range; Range.startCharacter = SM.getColumnNumber(Begin.first, Begin.second) - 1; Range.startLine = SM.getLineNumber(Begin.first, Begin.second) - 1; @@ -69,7 +69,7 @@ const auto *RBrace = cast_or_null( Stmt->findChild(syntax::NodeRole::CloseParen)); if (!LBrace || !RBrace) - return llvm::None; + return std::nullopt; // Fold the entire range within braces, including whitespace. const SourceLocation LBraceLocInfo = TM.getToken(LBrace->getTokenKey())->endLocation(), @@ -82,7 +82,7 @@ if (Range && Range->startLine != Range->endLine) return Range; } - return llvm::None; + return std::nullopt; } // Traverse the tree and collect folding ranges along the way. diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -320,7 +320,7 @@ /// Infers whether this is a header from the FileName and LangOpts (if /// presents). bool isHeaderFile(llvm::StringRef FileName, - llvm::Optional LangOpts = llvm::None); + llvm::Optional LangOpts = std::nullopt); /// Returns true if the given location is in a generated protobuf file. bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr); diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -427,11 +427,11 @@ SourceRange R) { SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts); if (!isValidFileRange(SM, R1)) - return llvm::None; + return std::nullopt; SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts); if (!isValidFileRange(SM, R2)) - return llvm::None; + return std::nullopt; SourceRange Result = rangeInCommonFile(unionTokenRange(R1, R2, SM, LangOpts), SM, LangOpts); @@ -439,7 +439,7 @@ // Convert from closed token range to half-open (char) range Result.setEnd(Result.getEnd().getLocWithOffset(TokLen)); if (!isValidFileRange(SM, Result)) - return llvm::None; + return std::nullopt; return Result; } @@ -515,7 +515,7 @@ llvm::Optional getCanonicalPath(const FileEntry *F, const SourceManager &SourceMgr) { if (!F) - return None; + return std::nullopt; llvm::SmallString<128> FilePath = F->getName(); if (!llvm::sys::path::is_absolute(FilePath)) { @@ -524,7 +524,7 @@ FilePath)) { elog("Could not turn relative path '{0}' to absolute: {1}", FilePath, EC.message()); - return None; + return std::nullopt; } } @@ -574,7 +574,7 @@ bool Invalid = false; llvm::StringRef Content = SM.getBufferData(FID, &Invalid); if (Invalid) - return None; + return std::nullopt; return digest(Content); } @@ -583,7 +583,7 @@ const ThreadsafeFS &TFS) { auto Style = format::getStyle(format::DefaultFormatStyle, File, format::DefaultFallbackStyle, Content, - TFS.view(/*CWD=*/llvm::None).get()); + TFS.view(/*CWD=*/std::nullopt).get()); if (!Style) { log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File, Style.takeError()); @@ -951,14 +951,14 @@ bool Invalid = false; llvm::StringRef Code = SM.getBufferData(File, &Invalid); if (Invalid) - return llvm::None; + return std::nullopt; unsigned B = Offset, E = Offset; while (B > 0 && isAsciiIdentifierContinue(Code[B - 1])) --B; while (E < Code.size() && isAsciiIdentifierContinue(Code[E])) ++E; if (B == E) - return llvm::None; + return std::nullopt; SpelledWord Result; Result.Location = SM.getComposedLoc(File, B); @@ -977,13 +977,13 @@ llvm::Optional locateMacroAt(const syntax::Token &SpelledTok, Preprocessor &PP) { if (SpelledTok.kind() != tok::identifier) - return None; + return std::nullopt; SourceLocation Loc = SpelledTok.location(); assert(Loc.isFileID()); const auto &SM = PP.getSourceManager(); IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM)); if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition()) - return None; + return std::nullopt; // We need to take special case to handle #define and #undef. // Preprocessor::getMacroDefinitionAtLoc() only considers a macro @@ -1004,7 +1004,7 @@ .getMacroInfo(); } if (!MacroInfo) { - return None; + return std::nullopt; } return DefinedMacro{ IdentifierInfo->getName(), MacroInfo, diff --git a/clang-tools-extra/clangd/SystemIncludeExtractor.cpp b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp --- a/clang-tools-extra/clangd/SystemIncludeExtractor.cpp +++ b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp @@ -126,11 +126,11 @@ } if (!SeenIncludes) { elog("System include extraction: start marker not found: {0}", Output); - return llvm::None; + return std::nullopt; } if (State == IncludesExtracting) { elog("System include extraction: end marker missing: {0}", Output); - return llvm::None; + return std::nullopt; } return std::move(Info); } @@ -152,7 +152,7 @@ Driver = *DriverProgram; } else { elog("System include extraction: driver {0} not found in PATH", Driver); - return llvm::None; + return std::nullopt; } } @@ -161,7 +161,7 @@ if (!QueryDriverRegex.match(Driver)) { vlog("System include extraction: not allowed driver {0}", Driver); - return llvm::None; + return std::nullopt; } llvm::SmallString<128> StdErrPath; @@ -170,12 +170,12 @@ elog("System include extraction: failed to create temporary file with " "error {0}", EC.message()); - return llvm::None; + return std::nullopt; } auto CleanUp = llvm::make_scope_exit( [&StdErrPath]() { llvm::sys::fs::remove(StdErrPath); }); - llvm::Optional Redirects[] = {{""}, {""}, StdErrPath.str()}; + std::optional Redirects[] = {{""}, {""}, StdErrPath.str()}; llvm::SmallVector Args = {Driver, "-E", "-x", Lang, "-", "-v"}; @@ -209,26 +209,26 @@ } std::string ErrMsg; - if (int RC = llvm::sys::ExecuteAndWait(Driver, Args, /*Env=*/llvm::None, + if (int RC = llvm::sys::ExecuteAndWait(Driver, Args, /*Env=*/std::nullopt, Redirects, /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) { elog("System include extraction: driver execution failed with return code: " "{0} - '{1}'. Args: [{2}]", llvm::to_string(RC), ErrMsg, printArgv(Args)); - return llvm::None; + return std::nullopt; } auto BufOrError = llvm::MemoryBuffer::getFile(StdErrPath); if (!BufOrError) { elog("System include extraction: failed to read {0} with error {1}", StdErrPath, BufOrError.getError().message()); - return llvm::None; + return std::nullopt; } llvm::Optional Info = parseDriverOutput(BufOrError->get()->getBuffer()); if (!Info) - return llvm::None; + return std::nullopt; log("System includes extractor: successfully executed {0}\n\tgot includes: " "\"{1}\"\n\tgot target: \"{2}\"", Driver, llvm::join(Info->SystemIncludes, ", "), Info->Target); diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -140,7 +140,7 @@ llvm::Optional TUScheduler::getFileBeingProcessedInContext() { if (auto *File = Context::current().get(FileBeingProcessed)) return llvm::StringRef(*File); - return None; + return std::nullopt; } /// An LRU cache of idle ASTs. @@ -192,7 +192,7 @@ if (Existing == LRU.end()) { if (AccessMetric) AccessMetric->record(1, "miss"); - return None; + return std::nullopt; } if (AccessMetric) AccessMetric->record(1, "hit"); @@ -996,7 +996,7 @@ FileInputs.Version); Action(InputsAndAST{FileInputs, **AST}); }; - startTask(Name, std::move(Task), /*Update=*/None, Invalidation); + startTask(Name, std::move(Task), /*Update=*/std::nullopt, Invalidation); } /// To be called from ThreadCrashReporter's signal handler. @@ -1132,7 +1132,7 @@ std::lock_guard Lock(Mutex); PreambleRequests.push_back({std::move(Task), std::string(TaskName), steady_clock::now(), Context::current().clone(), - llvm::None, llvm::None, + std::nullopt, std::nullopt, TUScheduler::NoInvalidation, nullptr}); } PreambleCV.notify_all(); @@ -1464,12 +1464,12 @@ for (auto I = Requests.begin(), E = Requests.end(); I != E; ++I) { if (!isCancelled(I->Ctx)) { // Cancellations after the first read don't affect current scheduling. - if (I->Update == None) + if (I->Update == std::nullopt) break; continue; } // Cancelled reads are moved to the front of the queue and run immediately. - if (I->Update == None) { + if (I->Update == std::nullopt) { Request R = std::move(*I); Requests.erase(I); Requests.push_front(std::move(R)); @@ -1490,7 +1490,8 @@ // We debounce "maybe-unused" writes, sleeping in case they become dead. // But don't delay reads (including updates where diagnostics are needed). for (const auto &R : Requests) - if (R.Update == None || R.Update->Diagnostics == WantDiagnostics::Yes) + if (R.Update == std::nullopt || + R.Update->Diagnostics == WantDiagnostics::Yes) return Deadline::zero(); // Front request needs to be debounced, so determine when we're ready. Deadline D(Requests.front().AddTime + UpdateDebounce.compute(RebuildTimes)); diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -119,16 +119,16 @@ llvm::Optional toLSPLocation(const SymbolLocation &Loc, llvm::StringRef TUPath) { if (!Loc) - return None; + return std::nullopt; auto Uri = URI::parse(Loc.FileURI); if (!Uri) { elog("Could not parse URI {0}: {1}", Loc.FileURI, Uri.takeError()); - return None; + return std::nullopt; } auto U = URIForFile::fromURI(*Uri, TUPath); if (!U) { elog("Could not resolve URI {0}: {1}", Loc.FileURI, U.takeError()); - return None; + return std::nullopt; } Location LSPLoc; @@ -210,11 +210,11 @@ const auto &SM = AST.getSourceManager(); const FileEntry *F = SM.getFileEntryForID(SM.getFileID(Loc)); if (!F) - return None; + return std::nullopt; auto FilePath = getCanonicalPath(F, SM); if (!FilePath) { log("failed to get path!"); - return None; + return std::nullopt; } Location L; L.uri = URIForFile::canonicalize(*FilePath, TUPath); @@ -241,7 +241,7 @@ return File; } } - return llvm::None; + return std::nullopt; } // Macros are simple: there's no declaration/definition distinction. @@ -260,7 +260,7 @@ return Macro; } } - return llvm::None; + return std::nullopt; } // A wrapper around `Decl::getCanonicalDecl` to support cases where Clang's @@ -1209,7 +1209,7 @@ CharSourceRange::getCharRange(Tok->location(), Tok->endLocation())); return Result; } - return llvm::None; + return std::nullopt; } } // namespace @@ -1569,11 +1569,11 @@ const auto DeclRange = toHalfOpenFileRange(SM, Ctx.getLangOpts(), {BeginLoc, EndLoc}); if (!DeclRange) - return llvm::None; + return std::nullopt; auto FilePath = getCanonicalPath(SM.getFileEntryForID(SM.getFileID(NameLoc)), SM); if (!FilePath) - return llvm::None; // Not useful without a uri. + return std::nullopt; // Not useful without a uri. Position NameBegin = sourceLocToPosition(SM, NameLoc); Position NameEnd = sourceLocToPosition( @@ -1633,7 +1633,7 @@ auto Loc = symbolToLocation(S, TUPath); if (!Loc) { elog("Failed to convert symbol to hierarchy item: {0}", Loc.takeError()); - return llvm::None; + return std::nullopt; } HierarchyItem HI; HI.name = std::string(S.Name); @@ -2065,7 +2065,7 @@ superTypes(const TypeHierarchyItem &Item, const SymbolIndex *Index) { std::vector Results; if (!Item.data.parents) - return llvm::None; + return std::nullopt; if (Item.data.parents->empty()) return Results; LookupRequest Req; diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -396,7 +396,7 @@ Rebuilder.loadedShard(LoadedShards); Rebuilder.doneLoading(); - auto FS = TFS.view(/*CWD=*/llvm::None); + auto FS = TFS.view(/*CWD=*/std::nullopt); llvm::DenseSet TUsToIndex; // We'll accept data from stale shards, but ensure the files get reindexed // soon. diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp --- a/clang-tools-extra/clangd/index/FileIndex.cpp +++ b/clang-tools-extra/clangd/index/FileIndex.cpp @@ -192,7 +192,7 @@ FileShardedIndex::getShard(llvm::StringRef Uri) const { auto It = Shards.find(Uri); if (It == Shards.end()) - return llvm::None; + return std::nullopt; IndexFileIn IF; IF.Sources = It->getValue().IG; diff --git a/clang-tools-extra/clangd/index/IndexAction.cpp b/clang-tools-extra/clangd/index/IndexAction.cpp --- a/clang-tools-extra/clangd/index/IndexAction.cpp +++ b/clang-tools-extra/clangd/index/IndexAction.cpp @@ -30,10 +30,10 @@ llvm::Optional toURI(Optional File) { if (!File) - return llvm::None; + return std::nullopt; auto AbsolutePath = File->getFileEntry().tryGetRealPathName(); if (AbsolutePath.empty()) - return llvm::None; + return std::nullopt; return URI::create(AbsolutePath).toString(); } diff --git a/clang-tools-extra/clangd/index/StdLib.cpp b/clang-tools-extra/clangd/index/StdLib.cpp --- a/clang-tools-extra/clangd/index/StdLib.cpp +++ b/clang-tools-extra/clangd/index/StdLib.cpp @@ -228,7 +228,7 @@ auto Clang = prepareCompilerInstance( std::move(CI), /*Preamble=*/nullptr, llvm::MemoryBuffer::getMemBuffer(HeaderSources, Input.getFile()), - TFS.view(/*CWD=*/llvm::None), IgnoreDiags); + TFS.view(/*CWD=*/std::nullopt), IgnoreDiags); if (!Clang) { elog("Standard Library Index: Couldn't build compiler instance"); return Symbols; @@ -296,7 +296,7 @@ if (!Config::current().Index.StandardLibrary) { dlog("No: disabled in config"); - return llvm::None; + return std::nullopt; } if (NewVersion <= OldVersion) { @@ -305,7 +305,7 @@ static_cast(NewVersion)) .getName(), OldVersion, NewVersion); - return llvm::None; + return std::nullopt; } // We'd like to index a standard library here if there is one. @@ -344,7 +344,7 @@ } } if (SearchPaths.empty()) - return llvm::None; + return std::nullopt; dlog("Found standard library in {0}", llvm::join(SearchPaths, ", ")); @@ -352,7 +352,8 @@ std::memory_order_acq_rel)) if (OldVersion >= NewVersion) { dlog("No: lost the race"); - return llvm::None; // Another thread won the race while we were checking. + return std::nullopt; // Another thread won the race while we were + // checking. } dlog("Yes, index stdlib!"); diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -148,7 +148,7 @@ return RelationKind::BaseOf; if (R.Roles & static_cast(index::SymbolRole::RelationOverrideOf)) return RelationKind::OverriddenBy; - return None; + return std::nullopt; } // Given a ref contained in enclosing decl `Enclosing`, return @@ -310,7 +310,7 @@ ++I; } // Unexpected, must not be a framework header. - return llvm::None; + return std::nullopt; } // Frameworks typically have an umbrella header of the same name, e.g. @@ -374,7 +374,7 @@ // Unexpected: must not be a proper framework header, don't cache the // failure. CachePathToFrameworkSpelling.erase(Res.first); - return llvm::None; + return std::nullopt; } auto DirKind = HS.getFileDirFlavor(FE); if (auto UmbrellaSpelling = @@ -441,7 +441,7 @@ const auto &SM = ASTCtx->getSourceManager(); auto *FE = SM.getFileEntryForID(SM.getFileID(TokLoc)); if (!FE) - return None; + return std::nullopt; SymbolLocation Result; Result.FileURI = HeaderFileURIs->toURI(FE).c_str(); diff --git a/clang-tools-extra/clangd/index/dex/PostingList.cpp b/clang-tools-extra/clangd/index/dex/PostingList.cpp --- a/clang-tools-extra/clangd/index/dex/PostingList.cpp +++ b/clang-tools-extra/clangd/index/dex/PostingList.cpp @@ -184,7 +184,7 @@ /// the stream is terminated, return None. llvm::Optional readVByte(llvm::ArrayRef &Bytes) { if (Bytes.front() == 0 || Bytes.empty()) - return llvm::None; + return std::nullopt; DocID Result = 0; bool HasNextByte = true; for (size_t Length = 0; HasNextByte && !Bytes.empty(); ++Length) { diff --git a/clang-tools-extra/clangd/refactor/InsertionPoint.cpp b/clang-tools-extra/clangd/refactor/InsertionPoint.cpp --- a/clang-tools-extra/clangd/refactor/InsertionPoint.cpp +++ b/clang-tools-extra/clangd/refactor/InsertionPoint.cpp @@ -59,7 +59,7 @@ } if (ReturnNext || (LastMatched && A.Direction == Anchor::Below)) return nullptr; - return llvm::None; + return std::nullopt; } SourceLocation beginLoc(const Decl &D) { diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -42,11 +42,11 @@ llvm::Optional filePath(const SymbolLocation &Loc, llvm::StringRef HintFilePath) { if (!Loc) - return None; + return std::nullopt; auto Path = URI::resolve(Loc.FileURI, HintFilePath); if (!Path) { elog("Could not resolve URI {0}: {1}", Loc.FileURI, Path.takeError()); - return None; + return std::nullopt; } return *Path; @@ -217,7 +217,7 @@ } // function-local symbols is safe to rename. if (RenameDecl.getParentFunctionOrMethod()) - return None; + return std::nullopt; if (isExcluded(RenameDecl)) return ReasonToReject::UnsupportedSymbol; @@ -239,7 +239,7 @@ IsMainFileOnly)) return ReasonToReject::NonIndexable; - return None; + return std::nullopt; } llvm::Error makeError(ReasonToReject Reason) { @@ -935,7 +935,7 @@ SPAN_ATTACH( Tracer, "error", "The number of lexed occurrences is less than indexed occurrences"); - return llvm::None; + return std::nullopt; } // Fast check for the special subset case. if (std::includes(Indexed.begin(), Indexed.end(), Lexed.begin(), Lexed.end())) @@ -962,12 +962,12 @@ if (HasMultiple) { vlog("The best near miss is not unique."); SPAN_ATTACH(Tracer, "error", "The best near miss is not unique"); - return llvm::None; + return std::nullopt; } if (Best.empty()) { vlog("Didn't find a near miss."); SPAN_ATTACH(Tracer, "error", "Didn't find a near miss"); - return llvm::None; + return std::nullopt; } std::vector Mapped; for (auto I : Best) diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp --- a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp @@ -60,7 +60,7 @@ SourceLocation CurLoc = FD->getEndLoc(); auto NextTok = Lexer::findNextToken(CurLoc, SM, LangOpts); if (!NextTok || !NextTok->is(tok::semi)) - return llvm::None; + return std::nullopt; return NextTok->getLocation(); } @@ -353,17 +353,17 @@ addInlineIfInHeader(const FunctionDecl *FD) { // This includes inline functions and constexpr functions. if (FD->isInlined() || llvm::isa(FD)) - return llvm::None; + return std::nullopt; // Primary template doesn't need inline. if (FD->isTemplated() && !FD->isFunctionTemplateSpecialization()) - return llvm::None; + return std::nullopt; const SourceManager &SM = FD->getASTContext().getSourceManager(); llvm::StringRef FileName = SM.getFilename(FD->getLocation()); // If it is not a header we don't need to mark function as "inline". if (!isHeaderFile(FileName, FD->getASTContext().getLangOpts())) - return llvm::None; + return std::nullopt; return tooling::Replacement(SM, FD->getInnerLocStart(), 0, "inline "); } diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp --- a/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp @@ -93,7 +93,7 @@ // If TargetNS is not a prefix of CurrentContext, there's no way to reach // it. if (!CurrentContextNS.startswith(TargetNS)) - return llvm::None; + return std::nullopt; while (CurrentContextNS != TargetNS) { CurContext = CurContext->getParent(); diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp --- a/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExpandAutoType.cpp @@ -118,7 +118,7 @@ getDeducedType(Inputs.AST->getASTContext(), AutoRange.getBegin()); // if we can't resolve the type, return an error message - if (DeducedType == llvm::None || (*DeducedType)->isUndeducedAutoType()) + if (DeducedType == std::nullopt || (*DeducedType)->isUndeducedAutoType()) return error("Could not deduce type for 'auto' type"); // if it's a lambda expression, return an error message diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp --- a/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp @@ -273,12 +273,12 @@ SM, LangOpts, Parent->Children.front()->ASTNode.getSourceRange())) SR.setBegin(BeginFileRange->getBegin()); else - return llvm::None; + return std::nullopt; if (auto EndFileRange = toHalfOpenFileRange( SM, LangOpts, Parent->Children.back()->ASTNode.getSourceRange())) SR.setEnd(EndFileRange->getEnd()); else - return llvm::None; + return std::nullopt; return SR; } @@ -317,22 +317,22 @@ ExtractionZone ExtZone; ExtZone.Parent = getParentOfRootStmts(CommonAnc); if (!ExtZone.Parent || ExtZone.Parent->Children.empty()) - return llvm::None; + return std::nullopt; ExtZone.EnclosingFunction = findEnclosingFunction(ExtZone.Parent); if (!ExtZone.EnclosingFunction) - return llvm::None; + return std::nullopt; // When there is a single RootStmt, we must check if it's valid for // extraction. if (ExtZone.Parent->Children.size() == 1 && !validSingleChild(ExtZone.getLastRootStmt(), ExtZone.EnclosingFunction)) - return llvm::None; + return std::nullopt; if (auto FuncRange = computeEnclosingFuncRange(ExtZone.EnclosingFunction, SM, LangOpts)) ExtZone.EnclosingFuncRange = *FuncRange; if (auto ZoneRange = findZoneRange(ExtZone.Parent, SM, LangOpts)) ExtZone.ZoneRange = *ZoneRange; if (ExtZone.EnclosingFuncRange.isInvalid() || ExtZone.ZoneRange.isInvalid()) - return llvm::None; + return std::nullopt; for (const Node *Child : ExtZone.Parent->Children) ExtZone.RootStmts.insert(Child->ASTNode.get()); diff --git a/clang-tools-extra/clangd/refactor/tweaks/ObjCMemberwiseInitializer.cpp b/clang-tools-extra/clangd/refactor/tweaks/ObjCMemberwiseInitializer.cpp --- a/clang-tools-extra/clangd/refactor/tweaks/ObjCMemberwiseInitializer.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/ObjCMemberwiseInitializer.cpp @@ -96,7 +96,7 @@ if (const auto *PD = dyn_cast(&D)) if (PD->isInstanceProperty()) return MethodParameter(*PD); - return llvm::None; + return std::nullopt; } }; diff --git a/clang-tools-extra/clangd/support/FileCache.cpp b/clang-tools-extra/clangd/support/FileCache.cpp --- a/clang-tools-extra/clangd/support/FileCache.cpp +++ b/clang-tools-extra/clangd/support/FileCache.cpp @@ -47,11 +47,11 @@ // stat is cheaper than opening the file. It's usually unchanged. assert(llvm::sys::path::is_absolute(Path)); - auto FS = TFS.view(/*CWD=*/llvm::None); + auto FS = TFS.view(/*CWD=*/std::nullopt); auto Stat = FS->status(Path); if (!Stat || !Stat->isRegularFile()) { if (Size != FileNotFound) // Allow "not found" value to be cached. - Parse(llvm::None); + Parse(std::nullopt); // Ensure the cache key won't match any future stat(). Size = FileNotFound; return; diff --git a/clang-tools-extra/clangd/support/ThreadsafeFS.cpp b/clang-tools-extra/clangd/support/ThreadsafeFS.cpp --- a/clang-tools-extra/clangd/support/ThreadsafeFS.cpp +++ b/clang-tools-extra/clangd/support/ThreadsafeFS.cpp @@ -74,7 +74,7 @@ llvm::IntrusiveRefCntPtr ThreadsafeFS::view(PathRef CWD) const { - auto FS = view(llvm::None); + auto FS = view(std::nullopt); if (auto EC = FS->setCurrentWorkingDirectory(CWD)) elog("VFS: failed to set CWD to {0}: {1}", CWD, EC.message()); return FS; diff --git a/clang-tools-extra/clangd/tool/Check.cpp b/clang-tools-extra/clangd/tool/Check.cpp --- a/clang-tools-extra/clangd/tool/Check.cpp +++ b/clang-tools-extra/clangd/tool/Check.cpp @@ -178,7 +178,7 @@ Inputs.Contents = *Contents; log("Imaginary source file contents:\n{0}", Inputs.Contents); } else { - if (auto Contents = TFS.view(llvm::None)->getBufferForFile(File)) { + if (auto Contents = TFS.view(std::nullopt)->getBufferForFile(File)) { Inputs.Contents = Contents->get()->getBuffer().str(); } else { elog("Couldn't read {0}: {1}", File, Contents.getError().message()); diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h @@ -13,14 +13,21 @@ #include "clang-include-cleaner/Record.h" #include "clang-include-cleaner/Types.h" +#include "clang/Format/Format.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/Support/MemoryBufferRef.h" #include namespace clang { class SourceLocation; class Decl; class FileEntry; +class HeaderSearch; +namespace tooling { +class Replacements; +struct IncludeStyle; +} // namespace tooling namespace include_cleaner { /// A UsedSymbolCB is a callback invoked for each symbol reference seen. @@ -47,6 +54,24 @@ llvm::ArrayRef MacroRefs, const PragmaIncludes *PI, const SourceManager &, UsedSymbolCB CB); +struct AnalysisResults { + std::vector Unused; + std::vector Missing; // Spellings, like "" +}; + +/// Determine which headers should be inserted or removed from the main file. +/// This exposes conclusions but not reasons: use lower-level walkUsed for that. +AnalysisResults analyze(llvm::ArrayRef ASTRoots, + llvm::ArrayRef MacroRefs, + const Includes &I, const PragmaIncludes *PI, + const SourceManager &SM, HeaderSearch &HS); + +/// Removes unused includes and inserts missing ones in the main file. +/// Returns the modified main-file code. +/// The FormatStyle must be C++ or ObjC (to support include ordering). +std::string fixIncludes(const AnalysisResults &Results, llvm::StringRef Code, + const format::FormatStyle &IncludeStyle); + } // namespace include_cleaner } // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp --- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -11,6 +11,10 @@ #include "clang-include-cleaner/Types.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/SourceManager.h" +#include "clang/Format/Format.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Inclusions/HeaderIncludes.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" @@ -42,4 +46,69 @@ } } +static std::string spellHeader(const Header &H, HeaderSearch &HS, + const FileEntry *Main) { + switch (H.kind()) { + case Header::Physical: { + bool IsSystem = false; + std::string Path = HS.suggestPathToFileForDiagnostics( + H.physical(), Main->tryGetRealPathName(), &IsSystem); + return IsSystem ? "<" + Path + ">" : "\"" + Path + "\""; + } + case Header::Standard: + return H.standard().name().str(); + case Header::Verbatim: + return H.verbatim().str(); + } + llvm_unreachable("Unknown Header kind"); +} + +AnalysisResults analyze(llvm::ArrayRef ASTRoots, + llvm::ArrayRef MacroRefs, + const Includes &Inc, const PragmaIncludes *PI, + const SourceManager &SM, HeaderSearch &HS) { + const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID()); + llvm::DenseSet Used; + llvm::StringSet<> Missing; + walkUsed(ASTRoots, MacroRefs, PI, SM, + [&](const SymbolReference &Ref, llvm::ArrayRef
Providers) { + bool Satisfied = false; + for (const Header &H : Providers) { + if (H.kind() == Header::Physical && H.physical() == MainFile) + Satisfied = true; + for (const Include *I : Inc.match(H)) { + Used.insert(I); + Satisfied = true; + } + } + if (!Satisfied && !Providers.empty() && + Ref.RT == RefType::Explicit) + Missing.insert(spellHeader(Providers.front(), HS, MainFile)); + }); + + AnalysisResults Results; + for (const Include &I : Inc.all()) + if (!Used.contains(&I)) + Results.Unused.push_back(&I); + for (llvm::StringRef S : Missing.keys()) + Results.Missing.push_back(S.str()); + llvm::sort(Results.Missing); + return Results; +} + +std::string fixIncludes(const AnalysisResults &Results, llvm::StringRef Code, + const format::FormatStyle &Style) { + assert(Style.isCpp() && "Only C++ style supports include insertions!"); + tooling::Replacements R; + // Encode insertions/deletions in the magic way clang-format understands. + for (const Include *I : Results.Unused) + cantFail(R.add(tooling::Replacement("input", UINT_MAX, 1, I->quote()))); + for (llvm::StringRef Spelled : Results.Missing) + cantFail(R.add(tooling::Replacement("input", UINT_MAX, 0, + ("#include " + Spelled).str()))); + // "cleanup" actually turns the UINT_MAX replacements into concrete edits. + auto Positioned = cantFail(format::cleanupAroundReplacements(Code, R, Style)); + return cantFail(tooling::applyAllReplacements(Code, Positioned)); +} + } // namespace clang::include_cleaner diff --git a/clang-tools-extra/include-cleaner/lib/CMakeLists.txt b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt --- a/clang-tools-extra/include-cleaner/lib/CMakeLists.txt +++ b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt @@ -14,7 +14,9 @@ PRIVATE clangAST clangBasic + clangFormat clangLex + clangToolingCore clangToolingInclusions clangToolingInclusionsStdlib ) diff --git a/clang-tools-extra/include-cleaner/test/Inputs/foobar.h b/clang-tools-extra/include-cleaner/test/Inputs/foobar.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/Inputs/foobar.h @@ -0,0 +1,3 @@ +#pragma once +#include "bar.h" +#include "foo.h" diff --git a/clang-tools-extra/include-cleaner/test/tool.cpp b/clang-tools-extra/include-cleaner/test/tool.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/include-cleaner/test/tool.cpp @@ -0,0 +1,25 @@ +#include "foobar.h" + +int x = foo(); + +// RUN: clang-include-cleaner -print=changes %s -- -I%S/Inputs/ | FileCheck --check-prefix=CHANGE %s +// CHANGE: - "foobar.h" +// CHANGE-NEXT: + "foo.h" + +// RUN: clang-include-cleaner -remove=0 -print=changes %s -- -I%S/Inputs/ | FileCheck --check-prefix=INSERT %s +// INSERT-NOT: - "foobar.h" +// INSERT: + "foo.h" + +// RUN: clang-include-cleaner -insert=0 -print=changes %s -- -I%S/Inputs/ | FileCheck --check-prefix=REMOVE %s +// REMOVE: - "foobar.h" +// REMOVE-NOT: + "foo.h" + +// RUN: clang-include-cleaner -print %s -- -I%S/Inputs/ | FileCheck --match-full-lines --check-prefix=PRINT %s +// PRINT: #include "foo.h" +// PRINT-NOT: {{^}}#include "foobar.h"{{$}} + +// RUN: cp %s %t.cpp +// RUN: clang-include-cleaner -edit %t.cpp -- -I%S/Inputs/ +// RUN: FileCheck --match-full-lines --check-prefix=EDIT %s < %t.cpp +// EDIT: #include "foo.h" +// EDIT-NOT: {{^}}#include "foobar.h"{{$}} diff --git a/clang-tools-extra/include-cleaner/tool/CMakeLists.txt b/clang-tools-extra/include-cleaner/tool/CMakeLists.txt --- a/clang-tools-extra/include-cleaner/tool/CMakeLists.txt +++ b/clang-tools-extra/include-cleaner/tool/CMakeLists.txt @@ -6,6 +6,7 @@ clangBasic clangFrontend clangLex + clangFormat clangSerialization clangTooling ) diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp --- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp +++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "AnalysisInternal.h" +#include "clang-include-cleaner/Analysis.h" #include "clang-include-cleaner/Record.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendAction.h" @@ -46,7 +47,48 @@ cl::cat(IncludeCleaner), }; -class HTMLReportAction : public clang::ASTFrontendAction { +enum class PrintStyle { Changes, Final }; +cl::opt Print{ + "print", + cl::values( + clEnumValN(PrintStyle::Changes, "changes", "Print symbolic changes"), + clEnumValN(PrintStyle::Final, "", "Print final code")), + cl::ValueOptional, + cl::init(PrintStyle::Final), + cl::desc("Print the list of headers to insert and remove"), + cl::cat(IncludeCleaner), +}; + +cl::opt Edit{ + "edit", + cl::desc("Apply edits to analyzed source files"), + cl::cat(IncludeCleaner), +}; + +cl::opt Insert{ + "insert", + cl::desc("Allow header insertions"), + cl::init(true), +}; +cl::opt Remove{ + "remove", + cl::desc("Allow header removals"), + cl::init(true), +}; + +std::atomic Errors = ATOMIC_VAR_INIT(0); + +format::FormatStyle getStyle(llvm::StringRef Filename) { + auto S = format::getStyle(format::DefaultFormatStyle, Filename, + format::DefaultFallbackStyle); + if (!S || !S->isCpp()) { + consumeError(S.takeError()); + return format::getLLVMStyle(); + } + return std::move(*S); +} + +class Action : public clang::ASTFrontendAction { RecordedAST AST; RecordedPP PP; PragmaIncludes PI; @@ -64,12 +106,59 @@ } void EndSourceFile() override { + if (!HTMLReportPath.empty()) + writeHTML(); + + const auto &SM = getCompilerInstance().getSourceManager(); + auto &HS = getCompilerInstance().getPreprocessor().getHeaderSearchInfo(); + llvm::StringRef Path = + SM.getFileEntryForID(SM.getMainFileID())->tryGetRealPathName(); + assert(!Path.empty() && "Main file path not known?"); + llvm::StringRef Code = SM.getBufferData(SM.getMainFileID()); + + auto Results = + analyze(AST.Roots, PP.MacroReferences, PP.Includes, &PI, SM, HS); + if (!Insert) + Results.Missing.clear(); + if (!Remove) + Results.Unused.clear(); + std::string Final = fixIncludes(Results, Code, getStyle(Path)); + + if (Print.getNumOccurrences()) { + switch (Print) { + case PrintStyle::Changes: + for (const Include *I : Results.Unused) + llvm::outs() << "- " << I->quote() << "\n"; + for (const auto &I : Results.Missing) + llvm::outs() << "+ " << I << "\n"; + break; + case PrintStyle::Final: + llvm::outs() << Final; + break; + } + } + + if (Edit) { + if (auto Err = llvm::writeToOutput( + Path, [&](llvm::raw_ostream &OS) -> llvm::Error { + OS << Final; + return llvm::Error::success(); + })) { + llvm::errs() << "Failed to apply edits to " << Path << ": " + << toString(std::move(Err)) << "\n"; + ++Errors; + } + } + } + + void writeHTML() { std::error_code EC; llvm::raw_fd_ostream OS(HTMLReportPath, EC); if (EC) { llvm::errs() << "Unable to write HTML report to " << HTMLReportPath << ": " << EC.message() << "\n"; - exit(1); + ++Errors; + return; } writeHTMLReport( AST.Ctx->getSourceManager().getMainFileID(), PP.Includes, AST.Roots, @@ -93,20 +182,17 @@ return 1; } - std::unique_ptr Factory; - if (HTMLReportPath.getNumOccurrences()) { - if (OptionsParser->getSourcePathList().size() != 1) { - llvm::errs() << "-" << HTMLReportPath.ArgStr - << " requires a single input file"; + if (OptionsParser->getSourcePathList().size() != 1) { + std::vector IncompatibleFlags = {&HTMLReportPath, &Print}; + for (const auto *Flag : IncompatibleFlags) { + if (Flag->getNumOccurrences()) + llvm::errs() << "-" << Flag->ArgStr << " requires a single input file"; return 1; } - Factory = clang::tooling::newFrontendActionFactory(); - } else { - llvm::errs() << "Unimplemented\n"; - return 1; } - + auto Factory = clang::tooling::newFrontendActionFactory(); return clang::tooling::ClangTool(OptionsParser->getCompilations(), OptionsParser->getSourcePathList()) - .run(Factory.get()); + .run(Factory.get()) || + Errors != 0; } diff --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp --- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp @@ -18,6 +18,7 @@ #include "clang/Tooling/Inclusions/StandardLibrary.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Testing/Support/Annotations.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -25,6 +26,7 @@ namespace clang::include_cleaner { namespace { +using testing::ElementsAre; using testing::Pair; using testing::UnorderedElementsAre; @@ -134,6 +136,83 @@ UnorderedElementsAre(Pair(Main.point(), UnorderedElementsAre(HdrFile)))); } +TEST(Analyze, Basic) { + TestInputs Inputs; + Inputs.Code = R"cpp( +#include "a.h" +#include "b.h" + +int x = a + c; +)cpp"; + Inputs.ExtraFiles["a.h"] = guard("int a;"); + Inputs.ExtraFiles["b.h"] = guard(R"cpp( + #include "c.h" + int b; + )cpp"); + Inputs.ExtraFiles["c.h"] = guard("int c;"); + + RecordedPP PP; + Inputs.MakeAction = [&PP] { + struct Hook : public SyntaxOnlyAction { + public: + Hook(RecordedPP &PP) : PP(PP) {} + bool BeginSourceFileAction(clang::CompilerInstance &CI) override { + CI.getPreprocessor().addPPCallbacks(PP.record(CI.getPreprocessor())); + return true; + } + + RecordedPP &PP; + }; + return std::make_unique(PP); + }; + + TestAST AST(Inputs); + auto Decls = AST.context().getTranslationUnitDecl()->decls(); + auto Results = + analyze(std::vector{Decls.begin(), Decls.end()}, + PP.MacroReferences, PP.Includes, /*PragmaIncludes=*/nullptr, + AST.sourceManager(), AST.preprocessor().getHeaderSearchInfo()); + + const Include *B = PP.Includes.atLine(3); + ASSERT_EQ(B->Spelled, "b.h"); + EXPECT_THAT(Results.Missing, ElementsAre("\"c.h\"")); + EXPECT_THAT(Results.Unused, ElementsAre(B)); +} + +TEST(FixIncludes, Basic) { + llvm::StringRef Code = R"cpp( +#include "a.h" +#include "b.h" +#include +)cpp"; + + Includes Inc; + Include I; + I.Spelled = "a.h"; + I.Line = 2; + Inc.add(I); + I.Spelled = "b.h"; + I.Line = 3; + Inc.add(I); + I.Spelled = "c.h"; + I.Line = 4; + I.Angled = true; + Inc.add(I); + + AnalysisResults Results; + Results.Missing.push_back("\"aa.h\""); + Results.Missing.push_back("\"ab.h\""); + Results.Missing.push_back(""); + Results.Unused.push_back(Inc.atLine(3)); + Results.Unused.push_back(Inc.atLine(4)); + + EXPECT_EQ(fixIncludes(Results, Code, format::getLLVMStyle()), R"cpp( +#include "a.h" +#include "aa.h" +#include "ab.h" +#include +)cpp"); +} } // namespace } // namespace clang::include_cleaner diff --git a/clang-tools-extra/include-cleaner/unittests/CMakeLists.txt b/clang-tools-extra/include-cleaner/unittests/CMakeLists.txt --- a/clang-tools-extra/include-cleaner/unittests/CMakeLists.txt +++ b/clang-tools-extra/include-cleaner/unittests/CMakeLists.txt @@ -22,6 +22,7 @@ clangAST clangBasic clangFrontend + clangFormat clangLex clangToolingInclusionsStdlib ) diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h @@ -222,7 +222,7 @@ Word KeyMask = Word(1) << (Key % WordBits); unsigned KeyWord = Key / WordBits; if ((HasValue[KeyWord] & KeyMask) == 0) - return llvm::None; + return std::nullopt; // Count the number of values since the checkpoint. Word BelowKeyMask = KeyMask - 1; unsigned CountSinceCheckpoint = diff --git a/clang-tools-extra/pseudo/lib/DirectiveTree.cpp b/clang-tools-extra/pseudo/lib/DirectiveTree.cpp --- a/clang-tools-extra/pseudo/lib/DirectiveTree.cpp +++ b/clang-tools-extra/pseudo/lib/DirectiveTree.cpp @@ -92,7 +92,7 @@ Tree->Chunks.push_back(std::move(Directive)); } } - return None; + return std::nullopt; } // Parse the rest of a conditional section, after seeing the If directive. @@ -292,7 +292,7 @@ case clang::tok::pp_else: return true; default: // #ifdef etc - return llvm::None; + return std::nullopt; } const auto &Tokens = Code.tokens(Dir.Tokens); @@ -301,11 +301,11 @@ const Token &Value = Name.nextNC(); // Does the condition consist of exactly one token? if (&Value >= Tokens.end() || &Value.nextNC() < Tokens.end()) - return llvm::None; + return std::nullopt; return llvm::StringSwitch>(Value.text()) .Cases("true", "1", true) .Cases("false", "0", false) - .Default(llvm::None); + .Default(std::nullopt); } const TokenStream &Code; diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp --- a/clang-tools-extra/pseudo/lib/Forest.cpp +++ b/clang-tools-extra/pseudo/lib/Forest.cpp @@ -167,12 +167,12 @@ LineDec.Subsequent = "│ "; } Dump(Children[I], P->kind() == Sequence ? EndOfElement(I) : End, - llvm::None, LineDec); + std::nullopt, LineDec); } LineDec.Prefix.resize(OldPrefixSize); }; LineDecoration LineDec; - Dump(this, KEnd, llvm::None, LineDec); + Dump(this, KEnd, std::nullopt, LineDec); return Result; } diff --git a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp --- a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp @@ -51,7 +51,7 @@ [&](const GrammarTable::Nonterminal &X) { return X.Name < Name; }); if (It != T->Nonterminals.end() && It->Name == Name) return It - T->Nonterminals.begin(); - return llvm::None; + return std::nullopt; } std::string Grammar::dumpRule(RuleID RID) const { diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -184,6 +184,9 @@ - To match GCC, ``__ppc64__`` is no longer defined on PowerPC64 targets. Use ``__powerpc64__`` instead. +- ``-p`` is rejected for all targets which are not AIX or OpenBSD. ``-p`` led + to an ``-Wunused-command-line-argument`` warning in previous releases. + What's New in Clang |release|? ============================== Some of the major new features and improvements to Clang are listed @@ -839,8 +842,8 @@ - Introduced the new function ``clang_CXXMethod_isMoveAssignmentOperator``, which identifies whether a method cursor is a move-assignment operator. -- ``clang_Cursor_getNumTemplateArguments``, ``clang_Cursor_getTemplateArgumentKind``, - ``clang_Cursor_getTemplateArgumentType``, ``clang_Cursor_getTemplateArgumentValue`` and +- ``clang_Cursor_getNumTemplateArguments``, ``clang_Cursor_getTemplateArgumentKind``, + ``clang_Cursor_getTemplateArgumentType``, ``clang_Cursor_getTemplateArgumentValue`` and ``clang_Cursor_getTemplateArgumentUnsignedValue`` now work on struct, class, and partial template specialization cursors in addition to function cursors. diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1437,6 +1437,8 @@ * ``-fno-honor-nans`` + * ``-fapprox-func`` + * ``-fno-math-errno`` * ``-ffinite-math-only`` @@ -1449,6 +1451,8 @@ * ``-fno-trapping-math`` + * ``-fno-rounding-math`` + * ``-ffp-contract=fast`` Note: ``-ffast-math`` causes ``crtfastmath.o`` to be linked with code. See @@ -1457,7 +1461,7 @@ .. option:: -fno-fast-math Disable fast-math mode. This options disables unsafe floating-point - optimizations by preventing the compiler from making any tranformations that + optimizations by preventing the compiler from making any transformations that could affect the results. This option implies: @@ -1466,7 +1470,7 @@ * ``-fhonor-nans`` - * ``-fmath-errno`` + * ``-fno-approx-func`` * ``-fno-finite-math-only`` @@ -1476,14 +1480,15 @@ * ``-fsigned-zeros`` - * ``-fno-trapping-math`` - * ``-ffp-contract=on`` - * ``-fdenormal-fp-math=ieee`` + Also, this option resets following options to their target-dependent defaults. + + * ``-f[no-]math-errno`` + * ``-fdenormal-fp-math=`` There is ambiguity about how ``-ffp-contract``, ``-ffast-math``, - and ``-fno-fast-math`` behave in combination. To keep the value of + and ``-fno-fast-math`` behave when combined. To keep the value of ``-ffp-contract`` consistent, we define this set of rules: * ``-ffast-math`` sets ``ffp-contract`` to ``fast``. @@ -1516,7 +1521,8 @@ * ``preserve-sign`` - the sign of a flushed-to-zero number is preserved in the sign of 0 * ``positive-zero`` - denormals are flushed to positive zero - Defaults to ``ieee``. + The default value depends on the target. For most targets, defaults to + ``ieee``. .. option:: -f[no-]strict-float-cast-overflow @@ -1525,6 +1531,7 @@ By default, Clang will not guarantee any particular result in that case. With the 'no-strict' option, Clang will saturate towards the smallest and largest representable integer values instead. NaNs will be converted to zero. + Defaults to ``-fstrict-float-cast-overflow``. .. option:: -f[no-]math-errno @@ -1572,11 +1579,19 @@ .. option:: -f[no-]honor-infinities + Allow floating-point optimizations that assume arguments and results are + not +-Inf. + Defaults to ``-fhonor-infinities``. + If both ``-fno-honor-infinities`` and ``-fno-honor-nans`` are used, has the same effect as specifying ``-ffinite-math-only``. .. option:: -f[no-]honor-nans + Allow floating-point optimizations that assume arguments and results are + not NaNs. + Defaults to ``-fhonor-nans``. + If both ``-fno-honor-infinities`` and ``-fno-honor-nans`` are used, has the same effect as specifying ``-ffinite-math-only``. @@ -1592,7 +1607,7 @@ .. option:: -f[no-]signed-zeros Allow optimizations that ignore the sign of floating point zeros. - Defaults to ``-fno-signed-zeros``. + Defaults to ``-fsigned-zeros``. .. option:: -f[no-]associative-math @@ -1608,24 +1623,48 @@ .. option:: -f[no-]unsafe-math-optimizations - Allow unsafe floating-point optimizations. Also implies: + Allow unsafe floating-point optimizations. + ``-funsafe-math-optimizations`` also implies: + * ``-fapprox-func`` * ``-fassociative-math`` * ``-freciprocal-math`` - * ``-fno-signed-zeroes`` - * ``-fno-trapping-math``. + * ``-fno-signed-zeros`` + * ``-fno-trapping-math`` + * ``-ffp-contract=fast`` + + ``-fno-unsafe-math-optimizations`` implies: + + * ``-fno-approx-func`` + * ``-fno-associative-math`` + * ``-fno-reciprocal-math`` + * ``-fsigned-zeros`` + * ``-ftrapping-math`` + * ``-ffp-contract=on`` + * ``-fdenormal-fp-math=ieee`` + + There is ambiguity about how ``-ffp-contract``, + ``-funsafe-math-optimizations``, and ``-fno-unsafe-math-optimizations`` + behave when combined. Explanation in :option:`-fno-fast-math` also applies + to these options. Defaults to ``-fno-unsafe-math-optimizations``. .. option:: -f[no-]finite-math-only Allow floating-point optimizations that assume arguments and results are - not NaNs or +-Inf. This defines the ``__FINITE_MATH_ONLY__`` preprocessor macro. - Also implies: + not NaNs or +-Inf. ``-ffinite-math-only`` defines the + ``__FINITE_MATH_ONLY__`` preprocessor macro. + ``-ffinite-math-only`` implies: * ``-fno-honor-infinities`` * ``-fno-honor-nans`` + ``-ffno-inite-math-only`` implies: + + * ``-fhonor-infinities`` + * ``-fhonor-nans`` + Defaults to ``-fno-finite-math-only``. .. option:: -f[no-]rounding-math @@ -1685,7 +1724,7 @@ * ``double`` The compiler uses ``double`` as the floating-point evaluation method for all float expressions of type that is narrower than ``double``. * ``extended`` The compiler uses ``long double`` as the floating-point evaluation method for all float expressions of type that is narrower than ``long double``. -.. option:: -f[no-]protect-parens: +.. option:: -f[no-]protect-parens This option pertains to floating-point types, complex types with floating-point components, and vectors of these types. Some arithmetic @@ -1699,6 +1738,7 @@ additions in any order regardless of the parentheses. When enabled, this option forces the optimizer to honor the order of operations with respect to parentheses in all circumstances. + Defaults to ``-fno-protect-parens``. Note that floating-point contraction (option `-ffp-contract=`) is disabled when `-fprotect-parens` is enabled. Also note that in safe floating-point diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h --- a/clang/include/clang/APINotes/Types.h +++ b/clang/include/clang/APINotes/Types.h @@ -72,7 +72,7 @@ llvm::Optional isSwiftPrivate() const { return SwiftPrivateSpecified ? llvm::Optional(SwiftPrivate) - : llvm::None; + : std::nullopt; } void setSwiftPrivate(llvm::Optional Private) { @@ -146,7 +146,7 @@ void setSwiftBridge(const llvm::Optional &SwiftType) { SwiftBridge = SwiftType ? llvm::Optional(std::string(*SwiftType)) - : llvm::None; + : std::nullopt; } const llvm::Optional &getNSErrorDomain() const { @@ -158,8 +158,8 @@ } void setNSErrorDomain(const llvm::Optional &Domain) { - NSErrorDomain = - Domain ? llvm::Optional(std::string(*Domain)) : llvm::None; + NSErrorDomain = Domain ? llvm::Optional(std::string(*Domain)) + : std::nullopt; } friend bool operator==(const CommonTypeInfo &, const CommonTypeInfo &); @@ -220,7 +220,7 @@ return HasDefaultNullability ? llvm::Optional( static_cast(DefaultNullability)) - : llvm::None; + : std::nullopt; } /// Set the default nullability for properties and methods of this class. @@ -235,7 +235,7 @@ llvm::Optional getSwiftImportAsNonGeneric() const { return SwiftImportAsNonGenericSpecified ? llvm::Optional(SwiftImportAsNonGeneric) - : llvm::None; + : std::nullopt; } void setSwiftImportAsNonGeneric(llvm::Optional Value) { SwiftImportAsNonGenericSpecified = Value.has_value(); @@ -244,7 +244,7 @@ llvm::Optional getSwiftObjCMembers() const { return SwiftObjCMembersSpecified ? llvm::Optional(SwiftObjCMembers) - : llvm::None; + : std::nullopt; } void setSwiftObjCMembers(llvm::Optional Value) { SwiftObjCMembersSpecified = Value.has_value(); @@ -313,7 +313,7 @@ llvm::Optional getNullability() const { return NullabilityAudited ? llvm::Optional( static_cast(Nullable)) - : llvm::None; + : std::nullopt; } void setNullabilityAudited(NullabilityKind kind) { @@ -362,7 +362,7 @@ llvm::Optional getSwiftImportAsAccessors() const { return SwiftImportAsAccessorsSpecified ? llvm::Optional(SwiftImportAsAccessors) - : llvm::None; + : std::nullopt; } void setSwiftImportAsAccessors(llvm::Optional Value) { SwiftImportAsAccessorsSpecified = Value.has_value(); @@ -425,7 +425,7 @@ llvm::Optional isNoEscape() const { if (!NoEscapeSpecified) - return llvm::None; + return std::nullopt; return NoEscape; } void setNoEscape(llvm::Optional Value) { @@ -435,7 +435,7 @@ llvm::Optional getRetainCountConvention() const { if (!RawRetainCountConvention) - return llvm::None; + return std::nullopt; return static_cast(RawRetainCountConvention - 1); } void @@ -553,7 +553,7 @@ llvm::Optional getRetainCountConvention() const { if (!RawRetainCountConvention) - return llvm::None; + return std::nullopt; return static_cast(RawRetainCountConvention - 1); } void @@ -661,7 +661,7 @@ llvm::Optional isFlagEnum() const { if (HasFlagEnum) return IsFlagEnum; - return llvm::None; + return std::nullopt; } void setFlagEnum(llvm::Optional Value) { HasFlagEnum = Value.has_value(); diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -14,42 +14,30 @@ #ifndef LLVM_CLANG_AST_ASTCONTEXT_H #define LLVM_CLANG_AST_ASTCONTEXT_H -#include "clang/AST/ASTContextAllocate.h" #include "clang/AST/ASTFwd.h" #include "clang/AST/CanonicalType.h" #include "clang/AST/CommentCommandTraits.h" #include "clang/AST/ComparisonCategories.h" #include "clang/AST/Decl.h" -#include "clang/AST/DeclBase.h" #include "clang/AST/DeclarationName.h" #include "clang/AST/ExternalASTSource.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/PrettyPrinter.h" #include "clang/AST/RawCommentList.h" #include "clang/AST/TemplateName.h" -#include "clang/AST/Type.h" -#include "clang/Basic/AddressSpaces.h" -#include "clang/Basic/AttrKinds.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangOptions.h" -#include "clang/Basic/Linkage.h" #include "clang/Basic/NoSanitizeList.h" -#include "clang/Basic/OperatorKinds.h" #include "clang/Basic/PartialDiagnostic.h" #include "clang/Basic/ProfileList.h" #include "clang/Basic/SourceLocation.h" -#include "clang/Basic/Specifiers.h" -#include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/XRayLists.h" -#include "llvm/ADT/APSInt.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/PointerUnion.h" @@ -57,22 +45,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" -#include "llvm/ADT/Triple.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/Support/AlignOf.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/TypeSize.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include namespace llvm { @@ -2320,7 +2293,7 @@ Optional getTypeSizeInCharsIfKnown(QualType Ty) const { if (Ty->isIncompleteType() || Ty->isDependentType()) - return None; + return std::nullopt; return getTypeSizeInChars(Ty); } @@ -2810,8 +2783,6 @@ /// long double and double on AArch64 will return 0). int getFloatingTypeSemanticOrder(QualType LHS, QualType RHS) const; - unsigned getTargetAddressSpace(QualType T) const; - unsigned getTargetAddressSpace(LangAS AS) const; LangAS getLangASForBuiltinAddressSpace(unsigned AS) const; diff --git a/clang/include/clang/AST/ASTImporterSharedState.h b/clang/include/clang/AST/ASTImporterSharedState.h --- a/clang/include/clang/AST/ASTImporterSharedState.h +++ b/clang/include/clang/AST/ASTImporterSharedState.h @@ -70,7 +70,7 @@ if (Pos != ImportErrors.end()) return Pos->second; else - return None; + return std::nullopt; } void setImportDeclError(Decl *To, ASTImportError Error) { diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -476,6 +476,8 @@ Visit(D->getAsmString()); } + void VisitTopLevelStmtDecl(const TopLevelStmtDecl *D) { Visit(D->getStmt()); } + void VisitCapturedDecl(const CapturedDecl *D) { Visit(D->getBody()); } void VisitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) { diff --git a/clang/include/clang/AST/CommentSema.h b/clang/include/clang/AST/CommentSema.h --- a/clang/include/clang/AST/CommentSema.h +++ b/clang/include/clang/AST/CommentSema.h @@ -80,7 +80,7 @@ ArrayRef copyArray(ArrayRef Source) { if (!Source.empty()) return Source.copy(Allocator); - return None; + return std::nullopt; } ParagraphComment *actOnParagraphComment( diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -4277,6 +4277,34 @@ static bool classofKind(Kind K) { return K == FileScopeAsm; } }; +/// A declaration that models statements at global scope. This declaration +/// supports incremental and interactive C/C++. +/// +/// \note This is used in libInterpreter, clang -cc1 -fincremental-extensions +/// and in tools such as clang-repl. +class TopLevelStmtDecl : public Decl { + friend class ASTDeclReader; + friend class ASTDeclWriter; + + Stmt *Statement = nullptr; + + TopLevelStmtDecl(DeclContext *DC, SourceLocation L, Stmt *S) + : Decl(TopLevelStmt, DC, L), Statement(S) {} + + virtual void anchor(); + +public: + static TopLevelStmtDecl *Create(ASTContext &C, Stmt *Statement); + static TopLevelStmtDecl *CreateDeserialized(ASTContext &C, unsigned ID); + + SourceRange getSourceRange() const override LLVM_READONLY; + Stmt *getStmt() { return Statement; } + const Stmt *getStmt() const { return Statement; } + + static bool classof(const Decl *D) { return classofKind(D->getKind()); } + static bool classofKind(Kind K) { return K == TopLevelStmt; } +}; + /// Represents a block literal declaration, which is like an /// unnamed FunctionDecl. For example: /// ^{ statement-body } or ^(int arg1, float arg2){ statement-body } diff --git a/clang/include/clang/AST/DeclFriend.h b/clang/include/clang/AST/DeclFriend.h --- a/clang/include/clang/AST/DeclFriend.h +++ b/clang/include/clang/AST/DeclFriend.h @@ -108,11 +108,10 @@ friend class ASTNodeImporter; friend TrailingObjects; - static FriendDecl *Create(ASTContext &C, DeclContext *DC, - SourceLocation L, FriendUnion Friend_, - SourceLocation FriendL, - ArrayRef FriendTypeTPLists - = None); + static FriendDecl * + Create(ASTContext &C, DeclContext *DC, SourceLocation L, FriendUnion Friend_, + SourceLocation FriendL, + ArrayRef FriendTypeTPLists = std::nullopt); static FriendDecl *CreateDeserialized(ASTContext &C, unsigned ID, unsigned FriendTypeNumTPLists); diff --git a/clang/include/clang/AST/DeclObjC.h b/clang/include/clang/AST/DeclObjC.h --- a/clang/include/clang/AST/DeclObjC.h +++ b/clang/include/clang/AST/DeclObjC.h @@ -389,9 +389,8 @@ /// Sets the method's parameters and selector source locations. /// If the method is implicit (not coming from source) \p SelLocs is /// ignored. - void setMethodParams(ASTContext &C, - ArrayRef Params, - ArrayRef SelLocs = llvm::None); + void setMethodParams(ASTContext &C, ArrayRef Params, + ArrayRef SelLocs = std::nullopt); // Iterator access to parameter types. struct GetTypeFn { diff --git a/clang/include/clang/AST/DeclOpenMP.h b/clang/include/clang/AST/DeclOpenMP.h --- a/clang/include/clang/AST/DeclOpenMP.h +++ b/clang/include/clang/AST/DeclOpenMP.h @@ -34,7 +34,7 @@ /// Get the clauses storage. MutableArrayRef getClauses() { if (!Data) - return llvm::None; + return std::nullopt; return Data->getClauses(); } @@ -90,7 +90,7 @@ ArrayRef clauses() const { if (!Data) - return llvm::None; + return std::nullopt; return Data->getClauses(); } }; diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1245,14 +1245,11 @@ NumExpanded(NumExpanded.value_or(0)) {} public: - static TemplateTypeParmDecl *Create(const ASTContext &C, DeclContext *DC, - SourceLocation KeyLoc, - SourceLocation NameLoc, - unsigned D, unsigned P, - IdentifierInfo *Id, bool Typename, - bool ParameterPack, - bool HasTypeConstraint = false, - Optional NumExpanded = None); + static TemplateTypeParmDecl * + Create(const ASTContext &C, DeclContext *DC, SourceLocation KeyLoc, + SourceLocation NameLoc, unsigned D, unsigned P, IdentifierInfo *Id, + bool Typename, bool ParameterPack, bool HasTypeConstraint = false, + Optional NumExpanded = std::nullopt); static TemplateTypeParmDecl *CreateDeserialized(const ASTContext &C, unsigned ID); static TemplateTypeParmDecl *CreateDeserialized(const ASTContext &C, @@ -3453,7 +3450,7 @@ return TTP->getNumExpansionTemplateParameters(); } - return None; + return std::nullopt; } /// Internal helper used by Subst* nodes to retrieve the parameter list diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -2273,13 +2273,13 @@ /// If the result is not-None, it will never wrap a nullptr. Optional getArraySize() { if (!isArray()) - return None; + return std::nullopt; if (auto *Result = cast_or_null(getTrailingObjects()[arraySizeOffset()])) return Result; - return None; + return std::nullopt; } /// This might return None even if isArray() returns true, @@ -2287,13 +2287,13 @@ /// If the result is not-None, it will never wrap a nullptr. Optional getArraySize() const { if (!isArray()) - return None; + return std::nullopt; if (auto *Result = cast_or_null(getTrailingObjects()[arraySizeOffset()])) return Result; - return None; + return std::nullopt; } unsigned getNumPlacementArgs() const { @@ -4116,7 +4116,7 @@ if (NumExpansions) return NumExpansions - 1; - return None; + return std::nullopt; } SourceLocation getBeginLoc() const LLVM_READONLY { @@ -4201,11 +4201,11 @@ : Expr(SizeOfPackExprClass, Empty), Length(NumPartialArgs) {} public: - static SizeOfPackExpr *Create(ASTContext &Context, SourceLocation OperatorLoc, - NamedDecl *Pack, SourceLocation PackLoc, - SourceLocation RParenLoc, - Optional Length = None, - ArrayRef PartialArgs = None); + static SizeOfPackExpr * + Create(ASTContext &Context, SourceLocation OperatorLoc, NamedDecl *Pack, + SourceLocation PackLoc, SourceLocation RParenLoc, + Optional Length = std::nullopt, + ArrayRef PartialArgs = std::nullopt); static SizeOfPackExpr *CreateDeserialized(ASTContext &Context, unsigned NumPartialArgs); @@ -4316,7 +4316,7 @@ Optional getPackIndex() const { if (PackIndex == 0) - return None; + return std::nullopt; return PackIndex - 1; } @@ -4681,7 +4681,7 @@ Optional getNumExpansions() const { if (NumExpansions) return NumExpansions - 1; - return None; + return std::nullopt; } SourceLocation getBeginLoc() const LLVM_READONLY { diff --git a/clang/include/clang/AST/ExprObjC.h b/clang/include/clang/AST/ExprObjC.h --- a/clang/include/clang/AST/ExprObjC.h +++ b/clang/include/clang/AST/ExprObjC.h @@ -362,7 +362,8 @@ ObjCDictionaryElement getKeyValueElement(unsigned Index) const { assert((Index < NumElements) && "Arg access out of range!"); const KeyValuePair &KV = getTrailingObjects()[Index]; - ObjCDictionaryElement Result = { KV.Key, KV.Value, SourceLocation(), None }; + ObjCDictionaryElement Result = {KV.Key, KV.Value, SourceLocation(), + std::nullopt}; if (HasPackExpansions) { const ExpansionData &Expansion = getTrailingObjects()[Index]; diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -5838,14 +5838,14 @@ return const_component_lists_iterator( getUniqueDeclsRef(), getDeclNumListsRef(), getComponentListSizesRef(), getComponentsRef(), SupportsMapper, - SupportsMapper ? getUDMapperRefs() : llvm::None); + SupportsMapper ? getUDMapperRefs() : std::nullopt); } const_component_lists_iterator component_lists_end() const { return const_component_lists_iterator( ArrayRef(), ArrayRef(), ArrayRef(), MappableExprComponentListRef(getComponentsRef().end(), getComponentsRef().end()), - SupportsMapper, llvm::None); + SupportsMapper, std::nullopt); } const_component_lists_range component_lists() const { return {component_lists_begin(), component_lists_end()}; @@ -5858,7 +5858,7 @@ return const_component_lists_iterator( VD, getUniqueDeclsRef(), getDeclNumListsRef(), getComponentListSizesRef(), getComponentsRef(), SupportsMapper, - SupportsMapper ? getUDMapperRefs() : llvm::None); + SupportsMapper ? getUDMapperRefs() : std::nullopt); } const_component_lists_iterator decl_component_lists_end() const { return component_lists_end(); diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -1543,6 +1543,8 @@ DEF_TRAVERSE_DECL(FileScopeAsmDecl, { TRY_TO(TraverseStmt(D->getAsmString())); }) +DEF_TRAVERSE_DECL(TopLevelStmtDecl, { TRY_TO(TraverseStmt(D->getStmt())); }) + DEF_TRAVERSE_DECL(ImportDecl, {}) DEF_TRAVERSE_DECL(FriendDecl, { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -277,7 +277,7 @@ /// Get the clauses storage. MutableArrayRef getClauses() { if (!Data) - return llvm::None; + return std::nullopt; return Data->getClauses(); } @@ -571,7 +571,7 @@ ArrayRef clauses() const { if (!Data) - return llvm::None; + return std::nullopt; return Data->getClauses(); } diff --git a/clang/include/clang/AST/TemplateBase.h b/clang/include/clang/AST/TemplateBase.h --- a/clang/include/clang/AST/TemplateBase.h +++ b/clang/include/clang/AST/TemplateBase.h @@ -233,7 +233,9 @@ TemplateArgument(TemplateName, bool) = delete; - static TemplateArgument getEmptyPack() { return TemplateArgument(None); } + static TemplateArgument getEmptyPack() { + return TemplateArgument(std::nullopt); + } /// Create a new template argument pack by copying the given set of /// template arguments. diff --git a/clang/include/clang/AST/TemplateName.h b/clang/include/clang/AST/TemplateName.h --- a/clang/include/clang/AST/TemplateName.h +++ b/clang/include/clang/AST/TemplateName.h @@ -397,7 +397,7 @@ Optional getPackIndex() const { if (Bits.Data == 0) - return None; + return std::nullopt; return Bits.Data - 1; } diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -5113,7 +5113,7 @@ Optional getPackIndex() const { if (SubstTemplateTypeParmTypeBits.PackIndex == 0) - return None; + return std::nullopt; return SubstTemplateTypeParmTypeBits.PackIndex - 1; } @@ -5855,7 +5855,7 @@ Optional getNumExpansions() const { if (PackExpansionTypeBits.NumExpansions) return PackExpansionTypeBits.NumExpansions - 1; - return None; + return std::nullopt; } bool isSugared() const { return false; } diff --git a/clang/include/clang/ASTMatchers/ASTMatchFinder.h b/clang/include/clang/ASTMatchers/ASTMatchFinder.h --- a/clang/include/clang/ASTMatchers/ASTMatchFinder.h +++ b/clang/include/clang/ASTMatchers/ASTMatchFinder.h @@ -290,7 +290,7 @@ } llvm::Optional getCheckTraversalKind() const override { - return llvm::None; + return std::nullopt; } SmallVector Nodes; diff --git a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h --- a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h +++ b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h @@ -122,7 +122,7 @@ template )> struct VariadicFunction { - ResultT operator()() const { return Func(None); } + ResultT operator()() const { return Func(std::nullopt); } template ResultT operator()(const ArgT &Arg1, const ArgsT &... Args) const { @@ -352,7 +352,7 @@ BoundNodesTreeBuilder *Builder) const = 0; virtual llvm::Optional TraversalKind() const { - return llvm::None; + return std::nullopt; } }; @@ -1983,10 +1983,10 @@ inline Optional equivalentBinaryOperator(const CXXOperatorCallExpr &Node) { if (Node.getNumArgs() != 2) - return None; + return std::nullopt; switch (Node.getOperator()) { default: - return None; + return std::nullopt; case OO_ArrowStar: return BO_PtrMemI; case OO_Star: @@ -2065,10 +2065,10 @@ equivalentUnaryOperator(const CXXOperatorCallExpr &Node) { if (Node.getNumArgs() != 1 && Node.getOperator() != OO_PlusPlus && Node.getOperator() != OO_MinusMinus) - return None; + return std::nullopt; switch (Node.getOperator()) { default: - return None; + return std::nullopt; case OO_Plus: return UO_Plus; case OO_Minus: @@ -2084,13 +2084,13 @@ case OO_PlusPlus: { const auto *FD = Node.getDirectCallee(); if (!FD) - return None; + return std::nullopt; return FD->getNumParams() > 0 ? UO_PostInc : UO_PreInc; } case OO_MinusMinus: { const auto *FD = Node.getDirectCallee(); if (!FD) - return None; + return std::nullopt; return FD->getNumParams() > 0 ? UO_PostDec : UO_PreDec; } case OO_Coawait: @@ -2191,7 +2191,7 @@ if (!optBinaryOpcode) { auto optUnaryOpcode = equivalentUnaryOperator(Node); if (!optUnaryOpcode) - return None; + return std::nullopt; return UnaryOperator::getOpcodeStr(*optUnaryOpcode); } return BinaryOperator::getOpcodeStr(*optBinaryOpcode); @@ -2236,7 +2236,7 @@ if (!optBinaryOpcode) { auto optUnaryOpcode = equivalentUnaryOperator(Node); if (!optUnaryOpcode) - return None; + return std::nullopt; return UnaryOperator::getOpcodeStr(*optUnaryOpcode); } return BinaryOperator::getOpcodeStr(*optBinaryOpcode); diff --git a/clang/include/clang/Analysis/Analyses/PostOrderCFGView.h b/clang/include/clang/Analysis/Analyses/PostOrderCFGView.h --- a/clang/include/clang/Analysis/Analyses/PostOrderCFGView.h +++ b/clang/include/clang/Analysis/Analyses/PostOrderCFGView.h @@ -54,11 +54,12 @@ // occasionally hand out null pointers for pruned edges, so we catch those // here. if (!Block) - return std::make_pair(None, false); // if an edge is trivially false. + return std::make_pair(std::nullopt, + false); // if an edge is trivially false. if (VisitedBlockIDs.test(Block->getBlockID())) - return std::make_pair(None, false); + return std::make_pair(std::nullopt, false); VisitedBlockIDs.set(Block->getBlockID()); - return std::make_pair(None, true); + return std::make_pair(std::nullopt, true); } /// Check if the bit for a CFGBlock has been already set. diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h --- a/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h +++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h @@ -1464,7 +1464,7 @@ static bool classof(const SExpr *E) { return E->opcode() == COP_Return; } /// Return an empty list. - ArrayRef successors() { return None; } + ArrayRef successors() { return std::nullopt; } SExpr *returnValue() { return Retval; } const SExpr *returnValue() const { return Retval; } @@ -1490,7 +1490,7 @@ case COP_Branch: return cast(this)->successors(); case COP_Return: return cast(this)->successors(); default: - return None; + return std::nullopt; } } diff --git a/clang/include/clang/Analysis/AnyCall.h b/clang/include/clang/Analysis/AnyCall.h --- a/clang/include/clang/Analysis/AnyCall.h +++ b/clang/include/clang/Analysis/AnyCall.h @@ -123,7 +123,7 @@ } else if (const auto *CXCIE = dyn_cast(E)) { return AnyCall(CXCIE); } else { - return None; + return std::nullopt; } } @@ -136,13 +136,13 @@ } else if (const auto *MD = dyn_cast(D)) { return AnyCall(MD); } - return None; + return std::nullopt; } /// \returns formal parameters for direct calls (including virtual calls) ArrayRef parameters() const { if (!D) - return None; + return std::nullopt; if (const auto *FD = dyn_cast(D)) { return FD->parameters(); @@ -151,7 +151,7 @@ } else if (const auto *BD = dyn_cast(D)) { return BD->parameters(); } else { - return None; + return std::nullopt; } } diff --git a/clang/include/clang/Analysis/CFG.h b/clang/include/clang/Analysis/CFG.h --- a/clang/include/clang/Analysis/CFG.h +++ b/clang/include/clang/Analysis/CFG.h @@ -108,7 +108,7 @@ template Optional getAs() const { if (!T::isKind(*this)) - return None; + return std::nullopt; T t; CFGElement& e = t; e = *this; diff --git a/clang/include/clang/Analysis/ProgramPoint.h b/clang/include/clang/Analysis/ProgramPoint.h --- a/clang/include/clang/Analysis/ProgramPoint.h +++ b/clang/include/clang/Analysis/ProgramPoint.h @@ -149,7 +149,7 @@ template Optional getAs() const { if (!T::isKind(*this)) - return None; + return std::nullopt; T t; ProgramPoint& PP = t; PP = *this; diff --git a/clang/include/clang/Basic/DarwinSDKInfo.h b/clang/include/clang/Basic/DarwinSDKInfo.h --- a/clang/include/clang/Basic/DarwinSDKInfo.h +++ b/clang/include/clang/Basic/DarwinSDKInfo.h @@ -100,9 +100,9 @@ /// Returns the mapped key, or the appropriate Minimum / MaximumValue if /// they key is outside of the mapping bounds. If they key isn't mapped, but /// within the minimum and maximum bounds, None is returned. - Optional map(const VersionTuple &Key, - const VersionTuple &MinimumValue, - Optional MaximumValue) const; + std::optional + map(const VersionTuple &Key, const VersionTuple &MinimumValue, + std::optional MaximumValue) const; static Optional parseJSON(const llvm::json::Object &Obj, @@ -145,7 +145,7 @@ return Mapping->getSecond() ? &*Mapping->getSecond() : nullptr; } - static Optional + static std::optional parseDarwinSDKSettingsJSON(const llvm::json::Object *Obj); private: @@ -162,8 +162,8 @@ /// /// \returns an error if the SDKSettings.json file is invalid, None if the /// SDK has no SDKSettings.json, or a valid \c DarwinSDKInfo otherwise. -Expected> parseDarwinSDKInfo(llvm::vfs::FileSystem &VFS, - StringRef SDKRootPath); +Expected> +parseDarwinSDKInfo(llvm::vfs::FileSystem &VFS, StringRef SDKRootPath); } // end namespace clang diff --git a/clang/include/clang/Basic/DeclNodes.td b/clang/include/clang/Basic/DeclNodes.td --- a/clang/include/clang/Basic/DeclNodes.td +++ b/clang/include/clang/Basic/DeclNodes.td @@ -95,6 +95,7 @@ def Export : DeclNode, DeclContext; def ObjCPropertyImpl : DeclNode; def FileScopeAsm : DeclNode; +def TopLevelStmt : DeclNode; def AccessSpec : DeclNode; def Friend : DeclNode; def FriendTemplate : DeclNode; diff --git a/clang/include/clang/Basic/DirectoryEntry.h b/clang/include/clang/Basic/DirectoryEntry.h --- a/clang/include/clang/Basic/DirectoryEntry.h +++ b/clang/include/clang/Basic/DirectoryEntry.h @@ -272,7 +272,7 @@ OptionalDirectoryEntryRefDegradesToDirectoryEntryPtr & operator=(std::nullopt_t) { - Optional::operator=(None); + Optional::operator=(std::nullopt); return *this; } OptionalDirectoryEntryRefDegradesToDirectoryEntryPtr &operator=(DirectoryEntryRef Ref) { diff --git a/clang/include/clang/Basic/FileEntry.h b/clang/include/clang/Basic/FileEntry.h --- a/clang/include/clang/Basic/FileEntry.h +++ b/clang/include/clang/Basic/FileEntry.h @@ -315,7 +315,7 @@ : Optional(MaybeRef) {} OptionalFileEntryRefDegradesToFileEntryPtr &operator=(std::nullopt_t) { - Optional::operator=(None); + Optional::operator=(std::nullopt); return *this; } OptionalFileEntryRefDegradesToFileEntryPtr &operator=(FileEntryRef Ref) { diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -459,6 +459,11 @@ // on large _BitInts. BENIGN_VALUE_LANGOPT(MaxBitIntWidth, 32, 128, "Maximum width of a _BitInt") +LANGOPT(IncrementalExtensions, 1, 0, " True if we want to process statements" + "on the global scope, ignore EOF token and continue later on (thus " + "avoid tearing the Lexer and etc. down). Controlled by " + "-fincremental-extensions.") + #undef LANGOPT #undef COMPATIBLE_LANGOPT #undef BENIGN_LANGOPT diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -179,8 +179,9 @@ mutable unsigned IsBufferInvalid : 1; ContentCache() - : OrigEntry(None), ContentsEntry(nullptr), BufferOverridden(false), - IsFileVolatile(false), IsTransient(false), IsBufferInvalid(false) {} + : OrigEntry(std::nullopt), ContentsEntry(nullptr), + BufferOverridden(false), IsFileVolatile(false), IsTransient(false), + IsBufferInvalid(false) {} ContentCache(FileEntryRef Ent) : ContentCache(Ent, Ent) {} @@ -236,7 +237,7 @@ llvm::Optional getBufferIfLoaded() const { if (Buffer) return Buffer->getMemBufferRef(); - return None; + return std::nullopt; } /// Return a StringRef to the source buffer data, only if it has already @@ -244,7 +245,7 @@ llvm::Optional getBufferDataIfLoaded() const { if (Buffer) return Buffer->getBuffer(); - return None; + return std::nullopt; } /// Set the buffer. @@ -1025,7 +1026,7 @@ if (auto *Entry = getSLocEntryForFile(FID)) return Entry->getFile().getContentCache().getBufferOrNone( Diag, getFileManager(), Loc); - return None; + return std::nullopt; } /// Return the buffer for the specified FileID. @@ -1050,7 +1051,7 @@ Optional getFileEntryRefForID(FileID FID) const { if (auto *Entry = getSLocEntryForFile(FID)) return Entry->getFile().getContentCache().OrigEntry; - return None; + return std::nullopt; } /// Returns the filename for the provided FileID, unless it's a built-in diff --git a/clang/include/clang/Basic/TargetID.h b/clang/include/clang/Basic/TargetID.h --- a/clang/include/clang/Basic/TargetID.h +++ b/clang/include/clang/Basic/TargetID.h @@ -49,7 +49,7 @@ /// Get the conflicted pair of target IDs for a compilation or a bundled code /// object, assuming \p TargetIDs are canonicalized. If there is no conflicts, /// returns None. -llvm::Optional> +std::optional> getConflictTargetIDCombination(const std::set &TargetIDs); /// Check whether the provided target ID is compatible with the requested diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -232,7 +232,6 @@ unsigned char RegParmMax, SSERegParmMax; TargetCXXABI TheCXXABI; const LangASMap *AddrSpaceMap; - unsigned ProgramAddrSpace; mutable StringRef PlatformName; mutable VersionTuple PlatformMinVersion; @@ -822,9 +821,6 @@ return getTypeWidth(IntMaxType); } - /// Return the address space for functions for the given target. - unsigned getProgramAddressSpace() const { return ProgramAddrSpace; } - // Return the size of unwind_word for this target. virtual unsigned getUnwindWordWidth() const { return getPointerWidth(LangAS::Default); @@ -960,7 +956,7 @@ /// Returns target-specific min and max values VScale_Range. virtual Optional> getVScaleRange(const LangOptions &LangOpts) const { - return None; + return std::nullopt; } /// The __builtin_clz* and __builtin_ctz* built-in /// functions are specified to have undefined results for zero inputs, but @@ -1185,7 +1181,7 @@ /// Replace some escaped characters with another string based on /// target-specific rules virtual llvm::Optional handleAsmEscapedChar(char C) const { - return llvm::None; + return std::nullopt; } /// Returns a string of target-specific clobbers, in LLVM format. @@ -1203,7 +1199,9 @@ } /// Returns the target ID if supported. - virtual llvm::Optional getTargetID() const { return llvm::None; } + virtual llvm::Optional getTargetID() const { + return std::nullopt; + } const char *getDataLayoutString() const { assert(!DataLayoutString.empty() && "Uninitialized DataLayout!"); @@ -1438,7 +1436,9 @@ // Get the cache line size of a given cpu. This method switches over // the given cpu and returns "None" if the CPU is not found. - virtual Optional getCPUCacheLineSize() const { return None; } + virtual Optional getCPUCacheLineSize() const { + return std::nullopt; + } // Returns maximal number of args passed in registers. unsigned getRegParmMax() const { @@ -1655,7 +1655,7 @@ /// \returns Otherwise return None and no conversion will be emitted in the /// DWARF. virtual Optional getDWARFAddressSpace(unsigned AddressSpace) const { - return None; + return std::nullopt; } /// \returns The version of the SDK which was used during the compilation if @@ -1708,7 +1708,7 @@ virtual ArrayRef getGCCRegNames() const = 0; virtual ArrayRef getGCCRegAliases() const = 0; virtual ArrayRef getGCCAddlRegNames() const { - return None; + return std::nullopt; } private: diff --git a/clang/include/clang/Driver/Compilation.h b/clang/include/clang/Driver/Compilation.h --- a/clang/include/clang/Driver/Compilation.h +++ b/clang/include/clang/Driver/Compilation.h @@ -15,13 +15,13 @@ #include "clang/Driver/Util.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Option/Option.h" #include #include #include #include +#include #include #include @@ -113,7 +113,7 @@ ArgStringMap FailureResultFiles; /// Optional redirection for stdin, stdout, stderr. - std::vector> Redirects; + std::vector> Redirects; /// Callback called after compilation job has been finished. /// Arguments of the callback are the compilation job as an instance of @@ -332,8 +332,8 @@ /// /// \param Redirects - array of optional paths. The array should have a size /// of three. The inferior process's stdin(0), stdout(1), and stderr(2) will - /// be redirected to the corresponding paths, if provided (not llvm::None). - void Redirect(ArrayRef> Redirects); + /// be redirected to the corresponding paths, if provided (not std::nullopt). + void Redirect(ArrayRef> Redirects); }; } // namespace driver diff --git a/clang/include/clang/Driver/Job.h b/clang/include/clang/Driver/Job.h --- a/clang/include/clang/Driver/Job.h +++ b/clang/include/clang/Driver/Job.h @@ -12,13 +12,13 @@ #include "clang/Basic/LLVM.h" #include "clang/Driver/InputInfo.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" #include "llvm/Option/Option.h" #include "llvm/Support/Program.h" #include +#include #include #include #include @@ -142,10 +142,10 @@ std::vector Environment; /// Optional redirection for stdin, stdout, stderr. - std::vector> RedirectFiles; + std::vector> RedirectFiles; /// Information on executable run provided by OS. - mutable Optional ProcStat; + mutable std::optional ProcStat; /// When a response file is needed, we try to put most arguments in an /// exclusive file, while others remains as regular command line arguments. @@ -169,7 +169,7 @@ Command(const Action &Source, const Tool &Creator, ResponseFileSupport ResponseSupport, const char *Executable, const llvm::opt::ArgStringList &Arguments, ArrayRef Inputs, - ArrayRef Outputs = None); + ArrayRef Outputs = std::nullopt); // FIXME: This really shouldn't be copyable, but is currently copied in some // error handling in Driver::generateCompilationDiagnostics. Command(const Command &) = default; @@ -178,7 +178,7 @@ virtual void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote, CrashReportInfo *CrashInfo = nullptr) const; - virtual int Execute(ArrayRef> Redirects, + virtual int Execute(ArrayRef> Redirects, std::string *ErrMsg, bool *ExecutionFailed) const; /// getSource - Return the Action which caused the creation of this job. @@ -207,7 +207,8 @@ /// from the parent process will be used. virtual void setEnvironment(llvm::ArrayRef NewEnvironment); - void setRedirectFiles(const std::vector> &Redirects); + void + setRedirectFiles(const std::vector> &Redirects); void replaceArguments(llvm::opt::ArgStringList List) { Arguments = std::move(List); @@ -225,7 +226,7 @@ return OutputFilenames; } - Optional getProcessStatistics() const { + std::optional getProcessStatistics() const { return ProcStat; } @@ -240,12 +241,13 @@ CC1Command(const Action &Source, const Tool &Creator, ResponseFileSupport ResponseSupport, const char *Executable, const llvm::opt::ArgStringList &Arguments, - ArrayRef Inputs, ArrayRef Outputs = None); + ArrayRef Inputs, + ArrayRef Outputs = std::nullopt); void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote, CrashReportInfo *CrashInfo = nullptr) const override; - int Execute(ArrayRef> Redirects, std::string *ErrMsg, + int Execute(ArrayRef> Redirects, std::string *ErrMsg, bool *ExecutionFailed) const override; void setEnvironment(llvm::ArrayRef NewEnvironment) override; @@ -259,12 +261,12 @@ const char *Executable_, const llvm::opt::ArgStringList &Arguments_, ArrayRef Inputs, - ArrayRef Outputs = None); + ArrayRef Outputs = std::nullopt); void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote, CrashReportInfo *CrashInfo = nullptr) const override; - int Execute(ArrayRef> Redirects, std::string *ErrMsg, + int Execute(ArrayRef> Redirects, std::string *ErrMsg, bool *ExecutionFailed) const override; }; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2320,6 +2320,13 @@ HelpText<"Do not enforce -fmodules-decluse and private header restrictions for textual headers. " "This flag will be removed in a future Clang release.">; +def fincremental_extensions : + Flag<["-"], "fincremental-extensions">, + Group, Flags<[CC1Option]>, + HelpText<"Enable incremental processing extensions such as processing" + "statements on the global scope.">, + MarshallingInfoFlag>; + def fvalidate_ast_input_files_content: Flag <["-"], "fvalidate-ast-input-files-content">, Group, Flags<[CC1Option]>, @@ -5076,15 +5083,9 @@ let Flags = [CC1Option, CC1AsOption, NoDriverOption] in { -def target_cpu : Separate<["-"], "target-cpu">, - HelpText<"Target a specific cpu type">, - MarshallingInfoString>; def tune_cpu : Separate<["-"], "tune-cpu">, HelpText<"Tune for a specific cpu type">, MarshallingInfoString>; -def target_feature : Separate<["-"], "target-feature">, - HelpText<"Target specific attributes">, - MarshallingInfoStringVector>; def target_abi : Separate<["-"], "target-abi">, HelpText<"Target a particular ABI type">, MarshallingInfoString>; @@ -5111,6 +5112,12 @@ let Flags = [CC1Option, CC1AsOption, FC1Option, NoDriverOption] in { +def target_cpu : Separate<["-"], "target-cpu">, + HelpText<"Target a specific cpu type">, + MarshallingInfoString>; +def target_feature : Separate<["-"], "target-feature">, + HelpText<"Target specific attributes">, + MarshallingInfoStringVector>; def triple : Separate<["-"], "triple">, HelpText<"Specify target triple (e.g. i686-apple-darwin9)">, MarshallingInfoString, "llvm::Triple::normalize(llvm::sys::getDefaultTargetTriple())">, @@ -6925,6 +6932,7 @@ def _SLASH_Bt : CLFlag<"Bt">; def _SLASH_Bt_plus : CLFlag<"Bt+">; def _SLASH_clr : CLJoined<"clr">; +def _SLASH_d1 : CLJoined<"d1">; def _SLASH_d2 : CLJoined<"d2">; def _SLASH_doc : CLJoined<"doc">; def _SLASH_experimental : CLJoined<"experimental:">; diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -181,9 +181,9 @@ EffectiveTriple = std::move(ET); } - mutable llvm::Optional cxxStdlibType; - mutable llvm::Optional runtimeLibType; - mutable llvm::Optional unwindLibType; + mutable std::optional cxxStdlibType; + mutable std::optional runtimeLibType; + mutable std::optional unwindLibType; protected: MultilibSet Multilibs; diff --git a/clang/include/clang/Frontend/ASTUnit.h b/clang/include/clang/Frontend/ASTUnit.h --- a/clang/include/clang/Frontend/ASTUnit.h +++ b/clang/include/clang/Frontend/ASTUnit.h @@ -823,7 +823,7 @@ IntrusiveRefCntPtr Diags, StringRef ResourceFilesPath, bool OnlyLocalDecls = false, CaptureDiagsKind CaptureDiagnostics = CaptureDiagsKind::None, - ArrayRef RemappedFiles = None, + ArrayRef RemappedFiles = std::nullopt, bool RemappedFilesKeepOriginalName = true, unsigned PrecompilePreambleAfterNParses = 0, TranslationUnitKind TUKind = TU_Complete, @@ -835,7 +835,7 @@ bool SingleFileParse = false, bool UserFilesAreVolatile = false, bool ForSerialization = false, bool RetainExcludedConditionalBlocks = false, - llvm::Optional ModuleFormat = llvm::None, + llvm::Optional ModuleFormat = std::nullopt, std::unique_ptr *ErrAST = nullptr, IntrusiveRefCntPtr VFS = nullptr); @@ -851,7 +851,7 @@ /// \returns True if a failure occurred that causes the ASTUnit not to /// contain any translation-unit information, false otherwise. bool Reparse(std::shared_ptr PCHContainerOps, - ArrayRef RemappedFiles = None, + ArrayRef RemappedFiles = std::nullopt, IntrusiveRefCntPtr VFS = nullptr); /// Free data that will be re-generated on the next parse. diff --git a/clang/include/clang/Frontend/CommandLineSourceLoc.h b/clang/include/clang/Frontend/CommandLineSourceLoc.h --- a/clang/include/clang/Frontend/CommandLineSourceLoc.h +++ b/clang/include/clang/Frontend/CommandLineSourceLoc.h @@ -93,7 +93,7 @@ } auto Begin = ParsedSourceLocation::FromString(RangeSplit.first); if (Begin.FileName.empty()) - return None; + return std::nullopt; if (!HasEndLoc) { EndLine = Begin.Line; EndColumn = Begin.Column; diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -294,6 +294,11 @@ const CompilerInvocation &CI, DiagnosticsEngine &Diags, IntrusiveRefCntPtr BaseFS); +IntrusiveRefCntPtr +createVFSFromOverlayFiles(ArrayRef VFSOverlayFiles, + DiagnosticsEngine &Diags, + IntrusiveRefCntPtr BaseFS); + } // namespace clang #endif // LLVM_CLANG_FRONTEND_COMPILERINVOCATION_H diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -241,9 +241,9 @@ InputKind getKind() const { return Kind; } bool isSystem() const { return IsSystem; } - bool isEmpty() const { return File.empty() && Buffer == None; } + bool isEmpty() const { return File.empty() && Buffer == std::nullopt; } bool isFile() const { return !isBuffer(); } - bool isBuffer() const { return Buffer != None; } + bool isBuffer() const { return Buffer != std::nullopt; } bool isPreprocessed() const { return Kind.isPreprocessed(); } bool isHeader() const { return Kind.isHeader(); } InputKind::HeaderUnitKind getHeaderUnitKind() const { diff --git a/clang/include/clang/Lex/DirectoryLookup.h b/clang/include/clang/Lex/DirectoryLookup.h --- a/clang/include/clang/Lex/DirectoryLookup.h +++ b/clang/include/clang/Lex/DirectoryLookup.h @@ -92,7 +92,7 @@ } Optional getDirRef() const { - return isNormalDir() ? Optional(u.Dir) : None; + return isNormalDir() ? Optional(u.Dir) : std::nullopt; } /// getFrameworkDir - Return the directory that this framework refers to. @@ -102,7 +102,7 @@ } Optional getFrameworkDirRef() const { - return isFramework() ? Optional(u.Dir) : None; + return isFramework() ? Optional(u.Dir) : std::nullopt; } /// getHeaderMap - Return the directory that this entry refers to. diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -735,7 +735,7 @@ llvm::Optional getCachedModuleLoad(const IdentifierInfo &II) { auto I = CachedModuleLoads.find(&II); if (I == CachedModuleLoads.end()) - return None; + return std::nullopt; return I->second; } }; diff --git a/clang/include/clang/Lex/PreprocessingRecord.h b/clang/include/clang/Lex/PreprocessingRecord.h --- a/clang/include/clang/Lex/PreprocessingRecord.h +++ b/clang/include/clang/Lex/PreprocessingRecord.h @@ -293,7 +293,7 @@ /// entity with index \p Index came from file \p FID. virtual Optional isPreprocessedEntityInFileID(unsigned Index, FileID FID) { - return None; + return std::nullopt; } /// Read a preallocated skipped range from the external source. diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -283,10 +283,6 @@ /// Empty line handler. EmptylineHandler *Emptyline = nullptr; - /// True if we want to ignore EOF token and continue later on (thus - /// avoid tearing the Lexer and etc. down). - bool IncrementalProcessing = false; - public: /// The kind of translation unit we are processing. const TranslationUnitKind TUKind; @@ -761,7 +757,7 @@ getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const { if (auto *Info = getModuleInfo(PP, II)) return Info->ActiveModuleMacros; - return None; + return std::nullopt; } MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc, @@ -785,7 +781,7 @@ ArrayRef getOverriddenMacros() const { if (auto *Info = State.dyn_cast()) return Info->OverriddenMacros; - return None; + return std::nullopt; } void setOverriddenMacros(Preprocessor &PP, @@ -908,17 +904,17 @@ static MacroAnnotations makeDeprecation(SourceLocation Loc, std::string Msg) { return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)}, - llvm::None, llvm::None}; + std::nullopt, std::nullopt}; } static MacroAnnotations makeRestrictExpansion(SourceLocation Loc, std::string Msg) { return MacroAnnotations{ - llvm::None, MacroAnnotationInfo{Loc, std::move(Msg)}, llvm::None}; + std::nullopt, MacroAnnotationInfo{Loc, std::move(Msg)}, std::nullopt}; } static MacroAnnotations makeFinal(SourceLocation Loc) { - return MacroAnnotations{llvm::None, llvm::None, Loc}; + return MacroAnnotations{std::nullopt, std::nullopt, Loc}; } }; @@ -1303,7 +1299,7 @@ auto I = LeafModuleMacros.find(II); if (I != LeafModuleMacros.end()) return I->second; - return None; + return std::nullopt; } /// Get the list of submodules that we're currently building. @@ -1778,11 +1774,14 @@ void recomputeCurLexerKind(); /// Returns true if incremental processing is enabled - bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } + bool isIncrementalProcessingEnabled() const { + return getLangOpts().IncrementalExtensions; + } /// Enables the incremental processing void enableIncrementalProcessing(bool value = true) { - IncrementalProcessing = value; + // FIXME: Drop this interface. + const_cast(getLangOpts()).IncrementalExtensions = value; } /// Specify the point at which code-completion will be performed. diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -464,6 +464,9 @@ typedef Sema::FullExprArg FullExprArg; + /// A SmallVector of statements. + typedef SmallVector StmtVector; + // Parsing methods. /// Initialize - Warm up the parser. @@ -2071,10 +2074,7 @@ //===--------------------------------------------------------------------===// // C99 6.8: Statements and Blocks. - /// A SmallVector of statements, with stack size 32 (as that is the only one - /// used.) - typedef SmallVector StmtVector; - /// A SmallVector of expressions, with stack size 12 (the maximum used.) + /// A SmallVector of expressions. typedef SmallVector ExprVector; StmtResult @@ -2451,6 +2451,8 @@ ParsingDeclSpec &DS, llvm::function_ref FieldsCallback); + DeclGroupPtrTy ParseTopLevelStmtDecl(); + bool isDeclarationSpecifier(ImplicitTypenameContext AllowImplicitTypename, bool DisambiguatingWithExpression = false); bool isTypeSpecifierQualifier(); @@ -2472,10 +2474,13 @@ /// isDeclarationStatement - Disambiguates between a declaration or an /// expression statement, when parsing function bodies. + /// + /// \param DisambiguatingWithExpression - True to indicate that the purpose of + /// this check is to disambiguate between an expression and a declaration. /// Returns true for declaration, false for expression. - bool isDeclarationStatement() { + bool isDeclarationStatement(bool DisambiguatingWithExpression = false) { if (getLangOpts().CPlusPlus) - return isCXXDeclarationStatement(); + return isCXXDeclarationStatement(DisambiguatingWithExpression); return isDeclarationSpecifier(ImplicitTypenameContext::No, true); } @@ -2542,7 +2547,7 @@ /// isCXXDeclarationStatement - C++-specialized function that disambiguates /// between a declaration or an expression statement, when parsing function /// bodies. Returns true for declaration, false for expression. - bool isCXXDeclarationStatement(); + bool isCXXDeclarationStatement(bool DisambiguatingWithExpression = false); /// isCXXSimpleDeclaration - C++-specialized function that disambiguates /// between a simple-declaration or an expression-statement. @@ -3061,7 +3066,8 @@ void ParseTypeQualifierListOpt( DeclSpec &DS, unsigned AttrReqs = AR_AllAttributesParsed, bool AtomicAllowed = true, bool IdentifierRequired = false, - Optional> CodeCompletionHandler = None); + Optional> CodeCompletionHandler = + std::nullopt); void ParseDirectDeclarator(Declarator &D); void ParseDecompositionDeclarator(Declarator &D); void ParseParenDeclarator(Declarator &D); diff --git a/clang/include/clang/Sema/CodeCompleteConsumer.h b/clang/include/clang/Sema/CodeCompleteConsumer.h --- a/clang/include/clang/Sema/CodeCompleteConsumer.h +++ b/clang/include/clang/Sema/CodeCompleteConsumer.h @@ -368,11 +368,11 @@ public: /// Construct a new code-completion context of the given kind. CodeCompletionContext(Kind CCKind) - : CCKind(CCKind), IsUsingDeclaration(false), SelIdents(None) {} + : CCKind(CCKind), IsUsingDeclaration(false), SelIdents(std::nullopt) {} /// Construct a new code-completion context of the given kind. CodeCompletionContext(Kind CCKind, QualType T, - ArrayRef SelIdents = None) + ArrayRef SelIdents = std::nullopt) : CCKind(CCKind), IsUsingDeclaration(false), SelIdents(SelIdents) { if (CCKind == CCC_DotMemberAccess || CCKind == CCC_ArrowMemberAccess || CCKind == CCC_ObjCPropertyAccess || CCKind == CCC_ObjCClassMessage || @@ -425,7 +425,7 @@ llvm::Optional getCXXScopeSpecifier() { if (ScopeSpecifier) return &*ScopeSpecifier; - return llvm::None; + return std::nullopt; } }; diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h --- a/clang/include/clang/Sema/Overload.h +++ b/clang/include/clang/Sema/Overload.h @@ -1146,8 +1146,9 @@ /// Add a new candidate with NumConversions conversion sequence slots /// to the overload set. - OverloadCandidate &addCandidate(unsigned NumConversions = 0, - ConversionSequenceList Conversions = None) { + OverloadCandidate & + addCandidate(unsigned NumConversions = 0, + ConversionSequenceList Conversions = std::nullopt) { assert((Conversions.empty() || Conversions.size() == NumConversions) && "preallocated conversion sequence has wrong length"); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2886,12 +2886,10 @@ LookupResult &Previous); NamedDecl* ActOnTypedefNameDecl(Scope* S, DeclContext* DC, TypedefNameDecl *D, LookupResult &Previous, bool &Redeclaration); - NamedDecl *ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC, - TypeSourceInfo *TInfo, - LookupResult &Previous, - MultiTemplateParamsArg TemplateParamLists, - bool &AddToScope, - ArrayRef Bindings = None); + NamedDecl *ActOnVariableDeclarator( + Scope *S, Declarator &D, DeclContext *DC, TypeSourceInfo *TInfo, + LookupResult &Previous, MultiTemplateParamsArg TemplateParamLists, + bool &AddToScope, ArrayRef Bindings = std::nullopt); NamedDecl * ActOnDecompositionDeclarator(Scope *S, Declarator &D, MultiTemplateParamsArg TemplateParamLists); @@ -3106,6 +3104,8 @@ SourceLocation AsmLoc, SourceLocation RParenLoc); + Decl *ActOnTopLevelStmtDecl(Stmt *Statement); + /// Handle a C++11 empty-declaration and attribute-declaration. Decl *ActOnEmptyDeclaration(Scope *S, const ParsedAttributesView &AttrList, SourceLocation SemiLoc); @@ -3928,16 +3928,14 @@ using ADLCallKind = CallExpr::ADLCallKind; - void AddOverloadCandidate(FunctionDecl *Function, DeclAccessPair FoundDecl, - ArrayRef Args, - OverloadCandidateSet &CandidateSet, - bool SuppressUserConversions = false, - bool PartialOverloading = false, - bool AllowExplicit = true, - bool AllowExplicitConversion = false, - ADLCallKind IsADLCandidate = ADLCallKind::NotADL, - ConversionSequenceList EarlyConversions = None, - OverloadCandidateParamOrder PO = {}); + void AddOverloadCandidate( + FunctionDecl *Function, DeclAccessPair FoundDecl, ArrayRef Args, + OverloadCandidateSet &CandidateSet, bool SuppressUserConversions = false, + bool PartialOverloading = false, bool AllowExplicit = true, + bool AllowExplicitConversion = false, + ADLCallKind IsADLCandidate = ADLCallKind::NotADL, + ConversionSequenceList EarlyConversions = std::nullopt, + OverloadCandidateParamOrder PO = {}); void AddFunctionCandidates(const UnresolvedSetImpl &Functions, ArrayRef Args, OverloadCandidateSet &CandidateSet, @@ -3952,16 +3950,15 @@ OverloadCandidateSet& CandidateSet, bool SuppressUserConversion = false, OverloadCandidateParamOrder PO = {}); - void AddMethodCandidate(CXXMethodDecl *Method, - DeclAccessPair FoundDecl, - CXXRecordDecl *ActingContext, QualType ObjectType, - Expr::Classification ObjectClassification, - ArrayRef Args, - OverloadCandidateSet& CandidateSet, - bool SuppressUserConversions = false, - bool PartialOverloading = false, - ConversionSequenceList EarlyConversions = None, - OverloadCandidateParamOrder PO = {}); + void + AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl, + CXXRecordDecl *ActingContext, QualType ObjectType, + Expr::Classification ObjectClassification, + ArrayRef Args, OverloadCandidateSet &CandidateSet, + bool SuppressUserConversions = false, + bool PartialOverloading = false, + ConversionSequenceList EarlyConversions = std::nullopt, + OverloadCandidateParamOrder PO = {}); void AddMethodTemplateCandidate(FunctionTemplateDecl *MethodTmpl, DeclAccessPair FoundDecl, CXXRecordDecl *ActingContext, @@ -5449,9 +5446,9 @@ /// referenced. Used when template instantiation instantiates a non-dependent /// type -- entities referenced by the type are now referenced. void MarkDeclarationsReferencedInType(SourceLocation Loc, QualType T); - void MarkDeclarationsReferencedInExpr(Expr *E, - bool SkipLocalVariables = false, - ArrayRef StopAt = None); + void MarkDeclarationsReferencedInExpr( + Expr *E, bool SkipLocalVariables = false, + ArrayRef StopAt = std::nullopt); /// Try to recover by turning the given expression into a /// call. Returns true if recovery was attempted or an error was @@ -5512,7 +5509,8 @@ DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, CorrectionCandidateCallback &CCC, TemplateArgumentListInfo *ExplicitTemplateArgs = nullptr, - ArrayRef Args = None, TypoExpr **Out = nullptr); + ArrayRef Args = std::nullopt, + TypoExpr **Out = nullptr); DeclResult LookupIvarInObjCMethod(LookupResult &Lookup, Scope *S, IdentifierInfo *II); @@ -7059,7 +7057,8 @@ /// Number lambda for linkage purposes if necessary. void handleLambdaNumbering( CXXRecordDecl *Class, CXXMethodDecl *Method, - Optional> Mangling = None); + Optional> Mangling = + std::nullopt); /// Endow the lambda scope info with the relevant properties. void buildLambdaScope(sema::LambdaScopeInfo *LSI, @@ -7078,7 +7077,7 @@ SourceLocation Loc, bool ByRef, SourceLocation EllipsisLoc, IdentifierInfo *Id, LambdaCaptureInitKind InitKind, Expr *&Init) { return ParsedType::make(buildLambdaInitCaptureInitialization( - Loc, ByRef, EllipsisLoc, None, Id, + Loc, ByRef, EllipsisLoc, std::nullopt, Id, InitKind != LambdaCaptureInitKind::CopyInit, Init)); } QualType buildLambdaInitCaptureInitialization( @@ -7542,8 +7541,9 @@ bool SetDelegatingInitializer(CXXConstructorDecl *Constructor, CXXCtorInitializer *Initializer); - bool SetCtorInitializers(CXXConstructorDecl *Constructor, bool AnyErrors, - ArrayRef Initializers = None); + bool SetCtorInitializers( + CXXConstructorDecl *Constructor, bool AnyErrors, + ArrayRef Initializers = std::nullopt); void SetIvarInitializers(ObjCImplementationDecl *ObjCImplementation); @@ -9572,7 +9572,7 @@ Sema &SemaRef, CodeSynthesisContext::SynthesisKind Kind, SourceLocation PointOfInstantiation, SourceRange InstantiationRange, Decl *Entity, NamedDecl *Template = nullptr, - ArrayRef TemplateArgs = None, + ArrayRef TemplateArgs = std::nullopt, sema::TemplateDeductionInfo *DeductionInfo = nullptr); InstantiatingTemplate(const InstantiatingTemplate&) = delete; @@ -10255,8 +10255,8 @@ ObjCInterfaceDecl *ID); Decl *ActOnAtEnd(Scope *S, SourceRange AtEnd, - ArrayRef allMethods = None, - ArrayRef allTUVars = None); + ArrayRef allMethods = std::nullopt, + ArrayRef allTUVars = std::nullopt); Decl *ActOnProperty(Scope *S, SourceLocation AtLoc, SourceLocation LParenLoc, @@ -11935,21 +11935,21 @@ SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, - ArrayRef UnresolvedReductions = llvm::None); + ArrayRef UnresolvedReductions = std::nullopt); /// Called on well-formed 'task_reduction' clause. OMPClause *ActOnOpenMPTaskReductionClause( ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, - ArrayRef UnresolvedReductions = llvm::None); + ArrayRef UnresolvedReductions = std::nullopt); /// Called on well-formed 'in_reduction' clause. OMPClause *ActOnOpenMPInReductionClause( ArrayRef VarList, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId, - ArrayRef UnresolvedReductions = llvm::None); + ArrayRef UnresolvedReductions = std::nullopt); /// Called on well-formed 'linear' clause. OMPClause * ActOnOpenMPLinearClause(ArrayRef VarList, Expr *Step, @@ -12003,7 +12003,7 @@ OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef VarList, const OMPVarListLocTy &Locs, bool NoDiagnose = false, - ArrayRef UnresolvedMappers = llvm::None); + ArrayRef UnresolvedMappers = std::nullopt); /// Called on well-formed 'num_teams' clause. OMPClause *ActOnOpenMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc, SourceLocation LParenLoc, @@ -12034,7 +12034,7 @@ CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, SourceLocation ColonLoc, ArrayRef VarList, const OMPVarListLocTy &Locs, - ArrayRef UnresolvedMappers = llvm::None); + ArrayRef UnresolvedMappers = std::nullopt); /// Called on well-formed 'from' clause. OMPClause * ActOnOpenMPFromClause(ArrayRef MotionModifiers, @@ -12042,7 +12042,7 @@ CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, SourceLocation ColonLoc, ArrayRef VarList, const OMPVarListLocTy &Locs, - ArrayRef UnresolvedMappers = llvm::None); + ArrayRef UnresolvedMappers = std::nullopt); /// Called on well-formed 'use_device_ptr' clause. OMPClause *ActOnOpenMPUseDevicePtrClause(ArrayRef VarList, const OMPVarListLocTy &Locs); @@ -12707,7 +12707,7 @@ } llvm::Optional getKnownValue() const { if (!HasKnownValue) - return None; + return std::nullopt; return KnownValue; } }; diff --git a/clang/include/clang/Sema/Template.h b/clang/include/clang/Sema/Template.h --- a/clang/include/clang/Sema/Template.h +++ b/clang/include/clang/Sema/Template.h @@ -571,6 +571,7 @@ // Decls which never appear inside a class or function. #define OBJCCONTAINER(DERIVED, BASE) #define FILESCOPEASM(DERIVED, BASE) +#define TOPLEVELSTMT(DERIVED, BASE) #define IMPORT(DERIVED, BASE) #define EXPORT(DERIVED, BASE) #define LINKAGESPEC(DERIVED, BASE) @@ -600,7 +601,7 @@ Decl *VisitCXXMethodDecl(CXXMethodDecl *D, TemplateParameterList *TemplateParams, Optional - ClassScopeSpecializationArgs = llvm::None, + ClassScopeSpecializationArgs = std::nullopt, RewriteKind RK = RewriteKind::None); Decl *VisitFunctionDecl(FunctionDecl *D, TemplateParameterList *TemplateParams, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 24; +const unsigned VERSION_MAJOR = 25; /// AST file minor version number supported by this version of /// Clang. @@ -397,6 +397,9 @@ /// Record code for the diagnostic options table. DIAGNOSTIC_OPTIONS, + /// Record code for the headers search paths. + HEADER_SEARCH_PATHS, + /// Record code for \#pragma diagnostic mappings. DIAG_PRAGMA_MAPPINGS, @@ -1315,6 +1318,9 @@ /// A FileScopeAsmDecl record. DECL_FILE_SCOPE_ASM, + /// A TopLevelStmtDecl record. + DECL_TOP_LEVEL_STMT_DECL, + /// A BlockDecl record. DECL_BLOCK, diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -164,6 +164,10 @@ /// Receives the header search options. /// + /// \param HSOpts The read header search options. The following fields are + /// missing and are reported in ReadHeaderSearchPaths(): + /// UserEntries, SystemHeaderPrefixes, VFSOverlayFiles. + /// /// \returns true to indicate the header search options are invalid, or false /// otherwise. virtual bool ReadHeaderSearchOptions(const HeaderSearchOptions &HSOpts, @@ -172,6 +176,20 @@ return false; } + /// Receives the header search paths. + /// + /// \param HSOpts The read header search paths. Only the following fields are + /// initialized: UserEntries, SystemHeaderPrefixes, + /// VFSOverlayFiles. The rest is reported in + /// ReadHeaderSearchOptions(). + /// + /// \returns true to indicate the header search paths are invalid, or false + /// otherwise. + virtual bool ReadHeaderSearchPaths(const HeaderSearchOptions &HSOpts, + bool Complain) { + return false; + } + /// Receives the preprocessor options. /// /// \param SuggestedPredefines Can be filled in with the set of predefines @@ -1247,18 +1265,8 @@ /// Reads a statement from the specified cursor. Stmt *ReadStmtFromStream(ModuleFile &F); - struct InputFileInfo { - std::string Filename; - uint64_t ContentHash; - off_t StoredSize; - time_t StoredTime; - bool Overridden; - bool Transient; - bool TopLevelModuleMap; - }; - - /// Reads the stored information about an input file. - InputFileInfo readInputFileInfo(ModuleFile &F, unsigned ID); + /// Retrieve the stored information about an input file. + serialization::InputFileInfo getInputFileInfo(ModuleFile &F, unsigned ID); /// Retrieve the file entry and 'overridden' bit for an input /// file in the given module file. @@ -1369,6 +1377,8 @@ ASTReaderListener &Listener); static bool ParseHeaderSearchOptions(const RecordData &Record, bool Complain, ASTReaderListener &Listener); + static bool ParseHeaderSearchPaths(const RecordData &Record, bool Complain, + ASTReaderListener &Listener); static bool ParsePreprocessorOptions(const RecordData &Record, bool Complain, ASTReaderListener &Listener, std::string &SuggestedPredefines); diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -59,6 +59,17 @@ MK_PrebuiltModule }; +/// The input file info that has been loaded from an AST file. +struct InputFileInfo { + std::string Filename; + uint64_t ContentHash; + off_t StoredSize; + time_t StoredTime; + bool Overridden; + bool Transient; + bool TopLevelModuleMap; +}; + /// The input file that has been loaded from this AST file, along with /// bools indicating whether this was an overridden buffer or if it was /// out-of-date or not-found. @@ -94,7 +105,7 @@ OptionalFileEntryRefDegradesToFileEntryPtr getFile() const { if (auto *P = Val.getPointer()) return FileEntryRef(*P); - return None; + return std::nullopt; } bool isOverridden() const { return Val.getInt() == Overridden; } bool isOutOfDate() const { return Val.getInt() == OutOfDate; } @@ -235,6 +246,9 @@ /// The input files that have been loaded from this AST file. std::vector InputFilesLoaded; + /// The input file infos that have been loaded from this AST file. + std::vector InputFileInfosLoaded; + // All user input files reside at the index range [0, NumUserInputFiles), and // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()). unsigned NumUserInputFiles = 0; diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h --- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h +++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h @@ -629,14 +629,14 @@ void EmitBasicReport(const Decl *DeclWithIssue, const CheckerBase *Checker, StringRef BugName, StringRef BugCategory, StringRef BugStr, PathDiagnosticLocation Loc, - ArrayRef Ranges = None, - ArrayRef Fixits = None); + ArrayRef Ranges = std::nullopt, + ArrayRef Fixits = std::nullopt); void EmitBasicReport(const Decl *DeclWithIssue, CheckerNameRef CheckerName, StringRef BugName, StringRef BugCategory, StringRef BugStr, PathDiagnosticLocation Loc, - ArrayRef Ranges = None, - ArrayRef Fixits = None); + ArrayRef Ranges = std::nullopt, + ArrayRef Fixits = std::nullopt); private: llvm::StringMap> StrBugTypes; @@ -783,7 +783,7 @@ PathSensitiveBugReport &R) const { std::string Msg = Cb(BRC, R); if (Msg.empty()) - return None; + return std::nullopt; return std::move(Msg); } diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h @@ -65,13 +65,13 @@ /// name regardless the number of arguments. CallDescription(CallDescriptionFlags Flags, ArrayRef QualifiedName, - MaybeCount RequiredArgs = None, - MaybeCount RequiredParams = None); + MaybeCount RequiredArgs = std::nullopt, + MaybeCount RequiredParams = std::nullopt); /// Construct a CallDescription with default flags. CallDescription(ArrayRef QualifiedName, - MaybeCount RequiredArgs = None, - MaybeCount RequiredParams = None); + MaybeCount RequiredArgs = std::nullopt, + MaybeCount RequiredParams = std::nullopt); CallDescription(std::nullptr_t) = delete; diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h @@ -775,7 +775,7 @@ // For member operator calls argument 0 on the expression corresponds // to implicit this-parameter on the declaration. return (ASTArgumentIndex > 0) ? Optional(ASTArgumentIndex - 1) - : None; + : std::nullopt; } unsigned getASTArgumentIndex(unsigned CallArgumentIndex) const override { diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h @@ -90,7 +90,7 @@ MapTy::const_iterator I = Map.find(D); if (I != Map.end() && I->second.InlineChecked) return I->second.MayInline; - return None; + return std::nullopt; } void markVisitedBasicBlock(unsigned ID, const Decl* D, unsigned TotalIDs) { diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -98,7 +98,7 @@ return ArrayRef( Directives->value()); } - return None; + return std::nullopt; } /// \returns The error. diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h @@ -84,7 +84,7 @@ /// occurred, dependency file contents otherwise. llvm::Expected getDependencyFile(const std::vector &CommandLine, StringRef CWD, - llvm::Optional ModuleName = None); + llvm::Optional ModuleName = std::nullopt); /// Collect the full module dependency graph for the input, ignoring any /// modules which have already been seen. If \p ModuleName isn't empty, this @@ -105,13 +105,13 @@ getFullDependencies(const std::vector &CommandLine, StringRef CWD, const llvm::StringSet<> &AlreadySeen, LookupModuleOutputCallback LookupModuleOutput, - llvm::Optional ModuleName = None); + llvm::Optional ModuleName = std::nullopt); llvm::Expected getFullDependenciesLegacyDriverCommand( const std::vector &CommandLine, StringRef CWD, const llvm::StringSet<> &AlreadySeen, LookupModuleOutputCallback LookupModuleOutput, - llvm::Optional ModuleName = None); + llvm::Optional ModuleName = std::nullopt); private: DependencyScanningWorker Worker; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -79,13 +79,14 @@ const std::vector &CommandLine, DependencyConsumer &DepConsumer, DiagnosticConsumer &DiagConsumer, - llvm::Optional ModuleName = None); + llvm::Optional ModuleName = std::nullopt); /// \returns A \c StringError with the diagnostic output if clang errors /// occurred, success otherwise. - llvm::Error computeDependencies(StringRef WorkingDirectory, - const std::vector &CommandLine, - DependencyConsumer &Consumer, - llvm::Optional ModuleName = None); + llvm::Error + computeDependencies(StringRef WorkingDirectory, + const std::vector &CommandLine, + DependencyConsumer &Consumer, + llvm::Optional ModuleName = std::nullopt); bool shouldEagerLoadModules() const { return EagerLoadModules; } diff --git a/clang/lib/APINotes/APINotesYAMLCompiler.cpp b/clang/lib/APINotes/APINotesYAMLCompiler.cpp --- a/clang/lib/APINotes/APINotesYAMLCompiler.cpp +++ b/clang/lib/APINotes/APINotesYAMLCompiler.cpp @@ -119,7 +119,7 @@ template <> struct MappingTraits { static void mapping(IO &IO, Param &P) { IO.mapRequired("Position", P.Position); - IO.mapOptional("Nullability", P.Nullability, llvm::None); + IO.mapOptional("Nullability", P.Nullability, std::nullopt); IO.mapOptional("RetainCountConvention", P.RetainCountConvention); IO.mapOptional("NoEscape", P.NoEscape); IO.mapOptional("Type", P.Type, StringRef("")); @@ -183,7 +183,7 @@ IO.mapRequired("MethodKind", M.Kind); IO.mapOptional("Parameters", M.Params); IO.mapOptional("Nullability", M.Nullability); - IO.mapOptional("NullabilityOfRet", M.NullabilityOfRet, llvm::None); + IO.mapOptional("NullabilityOfRet", M.NullabilityOfRet, std::nullopt); IO.mapOptional("RetainCountConvention", M.RetainCountConvention); IO.mapOptional("Availability", M.Availability.Mode, APIAvailability::Available); @@ -222,7 +222,7 @@ static void mapping(IO &IO, Property &P) { IO.mapRequired("Name", P.Name); IO.mapOptional("PropertyKind", P.Kind); - IO.mapOptional("Nullability", P.Nullability, llvm::None); + IO.mapOptional("Nullability", P.Nullability, std::nullopt); IO.mapOptional("Availability", P.Availability.Mode, APIAvailability::Available); IO.mapOptional("AvailabilityMsg", P.Availability.Msg, StringRef("")); @@ -303,7 +303,7 @@ IO.mapRequired("Name", F.Name); IO.mapOptional("Parameters", F.Params); IO.mapOptional("Nullability", F.Nullability); - IO.mapOptional("NullabilityOfRet", F.NullabilityOfRet, llvm::None); + IO.mapOptional("NullabilityOfRet", F.NullabilityOfRet, std::nullopt); IO.mapOptional("RetainCountConvention", F.RetainCountConvention); IO.mapOptional("Availability", F.Availability.Mode, APIAvailability::Available); @@ -336,7 +336,7 @@ template <> struct MappingTraits { static void mapping(IO &IO, GlobalVariable &GV) { IO.mapRequired("Name", GV.Name); - IO.mapOptional("Nullability", GV.Nullability, llvm::None); + IO.mapOptional("Nullability", GV.Nullability, std::nullopt); IO.mapOptional("Availability", GV.Availability.Mode, APIAvailability::Available); IO.mapOptional("AvailabilityMsg", GV.Availability.Msg, StringRef("")); @@ -549,7 +549,7 @@ TopLevelItems TopLevel; VersionedSeq SwiftVersions; - llvm::Optional SwiftInferImportAsMember = {llvm::None}; + llvm::Optional SwiftInferImportAsMember = {std::nullopt}; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() /*const*/; diff --git a/clang/lib/ARCMigrate/Internals.h b/clang/lib/ARCMigrate/Internals.h --- a/clang/lib/ARCMigrate/Internals.h +++ b/clang/lib/ARCMigrate/Internals.h @@ -74,7 +74,7 @@ bool clearDiagnostic(ArrayRef IDs, SourceRange range); bool clearAllDiagnostics(SourceRange range) { - return clearDiagnostic(None, range); + return clearDiagnostic(std::nullopt, range); } bool clearDiagnostic(unsigned ID1, unsigned ID2, SourceRange range) { unsigned IDs[] = { ID1, ID2 }; diff --git a/clang/lib/ARCMigrate/ObjCMT.cpp b/clang/lib/ARCMigrate/ObjCMT.cpp --- a/clang/lib/ARCMigrate/ObjCMT.cpp +++ b/clang/lib/ARCMigrate/ObjCMT.cpp @@ -202,7 +202,7 @@ Consumers.push_back(WrapperFrontendAction::CreateASTConsumer(CI, InFile)); Consumers.push_back(std::make_unique( MigrateDir, ObjCMigAction, Remapper, CompInst->getFileManager(), PPRec, - CompInst->getPreprocessor(), false, None)); + CompInst->getPreprocessor(), false, std::nullopt)); return std::make_unique(std::move(Consumers)); } diff --git a/clang/lib/ARCMigrate/TransGCAttrs.cpp b/clang/lib/ARCMigrate/TransGCAttrs.cpp --- a/clang/lib/ARCMigrate/TransGCAttrs.cpp +++ b/clang/lib/ARCMigrate/TransGCAttrs.cpp @@ -46,7 +46,7 @@ if (!D || D->isImplicit()) return true; - SaveAndRestore Save(FullyMigratable, isMigratable(D)); + SaveAndRestore Save(FullyMigratable, isMigratable(D)); if (ObjCPropertyDecl *PropD = dyn_cast(D)) { lookForAttribute(PropD, PropD->getTypeSourceInfo()); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -771,9 +771,9 @@ if (auto *OrigFold = dyn_cast(IDC)) NewIDC = new (C) CXXFoldExpr( - OrigFold->getType(), /*Callee*/nullptr, SourceLocation(), NewIDC, + OrigFold->getType(), /*Callee*/ nullptr, SourceLocation(), NewIDC, BinaryOperatorKind::BO_LAnd, SourceLocation(), /*RHS=*/nullptr, - SourceLocation(), /*NumExpansions=*/None); + SourceLocation(), /*NumExpansions=*/std::nullopt); return NewIDC; } @@ -797,12 +797,13 @@ PEnd = Params->end(); P != PEnd; ++P) { if (const auto *TTP = dyn_cast(*P)) { - TemplateTypeParmDecl *NewTTP = TemplateTypeParmDecl::Create(*this, - getTranslationUnitDecl(), SourceLocation(), SourceLocation(), + TemplateTypeParmDecl *NewTTP = TemplateTypeParmDecl::Create( + *this, getTranslationUnitDecl(), SourceLocation(), SourceLocation(), TTP->getDepth(), TTP->getIndex(), nullptr, false, TTP->isParameterPack(), TTP->hasTypeConstraint(), - TTP->isExpandedParameterPack() ? - llvm::Optional(TTP->getNumExpansionParameters()) : None); + TTP->isExpandedParameterPack() + ? llvm::Optional(TTP->getNumExpansionParameters()) + : std::nullopt); if (const auto *TC = TTP->getTypeConstraint()) { QualType ParamAsArgument(NewTTP->getTypeForDecl(), 0); Expr *NewIDC = canonicalizeImmediatelyDeclaredConstraint( @@ -1139,7 +1140,7 @@ auto MergedIt = MergedDefModules.find(cast(Def->getCanonicalDecl())); if (MergedIt == MergedDefModules.end()) - return None; + return std::nullopt; return MergedIt->second; } @@ -1197,7 +1198,7 @@ ArrayRef ASTContext::getModuleInitializers(Module *M) { auto It = ModuleInitializers.find(M); if (It == ModuleInitializers.end()) - return None; + return std::nullopt; auto *Inits = It->second; Inits->resolve(*this); @@ -2743,7 +2744,7 @@ bool IsBitIntType = Field->getType()->isBitIntType(); if (!Field->getType()->isReferenceType() && !IsBitIntType && !Context.hasUniqueObjectRepresentations(Field->getType())) - return llvm::None; + return std::nullopt; int64_t FieldSizeInBits = Context.toBits(Context.getTypeSizeInChars(Field->getType())); @@ -2752,14 +2753,14 @@ if (IsBitIntType) { if ((unsigned)BitfieldSize > cast(Field->getType())->getNumBits()) - return llvm::None; + return std::nullopt; } else if (BitfieldSize > FieldSizeInBits) { - return llvm::None; + return std::nullopt; } FieldSizeInBits = BitfieldSize; } else if (IsBitIntType && !Context.hasUniqueObjectRepresentations(Field->getType())) { - return llvm::None; + return std::nullopt; } return FieldSizeInBits; } @@ -2777,11 +2778,11 @@ llvm::Optional SizeInBits = getSubobjectSizeInBits(Subobject, Context); if (!SizeInBits) - return llvm::None; + return std::nullopt; if (*SizeInBits != 0) { int64_t Offset = getSubobjectOffset(Subobject, Context, Layout); if (Offset != CurOffsetInBits) - return llvm::None; + return std::nullopt; CurOffsetInBits += *SizeInBits; } } @@ -2797,7 +2798,7 @@ int64_t CurOffsetInBits = 0; if (const auto *ClassDecl = dyn_cast(RD)) { if (ClassDecl->isDynamicClass()) - return llvm::None; + return std::nullopt; SmallVector Bases; for (const auto &Base : ClassDecl->bases()) { @@ -2814,7 +2815,7 @@ structSubobjectsHaveUniqueObjectRepresentations(Bases, CurOffsetInBits, Context, Layout); if (!OffsetAfterBases) - return llvm::None; + return std::nullopt; CurOffsetInBits = *OffsetAfterBases; } @@ -2822,7 +2823,7 @@ structSubobjectsHaveUniqueObjectRepresentations( RD->fields(), CurOffsetInBits, Context, Layout); if (!OffsetAfterFields) - return llvm::None; + return std::nullopt; CurOffsetInBits = *OffsetAfterFields; return CurOffsetInBits; @@ -5218,7 +5219,7 @@ if (const auto *TTP = dyn_cast(Param)) { QualType ArgType = getTypeDeclType(TTP); if (TTP->isParameterPack()) - ArgType = getPackExpansionType(ArgType, None); + ArgType = getPackExpansionType(ArgType, std::nullopt); Arg = TemplateArgument(ArgType); } else if (auto *NTTP = dyn_cast(Param)) { @@ -5235,8 +5236,8 @@ Expr::getValueKindForType(NTTP->getType()), NTTP->getLocation()); if (NTTP->isParameterPack()) - E = new (*this) PackExpansionExpr(DependentTy, E, NTTP->getLocation(), - None); + E = new (*this) + PackExpansionExpr(DependentTy, E, NTTP->getLocation(), std::nullopt); Arg = TemplateArgument(E); } else { auto *TTP = cast(Param); @@ -11923,7 +11924,7 @@ [](ASTContext &, const NamedDecl *ND) -> llvm::Optional { if (const auto *RD = dyn_cast(ND)) return RD->getDeviceLambdaManglingNumber(); - return llvm::None; + return std::nullopt; }, /*IsAux=*/true); case TargetCXXABI::Microsoft: @@ -12227,16 +12228,6 @@ return getTargetInfo().getNullPointerValue(AS); } -unsigned ASTContext::getTargetAddressSpace(QualType T) const { - // Return the address space for the type. If the type is a - // function type without an address space qualifier, the - // program address space is used. Otherwise, the target picks - // the best address space based on the type information - return T->isFunctionType() && !T.hasAddressSpace() - ? getTargetInfo().getProgramAddressSpace() - : getTargetAddressSpace(T.getAddressSpace()); -} - unsigned ASTContext::getTargetAddressSpace(LangAS AS) const { if (isTargetAddressSpace(AS)) return toTargetAddressSpace(AS); diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -223,7 +223,7 @@ template Expected> import(Optional From) { if (!From) - return None; + return std::nullopt; return import(*From); } @@ -8544,7 +8544,7 @@ auto *Owner = dyn_cast(F->getDeclContext()); if (!Owner) - return None; + return std::nullopt; unsigned Index = 0; for (const auto *D : Owner->decls()) { @@ -8557,7 +8557,7 @@ llvm_unreachable("Field was not found in its parent context."); - return None; + return std::nullopt; } ASTImporter::FoundDeclsTy @@ -10022,7 +10022,7 @@ if (Pos != ImportDeclErrors.end()) return Pos->second; else - return None; + return std::nullopt; } void ASTImporter::setImportDeclError(Decl *From, ASTImportError Error) { diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -2128,7 +2128,7 @@ const auto *Owner = dyn_cast(Anon->getDeclContext()); if (!Owner) - return None; + return std::nullopt; unsigned Index = 0; for (const auto *D : Owner->noload_decls()) { diff --git a/clang/lib/AST/AttrImpl.cpp b/clang/lib/AST/AttrImpl.cpp --- a/clang/lib/AST/AttrImpl.cpp +++ b/clang/lib/AST/AttrImpl.cpp @@ -151,7 +151,7 @@ llvm::Optional OMPDeclareTargetDeclAttr::getActiveAttr(const ValueDecl *VD) { if (!VD->hasAttrs()) - return llvm::None; + return std::nullopt; unsigned Level = 0; OMPDeclareTargetDeclAttr *FoundAttr = nullptr; for (auto *Attr : VD->specific_attrs()) { @@ -162,7 +162,7 @@ } if (FoundAttr) return FoundAttr; - return llvm::None; + return std::nullopt; } llvm::Optional @@ -170,7 +170,7 @@ llvm::Optional ActiveAttr = getActiveAttr(VD); if (ActiveAttr) return ActiveAttr.value()->getMapType(); - return llvm::None; + return std::nullopt; } llvm::Optional @@ -178,7 +178,7 @@ llvm::Optional ActiveAttr = getActiveAttr(VD); if (ActiveAttr) return ActiveAttr.value()->getDevType(); - return llvm::None; + return std::nullopt; } llvm::Optional @@ -186,7 +186,7 @@ llvm::Optional ActiveAttr = getActiveAttr(VD); if (ActiveAttr) return ActiveAttr.value()->getRange().getBegin(); - return llvm::None; + return std::nullopt; } namespace clang { diff --git a/clang/lib/AST/Comment.cpp b/clang/lib/AST/Comment.cpp --- a/clang/lib/AST/Comment.cpp +++ b/clang/lib/AST/Comment.cpp @@ -206,7 +206,7 @@ IsInstanceMethod = false; IsClassMethod = false; IsVariadic = false; - ParamVars = None; + ParamVars = std::nullopt; TemplateParameters = nullptr; if (!CommentDecl) { diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp --- a/clang/lib/AST/CommentParser.cpp +++ b/clang/lib/AST/CommentParser.cpp @@ -334,7 +334,7 @@ if (isTokBlockCommand()) { // Block command ahead. We can't nest block commands, so pretend that this // command has an empty argument. - ParagraphComment *Paragraph = S.actOnParagraphComment(None); + ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt); if (PC) { S.actOnParamCommandFinish(PC, Paragraph); return PC; @@ -376,7 +376,7 @@ ParagraphComment *Paragraph; if (EmptyParagraph) - Paragraph = S.actOnParagraphComment(None); + Paragraph = S.actOnParagraphComment(std::nullopt); else { BlockContentComment *Block = parseParagraphOrBlockCommand(); // Since we have checked for a block command, we should have parsed a diff --git a/clang/lib/AST/ComparisonCategories.cpp b/clang/lib/AST/ComparisonCategories.cpp --- a/clang/lib/AST/ComparisonCategories.cpp +++ b/clang/lib/AST/ComparisonCategories.cpp @@ -37,7 +37,7 @@ return CCT::StrongOrdering; // TODO: Extend support for operator<=> to ObjC types. - return llvm::None; + return std::nullopt; } bool ComparisonCategoryInfo::ValueInfo::hasValidIntValue() const { diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -235,7 +235,7 @@ return getVisibilityFromAttr(A); } - return None; + return std::nullopt; } LinkageInfo LinkageComputer::getLVForType(const Type &T, @@ -1194,11 +1194,11 @@ const auto *TD = spec->getSpecializedTemplate()->getTemplatedDecl(); while (TD != nullptr) { auto Vis = getVisibilityOf(TD, kind); - if (Vis != None) + if (Vis != std::nullopt) return Vis; TD = TD->getPreviousDecl(); } - return None; + return std::nullopt; } // Use the most recent declaration. @@ -1219,7 +1219,7 @@ return getVisibilityOf(VTSD->getSpecializedTemplate()->getTemplatedDecl(), kind); - return None; + return std::nullopt; } // Also handle function template specializations. if (const auto *fn = dyn_cast(ND)) { @@ -1236,14 +1236,14 @@ if (InstantiatedFrom) return getVisibilityOf(InstantiatedFrom, kind); - return None; + return std::nullopt; } // The visibility of a template is stored in the templated decl. if (const auto *TD = dyn_cast(ND)) return getVisibilityOf(TD->getTemplatedDecl(), kind); - return None; + return std::nullopt; } Optional @@ -5130,8 +5130,9 @@ IndirectFieldDecl *IndirectFieldDecl::CreateDeserialized(ASTContext &C, unsigned ID) { - return new (C, ID) IndirectFieldDecl(C, nullptr, SourceLocation(), - DeclarationName(), QualType(), None); + return new (C, ID) + IndirectFieldDecl(C, nullptr, SourceLocation(), DeclarationName(), + QualType(), std::nullopt); } SourceRange EnumConstantDecl::getSourceRange() const { @@ -5236,6 +5237,29 @@ SourceLocation()); } +void TopLevelStmtDecl::anchor() {} + +TopLevelStmtDecl *TopLevelStmtDecl::Create(ASTContext &C, Stmt *Statement) { + assert(Statement); + assert(C.getLangOpts().IncrementalExtensions && + "Must be used only in incremental mode"); + + SourceLocation BeginLoc = Statement->getBeginLoc(); + DeclContext *DC = C.getTranslationUnitDecl(); + + return new (C, DC) TopLevelStmtDecl(DC, BeginLoc, Statement); +} + +TopLevelStmtDecl *TopLevelStmtDecl::CreateDeserialized(ASTContext &C, + unsigned ID) { + return new (C, ID) + TopLevelStmtDecl(/*DC=*/nullptr, SourceLocation(), /*S=*/nullptr); +} + +SourceRange TopLevelStmtDecl::getSourceRange() const { + return SourceRange(getLocation(), Statement->getEndLoc()); +} + void EmptyDecl::anchor() {} EmptyDecl *EmptyDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) { @@ -5339,7 +5363,7 @@ ArrayRef ImportDecl::getIdentifierLocs() const { if (!isImportComplete()) - return None; + return std::nullopt; const auto *StoredLocs = getTrailingObjects(); return llvm::makeArrayRef(StoredLocs, diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -843,6 +843,7 @@ case LinkageSpec: case Export: case FileScopeAsm: + case TopLevelStmt: case StaticAssert: case ObjCPropertyImpl: case PragmaComment: diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -3139,7 +3139,8 @@ UsingPackDecl *UsingPackDecl::CreateDeserialized(ASTContext &C, unsigned ID, unsigned NumExpansions) { size_t Extra = additionalSizeToAlloc(NumExpansions); - auto *Result = new (C, ID, Extra) UsingPackDecl(nullptr, nullptr, None); + auto *Result = + new (C, ID, Extra) UsingPackDecl(nullptr, nullptr, std::nullopt); Result->NumExpansions = NumExpansions; auto *Trail = Result->getTrailingObjects(); for (unsigned I = 0; I != NumExpansions; ++I) @@ -3277,7 +3278,7 @@ size_t Extra = additionalSizeToAlloc(NumBindings); auto *Result = new (C, ID, Extra) DecompositionDecl(C, nullptr, SourceLocation(), SourceLocation(), - QualType(), nullptr, StorageClass(), None); + QualType(), nullptr, StorageClass(), std::nullopt); // Set up and clean out the bindings array. Result->NumBindings = NumBindings; auto *Trail = Result->getTrailingObjects(); diff --git a/clang/lib/AST/DeclObjC.cpp b/clang/lib/AST/DeclObjC.cpp --- a/clang/lib/AST/DeclObjC.cpp +++ b/clang/lib/AST/DeclObjC.cpp @@ -910,12 +910,12 @@ assert((!SelLocs.empty() || isImplicit()) && "No selector locs for non-implicit method"); if (isImplicit()) - return setParamsAndSelLocs(C, Params, llvm::None); + return setParamsAndSelLocs(C, Params, std::nullopt); setSelLocsKind(hasStandardSelectorLocs(getSelector(), SelLocs, Params, DeclEndLoc)); if (getSelLocsKind() != SelLoc_NonStandard) - return setParamsAndSelLocs(C, Params, llvm::None); + return setParamsAndSelLocs(C, Params, std::nullopt); setParamsAndSelLocs(C, Params, SelLocs); } diff --git a/clang/lib/AST/DeclOpenMP.cpp b/clang/lib/AST/DeclOpenMP.cpp --- a/clang/lib/AST/DeclOpenMP.cpp +++ b/clang/lib/AST/DeclOpenMP.cpp @@ -30,7 +30,7 @@ SourceLocation L, ArrayRef VL) { auto *D = OMPDeclarativeDirective::createDirective( - C, DC, llvm::None, VL.size(), L); + C, DC, std::nullopt, VL.size(), L); D->setVars(VL); return D; } diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp --- a/clang/lib/AST/DeclPrinter.cpp +++ b/clang/lib/AST/DeclPrinter.cpp @@ -72,6 +72,7 @@ void VisitLabelDecl(LabelDecl *D); void VisitParmVarDecl(ParmVarDecl *D); void VisitFileScopeAsmDecl(FileScopeAsmDecl *D); + void VisitTopLevelStmtDecl(TopLevelStmtDecl *D); void VisitImportDecl(ImportDecl *D); void VisitStaticAssertDecl(StaticAssertDecl *D); void VisitNamespaceDecl(NamespaceDecl *D); @@ -932,6 +933,11 @@ Out << ")"; } +void DeclPrinter::VisitTopLevelStmtDecl(TopLevelStmtDecl *D) { + assert(D->getStmt()); + D->getStmt()->printPretty(Out, nullptr, Policy, Indentation, "\n", &Context); +} + void DeclPrinter::VisitImportDecl(ImportDecl *D) { Out << "@import " << D->getImportedModule()->getFullModuleName() << ";\n"; diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -656,9 +656,9 @@ TemplateTypeParmDecl * TemplateTypeParmDecl::CreateDeserialized(const ASTContext &C, unsigned ID) { - return new (C, ID) TemplateTypeParmDecl(nullptr, SourceLocation(), - SourceLocation(), nullptr, false, - false, None); + return new (C, ID) + TemplateTypeParmDecl(nullptr, SourceLocation(), SourceLocation(), nullptr, + false, false, std::nullopt); } TemplateTypeParmDecl * @@ -666,8 +666,8 @@ bool HasTypeConstraint) { return new (C, ID, additionalSizeToAlloc(HasTypeConstraint ? 1 : 0)) - TemplateTypeParmDecl(nullptr, SourceLocation(), SourceLocation(), - nullptr, false, HasTypeConstraint, None); + TemplateTypeParmDecl(nullptr, SourceLocation(), SourceLocation(), nullptr, + false, HasTypeConstraint, std::nullopt); } SourceLocation TemplateTypeParmDecl::getDefaultArgumentLoc() const { @@ -781,12 +781,12 @@ unsigned NumExpandedTypes, bool HasTypeConstraint) { auto *NTTP = - new (C, ID, additionalSizeToAlloc, - Expr *>( - NumExpandedTypes, HasTypeConstraint ? 1 : 0)) + new (C, ID, + additionalSizeToAlloc, Expr *>( + NumExpandedTypes, HasTypeConstraint ? 1 : 0)) NonTypeTemplateParmDecl(nullptr, SourceLocation(), SourceLocation(), - 0, 0, nullptr, QualType(), nullptr, None, - None); + 0, 0, nullptr, QualType(), nullptr, + std::nullopt, std::nullopt); NTTP->NumExpandedTypes = NumExpandedTypes; return NTTP; } @@ -854,7 +854,7 @@ auto *TTP = new (C, ID, additionalSizeToAlloc(NumExpansions)) TemplateTemplateParmDecl(nullptr, SourceLocation(), 0, 0, nullptr, - nullptr, None); + nullptr, std::nullopt); TTP->NumExpandedParams = NumExpansions; return TTP; } diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -647,7 +647,7 @@ const NamedDecl *ND) -> llvm::Optional { if (const auto *RD = dyn_cast(ND)) return RD->getDeviceLambdaManglingNumber(); - return llvm::None; + return std::nullopt; }; std::unique_ptr Ctx{ItaniumMangleContext::create( @@ -4533,7 +4533,8 @@ OK_Ordinary) { BaseAndUpdaterExprs[0] = baseExpr; - InitListExpr *ILE = new (C) InitListExpr(C, lBraceLoc, None, rBraceLoc); + InitListExpr *ILE = + new (C) InitListExpr(C, lBraceLoc, std::nullopt, rBraceLoc); ILE->setType(baseExpr->getType()); BaseAndUpdaterExprs[1] = ILE; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -5680,7 +5680,7 @@ // meaningful dynamic type. (We consider objects of non-class type to have no // dynamic type.) if (!checkDynamicType(Info, E, This, AK, true)) - return None; + return std::nullopt; // Refuse to compute a dynamic type in the presence of virtual bases. This // shouldn't happen other than in constant-folding situations, since literal @@ -5692,7 +5692,7 @@ This.Designator.MostDerivedType->getAsCXXRecordDecl(); if (!Class || Class->getNumVBases()) { Info.FFDiag(E); - return None; + return std::nullopt; } // FIXME: For very deep class hierarchies, it might be beneficial to use a @@ -5725,7 +5725,7 @@ // 'This', so that object has not yet begun its period of construction and // any polymorphic operation on it results in undefined behavior. Info.FFDiag(E); - return None; + return std::nullopt; } /// Perform virtual dispatch. @@ -6753,13 +6753,13 @@ << PointerAsString(); if (Pointer.Base) NoteLValueLocation(Info, Pointer.Base); - return None; + return std::nullopt; } Optional Alloc = Info.lookupDynamicAlloc(DA); if (!Alloc) { Info.FFDiag(E, diag::note_constexpr_double_delete); - return None; + return std::nullopt; } QualType AllocType = Pointer.Base.getDynamicAllocType(); @@ -6767,7 +6767,7 @@ Info.FFDiag(E, diag::note_constexpr_new_delete_mismatch) << DeallocKind << (*Alloc)->getKind() << AllocType; NoteLValueLocation(Info, Pointer.Base); - return None; + return std::nullopt; } bool Subobject = false; @@ -6781,7 +6781,7 @@ if (Subobject) { Info.FFDiag(E, diag::note_constexpr_delete_subobject) << PointerAsString() << Pointer.Designator.isOnePastTheEnd(); - return None; + return std::nullopt; } return Alloc; @@ -7028,7 +7028,7 @@ CharUnits DstSize = Info.Ctx.getTypeSizeInChars(BCE->getType()); APValueToBufferConverter Converter(Info, DstSize, BCE); if (!Converter.visit(Src, BCE->getSubExpr()->getType())) - return None; + return std::nullopt; return Converter.Buffer; } }; @@ -7050,14 +7050,14 @@ Info.FFDiag(BCE->getBeginLoc(), diag::note_constexpr_bit_cast_unsupported_type) << Ty; - return None; + return std::nullopt; } std::nullopt_t unrepresentableValue(QualType Ty, const APSInt &Val) { Info.FFDiag(BCE->getBeginLoc(), diag::note_constexpr_bit_cast_unrepresentable_value) << Ty << toString(Val, /*Radix=*/10); - return None; + return std::nullopt; } Optional visit(const BuiltinType *T, CharUnits Offset, @@ -7097,7 +7097,7 @@ Info.FFDiag(BCE->getExprLoc(), diag::note_constexpr_bit_cast_indet_dest) << DisplayType << Info.Ctx.getLangOpts().CharIsSigned; - return None; + return std::nullopt; } return APValue::IndeterminateValue(); @@ -7152,7 +7152,7 @@ Optional SubObj = visitType( BS.getType(), Layout.getBaseClassOffset(BaseDecl) + Offset); if (!SubObj) - return None; + return std::nullopt; ResultVal.getStructBase(I) = *SubObj; } } @@ -7165,7 +7165,7 @@ if (FD->isBitField()) { Info.FFDiag(BCE->getBeginLoc(), diag::note_constexpr_bit_cast_unsupported_bitfield); - return None; + return std::nullopt; } uint64_t FieldOffsetBits = Layout.getFieldOffset(FieldIdx); @@ -7177,7 +7177,7 @@ QualType FieldTy = FD->getType(); Optional SubObj = visitType(FieldTy, FieldOffset); if (!SubObj) - return None; + return std::nullopt; ResultVal.getStructField(FieldIdx) = *SubObj; ++FieldIdx; } @@ -7205,7 +7205,7 @@ Optional ElementValue = visitType(Ty->getElementType(), Offset + I * ElementWidth); if (!ElementValue) - return None; + return std::nullopt; ArrayValue.getArrayInitializedElt(I) = std::move(*ElementValue); } @@ -7961,8 +7961,8 @@ bool VisitStmtExpr(const StmtExpr *E) { // We will have checked the full-expressions inside the statement expression // when they were completed, and don't need to check them again now. - llvm::SaveAndRestore NotCheckingForUB( - Info.CheckingForUndefinedBehavior, false); + llvm::SaveAndRestore NotCheckingForUB(Info.CheckingForUndefinedBehavior, + false); const CompoundStmt *CS = E->getSubStmt(); if (CS->body_empty()) @@ -10571,7 +10571,7 @@ } default: // FIXME: Implement the rest of the unary operators. - return llvm::None; + return std::nullopt; } } @@ -15943,7 +15943,7 @@ bool isEvaluated) const { if (isValueDependent()) { // Expression evaluator can't succeed on a dependent expression. - return None; + return std::nullopt; } APSInt Value; @@ -15951,11 +15951,11 @@ if (Ctx.getLangOpts().CPlusPlus11) { if (EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, &Value, Loc)) return Value; - return None; + return std::nullopt; } if (!isIntegerConstantExpr(Ctx, Loc)) - return None; + return std::nullopt; // The only possible side-effects here are due to UB discovered in the // evaluation (for instance, INT_MAX + 1). In such a case, we are still diff --git a/clang/lib/AST/ExternalASTSource.cpp b/clang/lib/AST/ExternalASTSource.cpp --- a/clang/lib/AST/ExternalASTSource.cpp +++ b/clang/lib/AST/ExternalASTSource.cpp @@ -32,7 +32,7 @@ llvm::Optional ExternalASTSource::getSourceDescriptor(unsigned ID) { - return None; + return std::nullopt; } ExternalASTSource::ExtKind diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -740,7 +740,7 @@ switch (getKind()) { default: - return None; + return std::nullopt; case DArg: NewKind = dArg; break; @@ -1041,7 +1041,7 @@ } } - return None; + return std::nullopt; } bool FormatSpecifier::namedTypeToLengthModifier(QualType QT, diff --git a/clang/lib/AST/ItaniumCXXABI.cpp b/clang/lib/AST/ItaniumCXXABI.cpp --- a/clang/lib/AST/ItaniumCXXABI.cpp +++ b/clang/lib/AST/ItaniumCXXABI.cpp @@ -96,7 +96,7 @@ if (LHSTombstone || RHSTombstone) return LHSTombstone && RHSTombstone; - return None; + return std::nullopt; } template<> diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -6560,7 +6560,7 @@ return new ItaniumMangleContextImpl( Context, Diags, [](ASTContext &, const NamedDecl *) -> llvm::Optional { - return llvm::None; + return std::nullopt; }, IsAux); } diff --git a/clang/lib/AST/Linkage.h b/clang/lib/AST/Linkage.h --- a/clang/lib/AST/Linkage.h +++ b/clang/lib/AST/Linkage.h @@ -95,7 +95,7 @@ LVComputationKind Kind) const { auto Iter = CachedLinkageInfo.find(makeCacheKey(ND, Kind)); if (Iter == CachedLinkageInfo.end()) - return None; + return std::nullopt; return Iter->second; } diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -376,7 +376,7 @@ void mangleBits(llvm::APInt Number); void mangleTagTypeKind(TagTypeKind TK); void mangleArtificialTagType(TagTypeKind TK, StringRef UnqualifiedName, - ArrayRef NestedNames = None); + ArrayRef NestedNames = std::nullopt); void mangleAddressSpaceType(QualType T, Qualifiers Quals, SourceRange Range); void mangleType(QualType T, SourceRange Range, QualifierMangleMode QMM = QMM_Mangle); diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp --- a/clang/lib/AST/NSAPI.cpp +++ b/clang/lib/AST/NSAPI.cpp @@ -149,7 +149,7 @@ return MK; } - return None; + return std::nullopt; } Selector NSAPI::getNSDictionarySelector( @@ -251,7 +251,7 @@ return MK; } - return None; + return std::nullopt; } Selector NSAPI::getNSSetSelector(NSSetMethodKind MK) const { @@ -308,7 +308,7 @@ return MK; } - return None; + return std::nullopt; } Selector NSAPI::getNSNumberLiteralSelector(NSNumberLiteralMethodKind MK, @@ -371,14 +371,14 @@ return MK; } - return None; + return std::nullopt; } Optional NSAPI::getNSNumberFactoryMethodKind(QualType T) const { const BuiltinType *BT = T->getAs(); if (!BT) - return None; + return std::nullopt; const TypedefType *TDT = T->getAs(); if (TDT) { @@ -496,7 +496,7 @@ break; } - return None; + return std::nullopt; } /// Returns true if \param T is a typedef of "BOOL" in objective-c. diff --git a/clang/lib/AST/ParentMap.cpp b/clang/lib/AST/ParentMap.cpp --- a/clang/lib/AST/ParentMap.cpp +++ b/clang/lib/AST/ParentMap.cpp @@ -33,9 +33,11 @@ switch (S->getStmtClass()) { case Stmt::PseudoObjectExprClass: { - assert(OVMode == OV_Transparent && "Should not appear alongside OVEs"); PseudoObjectExpr *POE = cast(S); + if (OVMode == OV_Opaque && M[POE->getSyntacticForm()]) + break; + // If we are rebuilding the map, clear out any existing state. if (M[POE->getSyntacticForm()]) for (Stmt *SubStmt : S->children()) diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp --- a/clang/lib/AST/Stmt.cpp +++ b/clang/lib/AST/Stmt.cpp @@ -1003,7 +1003,7 @@ Optional IfStmt::getNondiscardedCase(const ASTContext &Ctx) { if (!isConstexpr() || getCond()->isValueDependent()) - return None; + return std::nullopt; return !getCond()->EvaluateKnownConstInt(Ctx) ? getElse() : getThen(); } @@ -1012,7 +1012,7 @@ if (Optional Result = const_cast(this)->getNondiscardedCase(Ctx)) return *Result; - return None; + return std::nullopt; } ForStmt::ForStmt(const ASTContext &C, Stmt *Init, Expr *Cond, VarDecl *condVar, diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -517,7 +517,7 @@ Stmt *AssociatedStmt, bool HasCancel) { auto *Dir = - createDirective(C, llvm::None, AssociatedStmt, + createDirective(C, std::nullopt, AssociatedStmt, /*NumChildren=*/0, StartLoc, EndLoc); Dir->setHasCancel(HasCancel); return Dir; @@ -550,7 +550,7 @@ SourceLocation StartLoc, SourceLocation EndLoc, Stmt *AssociatedStmt) { - return createDirective(C, llvm::None, AssociatedStmt, + return createDirective(C, std::nullopt, AssociatedStmt, /*NumChildren=*/0, StartLoc, EndLoc); } diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp --- a/clang/lib/AST/TemplateBase.cpp +++ b/clang/lib/AST/TemplateBase.cpp @@ -276,7 +276,7 @@ if (TemplateArg.NumExpansions) return TemplateArg.NumExpansions - 1; - return None; + return std::nullopt; } QualType TemplateArgument::getNonTypeTemplateArgumentType() const { diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -1527,23 +1527,23 @@ // substitution to do. dcTypeParams = dcClassDecl->getTypeParamList(); if (!dcTypeParams) - return None; + return std::nullopt; } else { // If we are in neither a class nor a category, there's no // substitution to perform. dcCategoryDecl = dyn_cast(dc); if (!dcCategoryDecl) - return None; + return std::nullopt; // If the category does not have any type parameters, there's no // substitution to do. dcTypeParams = dcCategoryDecl->getTypeParamList(); if (!dcTypeParams) - return None; + return std::nullopt; dcClassDecl = dcCategoryDecl->getClassInterface(); if (!dcClassDecl) - return None; + return std::nullopt; } assert(dcTypeParams && "No substitutions to perform"); assert(dcClassDecl && "No class context"); @@ -4153,7 +4153,7 @@ Type = AT->getEquivalentType(); } - return None; + return std::nullopt; } bool Type::canHaveNullability(bool ResultIfUnknown) const { @@ -4294,7 +4294,7 @@ return NullabilityKind::Unspecified; if (getAttrKind() == attr::TypeNullableResult) return NullabilityKind::NullableResult; - return None; + return std::nullopt; } Optional AttributedType::stripOuterNullability(QualType &T) { @@ -4309,7 +4309,7 @@ } } - return None; + return std::nullopt; } bool Type::isBlockCompatibleObjCPointerType(ASTContext &ctx) const { diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -195,7 +195,7 @@ return; } - SaveAndRestore PHVal(HasEmptyPlaceHolder, PlaceHolder.empty()); + SaveAndRestore PHVal(HasEmptyPlaceHolder, PlaceHolder.empty()); printBefore(T, Quals, OS); OS << PlaceHolder; @@ -319,7 +319,7 @@ if (Policy.SuppressSpecifiers && T->isSpecifierType()) return; - SaveAndRestore PrevPHIsEmpty(HasEmptyPlaceHolder); + SaveAndRestore PrevPHIsEmpty(HasEmptyPlaceHolder); // Print qualifiers as appropriate. @@ -396,7 +396,7 @@ void TypePrinter::printPointerBefore(const PointerType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); printBefore(T->getPointeeType(), OS); // Handle things like 'int (*A)[4];' correctly. // FIXME: this should include vectors, but vectors use attributes I guess. @@ -407,7 +407,7 @@ void TypePrinter::printPointerAfter(const PointerType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); // Handle things like 'int (*A)[4];' correctly. // FIXME: this should include vectors, but vectors use attributes I guess. if (isa(T->getPointeeType())) @@ -417,14 +417,14 @@ void TypePrinter::printBlockPointerBefore(const BlockPointerType *T, raw_ostream &OS) { - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); printBefore(T->getPointeeType(), OS); OS << '^'; } void TypePrinter::printBlockPointerAfter(const BlockPointerType *T, raw_ostream &OS) { - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); printAfter(T->getPointeeType(), OS); } @@ -439,7 +439,7 @@ void TypePrinter::printLValueReferenceBefore(const LValueReferenceType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); QualType Inner = skipTopLevelReferences(T->getPointeeTypeAsWritten()); printBefore(Inner, OS); // Handle things like 'int (&A)[4];' correctly. @@ -452,7 +452,7 @@ void TypePrinter::printLValueReferenceAfter(const LValueReferenceType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); QualType Inner = skipTopLevelReferences(T->getPointeeTypeAsWritten()); // Handle things like 'int (&A)[4];' correctly. // FIXME: this should include vectors, but vectors use attributes I guess. @@ -464,7 +464,7 @@ void TypePrinter::printRValueReferenceBefore(const RValueReferenceType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); QualType Inner = skipTopLevelReferences(T->getPointeeTypeAsWritten()); printBefore(Inner, OS); // Handle things like 'int (&&A)[4];' correctly. @@ -477,7 +477,7 @@ void TypePrinter::printRValueReferenceAfter(const RValueReferenceType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); QualType Inner = skipTopLevelReferences(T->getPointeeTypeAsWritten()); // Handle things like 'int (&&A)[4];' correctly. // FIXME: this should include vectors, but vectors use attributes I guess. @@ -489,7 +489,7 @@ void TypePrinter::printMemberPointerBefore(const MemberPointerType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); printBefore(T->getPointeeType(), OS); // Handle things like 'int (Cls::*A)[4];' correctly. // FIXME: this should include vectors, but vectors use attributes I guess. @@ -506,7 +506,7 @@ void TypePrinter::printMemberPointerAfter(const MemberPointerType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); // Handle things like 'int (Cls::*A)[4];' correctly. // FIXME: this should include vectors, but vectors use attributes I guess. if (isa(T->getPointeeType())) @@ -848,7 +848,7 @@ OS << '('; } else { // If needed for precedence reasons, wrap the inner part in grouping parens. - SaveAndRestore PrevPHIsEmpty(HasEmptyPlaceHolder, false); + SaveAndRestore PrevPHIsEmpty(HasEmptyPlaceHolder, false); printBefore(T->getReturnType(), OS); if (!PrevPHIsEmpty.get()) OS << '('; @@ -876,7 +876,7 @@ // If needed for precedence reasons, wrap the inner part in grouping parens. if (!HasEmptyPlaceHolder) OS << ')'; - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); OS << '('; { @@ -1027,7 +1027,7 @@ void TypePrinter::printFunctionNoProtoBefore(const FunctionNoProtoType *T, raw_ostream &OS) { // If needed for precedence reasons, wrap the inner part in grouping parens. - SaveAndRestore PrevPHIsEmpty(HasEmptyPlaceHolder, false); + SaveAndRestore PrevPHIsEmpty(HasEmptyPlaceHolder, false); printBefore(T->getReturnType(), OS); if (!PrevPHIsEmpty.get()) OS << '('; @@ -1038,7 +1038,7 @@ // If needed for precedence reasons, wrap the inner part in grouping parens. if (!HasEmptyPlaceHolder) OS << ')'; - SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); OS << "()"; printFunctionAfter(T->getExtInfo(), OS); @@ -1676,7 +1676,7 @@ // If this is a calling convention attribute, don't print the implicit CC from // the modified type. - SaveAndRestore MaybeSuppressCC(InsideCCAttribute, T->isCallingConv()); + SaveAndRestore MaybeSuppressCC(InsideCCAttribute, T->isCallingConv()); printAfter(T->getModifiedType(), OS); diff --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp --- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp +++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp @@ -1687,7 +1687,7 @@ llvm::Optional MatchFinder::MatchCallback::getCheckTraversalKind() const { - return llvm::None; + return std::nullopt; } } // end namespace ast_matchers diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp --- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp +++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -340,7 +340,8 @@ } llvm::Optional DynTypedMatcher::tryBind(StringRef ID) const { - if (!AllowBind) return llvm::None; + if (!AllowBind) + return std::nullopt; auto Result = *this; Result.Implementation = new IdDynMatcher(ID, std::move(Result.Implementation)); @@ -703,7 +704,7 @@ if (isTokenAtLoc(SM, LangOpts, MacroName, Loc)) return Loc; } - return llvm::None; + return std::nullopt; } std::shared_ptr createAndVerifyRegex(StringRef Regex, diff --git a/clang/lib/ASTMatchers/Dynamic/Marshallers.h b/clang/lib/ASTMatchers/Dynamic/Marshallers.h --- a/clang/lib/ASTMatchers/Dynamic/Marshallers.h +++ b/clang/lib/ASTMatchers/Dynamic/Marshallers.h @@ -72,7 +72,7 @@ } static llvm::Optional getBestGuess(const VariantValue &) { - return llvm::None; + return std::nullopt; } }; @@ -97,7 +97,7 @@ } static llvm::Optional getBestGuess(const VariantValue &) { - return llvm::None; + return std::nullopt; } }; @@ -116,7 +116,7 @@ } static llvm::Optional getBestGuess(const VariantValue &) { - return llvm::None; + return std::nullopt; } }; @@ -135,7 +135,7 @@ } static llvm::Optional getBestGuess(const VariantValue &) { - return llvm::None; + return std::nullopt; } }; @@ -154,7 +154,7 @@ } static llvm::Optional getBestGuess(const VariantValue &) { - return llvm::None; + return std::nullopt; } }; @@ -162,11 +162,11 @@ private: static Optional getAttrKind(llvm::StringRef AttrKind) { if (!AttrKind.consume_front("attr::")) - return llvm::None; + return std::nullopt; return llvm::StringSwitch>(AttrKind) #define ATTR(X) .Case(#X, attr::X) #include "clang/Basic/AttrList.inc" - .Default(llvm::None); + .Default(std::nullopt); } public: @@ -192,11 +192,11 @@ private: static Optional getCastKind(llvm::StringRef AttrKind) { if (!AttrKind.consume_front("CK_")) - return llvm::None; + return std::nullopt; return llvm::StringSwitch>(AttrKind) #define CAST_OPERATION(Name) .Case(#Name, CK_##Name) #include "clang/AST/OperationKinds.def" - .Default(llvm::None); + .Default(std::nullopt); } public: @@ -246,7 +246,7 @@ #define GEN_CLANG_CLAUSE_CLASS #define CLAUSE_CLASS(Enum, Str, Class) .Case(#Enum, llvm::omp::Clause::Enum) #include "llvm/Frontend/OpenMP/OMP.inc" - .Default(llvm::None); + .Default(std::nullopt); } public: @@ -271,13 +271,13 @@ static Optional getUnaryOrTypeTraitKind(llvm::StringRef ClauseKind) { if (!ClauseKind.consume_front("UETT_")) - return llvm::None; + return std::nullopt; return llvm::StringSwitch>(ClauseKind) #define UNARY_EXPR_OR_TYPE_TRAIT(Spelling, Name, Key) .Case(#Name, UETT_##Name) #define CXX11_UNARY_EXPR_OR_TYPE_TRAIT(Spelling, Name, Key) \ .Case(#Name, UETT_##Name) #include "clang/Basic/TokenKinds.def" - .Default(llvm::None); + .Default(std::nullopt); } public: @@ -1060,7 +1060,7 @@ BuildReturnTypeVector::build(RetTypes); return std::make_unique( matcherMarshall0, reinterpret_cast(Func), - MatcherName, RetTypes, None); + MatcherName, RetTypes, std::nullopt); } /// 1-arg overload diff --git a/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp b/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp --- a/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp @@ -56,7 +56,7 @@ if (!Res.empty()) return Res.str(); } - return llvm::None; + return std::nullopt; } llvm::Optional @@ -69,7 +69,7 @@ if (Value.isString()) return ::getBestGuess(Value.getString(), llvm::makeArrayRef(Allowed), "attr::"); - return llvm::None; + return std::nullopt; } llvm::Optional @@ -82,7 +82,7 @@ if (Value.isString()) return ::getBestGuess(Value.getString(), llvm::makeArrayRef(Allowed), "CK_"); - return llvm::None; + return std::nullopt; } llvm::Optional @@ -96,7 +96,7 @@ if (Value.isString()) return ::getBestGuess(Value.getString(), llvm::makeArrayRef(Allowed), "OMPC_"); - return llvm::None; + return std::nullopt; } llvm::Optional @@ -110,7 +110,7 @@ if (Value.isString()) return ::getBestGuess(Value.getString(), llvm::makeArrayRef(Allowed), "UETT_"); - return llvm::None; + return std::nullopt; } static constexpr std::pair @@ -127,7 +127,7 @@ if (Flag == StringFlag.first) return StringFlag.second; } - return llvm::None; + return std::nullopt; } static llvm::Optional @@ -136,7 +136,7 @@ if (Flag.edit_distance(StringFlag.first) < 3) return StringFlag.first; } - return llvm::None; + return std::nullopt; } llvm::Optional @@ -150,7 +150,7 @@ getRegexFlag(OrFlag.trim())) Flag = Flag.value_or(llvm::Regex::NoFlags) | *NextFlag; else - return None; + return std::nullopt; } return Flag; } @@ -159,7 +159,7 @@ clang::ast_matchers::dynamic::internal::ArgTypeTraits< llvm::Regex::RegexFlags>::getBestGuess(const VariantValue &Value) { if (!Value.isString()) - return llvm::None; + return std::nullopt; SmallVector Split; llvm::StringRef(Value.getString()).split(Split, '|', -1, false); for (llvm::StringRef &Flag : Split) { @@ -167,9 +167,9 @@ getCloseRegexMatch(Flag.trim())) Flag = *BestGuess; else - return None; + return std::nullopt; } if (Split.empty()) - return None; + return std::nullopt; return llvm::join(Split, " | "); } diff --git a/clang/lib/ASTMatchers/Dynamic/Parser.cpp b/clang/lib/ASTMatchers/Dynamic/Parser.cpp --- a/clang/lib/ASTMatchers/Dynamic/Parser.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Parser.cpp @@ -910,10 +910,10 @@ Diagnostics *Error) { VariantValue Value; if (!parseExpression(Code, S, NamedValues, &Value, Error)) - return llvm::None; + return std::nullopt; if (!Value.isMatcher()) { Error->addError(SourceRange(), Error->ET_ParserNotAMatcher); - return llvm::None; + return std::nullopt; } llvm::Optional Result = Value.getMatcher().getSingleMatcher(); diff --git a/clang/lib/ASTMatchers/Dynamic/VariantValue.cpp b/clang/lib/ASTMatchers/Dynamic/VariantValue.cpp --- a/clang/lib/ASTMatchers/Dynamic/VariantValue.cpp +++ b/clang/lib/ASTMatchers/Dynamic/VariantValue.cpp @@ -75,11 +75,11 @@ // Abort if any of the inner matchers can't be converted to // Matcher. if (!InnerMatcher.Value) - return llvm::None; + return std::nullopt; llvm::Optional Inner = InnerMatcher.Value->getTypedMatcher(*this); if (!Inner) - return llvm::None; + return std::nullopt; DynMatchers.push_back(*Inner); } return DynTypedMatcher::constructVariadic(Op, NodeKind, DynMatchers); @@ -105,7 +105,7 @@ bool Ignore; if (Ops.canConstructFrom(Matcher, Ignore)) return Matcher; - return llvm::None; + return std::nullopt; } bool isConvertibleTo(ASTNodeKind Kind, unsigned *Specificity) const override { @@ -126,7 +126,7 @@ llvm::Optional getSingleMatcher() const override { if (Matchers.size() != 1) - return llvm::None; + return std::nullopt; return Matchers[0]; } @@ -162,7 +162,7 @@ // We only succeed if we found exactly one, or if we found an exact match. if (Found && (FoundIsExact || NumFound == 1)) return *Found; - return llvm::None; + return std::nullopt; } bool isConvertibleTo(ASTNodeKind Kind, unsigned *Specificity) const override { @@ -190,7 +190,7 @@ : Op(Op), Args(std::move(Args)) {} llvm::Optional getSingleMatcher() const override { - return llvm::None; + return std::nullopt; } std::string getTypeAsString() const override { diff --git a/clang/lib/Analysis/AnalysisDeclContext.cpp b/clang/lib/Analysis/AnalysisDeclContext.cpp --- a/clang/lib/Analysis/AnalysisDeclContext.cpp +++ b/clang/lib/Analysis/AnalysisDeclContext.cpp @@ -231,8 +231,7 @@ CFG *AnalysisDeclContext::getUnoptimizedCFG() { if (!builtCompleteCFG) { - SaveAndRestore NotPrune(cfgBuildOptions.PruneTriviallyFalseEdges, - false); + SaveAndRestore NotPrune(cfgBuildOptions.PruneTriviallyFalseEdges, false); completeCFG = CFG::buildCFG(D, getBody(), &D->getASTContext(), cfgBuildOptions); // Even when the cfg is not successfully built, we don't diff --git a/clang/lib/Analysis/BodyFarm.cpp b/clang/lib/Analysis/BodyFarm.cpp --- a/clang/lib/Analysis/BodyFarm.cpp +++ b/clang/lib/Analysis/BodyFarm.cpp @@ -541,7 +541,7 @@ CallExpr *CE = CallExpr::Create( /*ASTContext=*/C, /*StmtClass=*/M.makeLvalueToRvalue(/*Expr=*/Block), - /*Args=*/None, + /*Args=*/std::nullopt, /*QualType=*/C.VoidTy, /*ExprValueType=*/VK_PRValue, /*SourceLocation=*/SourceLocation(), FPOptionsOverride()); @@ -609,7 +609,7 @@ ASTMaker M(C); DeclRefExpr *DR = M.makeDeclRefExpr(PV); ImplicitCastExpr *ICE = M.makeLvalueToRvalue(DR, Ty); - CallExpr *CE = CallExpr::Create(C, ICE, None, C.VoidTy, VK_PRValue, + CallExpr *CE = CallExpr::Create(C, ICE, std::nullopt, C.VoidTy, VK_PRValue, SourceLocation(), FPOptionsOverride()); return CE; } diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -1044,14 +1044,14 @@ return llvm::APInt(Context->getTypeSize(Context->IntTy), !Value); default: assert(false && "Unexpected unary operator!"); - return llvm::None; + return std::nullopt; } } } else if (const auto *IntLiteral = dyn_cast(E->IgnoreParens())) return IntLiteral->getValue(); - return llvm::None; + return std::nullopt; } TryResult analyzeLogicOperatorCondition(BinaryOperatorKind Relation, @@ -3073,7 +3073,7 @@ // Save local scope position because in case of condition variable ScopePos // won't be restored when traversing AST. - SaveAndRestore save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scope for C++17 if init-stmt if one exists. if (Stmt *Init = I->getInit()) @@ -3098,7 +3098,7 @@ CFGBlock *ElseBlock = Succ; if (Stmt *Else = I->getElse()) { - SaveAndRestore sv(Succ); + SaveAndRestore sv(Succ); // NULL out Block so that the recursive call to Visit will // create a new basic block. @@ -3124,7 +3124,7 @@ { Stmt *Then = I->getThen(); assert(Then); - SaveAndRestore sv(Succ); + SaveAndRestore sv(Succ); Block = nullptr; // If branch is not a compound statement create implicit scope @@ -3274,7 +3274,7 @@ // Save local scope position because in case of exception variable ScopePos // won't be restored when traversing AST. - SaveAndRestore save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); addStmt(ES->getBlock()); CFGBlock *SEHExceptBlock = Block; @@ -3364,13 +3364,13 @@ Succ = SEHTrySuccessor; // Save the current "__try" context. - SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); + SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); cfg->addTryDispatchBlock(TryTerminatedBlock); // Save the current value for the __leave target. // All __leaves should go to the code following the __try // (FIXME: or if the __try has a __finally, to the __finally.) - SaveAndRestore save_break(SEHLeaveJumpTarget); + SaveAndRestore save_break(SEHLeaveJumpTarget); SEHLeaveJumpTarget = JumpTarget(SEHTrySuccessor, ScopePos); assert(Terminator->getTryBlock() && "__try must contain a non-NULL body"); @@ -3493,7 +3493,7 @@ // Save local scope position because in case of condition variable ScopePos // won't be restored when traversing AST. - SaveAndRestore save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scope for init statement and possible condition variable. // Add destructor for init statement and condition variable. @@ -3521,7 +3521,7 @@ // Save the current value for the break targets. // All breaks should go to the code following the loop. - SaveAndRestore save_break(BreakJumpTarget); + SaveAndRestore save_break(BreakJumpTarget); BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos); CFGBlock *BodyBlock = nullptr, *TransitionBlock = nullptr; @@ -3531,8 +3531,8 @@ assert(F->getBody()); // Save the current values for Block, Succ, continue and break targets. - SaveAndRestore save_Block(Block), save_Succ(Succ); - SaveAndRestore save_continue(ContinueJumpTarget); + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget); // Create an empty block to represent the transition block for looping back // to the head of the loop. If we have increment code, it will @@ -3587,7 +3587,7 @@ do { Expr *C = F->getCond(); - SaveAndRestore save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Specially handle logical operators, which have a slightly // more optimal CFG representation. @@ -3653,7 +3653,7 @@ // If the loop contains initialization, create a new block for those // statements. This block can also contain statements that precede the loop. if (Stmt *I = F->getInit()) { - SaveAndRestore save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); ScopePos = LoopBeginScopePos; Block = createBlock(); return addStmt(I); @@ -3756,9 +3756,9 @@ // Now create the true branch. { // Save the current values for Succ, continue and break targets. - SaveAndRestore save_Block(Block), save_Succ(Succ); - SaveAndRestore save_continue(ContinueJumpTarget), - save_break(BreakJumpTarget); + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget), + save_break(BreakJumpTarget); // Add an intermediate block between the BodyBlock and the // EntryConditionBlock to represent the "loop back" transition, for looping @@ -3852,7 +3852,7 @@ // Save local scope position because in case of condition variable ScopePos // won't be restored when traversing AST. - SaveAndRestore save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scope for possible condition variable. // Store scope position for continue statement. @@ -3881,9 +3881,9 @@ assert(W->getBody()); // Save the current values for Block, Succ, continue and break targets. - SaveAndRestore save_Block(Block), save_Succ(Succ); - SaveAndRestore save_continue(ContinueJumpTarget), - save_break(BreakJumpTarget); + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget), + save_break(BreakJumpTarget); // Create an empty block to represent the transition block for looping back // to the head of the loop. @@ -4009,7 +4009,7 @@ // Save local scope position because in case of exception variable ScopePos // won't be restored when traversing AST. - SaveAndRestore save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); if (CS->getCatchBody()) addStmt(CS->getCatchBody()); @@ -4104,7 +4104,7 @@ Succ = TrySuccessor; // Save the current "try" context. - SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); + SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); cfg->addTryDispatchBlock(TryTerminatedBlock); assert(Terminator->getTryBody() && "try must contain a non-NULL body"); @@ -4207,8 +4207,8 @@ assert(D->getBody()); // Save the current values for Block, Succ, and continue and break targets - SaveAndRestore save_Block(Block), save_Succ(Succ); - SaveAndRestore save_continue(ContinueJumpTarget), + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget), save_break(BreakJumpTarget); // All continues within this loop should go to the condition block @@ -4326,7 +4326,7 @@ // Save local scope position because in case of condition variable ScopePos // won't be restored when traversing AST. - SaveAndRestore save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scope for C++17 switch init-stmt if one exists. if (Stmt *Init = Terminator->getInit()) @@ -4346,9 +4346,9 @@ } else SwitchSuccessor = Succ; // Save the current "switch" context. - SaveAndRestore save_switch(SwitchTerminatedBlock), - save_default(DefaultCaseBlock); - SaveAndRestore save_break(BreakJumpTarget); + SaveAndRestore save_switch(SwitchTerminatedBlock), + save_default(DefaultCaseBlock); + SaveAndRestore save_break(BreakJumpTarget); // Set the "default" case to be the block after the switch statement. If the // switch statement contains a "default:", this value will be overwritten with @@ -4371,15 +4371,13 @@ // For pruning unreachable case statements, save the current state // for tracking the condition value. - SaveAndRestore save_switchExclusivelyCovered(switchExclusivelyCovered, - false); + SaveAndRestore save_switchExclusivelyCovered(switchExclusivelyCovered, false); // Determine if the switch condition can be explicitly evaluated. assert(Terminator->getCond() && "switch condition must be non-NULL"); Expr::EvalResult result; bool b = tryEvaluate(Terminator->getCond(), result); - SaveAndRestore save_switchCond(switchCond, - b ? &result : nullptr); + SaveAndRestore save_switchCond(switchCond, b ? &result : nullptr); // If body is not a compound statement create implicit scope // and add destructors. @@ -4606,7 +4604,7 @@ Succ = TrySuccessor; // Save the current "try" context. - SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); + SaveAndRestore SaveTry(TryTerminatedBlock, NewTryTerminatedBlock); cfg->addTryDispatchBlock(TryTerminatedBlock); assert(Terminator->getTryBlock() && "try must contain a non-NULL body"); @@ -4620,7 +4618,7 @@ // Save local scope position because in case of exception variable ScopePos // won't be restored when traversing AST. - SaveAndRestore save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scope for possible exception variable. // Store scope position. Add implicit destructor. @@ -4672,7 +4670,7 @@ // } // Save local scope position before the addition of the implicit variables. - SaveAndRestore save_scope_pos(ScopePos); + SaveAndRestore save_scope_pos(ScopePos); // Create local scopes and destructors for range, begin and end variables. if (Stmt *Range = S->getRangeStmt()) @@ -4697,7 +4695,7 @@ // Save the current value for the break targets. // All breaks should go to the code following the loop. - SaveAndRestore save_break(BreakJumpTarget); + SaveAndRestore save_break(BreakJumpTarget); BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos); // The block for the __begin != __end expression. @@ -4730,8 +4728,8 @@ assert(S->getBody()); // Save the current values for Block, Succ, and continue targets. - SaveAndRestore save_Block(Block), save_Succ(Succ); - SaveAndRestore save_continue(ContinueJumpTarget); + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget); // Generate increment code in its own basic block. This is the target of // continue statements. diff --git a/clang/lib/Analysis/CalledOnceCheck.cpp b/clang/lib/Analysis/CalledOnceCheck.cpp --- a/clang/lib/Analysis/CalledOnceCheck.cpp +++ b/clang/lib/Analysis/CalledOnceCheck.cpp @@ -513,7 +513,7 @@ if (const Stmt *Terminator = Conditional->getTerminatorStmt()) { return NotCalledClarifier{Conditional, SuccWithoutCall}.Visit(Terminator); } - return llvm::None; + return std::nullopt; } llvm::Optional VisitIfStmt(const IfStmt *If) { @@ -563,7 +563,7 @@ llvm::Optional VisitBinaryOperator(const BinaryOperator *) { // We don't want to report on short-curcuit logical operations. - return llvm::None; + return std::nullopt; } llvm::Optional VisitStmt(const Stmt *Terminator) { @@ -1008,7 +1008,7 @@ return A->getCompletionHandlerIndex().getASTIndex() == ParamIndex; } - return llvm::None; + return std::nullopt; } /// Return true if the specified selector represents init method. @@ -1644,7 +1644,7 @@ return getIndex(*Parameter); } - return llvm::None; + return std::nullopt; } llvm::Optional getIndex(const ParmVarDecl &Parameter) const { @@ -1662,7 +1662,7 @@ return It - TrackedParams.begin(); } - return llvm::None; + return std::nullopt; } const ParmVarDecl *getParameter(unsigned Index) const { diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -56,7 +56,7 @@ auto isOptionalMemberCallWithName( llvm::StringRef MemberName, - llvm::Optional Ignorable = llvm::None) { + llvm::Optional Ignorable = std::nullopt) { auto Exception = unless(Ignorable ? expr(anyOf(*Ignorable, cxxThisExpr())) : cxxThisExpr()); return cxxMemberCallExpr( @@ -66,7 +66,7 @@ auto isOptionalOperatorCallWithName( llvm::StringRef operator_name, - llvm::Optional Ignorable = llvm::None) { + llvm::Optional Ignorable = std::nullopt) { return cxxOperatorCallExpr( hasOverloadedOperatorName(operator_name), callee(cxxMethodDecl(ofClass(optionalClass()))), @@ -540,7 +540,7 @@ cxxOperatorCallExpr(anyOf(hasOverloadedOperatorName("->"), hasOverloadedOperatorName("*")), unless(hasArgument(0, expr(hasOptionalType()))))))); - return llvm::None; + return std::nullopt; } StatementMatcher diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -414,7 +414,7 @@ ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV); std::vector> BlockStates( - CFCtx.getCFG().size(), llvm::None); + CFCtx.getCFG().size(), std::nullopt); // The entry basic block doesn't contain statements so it can be skipped. const CFGBlock &Entry = CFCtx.getCFG().getEntry(); diff --git a/clang/lib/Analysis/MacroExpansionContext.cpp b/clang/lib/Analysis/MacroExpansionContext.cpp --- a/clang/lib/Analysis/MacroExpansionContext.cpp +++ b/clang/lib/Analysis/MacroExpansionContext.cpp @@ -99,11 +99,11 @@ Optional MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const { if (MacroExpansionLoc.isMacroID()) - return llvm::None; + return std::nullopt; // If there was no macro expansion at that location, return None. if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end()) - return llvm::None; + return std::nullopt; // There was macro expansion, but resulted in no tokens, return empty string. const auto It = ExpandedTokens.find_as(MacroExpansionLoc); @@ -117,11 +117,11 @@ Optional MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const { if (MacroExpansionLoc.isMacroID()) - return llvm::None; + return std::nullopt; const auto It = ExpansionRanges.find_as(MacroExpansionLoc); if (It == ExpansionRanges.end()) - return llvm::None; + return std::nullopt; assert(It->getFirst() != It->getSecond() && "Every macro expansion must cover a non-empty range."); diff --git a/clang/lib/Analysis/PathDiagnostic.cpp b/clang/lib/Analysis/PathDiagnostic.cpp --- a/clang/lib/Analysis/PathDiagnostic.cpp +++ b/clang/lib/Analysis/PathDiagnostic.cpp @@ -239,7 +239,7 @@ FullSourceLoc YEL = Y.getEndLocation().asLocation(); if (XEL != YEL) return XEL.isBeforeInTranslationUnitThan(YEL); - return None; + return std::nullopt; } static Optional compareMacro(const PathDiagnosticMacroPiece &X, @@ -305,7 +305,7 @@ case PathDiagnosticPiece::Event: case PathDiagnosticPiece::Note: case PathDiagnosticPiece::PopUp: - return None; + return std::nullopt; } llvm_unreachable("all cases handled"); } @@ -323,7 +323,7 @@ return b.value(); } - return None; + return std::nullopt; } static bool compareCrossTUSourceLocs(FullSourceLoc XL, FullSourceLoc YL) { @@ -367,7 +367,7 @@ return X.getShortDescription() < Y.getShortDescription(); auto CompareDecls = [&XL](const Decl *D1, const Decl *D2) -> Optional { if (D1 == D2) - return None; + return std::nullopt; if (!D1) return true; if (!D2) @@ -379,7 +379,7 @@ return compareCrossTUSourceLocs(FullSourceLoc(D1L, SM), FullSourceLoc(D2L, SM)); } - return None; + return std::nullopt; }; if (auto Result = CompareDecls(X.getDeclWithIssue(), Y.getDeclWithIssue())) return *Result; diff --git a/clang/lib/Analysis/RetainSummaryManager.cpp b/clang/lib/Analysis/RetainSummaryManager.cpp --- a/clang/lib/Analysis/RetainSummaryManager.cpp +++ b/clang/lib/Analysis/RetainSummaryManager.cpp @@ -71,7 +71,7 @@ if (isOneOf()) { if (!TrackObjCAndCFObjects) - return None; + return std::nullopt; K = ObjKind::CF; } else if (isOneOf()) { if (!TrackObjCAndCFObjects) - return None; + return std::nullopt; if (isOneOf() && !cocoa::isCocoaObjectRef(QT)) - return None; + return std::nullopt; K = ObjKind::ObjC; } else if (isOneOf()) { if (!TrackOSObjects) - return None; + return std::nullopt; K = ObjKind::OS; } else if (isOneOfhasAttr()) return K; - return None; + return std::nullopt; } template @@ -724,7 +724,7 @@ IdentifierInfo *II = FD->getIdentifier(); if (!II) - return None; + return std::nullopt; StringRef FName = II->getName(); FName = FName.substr(FName.find_first_not_of('_')); @@ -741,7 +741,7 @@ FName == "CMBufferQueueDequeueIfDataReadyAndRetain") { // Part of: . // These are not retain. They just return something and retain it. - return None; + return std::nullopt; } if (CE->getNumArgs() == 1 && (cocoa::isRefType(ResultTy, "CF", FName) || @@ -781,7 +781,7 @@ return BehaviorSummary::NoOp; } - return None; + return std::nullopt; } const RetainSummary * @@ -885,7 +885,7 @@ if (auto RE = getRetEffectFromAnnotations(RetTy, PD)) return RE; - return None; + return std::nullopt; } /// \return Whether the chain of typedefs starting from @c QT diff --git a/clang/lib/Analysis/UninitializedValues.cpp b/clang/lib/Analysis/UninitializedValues.cpp --- a/clang/lib/Analysis/UninitializedValues.cpp +++ b/clang/lib/Analysis/UninitializedValues.cpp @@ -90,7 +90,7 @@ Optional DeclToIndex::getValueIndex(const VarDecl *d) const { llvm::DenseMap::const_iterator I = map.find(d); if (I == map.end()) - return None; + return std::nullopt; return I->second; } diff --git a/clang/lib/Basic/DarwinSDKInfo.cpp b/clang/lib/Basic/DarwinSDKInfo.cpp --- a/clang/lib/Basic/DarwinSDKInfo.cpp +++ b/clang/lib/Basic/DarwinSDKInfo.cpp @@ -14,9 +14,9 @@ using namespace clang; -Optional DarwinSDKInfo::RelatedTargetVersionMapping::map( +std::optional DarwinSDKInfo::RelatedTargetVersionMapping::map( const VersionTuple &Key, const VersionTuple &MinimumValue, - Optional MaximumValue) const { + std::optional MaximumValue) const { if (Key < MinimumKeyVersion) return MinimumValue; if (Key > MaximumKeyVersion) @@ -30,7 +30,7 @@ if (Key.getMinor()) return map(VersionTuple(Key.getMajor()), MinimumValue, MaximumValue); // If this a major only key, return None for a missing entry. - return None; + return std::nullopt; } Optional @@ -45,7 +45,7 @@ llvm::VersionTuple KeyVersion; llvm::VersionTuple ValueVersion; if (KeyVersion.tryParse(KV.getFirst()) || ValueVersion.tryParse(*Val)) - return None; + return std::nullopt; Mapping[KeyVersion.normalize()] = ValueVersion; if (KeyVersion < Min) Min = KeyVersion; @@ -56,7 +56,7 @@ } } if (Mapping.empty()) - return None; + return std::nullopt; return RelatedTargetVersionMapping( Min, Max, MinValue, MaximumDeploymentTarget, std::move(Mapping)); } @@ -65,22 +65,22 @@ StringRef Key) { auto Value = Obj.getString(Key); if (!Value) - return None; + return std::nullopt; VersionTuple Version; if (Version.tryParse(*Value)) - return None; + return std::nullopt; return Version; } -Optional +std::optional DarwinSDKInfo::parseDarwinSDKSettingsJSON(const llvm::json::Object *Obj) { auto Version = getVersionKey(*Obj, "Version"); if (!Version) - return None; + return std::nullopt; auto MaximumDeploymentVersion = getVersionKey(*Obj, "MaximumDeploymentTarget"); if (!MaximumDeploymentVersion) - return None; + return std::nullopt; llvm::DenseMap> VersionMappings; if (const auto *VM = Obj->getObject("VersionMap")) { @@ -107,7 +107,7 @@ auto VersionMap = RelatedTargetVersionMapping::parseJSON( *Mapping, *MaximumDeploymentVersion); if (!VersionMap) - return None; + return std::nullopt; VersionMappings[OSEnvPair::macOStoMacCatalystPair().Value] = std::move(VersionMap); } @@ -115,7 +115,7 @@ auto VersionMap = RelatedTargetVersionMapping::parseJSON( *Mapping, *MaximumDeploymentVersion); if (!VersionMap) - return None; + return std::nullopt; VersionMappings[OSEnvPair::macCatalystToMacOSPair().Value] = std::move(VersionMap); } @@ -126,7 +126,7 @@ std::move(VersionMappings)); } -Expected> +Expected> clang::parseDarwinSDKInfo(llvm::vfs::FileSystem &VFS, StringRef SDKRootPath) { llvm::SmallString<256> Filepath = SDKRootPath; llvm::sys::path::append(Filepath, "SDKSettings.json"); @@ -134,7 +134,7 @@ VFS.getBufferForFile(Filepath); if (!File) { // If the file couldn't be read, assume it just doesn't exist. - return None; + return std::nullopt; } Expected Result = llvm::json::parse(File.get()->getBuffer()); diff --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp --- a/clang/lib/Basic/DiagnosticIDs.cpp +++ b/clang/lib/Basic/DiagnosticIDs.cpp @@ -639,14 +639,14 @@ const auto *Found = llvm::partition_point( OptionTable, [=](const WarningOption &O) { return O.getName() < Name; }); if (Found == std::end(OptionTable) || Found->getName() != Name) - return llvm::None; + return std::nullopt; return static_cast(Found - OptionTable); } llvm::Optional DiagnosticIDs::getGroupForDiag(unsigned DiagID) { if (const StaticDiagInfoRec *Info = GetDiagInfo(DiagID)) return static_cast(Info->getOptionGroupIndex()); - return llvm::None; + return std::nullopt; } /// getWarningOptionForDiag - Return the lowest-level warning option that diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -474,7 +474,7 @@ // Stat of the file and return nullptr if it doesn't exist. llvm::vfs::Status Status; if (getStatValue(VF.getName(), Status, /*isFile=*/true, /*F=*/nullptr)) - return None; + return std::nullopt; if (!SeenBypassFileEntries) SeenBypassFileEntries = std::make_unique< diff --git a/clang/lib/Basic/ProfileList.cpp b/clang/lib/Basic/ProfileList.cpp --- a/clang/lib/Basic/ProfileList.cpp +++ b/clang/lib/Basic/ProfileList.cpp @@ -111,7 +111,7 @@ return Forbid; if (SCL->inSection(Section, Prefix, Query)) return Allow; - return None; + return std::nullopt; } llvm::Optional @@ -125,7 +125,7 @@ return Forbid; if (SCL->inSection(Section, "fun", FunctionName)) return Allow; - return None; + return std::nullopt; } llvm::Optional @@ -145,5 +145,5 @@ return Forbid; if (SCL->inSection(Section, "src", FileName)) return Allow; - return None; + return std::nullopt; } diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -105,11 +105,11 @@ // Lazily create the Buffer for ContentCaches that wrap files. If we already // computed it, just return what we have. if (IsBufferInvalid) - return None; + return std::nullopt; if (Buffer) return Buffer->getMemBufferRef(); if (!ContentsEntry) - return None; + return std::nullopt; // Start with the assumption that the buffer is invalid to simplify early // return paths. @@ -131,7 +131,7 @@ Diag.Report(Loc, diag::err_cannot_open_file) << ContentsEntry->getName() << BufferOrError.getError().message(); - return None; + return std::nullopt; } Buffer = std::move(*BufferOrError); @@ -153,7 +153,7 @@ Diag.Report(Loc, diag::err_file_too_large) << ContentsEntry->getName(); - return None; + return std::nullopt; } // Unless this is a named pipe (in which case we can handle a mismatch), @@ -168,7 +168,7 @@ Diag.Report(Loc, diag::err_file_modified) << ContentsEntry->getName(); - return None; + return std::nullopt; } // If the buffer is valid, check to see if it has a UTF Byte Order Mark @@ -180,7 +180,7 @@ if (InvalidBOM) { Diag.Report(Loc, diag::err_unsupported_bom) << InvalidBOM << ContentsEntry->getName(); - return None; + return std::nullopt; } // Buffer has been validated. @@ -720,7 +720,7 @@ // If the file can't be found in the FS, give up. if (!BypassFile) - return None; + return std::nullopt; (void)getOrCreateContentCache(*BypassFile); return BypassFile; @@ -735,7 +735,7 @@ if (const SrcMgr::SLocEntry *Entry = getSLocEntryForFile(FID)) if (Entry->getFile().getContentCache().OrigEntry) return Entry->getFile().getName(); - return None; + return std::nullopt; } StringRef SourceManager::getBufferData(FileID FID, bool *Invalid) const { @@ -749,7 +749,7 @@ SourceManager::getBufferDataIfLoaded(FileID FID) const { if (const SrcMgr::SLocEntry *Entry = getSLocEntryForFile(FID)) return Entry->getFile().getContentCache().getBufferDataIfLoaded(); - return None; + return std::nullopt; } llvm::Optional SourceManager::getBufferDataOrNone(FileID FID) const { @@ -757,7 +757,7 @@ if (auto B = Entry->getFile().getContentCache().getBufferOrNone( Diag, getFileManager(), SourceLocation())) return B->getBuffer(); - return None; + return std::nullopt; } //===----------------------------------------------------------------------===// @@ -1309,7 +1309,7 @@ Buf += N / 8 + 1; unsigned char Byte = Word; switch (Byte) { - case 'r': + case '\r': // If this is \r\n, skip both characters. if (*Buf == '\n') { ++Buf; @@ -2230,7 +2230,7 @@ DumpSLocEntry(ID, LoadedSLocEntryTable[Index], NextStart); NextStart = LoadedSLocEntryTable[Index].getOffset(); } else { - NextStart = None; + NextStart = std::nullopt; } } } diff --git a/clang/lib/Basic/TargetID.cpp b/clang/lib/Basic/TargetID.cpp --- a/clang/lib/Basic/TargetID.cpp +++ b/clang/lib/Basic/TargetID.cpp @@ -73,7 +73,7 @@ auto Split = TargetID.split(':'); Processor = Split.first; if (Processor.empty()) - return llvm::None; + return std::nullopt; auto Features = Split.second; if (Features.empty()) @@ -88,12 +88,12 @@ auto Sign = Splits.first.back(); auto Feature = Splits.first.drop_back(); if (Sign != '+' && Sign != '-') - return llvm::None; + return std::nullopt; bool IsOn = Sign == '+'; auto Loc = FeatureMap->find(Feature); // Each feature can only show up at most once in target ID. if (Loc != FeatureMap->end()) - return llvm::None; + return std::nullopt; (*FeatureMap)[Feature] = IsOn; Features = Splits.second; } @@ -107,11 +107,11 @@ parseTargetIDWithFormatCheckingOnly(TargetID, FeatureMap); if (!OptionalProcessor) - return llvm::None; + return std::nullopt; llvm::StringRef Processor = getCanonicalProcessorName(T, *OptionalProcessor); if (Processor.empty()) - return llvm::None; + return std::nullopt; llvm::SmallSet AllFeatures; for (auto &&F : getAllPossibleTargetIDFeatures(T, Processor)) @@ -119,7 +119,7 @@ for (auto &&F : *FeatureMap) if (!AllFeatures.count(F.first())) - return llvm::None; + return std::nullopt; return Processor; } @@ -140,7 +140,7 @@ // For a specific processor, a feature either shows up in all target IDs, or // does not show up in any target IDs. Otherwise the target ID combination // is invalid. -llvm::Optional> +std::optional> getConflictTargetIDCombination(const std::set &TargetIDs) { struct Info { llvm::StringRef TargetID; @@ -161,7 +161,7 @@ return std::make_pair(Loc->second.TargetID, ID); } } - return llvm::None; + return std::nullopt; } bool isCompatibleTargetID(llvm::StringRef Provided, llvm::StringRef Requested) { diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -155,8 +155,6 @@ MaxOpenCLWorkGroupSize = 1024; MaxBitIntWidth.reset(); - - ProgramAddrSpace = 0; } // Out of line virtual dtor for TargetInfo. diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -506,7 +506,7 @@ if (hasFeature("sve")) return std::pair(1, 16); - return None; + return std::nullopt; } bool AArch64TargetInfo::hasFeature(StringRef Feature) const { diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -119,7 +119,7 @@ ArrayRef getGCCRegNames() const override; ArrayRef getGCCRegAliases() const override { - return None; + return std::nullopt; } /// Accepted register names: (n, m is unsigned integer, n < m) @@ -402,7 +402,7 @@ } else if (AddressSpace == Local) { return DWARF_Local; } else { - return None; + return std::nullopt; } } @@ -452,7 +452,7 @@ Optional getTargetID() const override { if (!isAMDGCN(getTriple())) - return llvm::None; + return std::nullopt; // When -target-cpu is not set, we assume generic code that it is valid // for all GPU and use an empty string as target ID to represent that. if (GPUKind == llvm::AMDGPU::GK_NONE) diff --git a/clang/lib/Basic/Targets/ARC.h b/clang/lib/Basic/Targets/ARC.h --- a/clang/lib/Basic/Targets/ARC.h +++ b/clang/lib/Basic/Targets/ARC.h @@ -40,7 +40,9 @@ void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; - ArrayRef getTargetBuiltins() const override { return None; } + ArrayRef getTargetBuiltins() const override { + return std::nullopt; + } BuiltinVaListKind getBuiltinVaListKind() const override { return TargetInfo::VoidPtrBuiltinVaList; @@ -58,7 +60,7 @@ } ArrayRef getGCCRegAliases() const override { - return None; + return std::nullopt; } bool validateAsmConstraint(const char *&Name, diff --git a/clang/lib/Basic/Targets/AVR.h b/clang/lib/Basic/Targets/AVR.h --- a/clang/lib/Basic/Targets/AVR.h +++ b/clang/lib/Basic/Targets/AVR.h @@ -55,14 +55,15 @@ Int16Type = SignedInt; Char32Type = UnsignedLong; SigAtomicType = SignedChar; - ProgramAddrSpace = 1; resetDataLayout("e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"); } void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; - ArrayRef getTargetBuiltins() const override { return None; } + ArrayRef getTargetBuiltins() const override { + return std::nullopt; + } BuiltinVaListKind getBuiltinVaListKind() const override { return TargetInfo::VoidPtrBuiltinVaList; @@ -79,7 +80,7 @@ } ArrayRef getGCCRegAliases() const override { - return None; + return std::nullopt; } ArrayRef getGCCAddlRegNames() const override { diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h --- a/clang/lib/Basic/Targets/BPF.h +++ b/clang/lib/Basic/Targets/BPF.h @@ -68,7 +68,9 @@ } bool isValidGCCRegisterName(StringRef Name) const override { return true; } - ArrayRef getGCCRegNames() const override { return None; } + ArrayRef getGCCRegNames() const override { + return std::nullopt; + } bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &Info) const override { @@ -85,7 +87,7 @@ } ArrayRef getGCCRegAliases() const override { - return None; + return std::nullopt; } bool allowDebugInfoForExternalRef() const override { return true; } diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h --- a/clang/lib/Basic/Targets/DirectX.h +++ b/clang/lib/Basic/Targets/DirectX.h @@ -70,11 +70,15 @@ return Feature == "directx"; } - ArrayRef getTargetBuiltins() const override { return None; } + ArrayRef getTargetBuiltins() const override { + return std::nullopt; + } const char *getClobbers() const override { return ""; } - ArrayRef getGCCRegNames() const override { return None; } + ArrayRef getGCCRegNames() const override { + return std::nullopt; + } bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &info) const override { @@ -82,7 +86,7 @@ } ArrayRef getGCCRegAliases() const override { - return None; + return std::nullopt; } BuiltinVaListKind getBuiltinVaListKind() const override { diff --git a/clang/lib/Basic/Targets/Lanai.h b/clang/lib/Basic/Targets/Lanai.h --- a/clang/lib/Basic/Targets/Lanai.h +++ b/clang/lib/Basic/Targets/Lanai.h @@ -78,7 +78,9 @@ return TargetInfo::VoidPtrBuiltinVaList; } - ArrayRef getTargetBuiltins() const override { return None; } + ArrayRef getTargetBuiltins() const override { + return std::nullopt; + } bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &info) const override { diff --git a/clang/lib/Basic/Targets/Le64.h b/clang/lib/Basic/Targets/Le64.h --- a/clang/lib/Basic/Targets/Le64.h +++ b/clang/lib/Basic/Targets/Le64.h @@ -43,10 +43,12 @@ const char *getClobbers() const override { return ""; } - ArrayRef getGCCRegNames() const override { return None; } + ArrayRef getGCCRegNames() const override { + return std::nullopt; + } ArrayRef getGCCRegAliases() const override { - return None; + return std::nullopt; } bool validateAsmConstraint(const char *&Name, diff --git a/clang/lib/Basic/Targets/M68k.cpp b/clang/lib/Basic/Targets/M68k.cpp --- a/clang/lib/Basic/Targets/M68k.cpp +++ b/clang/lib/Basic/Targets/M68k.cpp @@ -117,7 +117,7 @@ ArrayRef M68kTargetInfo::getTargetBuiltins() const { // FIXME: Implement. - return None; + return std::nullopt; } bool M68kTargetInfo::hasFeature(StringRef Feature) const { @@ -136,7 +136,7 @@ ArrayRef M68kTargetInfo::getGCCRegAliases() const { // No aliases. - return None; + return std::nullopt; } bool M68kTargetInfo::validateAsmConstraint( @@ -209,7 +209,7 @@ C = 'd'; break; default: - return llvm::None; + return std::nullopt; } return std::string(1, C); diff --git a/clang/lib/Basic/Targets/MSP430.h b/clang/lib/Basic/Targets/MSP430.h --- a/clang/lib/Basic/Targets/MSP430.h +++ b/clang/lib/Basic/Targets/MSP430.h @@ -52,7 +52,7 @@ ArrayRef getTargetBuiltins() const override { // FIXME: Implement. - return None; + return std::nullopt; } bool allowsLargerPreferedTypeAlignment() const override { return false; } diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -87,7 +87,7 @@ ArrayRef getGCCRegAliases() const override { // No aliases. - return None; + return std::nullopt; } bool validateAsmConstraint(const char *&Name, @@ -162,7 +162,7 @@ getDWARFAddressSpace(unsigned AddressSpace) const override { if (AddressSpace >= std::size(NVPTXDWARFAddrSpaceMap) || NVPTXDWARFAddrSpaceMap[AddressSpace] < 0) - return llvm::None; + return std::nullopt; return NVPTXDWARFAddrSpaceMap[AddressSpace]; } diff --git a/clang/lib/Basic/Targets/PNaCl.h b/clang/lib/Basic/Targets/PNaCl.h --- a/clang/lib/Basic/Targets/PNaCl.h +++ b/clang/lib/Basic/Targets/PNaCl.h @@ -52,7 +52,9 @@ return Feature == "pnacl"; } - ArrayRef getTargetBuiltins() const override { return None; } + ArrayRef getTargetBuiltins() const override { + return std::nullopt; + } BuiltinVaListKind getBuiltinVaListKind() const override { return TargetInfo::PNaClABIBuiltinVaList; diff --git a/clang/lib/Basic/Targets/PNaCl.cpp b/clang/lib/Basic/Targets/PNaCl.cpp --- a/clang/lib/Basic/Targets/PNaCl.cpp +++ b/clang/lib/Basic/Targets/PNaCl.cpp @@ -16,10 +16,12 @@ using namespace clang; using namespace clang::targets; -ArrayRef PNaClTargetInfo::getGCCRegNames() const { return None; } +ArrayRef PNaClTargetInfo::getGCCRegNames() const { + return std::nullopt; +} ArrayRef PNaClTargetInfo::getGCCRegAliases() const { - return None; + return std::nullopt; } void PNaClTargetInfo::getArchDefines(const LangOptions &Opts, diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -264,7 +264,7 @@ MaxVLen / llvm::RISCV::RVVBitsPerBlock); } - return None; + return std::nullopt; } /// Return true if has this feature, need to sync with handleTargetFeatures. @@ -276,7 +276,7 @@ .Case("riscv64", Is64Bit) .Case("32bit", !Is64Bit) .Case("64bit", Is64Bit) - .Default(None); + .Default(std::nullopt); if (Result) return Result.value(); diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -104,11 +104,15 @@ // memcpy as per section 3 of the SPIR spec. bool useFP16ConversionIntrinsics() const override { return false; } - ArrayRef getTargetBuiltins() const override { return None; } + ArrayRef getTargetBuiltins() const override { + return std::nullopt; + } const char *getClobbers() const override { return ""; } - ArrayRef getGCCRegNames() const override { return None; } + ArrayRef getGCCRegNames() const override { + return std::nullopt; + } bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &info) const override { @@ -116,7 +120,7 @@ } ArrayRef getGCCRegAliases() const override { - return None; + return std::nullopt; } BuiltinVaListKind getBuiltinVaListKind() const override { diff --git a/clang/lib/Basic/Targets/Sparc.h b/clang/lib/Basic/Targets/Sparc.h --- a/clang/lib/Basic/Targets/Sparc.h +++ b/clang/lib/Basic/Targets/Sparc.h @@ -50,7 +50,7 @@ ArrayRef getTargetBuiltins() const override { // FIXME: Implement! - return None; + return std::nullopt; } BuiltinVaListKind getBuiltinVaListKind() const override { return TargetInfo::VoidPtrBuiltinVaList; diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -71,7 +71,7 @@ ArrayRef getGCCRegAliases() const override { // No aliases. - return None; + return std::nullopt; } ArrayRef getGCCAddlRegNames() const override; diff --git a/clang/lib/Basic/Targets/TCE.h b/clang/lib/Basic/Targets/TCE.h --- a/clang/lib/Basic/Targets/TCE.h +++ b/clang/lib/Basic/Targets/TCE.h @@ -92,7 +92,9 @@ bool hasFeature(StringRef Feature) const override { return Feature == "tce"; } - ArrayRef getTargetBuiltins() const override { return None; } + ArrayRef getTargetBuiltins() const override { + return std::nullopt; + } const char *getClobbers() const override { return ""; } @@ -100,7 +102,9 @@ return TargetInfo::VoidPtrBuiltinVaList; } - ArrayRef getGCCRegNames() const override { return None; } + ArrayRef getGCCRegNames() const override { + return std::nullopt; + } bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &info) const override { @@ -108,7 +112,7 @@ } ArrayRef getGCCRegAliases() const override { - return None; + return std::nullopt; } }; diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -96,10 +96,10 @@ return VoidPtrBuiltinVaList; } - ArrayRef getGCCRegNames() const final { return None; } + ArrayRef getGCCRegNames() const final { return std::nullopt; } ArrayRef getGCCRegAliases() const final { - return None; + return std::nullopt; } bool validateAsmConstraint(const char *&Name, diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -192,7 +192,7 @@ ArrayRef getGCCRegNames() const override; ArrayRef getGCCRegAliases() const override { - return None; + return std::nullopt; } ArrayRef getGCCAddlRegNames() const override; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1453,7 +1453,7 @@ // The following currently have unknown cache line sizes (but they are probably all 64): // Core case CK_None: - return None; + return std::nullopt; } llvm_unreachable("Unknown CPU kind"); } diff --git a/clang/lib/Basic/Targets/XCore.h b/clang/lib/Basic/Targets/XCore.h --- a/clang/lib/Basic/Targets/XCore.h +++ b/clang/lib/Basic/Targets/XCore.h @@ -61,7 +61,7 @@ } ArrayRef getGCCRegAliases() const override { - return None; + return std::nullopt; } bool validateAsmConstraint(const char *&Name, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -92,6 +92,7 @@ #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include +#include using namespace clang; using namespace llvm; @@ -312,7 +313,7 @@ } } -static Optional +static std::optional getCodeModel(const CodeGenOptions &CodeGenOpts) { unsigned CodeModel = llvm::StringSwitch(CodeGenOpts.CodeModel) .Case("tiny", llvm::CodeModel::Tiny) @@ -324,7 +325,7 @@ .Default(~0u); assert(CodeModel != ~0u && "invalid code model!"); if (CodeModel == ~1u) - return None; + return std::nullopt; return static_cast(CodeModel); } @@ -511,7 +512,7 @@ static Optional getGCOVOptions(const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts) { if (!CodeGenOpts.EmitGcovArcs && !CodeGenOpts.EmitGcovNotes) - return None; + return std::nullopt; // Not using 'GCOVOptions::getDefault' allows us to avoid exiting if // LLVM's -default-gcov-version flag is set to something invalid. GCOVOptions Options; @@ -529,7 +530,7 @@ getInstrProfOptions(const CodeGenOptions &CodeGenOpts, const LangOptions &LangOpts) { if (!CodeGenOpts.hasProfileClangInstr()) - return None; + return std::nullopt; InstrProfOptions Options; Options.NoRedZone = CodeGenOpts.DisableRedZone; Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; @@ -572,7 +573,7 @@ return; } - Optional CM = getCodeModel(CodeGenOpts); + std::optional CM = getCodeModel(CodeGenOpts); std::string FeaturesStr = llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ","); llvm::Reloc::Model RM = CodeGenOpts.RelocationModel; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1180,7 +1180,7 @@ using MSVCIntrin = CodeGenFunction::MSVCIntrin; switch (BuiltinID) { default: - return None; + return std::nullopt; case clang::ARM::BI_BitScanForward: case clang::ARM::BI_BitScanForward64: return MSVCIntrin::_BitScanForward; @@ -1326,7 +1326,7 @@ using MSVCIntrin = CodeGenFunction::MSVCIntrin; switch (BuiltinID) { default: - return None; + return std::nullopt; case clang::AArch64::BI_BitScanForward: case clang::AArch64::BI_BitScanForward64: return MSVCIntrin::_BitScanForward; @@ -1480,7 +1480,7 @@ using MSVCIntrin = CodeGenFunction::MSVCIntrin; switch (BuiltinID) { default: - return None; + return std::nullopt; case clang::X86::BI_BitScanForward: case clang::X86::BI_BitScanForward64: return MSVCIntrin::_BitScanForward; @@ -1715,7 +1715,7 @@ SanitizerHandler::InvalidBuiltin, {EmitCheckSourceLocation(E->getExprLoc()), llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, - None); + std::nullopt); return ArgValue; } @@ -16780,7 +16780,7 @@ APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1)); LD->setMetadata(llvm::LLVMContext::MD_range, RNode); LD->setMetadata(llvm::LLVMContext::MD_invariant_load, - llvm::MDNode::get(CGF.getLLVMContext(), None)); + llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); return LD; } @@ -16797,7 +16797,7 @@ auto *LD = CGF.Builder.CreateLoad( Address(Cast, CGF.Int32Ty, CharUnits::fromQuantity(4))); LD->setMetadata(llvm::LLVMContext::MD_invariant_load, - llvm::MDNode::get(CGF.getLLVMContext(), None)); + llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); return LD; } } // namespace diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/Transforms/Utils/Local.h" +#include using namespace clang; using namespace CodeGen; @@ -112,7 +113,7 @@ // variadic type. return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(), /*instanceMethod=*/false, - /*chainCall=*/false, None, + /*chainCall=*/false, std::nullopt, FTNP->getExtInfo(), {}, RequiredArgs(0)); } @@ -459,7 +460,8 @@ if (CanQual noProto = FTy.getAs()) { return arrangeLLVMFunctionInfo( noProto->getReturnType(), /*instanceMethod=*/false, - /*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All); + /*chainCall=*/false, std::nullopt, noProto->getExtInfo(), {}, + RequiredArgs::All); } return arrangeFreeFunctionType(FTy.castAs()); @@ -710,7 +712,7 @@ const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() { return arrangeLLVMFunctionInfo( getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, - None, FunctionType::ExtInfo(), {}, RequiredArgs::All); + std::nullopt, FunctionType::ExtInfo(), {}, RequiredArgs::All); } const CGFunctionInfo & @@ -1635,7 +1637,7 @@ // sret things on win32 aren't void, they return the sret pointer. QualType ret = FI.getReturnType(); llvm::Type *ty = ConvertType(ret); - unsigned addressSpace = Context.getTargetAddressSpace(ret); + unsigned addressSpace = CGM.getTypes().getTargetAddressSpace(ret); resultType = llvm::PointerType::get(ty, addressSpace); } else { resultType = llvm::Type::getVoidTy(getLLVMContext()); @@ -1659,7 +1661,7 @@ if (IRFunctionArgs.hasSRetArg()) { QualType Ret = FI.getReturnType(); llvm::Type *Ty = ConvertType(Ret); - unsigned AddressSpace = Context.getTargetAddressSpace(Ret); + unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(Ret); ArgTypes[IRFunctionArgs.getSRetArgNo()] = llvm::PointerType::get(Ty, AddressSpace); } @@ -2205,7 +2207,7 @@ HasOptnone = TargetDecl->hasAttr(); if (auto *AllocSize = TargetDecl->getAttr()) { - Optional NumElemsParam; + std::optional NumElemsParam; if (AllocSize->getNumElemsParam().isValid()) NumElemsParam = AllocSize->getNumElemsParam().getLLVMIndex(); FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam().getLLVMIndex(), @@ -2385,7 +2387,7 @@ if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) RetAttrs.addDereferenceableAttr( getMinimumObjectSize(PTy).getQuantity()); - if (getContext().getTargetAddressSpace(PTy) == 0 && + if (getTypes().getTargetAddressSpace(PTy) == 0 && !CodeGenOpts.NullPointerIsValid) RetAttrs.addAttribute(llvm::Attribute::NonNull); if (PTy->isObjectType()) { @@ -2434,7 +2436,7 @@ FI.arg_begin()->type.castAs()->getPointeeType(); if (!CodeGenOpts.NullPointerIsValid && - getContext().getTargetAddressSpace(FI.arg_begin()->type) == 0) { + getTypes().getTargetAddressSpace(FI.arg_begin()->type) == 0) { Attrs.addAttribute(llvm::Attribute::NonNull); Attrs.addDereferenceableAttr(getMinimumObjectSize(ThisTy).getQuantity()); } else { @@ -2561,7 +2563,7 @@ if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) Attrs.addDereferenceableAttr( getMinimumObjectSize(PTy).getQuantity()); - if (getContext().getTargetAddressSpace(PTy) == 0 && + if (getTypes().getTargetAddressSpace(PTy) == 0 && !CodeGenOpts.NullPointerIsValid) Attrs.addAttribute(llvm::Attribute::NonNull); if (PTy->isObjectType()) { @@ -2883,7 +2885,7 @@ llvm::Align Alignment = CGM.getNaturalTypeAlignment(ETy).getAsAlign(); AI->addAttrs(llvm::AttrBuilder(getLLVMContext()).addAlignmentAttr(Alignment)); - if (!getContext().getTargetAddressSpace(ETy) && + if (!getTypes().getTargetAddressSpace(ETy) && !CGM.getCodeGenOpts().NullPointerIsValid) AI->addAttr(llvm::Attribute::NonNull); } @@ -4138,7 +4140,7 @@ EmitCheckSourceLocation(ArgLoc), EmitCheckSourceLocation(AttrLoc), llvm::ConstantInt::get(Int32Ty, ArgNo + 1), }; - EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, None); + EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, std::nullopt); } // Check if the call is going to use the inalloca convention. This needs to @@ -4481,7 +4483,7 @@ llvm::CallInst * CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const llvm::Twine &name) { - return EmitNounwindRuntimeCall(callee, None, name); + return EmitNounwindRuntimeCall(callee, std::nullopt, name); } /// Emits a call to the given nounwind runtime function. @@ -4498,7 +4500,7 @@ /// runtime function. llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, const llvm::Twine &name) { - return EmitRuntimeCall(callee, None, name); + return EmitRuntimeCall(callee, std::nullopt, name); } // Calls which may throw must have operand bundles indicating which funclet @@ -4562,7 +4564,7 @@ llvm::CallBase * CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, const Twine &name) { - return EmitRuntimeCallOrInvoke(callee, None, name); + return EmitRuntimeCallOrInvoke(callee, std::nullopt, name); } /// Emits a call or invoke instruction to the given runtime function. diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -1813,7 +1813,7 @@ public: SanitizeDtorCleanupBuilder(ASTContext &Context, EHScopeStack &EHStack, const CXXDestructorDecl *DD) - : Context(Context), EHStack(EHStack), DD(DD), StartIndex(llvm::None) {} + : Context(Context), EHStack(EHStack), DD(DD), StartIndex(std::nullopt) {} void PushCleanupForField(const FieldDecl *Field) { if (Field->isZeroSize(Context)) return; @@ -1824,7 +1824,7 @@ } else if (StartIndex) { EHStack.pushCleanup( NormalAndEHCleanup, DD, StartIndex.value(), FieldIndex); - StartIndex = None; + StartIndex = std::nullopt; } } void End() { diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp --- a/clang/lib/CodeGen/CGCleanup.cpp +++ b/clang/lib/CodeGen/CGCleanup.cpp @@ -556,7 +556,7 @@ Entry->replaceAllUsesWith(Pred); // Merge the blocks. - Pred->getInstList().splice(Pred->end(), Entry->getInstList()); + Pred->splice(Pred->end(), Entry); // Kill the entry block. Entry->eraseFromParent(); @@ -1016,8 +1016,7 @@ // throwing cleanups. For funclet EH personalities, the cleanupendpad models // program termination when cleanups throw. bool PushedTerminate = false; - SaveAndRestore RestoreCurrentFuncletPad( - CurrentFuncletPad); + SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad); llvm::CleanupPadInst *CPI = nullptr; const EHPersonality &Personality = EHPersonality::get(*this); @@ -1336,7 +1335,8 @@ CGF.getBundlesForFunclet(SehCppScope.getCallee()); if (CGF.CurrentFuncletPad) BundleList.emplace_back("funclet", CGF.CurrentFuncletPad); - CGF.Builder.CreateInvoke(SehCppScope, Cont, InvokeDest, None, BundleList); + CGF.Builder.CreateInvoke(SehCppScope, Cont, InvokeDest, std::nullopt, + BundleList); CGF.EmitBlock(Cont); } diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -351,12 +351,12 @@ if (!CGM.getCodeGenOpts().EmitCodeView && CGM.getCodeGenOpts().DwarfVersion < 5) - return None; + return std::nullopt; SourceManager &SM = CGM.getContext().getSourceManager(); Optional MemBuffer = SM.getBufferOrNone(FID); if (!MemBuffer) - return None; + return std::nullopt; auto Data = llvm::arrayRefFromStringRef(MemBuffer->getBuffer()); switch (CGM.getCodeGenOpts().getDebugSrcHash()) { @@ -376,13 +376,13 @@ Optional CGDebugInfo::getSource(const SourceManager &SM, FileID FID) { if (!CGM.getCodeGenOpts().EmbedSource) - return None; + return std::nullopt; bool SourceInvalid = false; StringRef Source = SM.getBufferData(FID, &SourceInvalid); if (SourceInvalid) - return None; + return std::nullopt; return Source; } @@ -1149,7 +1149,7 @@ uint64_t Size = CGM.getContext().getTypeSize(Ty); auto Align = getTypeAlignIfRequired(Ty, CGM.getContext()); Optional DWARFAddressSpace = CGM.getTarget().getDWARFAddressSpace( - CGM.getContext().getTargetAddressSpace(PointeeTy)); + CGM.getTypes().getTargetAddressSpace(PointeeTy)); SmallVector Annots; auto *BTFAttrTy = dyn_cast(PointeeTy); @@ -2130,7 +2130,7 @@ ->getTemplateParameters(); return {{TList, FD->getTemplateSpecializationArgs()->asArray()}}; } - return None; + return std::nullopt; } Optional CGDebugInfo::GetTemplateArgs(const VarDecl *VD) const { @@ -2139,7 +2139,7 @@ // there are arguments. auto *TS = dyn_cast(VD); if (!TS) - return None; + return std::nullopt; VarTemplateDecl *T = TS->getSpecializedTemplate(); const TemplateParameterList *TList = T->getTemplateParameters(); auto TA = TS->getTemplateArgs().asArray(); @@ -2156,7 +2156,7 @@ const TemplateArgumentList &TAList = TSpecial->getTemplateArgs(); return {{TPList, TAList.asArray()}}; } - return None; + return std::nullopt; } llvm::DINodeArray @@ -2355,7 +2355,7 @@ return; llvm::MDNode *node; if (AllocatedTy->isVoidType()) - node = llvm::MDNode::get(CGM.getLLVMContext(), None); + node = llvm::MDNode::get(CGM.getLLVMContext(), std::nullopt); else node = getOrCreateType(AllocatedTy, getOrCreateFile(Loc)); @@ -3971,7 +3971,8 @@ !CGM.getCodeGenOpts().EmitCodeView)) // Create fake but valid subroutine type. Otherwise -verify would fail, and // subprogram DIE will miss DW_AT_decl_file and DW_AT_decl_line fields. - return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None)); + return DBuilder.createSubroutineType( + DBuilder.getOrCreateTypeArray(std::nullopt)); if (const auto *Method = dyn_cast(D)) return getOrCreateMethodType(Method, F); @@ -4454,7 +4455,7 @@ auto Align = getDeclAlignIfRequired(VD, CGM.getContext()); - unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(VD->getType()); + unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(VD->getType()); AppendAddressSpaceXDeref(AddressSpace, Expr); // If this is implicit parameter of CXXThis or ObjCSelf kind, then give it an @@ -4615,7 +4616,7 @@ return nullptr; auto Align = getDeclAlignIfRequired(BD, CGM.getContext()); - unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(BD->getType()); + unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(BD->getType()); SmallVector Expr; AppendAddressSpaceXDeref(AddressSpace, Expr); @@ -4685,11 +4686,11 @@ if (auto *DD = dyn_cast(VD)) for (auto *B : DD->bindings()) { - EmitDeclare(B, Storage, llvm::None, Builder, + EmitDeclare(B, Storage, std::nullopt, Builder, VD->getType()->isReferenceType()); } - return EmitDeclare(VD, Storage, llvm::None, Builder, UsePointerValue); + return EmitDeclare(VD, Storage, std::nullopt, Builder, UsePointerValue); } void CGDebugInfo::EmitLabel(const LabelDecl *D, CGBuilderTy &Builder) { @@ -5294,8 +5295,7 @@ auto Align = getDeclAlignIfRequired(D, CGM.getContext()); SmallVector Expr; - unsigned AddressSpace = - CGM.getContext().getTargetAddressSpace(D->getType()); + unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(D->getType()); if (CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) { if (D->hasAttr()) AddressSpace = diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -90,6 +90,7 @@ case Decl::Export: case Decl::ObjCPropertyImpl: case Decl::FileScopeAsm: + case Decl::TopLevelStmt: case Decl::Friend: case Decl::FriendTemplate: case Decl::Block: @@ -2713,5 +2714,5 @@ std::max(UserAlign, NaturalAlign.getQuantity())); } } - return llvm::None; + return std::nullopt; } diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -195,7 +195,7 @@ // For example, in the above CUDA code, the static local variable s has a // "shared" address space qualifier, but the constructor of StructWithCtor // expects "this" in the "generic" address space. - unsigned ExpectedAddrSpace = getContext().getTargetAddressSpace(T); + unsigned ExpectedAddrSpace = getTypes().getTargetAddressSpace(T); unsigned ActualAddrSpace = GV->getAddressSpace(); llvm::Constant *DeclPtr = GV; if (ActualAddrSpace != ExpectedAddrSpace) { diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -1223,8 +1223,7 @@ // Wasm uses Windows-style EH instructions, but merges all catch clauses into // one big catchpad. So we save the old funclet pad here before we traverse // each catch handler. - SaveAndRestore RestoreCurrentFuncletPad( - CurrentFuncletPad); + SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad); llvm::BasicBlock *WasmCatchStartBlock = nullptr; if (EHPersonality::get(*this).isWasmPersonality()) { auto *CatchSwitch = @@ -1257,8 +1256,7 @@ RunCleanupsScope CatchScope(*this); // Initialize the catch variable and set up the cleanups. - SaveAndRestore RestoreCurrentFuncletPad( - CurrentFuncletPad); + SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad); CGM.getCXXABI().emitBeginCatch(*this, C); // Emit the PGO counter increment. @@ -1582,8 +1580,7 @@ // Create the cleanuppad using the current parent pad as its token. Use 'none' // if this is a top-level terminate scope, which is the common case. - SaveAndRestore RestoreCurrentFuncletPad( - CurrentFuncletPad); + SaveAndRestore RestoreCurrentFuncletPad(CurrentFuncletPad); llvm::Value *ParentPad = CurrentFuncletPad; if (!ParentPad) ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext()); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2747,7 +2747,7 @@ getContext().getDeclAlign(VD)); llvm::Type *VarTy = getTypes().ConvertTypeForMem(VD->getType()); auto *PTy = llvm::PointerType::get( - VarTy, getContext().getTargetAddressSpace(VD->getType())); + VarTy, getTypes().getTargetAddressSpace(VD->getType())); Addr = Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, PTy, VarTy); } else { // Should we be using the alignment of the constant pointer we emitted? @@ -3061,10 +3061,9 @@ // Format the type name as if for a diagnostic, including quotes and // optionally an 'aka'. SmallString<32> Buffer; - CGM.getDiags().ConvertArgToString(DiagnosticsEngine::ak_qualtype, - (intptr_t)T.getAsOpaquePtr(), - StringRef(), StringRef(), None, Buffer, - None); + CGM.getDiags().ConvertArgToString( + DiagnosticsEngine::ak_qualtype, (intptr_t)T.getAsOpaquePtr(), StringRef(), + StringRef(), std::nullopt, Buffer, std::nullopt); llvm::Constant *Components[] = { Builder.getInt16(TypeKind), Builder.getInt16(TypeInfo), @@ -3535,7 +3534,7 @@ EmitCheck(std::make_pair(static_cast(Builder.getFalse()), SanitizerKind::Unreachable), SanitizerHandler::BuiltinUnreachable, - EmitCheckSourceLocation(Loc), None); + EmitCheckSourceLocation(Loc), std::nullopt); } Builder.CreateUnreachable(); } @@ -4575,7 +4574,7 @@ const Expr *Operand) { if (auto *ThrowExpr = dyn_cast(Operand->IgnoreParens())) { CGF.EmitCXXThrowExpr(ThrowExpr, /*KeepInsertionPoint*/false); - return None; + return std::nullopt; } return CGF.EmitLValue(Operand); @@ -4610,7 +4609,7 @@ return CGF.EmitLValue(Live); } } - return llvm::None; + return std::nullopt; } struct ConditionalInfo { llvm::BasicBlock *lhsBlock, *rhsBlock; @@ -4624,8 +4623,8 @@ const AbstractConditionalOperator *E, const FuncTy &BranchGenFunc) { ConditionalInfo Info{CGF.createBasicBlock("cond.true"), - CGF.createBasicBlock("cond.false"), llvm::None, - llvm::None}; + CGF.createBasicBlock("cond.false"), std::nullopt, + std::nullopt}; llvm::BasicBlock *endBlock = CGF.createBasicBlock("cond.end"); CodeGenFunction::ConditionalEvaluation eval(CGF); diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -305,7 +305,7 @@ // Try to decompose it into smaller constants. if (!split(LastAtOrBeforePosIndex, Pos)) - return None; + return std::nullopt; } } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -156,12 +156,12 @@ const Expr *E) { const Expr *Base = E->IgnoreImpCasts(); if (E == Base) - return llvm::None; + return std::nullopt; QualType BaseTy = Base->getType(); if (!Ctx.isPromotableIntegerType(BaseTy) || Ctx.getTypeSize(BaseTy) >= Ctx.getTypeSize(E->getType())) - return llvm::None; + return std::nullopt; return BaseTy; } diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -86,7 +86,7 @@ if (Attrs.UnrollEnable == LoopAttributes::Disable) Enabled = false; else if (Attrs.UnrollEnable == LoopAttributes::Full) - Enabled = None; + Enabled = std::nullopt; else if (Attrs.UnrollEnable != LoopAttributes::Unspecified || Attrs.UnrollCount != 0) Enabled = true; @@ -496,7 +496,7 @@ !EndLoc && !Attrs.MustProgress) return; - TempLoopID = MDNode::getTemporary(Header->getContext(), None); + TempLoopID = MDNode::getTemporary(Header->getContext(), std::nullopt); } void LoopInfo::finish() { diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -140,7 +140,7 @@ llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getBeginLoc()); cast(Ptr)->setMetadata( CGM.getModule().getMDKindID("invariant.load"), - llvm::MDNode::get(getLLVMContext(), None)); + llvm::MDNode::get(getLLVMContext(), std::nullopt)); return Builder.CreateBitCast(Ptr, ConvertType(E->getType())); } @@ -381,7 +381,7 @@ bool isClassMessage) { auto &CGM = CGF.CGM; if (!CGM.getCodeGenOpts().ObjCConvertMessagesToRuntimeCalls) - return None; + return std::nullopt; auto &Runtime = CGM.getLangOpts().ObjCRuntime; switch (Sel.getMethodFamily()) { @@ -402,7 +402,7 @@ if (isa(arg)) return CGF.EmitObjCAllocWithZone(Receiver, CGF.ConvertType(ResultType)); - return None; + return std::nullopt; } } break; @@ -433,7 +433,7 @@ default: break; } - return None; + return std::nullopt; } CodeGen::RValue CGObjCRuntime::GeneratePossiblySpecializedMessageSend( @@ -526,32 +526,32 @@ tryEmitSpecializedAllocInit(CodeGenFunction &CGF, const ObjCMessageExpr *OME) { auto &Runtime = CGF.getLangOpts().ObjCRuntime; if (!Runtime.shouldUseRuntimeFunctionForCombinedAllocInit()) - return None; + return std::nullopt; // Match the exact pattern '[[MyClass alloc] init]'. Selector Sel = OME->getSelector(); if (OME->getReceiverKind() != ObjCMessageExpr::Instance || !OME->getType()->isObjCObjectPointerType() || !Sel.isUnarySelector() || Sel.getNameForSlot(0) != "init") - return None; + return std::nullopt; // Okay, this is '[receiver init]', check if 'receiver' is '[cls alloc]' // with 'cls' a Class. auto *SubOME = dyn_cast(OME->getInstanceReceiver()->IgnoreParenCasts()); if (!SubOME) - return None; + return std::nullopt; Selector SubSel = SubOME->getSelector(); if (!SubOME->getType()->isObjCObjectPointerType() || !SubSel.isUnarySelector() || SubSel.getNameForSlot(0) != "alloc") - return None; + return std::nullopt; llvm::Value *Receiver = nullptr; switch (SubOME->getReceiverKind()) { case ObjCMessageExpr::Instance: if (!SubOME->getInstanceReceiver()->getType()->isObjCClassType()) - return None; + return std::nullopt; Receiver = CGF.EmitScalarExpr(SubOME->getInstanceReceiver()); break; @@ -565,7 +565,7 @@ } case ObjCMessageExpr::SuperInstance: case ObjCMessageExpr::SuperClass: - return None; + return std::nullopt; } return CGF.EmitObjCAllocInit(Receiver, CGF.ConvertType(OME->getType())); @@ -2343,7 +2343,7 @@ CGM.getObjCEntrypoints().objc_retainBlock); call->setMetadata("clang.arc.copy_on_escape", - llvm::MDNode::get(Builder.getContext(), None)); + llvm::MDNode::get(Builder.getContext(), std::nullopt)); } return result; @@ -2385,7 +2385,8 @@ // Call the marker asm if we made one, which we do only at -O0. if (marker) - CGF.Builder.CreateCall(marker, None, CGF.getBundlesForFunclet(marker)); + CGF.Builder.CreateCall(marker, std::nullopt, + CGF.getBundlesForFunclet(marker)); } static llvm::Value *emitOptimizedARCReturnCall(llvm::Value *value, @@ -2471,7 +2472,7 @@ if (precise == ARCImpreciseLifetime) { call->setMetadata("clang.imprecise_release", - llvm::MDNode::get(Builder.getContext(), None)); + llvm::MDNode::get(Builder.getContext(), std::nullopt)); } } @@ -2869,7 +2870,7 @@ if (precise == ARCImpreciseLifetime) { call->setMetadata("clang.imprecise_release", - llvm::MDNode::get(Builder.getContext(), None)); + llvm::MDNode::get(Builder.getContext(), std::nullopt)); } } diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -71,7 +71,7 @@ FTy = llvm::FunctionType::get(RetTy, ArgTys, false); } else { - FTy = llvm::FunctionType::get(RetTy, None, false); + FTy = llvm::FunctionType::get(RetTy, std::nullopt, false); } } diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -7232,7 +7232,7 @@ if (IsIvarOffsetKnownIdempotent(CGF, Ivar)) cast(IvarOffsetValue) ->setMetadata(CGM.getModule().getMDKindID("invariant.load"), - llvm::MDNode::get(VMContext, None)); + llvm::MDNode::get(VMContext, std::nullopt)); } // This could be 32bit int or 64bit integer depending on the architecture. @@ -7632,7 +7632,7 @@ llvm::LoadInst* LI = CGF.Builder.CreateLoad(Addr); LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"), - llvm::MDNode::get(VMContext, None)); + llvm::MDNode::get(VMContext, std::nullopt)); return LI; } diff --git a/clang/lib/CodeGen/CGObjCRuntime.cpp b/clang/lib/CodeGen/CGObjCRuntime.cpp --- a/clang/lib/CodeGen/CGObjCRuntime.cpp +++ b/clang/lib/CodeGen/CGObjCRuntime.cpp @@ -230,7 +230,7 @@ CGF.EmitBlock(Handler.Block); CodeGenFunction::LexicalScope Cleanups(CGF, Handler.Body->getSourceRange()); - SaveAndRestore RevertAfterScope(CGF.CurrentFuncletPad); + SaveAndRestore RevertAfterScope(CGF.CurrentFuncletPad); if (useFunclets) { llvm::Instruction *CPICandidate = Handler.Block->getFirstNonPHI(); if (auto *CPI = dyn_cast_or_null(CPICandidate)) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -376,7 +376,7 @@ /// Emits \p Callee function call with arguments \p Args with location \p Loc. void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, - ArrayRef Args = llvm::None) const; + ArrayRef Args = std::nullopt) const; /// Emits address of the word in a memory where current thread id is /// stored. @@ -1516,7 +1516,7 @@ virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, - ArrayRef Args = llvm::None) const; + ArrayRef Args = std::nullopt) const; /// Emits OpenMP-specific function prolog. /// Required for device constructs. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -5261,7 +5261,7 @@ }; RegionCodeGenTy RCG(CodeGen); CommonActionTy Action( - nullptr, llvm::None, + nullptr, std::nullopt, OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait : OMPRTL___kmpc_end_reduce), @@ -5383,7 +5383,7 @@ ThreadId, // i32 Lock // kmp_critical_name *& }; - CommonActionTy Action(nullptr, llvm::None, + CommonActionTy Action(nullptr, std::nullopt, OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_end_reduce), EndArgs); @@ -7209,7 +7209,7 @@ const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, ArrayRef - OverlappedElements = llvm::None) const { + OverlappedElements = std::nullopt) const { // The following summarizes what has to be generated for each map and the // types below. The generated information is expressed in this order: // base pointer, section pointer, size, flags @@ -7996,7 +7996,7 @@ // for map(to: lambda): using user specified map type. return getMapTypeBits( I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), - /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), + /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(), /*AddPtrFlag=*/false, /*AddIsTargetParamFlag=*/false, /*isNonContiguous=*/false); @@ -8140,7 +8140,7 @@ for (const auto L : C->component_lists()) { const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), - C->getMapTypeModifiers(), llvm::None, + C->getMapTypeModifiers(), std::nullopt, /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), E); ++EI; @@ -8156,7 +8156,7 @@ Kind = Present; const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { - InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, + InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt, C->getMotionModifiers(), /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), *EI); ++EI; @@ -8172,9 +8172,10 @@ Kind = Present; const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { - InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, - C->getMotionModifiers(), /*ReturnDevicePointer=*/false, - C->isImplicit(), std::get<2>(L), *EI); + InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, + std::nullopt, C->getMotionModifiers(), + /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), + *EI); ++EI; } } @@ -8220,8 +8221,8 @@ // processed. Nonetheless, generateInfoForComponentList must be // called to take the pointer into account for the calculation of // the range of the partial struct. - InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, - llvm::None, /*ReturnDevicePointer=*/false, IsImplicit, + InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt, + std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr, IsDevAddr); DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); } else { @@ -8889,7 +8890,7 @@ ArrayRef OverlappedComponents = Pair.getSecond(); generateInfoForComponentList( - MapType, MapModifiers, llvm::None, Components, CombinedInfo, + MapType, MapModifiers, std::nullopt, Components, CombinedInfo, PartialStruct, IsFirstComponentList, IsImplicit, Mapper, /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); IsFirstComponentList = false; @@ -8906,7 +8907,7 @@ L; auto It = OverlappedData.find(&L); if (It == OverlappedData.end()) - generateInfoForComponentList(MapType, MapModifiers, llvm::None, + generateInfoForComponentList(MapType, MapModifiers, std::nullopt, Components, CombinedInfo, PartialStruct, IsFirstComponentList, IsImplicit, Mapper, /*ForDeviceAddr=*/false, VD, VarRef); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -324,7 +324,7 @@ /// translating these arguments to correct target-specific arguments. void emitOutlinedFunctionCall( CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, - ArrayRef Args = llvm::None) const override; + ArrayRef Args = std::nullopt) const override; /// Emits OpenMP-specific function prolog. /// Required for device constructs. @@ -412,7 +412,7 @@ using EscapedParamsTy = llvm::SmallPtrSet; struct FunctionData { DeclToAddrMapTy LocalVarData; - llvm::Optional SecondaryLocalVarData = llvm::None; + llvm::Optional SecondaryLocalVarData = std::nullopt; EscapedParamsTy EscapedParameters; llvm::SmallVector EscapedVariableLengthDecls; llvm::SmallVector, 4> diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1028,7 +1028,7 @@ getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions); if (!LastPrivatesReductions.empty()) { GlobalizedRD = ::buildRecordForGlobalizedVars( - CGM.getContext(), llvm::None, LastPrivatesReductions, + CGM.getContext(), std::nullopt, LastPrivatesReductions, MappedDeclsFields, WarpSize); } } else if (!LastPrivatesReductions.empty()) { @@ -3005,7 +3005,7 @@ ++Cnt; } const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars( - CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap, + CGM.getContext(), PrivatesReductions, std::nullopt, VarFieldMap, C.getLangOpts().OpenMPCUDAReductionBufNum); TeamsReductions.push_back(TeamReductionRec); if (!KernelTeamsReductionPtr) { @@ -3077,7 +3077,7 @@ llvm::Value *EndArgs[] = {ThreadId}; RegionCodeGenTy RCG(CodeGen); NVPTXActionTy Action( - nullptr, llvm::None, + nullptr, std::nullopt, OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait), EndArgs); @@ -3133,7 +3133,7 @@ const Type *NonQualTy = QC.strip(NativeParamType); QualType NativePointeeTy = cast(NonQualTy)->getPointeeType(); unsigned NativePointeeAddrSpace = - CGF.getContext().getTargetAddressSpace(NativePointeeTy); + CGF.getTypes().getTargetAddressSpace(NativePointeeTy); QualType TargetTy = TargetParam->getType(); llvm::Value *TargetAddr = CGF.EmitLoadOfScalar( LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation()); @@ -3358,7 +3358,7 @@ Data.insert(std::make_pair(VD, MappedVarData())); } if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) { - CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None); + CheckVarsEscapingDeclContext VarChecker(CGF, std::nullopt); VarChecker.Visit(Body); I->getSecond().SecondaryLocalVarData.emplace(); DeclToAddrMapTy &Data = *I->getSecond().SecondaryLocalVarData; @@ -3709,10 +3709,10 @@ llvm::Function *F = M->getFunction(LocSize); if (!F) { F = llvm::Function::Create( - llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), + llvm::FunctionType::get(CGF.Int32Ty, std::nullopt, false), llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); } - return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); + return Bld.CreateCall(F, std::nullopt, "nvptx_num_threads"); } llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -721,11 +721,10 @@ break; } } - SaveAndRestore save_nomerge(InNoMergeAttributedStmt, nomerge); - SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline); - SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, - alwaysinline); - SaveAndRestore save_musttail(MustTailCall, musttail); + SaveAndRestore save_nomerge(InNoMergeAttributedStmt, nomerge); + SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline); + SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline); + SaveAndRestore save_musttail(MustTailCall, musttail); EmitStmt(S.getSubStmt(), S.getAttrs()); } @@ -1874,7 +1873,7 @@ getLikelihoodWeights(ArrayRef Likelihoods) { // Are there enough branches to weight them? if (Likelihoods.size() <= 1) - return None; + return std::nullopt; uint64_t NumUnlikely = 0; uint64_t NumNone = 0; @@ -1895,7 +1894,7 @@ // Is there a likelihood attribute used? if (NumUnlikely == 0 && NumLikely == 0) - return None; + return std::nullopt; // When multiple cases share the same code they can be combined during // optimization. In that case the weights of the branch will be the sum of diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -74,7 +74,7 @@ public: OMPLexicalScope( CodeGenFunction &CGF, const OMPExecutableDirective &S, - const llvm::Optional CapturedRegion = llvm::None, + const llvm::Optional CapturedRegion = std::nullopt, const bool EmitPreInitStmt = true) : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), InlinedShareds(CGF) { @@ -114,7 +114,7 @@ public: OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) - : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, + : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, EmitPreInitStmt(S)) {} }; @@ -129,7 +129,7 @@ public: OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) - : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, + : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, EmitPreInitStmt(S)) {} }; @@ -446,7 +446,7 @@ FunctionDecl *DebugFunctionDecl = nullptr; if (!FO.UIntPtrCastRequired) { FunctionProtoType::ExtProtoInfo EPI; - QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); + QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI); DebugFunctionDecl = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), SourceLocation(), DeclarationName(), FunctionTy, @@ -4945,7 +4945,7 @@ llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, Data.NumberOfParts); - OMPLexicalScope Scope(*this, S, llvm::None, + OMPLexicalScope Scope(*this, S, std::nullopt, !isOpenMPParallelDirective(S.getDirectiveKind()) && !isOpenMPSimdDirective(S.getDirectiveKind())); TaskGen(*this, OutlinedFn, Data); @@ -5322,7 +5322,7 @@ if (const auto *FlushClause = S.getSingleClause()) return llvm::makeArrayRef(FlushClause->varlist_begin(), FlushClause->varlist_end()); - return llvm::None; + return std::nullopt; }(), S.getBeginLoc(), AO); } @@ -5991,7 +5991,7 @@ case llvm::AtomicOrdering::Acquire: case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, llvm::AtomicOrdering::Acquire); break; case llvm::AtomicOrdering::Monotonic: @@ -6020,7 +6020,7 @@ case llvm::AtomicOrdering::Release: case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, llvm::AtomicOrdering::Release); break; case llvm::AtomicOrdering::Acquire: @@ -6211,7 +6211,7 @@ case llvm::AtomicOrdering::Release: case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, llvm::AtomicOrdering::Release); break; case llvm::AtomicOrdering::Acquire: @@ -6326,17 +6326,17 @@ // operation is also an acquire flush. switch (AO) { case llvm::AtomicOrdering::Release: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, llvm::AtomicOrdering::Release); break; case llvm::AtomicOrdering::Acquire: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, llvm::AtomicOrdering::Acquire); break; case llvm::AtomicOrdering::AcquireRelease: case llvm::AtomicOrdering::SequentiallyConsistent: CGF.CGM.getOpenMPRuntime().emitFlush( - CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease); + CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease); break; case llvm::AtomicOrdering::Monotonic: break; @@ -7775,7 +7775,7 @@ }; auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); - OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); + OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); } diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -698,7 +698,7 @@ if (Pair.first == Hash) return Pair.second; } - return None; + return std::nullopt; } void BackendConsumer::UnsupportedDiagHandler( diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3213,7 +3213,7 @@ /// This function may clear the current insertion point; callers should use /// EnsureInsertPoint if they wish to subsequently generate code without first /// calling EmitBlock, EmitBranch, or EmitStmt. - void EmitStmt(const Stmt *S, ArrayRef Attrs = None); + void EmitStmt(const Stmt *S, ArrayRef Attrs = std::nullopt); /// EmitSimpleStmt - Try to emit a "simple" statement which does not /// necessarily require an insertion point or debug information; typically @@ -3241,10 +3241,10 @@ void EmitIfStmt(const IfStmt &S); void EmitWhileStmt(const WhileStmt &S, - ArrayRef Attrs = None); - void EmitDoStmt(const DoStmt &S, ArrayRef Attrs = None); + ArrayRef Attrs = std::nullopt); + void EmitDoStmt(const DoStmt &S, ArrayRef Attrs = std::nullopt); void EmitForStmt(const ForStmt &S, - ArrayRef Attrs = None); + ArrayRef Attrs = std::nullopt); void EmitReturnStmt(const ReturnStmt &S); void EmitDeclStmt(const DeclStmt &S); void EmitBreakStmt(const BreakStmt &S); @@ -3321,7 +3321,7 @@ llvm::Value *ParentFP); void EmitCXXForRangeStmt(const CXXForRangeStmt &S, - ArrayRef Attrs = None); + ArrayRef Attrs = std::nullopt); /// Controls insertion of cancellation exit blocks in worksharing constructs. class OMPCancelStackRAII { diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1467,7 +1467,7 @@ llvm::Value *IsFalse = Builder.getFalse(); EmitCheck(std::make_pair(IsFalse, SanitizerKind::Return), SanitizerHandler::MissingReturn, - EmitCheckSourceLocation(FD->getLocation()), None); + EmitCheckSourceLocation(FD->getLocation()), std::nullopt); } else if (ShouldEmitUnreachable) { if (CGM.getCodeGenOpts().OptimizationLevel == 0) EmitTrapCall(llvm::Intrinsic::trap); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -591,6 +591,11 @@ llvm::DenseMap RTTIProxyMap; + // Helps squashing blocks of TopLevelStmtDecl into a single llvm::Function + // when used with -fincremental-extensions. + std::pair, const TopLevelStmtDecl *> + GlobalTopLevelStmtBlockInFlight; + public: CodeGenModule(ASTContext &C, IntrusiveRefCntPtr FS, const HeaderSearchOptions &headersearchopts, @@ -715,7 +720,8 @@ llvm::MDNode *getNoObjCARCExceptionsMetadata() { if (!NoObjCARCExceptionsMetadata) - NoObjCARCExceptionsMetadata = llvm::MDNode::get(getLLVMContext(), None); + NoObjCARCExceptionsMetadata = + llvm::MDNode::get(getLLVMContext(), std::nullopt); return NoObjCARCExceptionsMetadata; } @@ -1079,7 +1085,8 @@ llvm::Constant *getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID); - llvm::Function *getIntrinsic(unsigned IID, ArrayRef Tys = None); + llvm::Function *getIntrinsic(unsigned IID, + ArrayRef Tys = std::nullopt); /// Emit code for a single top level declaration. void EmitTopLevelDecl(Decl *D); @@ -1590,6 +1597,7 @@ void EmitDeclContext(const DeclContext *DC); void EmitLinkageSpec(const LinkageSpecDecl *D); + void EmitTopLevelStmt(const TopLevelStmtDecl *D); /// Emit the function that initializes C++ thread_local variables. void EmitCXXThreadLocalInitFunc(); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -516,6 +516,14 @@ applyGlobalValReplacements(); applyReplacements(); emitMultiVersionFunctions(); + + if (Context.getLangOpts().IncrementalExtensions && + GlobalTopLevelStmtBlockInFlight.first) { + const TopLevelStmtDecl *TLSD = GlobalTopLevelStmtBlockInFlight.second; + GlobalTopLevelStmtBlockInFlight.first->FinishFunction(TLSD->getEndLoc()); + GlobalTopLevelStmtBlockInFlight = {nullptr, nullptr}; + } + if (CXX20ModuleInits && Primary && Primary->isInterfaceOrPartition()) EmitCXXModuleInitFunc(Primary); else @@ -1641,7 +1649,7 @@ // The LTO linker doesn't seem to like it when we set an alignment // on appending variables. Take it off as a workaround. - list->setAlignment(llvm::None); + list->setAlignment(std::nullopt); Fns.clear(); } @@ -3196,7 +3204,7 @@ // See if there is already something with the target's name in the module. llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee()); if (Entry) { - unsigned AS = getContext().getTargetAddressSpace(VD->getType()); + unsigned AS = getTypes().getTargetAddressSpace(VD->getType()); auto Ptr = llvm::ConstantExpr::getBitCast(Entry, DeclTy->getPointerTo(AS)); return ConstantAddress(Ptr, DeclTy, Alignment); } @@ -3761,7 +3769,7 @@ if (getTarget().supportsIFunc()) { ResolverType = llvm::FunctionType::get( llvm::PointerType::get(DeclTy, - Context.getTargetAddressSpace(FD->getType())), + getTypes().getTargetAddressSpace(FD->getType())), false); } else { @@ -3899,8 +3907,8 @@ // cpu_dispatch will be emitted in this translation unit. if (getTarget().supportsIFunc() && !FD->isCPUSpecificMultiVersion()) { llvm::Type *ResolverType = llvm::FunctionType::get( - llvm::PointerType::get( - DeclTy, getContext().getTargetAddressSpace(FD->getType())), + llvm::PointerType::get(DeclTy, + getTypes().getTargetAddressSpace(FD->getType())), false); llvm::Constant *Resolver = GetOrCreateLLVMFunction( MangledName + ".resolver", ResolverType, GlobalDecl{}, @@ -6150,6 +6158,39 @@ EmitDeclContext(LSD); } +void CodeGenModule::EmitTopLevelStmt(const TopLevelStmtDecl *D) { + std::unique_ptr &CurCGF = + GlobalTopLevelStmtBlockInFlight.first; + + // We emitted a top-level stmt but after it there is initialization. + // Stop squashing the top-level stmts into a single function. + if (CurCGF && CXXGlobalInits.back() != CurCGF->CurFn) { + CurCGF->FinishFunction(D->getEndLoc()); + CurCGF = nullptr; + } + + if (!CurCGF) { + // void __stmts__N(void) + // FIXME: Ask the ABI name mangler to pick a name. + std::string Name = "__stmts__" + llvm::utostr(CXXGlobalInits.size()); + FunctionArgList Args; + QualType RetTy = getContext().VoidTy; + const CGFunctionInfo &FnInfo = + getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args); + llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo); + llvm::Function *Fn = llvm::Function::Create( + FnTy, llvm::GlobalValue::InternalLinkage, Name, &getModule()); + + CurCGF.reset(new CodeGenFunction(*this)); + GlobalTopLevelStmtBlockInFlight.second = D; + CurCGF->StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args, + D->getBeginLoc(), D->getBeginLoc()); + CXXGlobalInits.push_back(Fn); + } + + CurCGF->EmitStmt(D->getStmt()); +} + void CodeGenModule::EmitDeclContext(const DeclContext *DC) { for (auto *I : DC->decls()) { // Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope @@ -6359,6 +6400,10 @@ break; } + case Decl::TopLevelStmt: + EmitTopLevelStmt(cast(D)); + break; + case Decl::Import: { auto *Import = cast(D); diff --git a/clang/lib/CodeGen/CodeGenPGO.h b/clang/lib/CodeGen/CodeGenPGO.h --- a/clang/lib/CodeGen/CodeGenPGO.h +++ b/clang/lib/CodeGen/CodeGenPGO.h @@ -61,10 +61,10 @@ /// true and put the value in Count; else return false. Optional getStmtCount(const Stmt *S) const { if (!StmtCountMap) - return None; + return std::nullopt; auto I = StmtCountMap->find(S); if (I == StmtCountMap->end()) - return None; + return std::nullopt; return I->second; } diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h --- a/clang/lib/CodeGen/CodeGenTypes.h +++ b/clang/lib/CodeGen/CodeGenTypes.h @@ -305,7 +305,7 @@ bool isRecordBeingLaidOut(const Type *Ty) const { return RecordsBeingLaidOut.count(Ty); } - + unsigned getTargetAddressSpace(QualType T) const; }; } // end namespace CodeGen diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -655,7 +655,7 @@ const ReferenceType *RTy = cast(Ty); QualType ETy = RTy->getPointeeType(); llvm::Type *PointeeType = ConvertTypeForMem(ETy); - unsigned AS = Context.getTargetAddressSpace(ETy); + unsigned AS = getTargetAddressSpace(ETy); ResultType = llvm::PointerType::get(PointeeType, AS); break; } @@ -665,7 +665,7 @@ llvm::Type *PointeeType = ConvertTypeForMem(ETy); if (PointeeType->isVoidTy()) PointeeType = llvm::Type::getInt8Ty(getLLVMContext()); - unsigned AS = Context.getTargetAddressSpace(ETy); + unsigned AS = getTargetAddressSpace(ETy); ResultType = llvm::PointerType::get(PointeeType, AS); break; } @@ -958,3 +958,13 @@ bool CodeGenTypes::isZeroInitializable(const RecordDecl *RD) { return getCGRecordLayout(RD).isZeroInitializable(); } + +unsigned CodeGenTypes::getTargetAddressSpace(QualType T) const { + // Return the address space for the type. If the type is a + // function type without an address space qualifier, the + // program address space is used. Otherwise, the target picks + // the best address space based on the type information + return T->isFunctionType() && !T.hasAddressSpace() + ? getDataLayout().getProgramAddressSpace() + : getContext().getTargetAddressSpace(T.getAddressSpace()); +} diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -330,7 +330,7 @@ auto Mapping = FileIDMapping.find(SM.getFileID(Loc)); if (Mapping != FileIDMapping.end()) return Mapping->second.first; - return None; + return std::nullopt; } /// This shrinks the skipped range if it spans a line that contains a @@ -355,7 +355,7 @@ } if (SR.isInSourceOrder()) return SR; - return None; + return std::nullopt; } /// Gather all the regions that were skipped by the preprocessor @@ -527,7 +527,7 @@ if (MappingRegions.empty()) return; - CoverageMappingWriter Writer(FileIDMapping, None, MappingRegions); + CoverageMappingWriter Writer(FileIDMapping, std::nullopt, MappingRegions); Writer.write(OS); } }; @@ -583,9 +583,10 @@ /// /// Returns the index on the stack where the region was pushed. This can be /// used with popRegions to exit a "scope", ending the region that was pushed. - size_t pushRegion(Counter Count, Optional StartLoc = None, - Optional EndLoc = None, - Optional FalseCount = None) { + size_t pushRegion(Counter Count, + Optional StartLoc = std::nullopt, + Optional EndLoc = std::nullopt, + Optional FalseCount = std::nullopt) { if (StartLoc && !FalseCount) { MostRecentLocation = *StartLoc; @@ -917,10 +918,10 @@ // If the start and end locations of the gap are both within the same macro // file, the range may not be in source order. if (AfterLoc.isMacroID() || BeforeLoc.isMacroID()) - return None; + return std::nullopt; if (!SM.isWrittenInSameFile(AfterLoc, BeforeLoc) || !SpellingRegion(SM, AfterLoc, BeforeLoc).isInSourceOrder()) - return None; + return std::nullopt; return {{AfterLoc, BeforeLoc}}; } diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1250,7 +1250,7 @@ llvm::FunctionCallee Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow"); if (isNoReturn) - CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, None); + CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, std::nullopt); else CGF.EmitRuntimeCallOrInvoke(Fn); } diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp --- a/clang/lib/CodeGen/ModuleBuilder.cpp +++ b/clang/lib/CodeGen/ModuleBuilder.cpp @@ -179,6 +179,7 @@ } bool HandleTopLevelDecl(DeclGroupRef DG) override { + // FIXME: Why not return false and abort parsing? if (Diags.hasErrorOccurred()) return true; diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp --- a/clang/lib/CodeGen/SanitizerMetadata.cpp +++ b/clang/lib/CodeGen/SanitizerMetadata.cpp @@ -104,5 +104,5 @@ void SanitizerMetadata::disableSanitizerForInstruction(llvm::Instruction *I) { I->setMetadata(llvm::LLVMContext::MD_nosanitize, - llvm::MDNode::get(CGM.getLLVMContext(), None)); + llvm::MDNode::get(CGM.getLLVMContext(), std::nullopt)); } diff --git a/clang/lib/CrossTU/CrossTranslationUnit.cpp b/clang/lib/CrossTU/CrossTranslationUnit.cpp --- a/clang/lib/CrossTU/CrossTranslationUnit.cpp +++ b/clang/lib/CrossTU/CrossTranslationUnit.cpp @@ -797,7 +797,7 @@ CrossTranslationUnitContext::getMacroExpansionContextForSourceLocation( const clang::SourceLocation &ToLoc) const { // FIXME: Implement: Record such a context for every imported ASTUnit; lookup. - return llvm::None; + return std::nullopt; } bool CrossTranslationUnitContext::isImportedAsNew(const Decl *ToDecl) const { diff --git a/clang/lib/DirectoryWatcher/DirectoryScanner.cpp b/clang/lib/DirectoryWatcher/DirectoryScanner.cpp --- a/clang/lib/DirectoryWatcher/DirectoryScanner.cpp +++ b/clang/lib/DirectoryWatcher/DirectoryScanner.cpp @@ -18,7 +18,7 @@ sys::fs::file_status Status; std::error_code EC = status(Path, Status); if (EC) - return None; + return std::nullopt; return Status; } diff --git a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp --- a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp +++ b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp @@ -75,7 +75,7 @@ static llvm::Optional create() { int InotifyPollingStopperFDs[2]; if (pipe2(InotifyPollingStopperFDs, O_CLOEXEC) == -1) - return llvm::None; + return std::nullopt; return SemaphorePipe(InotifyPollingStopperFDs); } }; diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -297,7 +297,7 @@ TCArgs.clear(); // Redirect stdout/stderr to /dev/null. - Redirects = {None, {""}, {""}}; + Redirects = {std::nullopt, {""}, {""}}; // Temporary files added by diagnostics should be kept. ForceKeepTempFiles = true; @@ -307,6 +307,6 @@ return getDriver().SysRoot; } -void Compilation::Redirect(ArrayRef> Redirects) { +void Compilation::Redirect(ArrayRef> Redirects) { this->Redirects = Redirects; } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -106,8 +106,8 @@ using namespace clang; using namespace llvm::opt; -static llvm::Optional -getOffloadTargetTriple(const Driver &D, const ArgList &Args) { +static std::optional getOffloadTargetTriple(const Driver &D, + const ArgList &Args) { auto OffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ); // Offload compilation flow does not support multiple targets for now. We // need the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too) @@ -115,17 +115,17 @@ switch (OffloadTargets.size()) { default: D.Diag(diag::err_drv_only_one_offload_target_supported); - return llvm::None; + return std::nullopt; case 0: D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << ""; - return llvm::None; + return std::nullopt; case 1: break; } return llvm::Triple(OffloadTargets[0]); } -static llvm::Optional +static std::optional getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args, const llvm::Triple &HostTriple) { if (!Args.hasArg(options::OPT_offload_EQ)) { @@ -138,19 +138,19 @@ if (Args.hasArg(options::OPT_emit_llvm)) return TT; D.Diag(diag::err_drv_cuda_offload_only_emit_bc); - return llvm::None; + return std::nullopt; } D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); - return llvm::None; + return std::nullopt; } -static llvm::Optional +static std::optional getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { if (!Args.hasArg(options::OPT_offload_EQ)) { return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple. } auto TT = getOffloadTargetTriple(D, Args); if (!TT) - return llvm::None; + return std::nullopt; if (TT->getArch() == llvm::Triple::amdgcn && TT->getVendor() == llvm::Triple::AMD && TT->getOS() == llvm::Triple::AMDHSA) @@ -158,7 +158,7 @@ if (TT->getArch() == llvm::Triple::spirv64) return TT; D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); - return llvm::None; + return std::nullopt; } // static @@ -234,14 +234,14 @@ void Driver::setDriverMode(StringRef Value) { static const std::string OptName = getOpts().getOption(options::OPT_driver_mode).getPrefixedName(); - if (auto M = llvm::StringSwitch>(Value) + if (auto M = llvm::StringSwitch>(Value) .Case("gcc", GCCMode) .Case("g++", GXXMode) .Case("cpp", CPPMode) .Case("cl", CLMode) .Case("flang", FlangMode) .Case("dxc", DXCMode) - .Default(None)) + .Default(std::nullopt)) Mode = *M; else Diag(diag::err_drv_unsupported_option_argument) << OptName << Value; @@ -1211,9 +1211,6 @@ // FIXME: This stuff needs to go into the Compilation, not the driver. bool CCCPrintPhases; - // Silence driver warnings if requested - Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w)); - // -canonical-prefixes, -no-canonical-prefixes are used very early in main. Args.ClaimAllArgs(options::OPT_canonical_prefixes); Args.ClaimAllArgs(options::OPT_no_canonical_prefixes); @@ -1599,7 +1596,7 @@ NewLLDInvocation.replaceArguments(std::move(ArgList)); // Redirect stdout/stderr to /dev/null. - NewLLDInvocation.Execute({None, {""}, {""}}, nullptr, nullptr); + NewLLDInvocation.Execute({std::nullopt, {""}, {""}}, nullptr, nullptr); Diag(clang::diag::note_drv_command_failed_diag_msg) << BugReporMsg; Diag(clang::diag::note_drv_command_failed_diag_msg) << TmpName; Diag(clang::diag::note_drv_command_failed_diag_msg) @@ -2991,7 +2988,7 @@ /// option is invalid. virtual StringRef getCanonicalOffloadArch(StringRef Arch) = 0; - virtual llvm::Optional> + virtual std::optional> getConflictOffloadArchCombination(const std::set &GpuArchs) = 0; bool initialize() override { @@ -3128,10 +3125,10 @@ return CudaArchToString(Arch); } - llvm::Optional> + std::optional> getConflictOffloadArchCombination( const std::set &GpuArchs) override { - return llvm::None; + return std::nullopt; } ActionBuilderReturnCode @@ -3247,7 +3244,7 @@ // Bundle code objects except --no-gpu-output is specified for device // only compilation. Bundle other type of output files only if // --gpu-bundle-output is specified for device only compilation. - Optional BundleOutput; + std::optional BundleOutput; public: HIPActionBuilder(Compilation &C, DerivedArgList &Args, @@ -3278,7 +3275,7 @@ return Args.MakeArgStringRef(CanId); }; - llvm::Optional> + std::optional> getConflictOffloadArchCombination( const std::set &GpuArchs) override { return getConflictTargetIDCombination(GpuArchs); @@ -4210,11 +4207,11 @@ /// Checks if the set offloading architectures does not conflict. Returns the /// incompatible pair if a conflict occurs. -static llvm::Optional> +static std::optional> getConflictOffloadArchCombination(const llvm::DenseSet &Archs, Action::OffloadKind Kind) { if (Kind != Action::OFK_HIP) - return None; + return std::nullopt; std::set ArchSet; llvm::copy(Archs, std::inserter(ArchSet, ArchSet.begin())); @@ -4659,7 +4656,7 @@ if (CCPrintProcessStats) { C.setPostCallback([=](const Command &Cmd, int Res) { - Optional ProcStat = + std::optional ProcStat = Cmd.getProcessStatistics(); if (!ProcStat) return; @@ -5713,7 +5710,7 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { // Search for Name in a list of paths. auto SearchPaths = [&](const llvm::SmallVectorImpl &P) - -> llvm::Optional { + -> std::optional { // Respect a limited subset of the '-Bprefix' functionality in GCC by // attempting to use this prefix when looking for file paths. for (const auto &Dir : P) { @@ -5724,7 +5721,7 @@ if (llvm::sys::fs::exists(Twine(P))) return std::string(P); } - return None; + return std::nullopt; }; if (auto P = SearchPaths(PrefixDirs)) diff --git a/clang/lib/Driver/Job.cpp b/clang/lib/Driver/Job.cpp --- a/clang/lib/Driver/Job.cpp +++ b/clang/lib/Driver/Job.cpp @@ -302,7 +302,7 @@ } void Command::setRedirectFiles( - const std::vector> &Redirects) { + const std::vector> &Redirects) { RedirectFiles = Redirects; } @@ -314,7 +314,7 @@ } } -int Command::Execute(ArrayRef> Redirects, +int Command::Execute(ArrayRef> Redirects, std::string *ErrMsg, bool *ExecutionFailed) const { PrintFileNames(); @@ -347,7 +347,7 @@ } } - Optional> Env; + std::optional> Env; std::vector ArgvVectorStorage; if (!Environment.empty()) { assert(Environment.back() == nullptr && @@ -360,12 +360,12 @@ // Use Job-specific redirect files if they are present. if (!RedirectFiles.empty()) { - std::vector> RedirectFilesOptional; + std::vector> RedirectFilesOptional; for (const auto &Ele : RedirectFiles) if (Ele) - RedirectFilesOptional.push_back(Optional(*Ele)); + RedirectFilesOptional.push_back(std::optional(*Ele)); else - RedirectFilesOptional.push_back(None); + RedirectFilesOptional.push_back(std::nullopt); return llvm::sys::ExecuteAndWait(Executable, Args, Env, makeArrayRef(RedirectFilesOptional), @@ -395,7 +395,7 @@ Command::Print(OS, Terminator, Quote, CrashInfo); } -int CC1Command::Execute(ArrayRef> Redirects, +int CC1Command::Execute(ArrayRef> Redirects, std::string *ErrMsg, bool *ExecutionFailed) const { // FIXME: Currently, if there're more than one job, we disable // -fintegrate-cc1. If we're no longer a integrated-cc1 job, fallback to @@ -452,7 +452,7 @@ OS << " || (exit 0)" << Terminator; } -int ForceSuccessCommand::Execute(ArrayRef> Redirects, +int ForceSuccessCommand::Execute(ArrayRef> Redirects, std::string *ErrMsg, bool *ExecutionFailed) const { int Status = Command::Execute(Redirects, ErrMsg, ExecutionFailed); diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -202,7 +202,7 @@ /// Read the marker of the next bundled to be read in the file. The bundle /// name is returned if there is one in the file, or `None` if there are no /// more bundles to be read. - virtual Expected> + virtual Expected> ReadBundleStart(MemoryBuffer &Input) = 0; /// Read the marker that closes the current bundle. @@ -245,7 +245,8 @@ Error forEachBundle(MemoryBuffer &Input, std::function Func) { while (true) { - Expected> CurTripleOrErr = ReadBundleStart(Input); + Expected> CurTripleOrErr = + ReadBundleStart(Input); if (!CurTripleOrErr) return CurTripleOrErr.takeError(); @@ -415,9 +416,10 @@ return Error::success(); } - Expected> ReadBundleStart(MemoryBuffer &Input) final { + Expected> + ReadBundleStart(MemoryBuffer &Input) final { if (NextBundleInfo == BundlesInfo.end()) - return None; + return std::nullopt; CurBundleInfo = NextBundleInfo++; return CurBundleInfo->first(); } @@ -501,7 +503,7 @@ } // Creates temporary file with given contents. - Expected Create(Optional> Contents) { + Expected Create(std::optional> Contents) { SmallString<128u> File; if (std::error_code EC = sys::fs::createTemporaryFile("clang-offload-bundler", "tmp", File)) @@ -538,14 +540,15 @@ /// Return bundle name (-) if the provided section is an offload /// section. - static Expected> IsOffloadSection(SectionRef CurSection) { + static Expected> + IsOffloadSection(SectionRef CurSection) { Expected NameOrErr = CurSection.getName(); if (!NameOrErr) return NameOrErr.takeError(); // If it does not start with the reserved suffix, just skip this section. if (!NameOrErr->startswith(OFFLOAD_BUNDLER_MAGIC_STR)) - return None; + return std::nullopt; // Return the triple that is right after the reserved prefix. return NameOrErr->substr(sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1); @@ -576,21 +579,22 @@ Error ReadHeader(MemoryBuffer &Input) final { return Error::success(); } - Expected> ReadBundleStart(MemoryBuffer &Input) final { + Expected> + ReadBundleStart(MemoryBuffer &Input) final { while (NextSection != Obj->section_end()) { CurrentSection = NextSection; ++NextSection; // Check if the current section name starts with the reserved prefix. If // so, return the triple. - Expected> TripleOrErr = + Expected> TripleOrErr = IsOffloadSection(*CurrentSection); if (!TripleOrErr) return TripleOrErr.takeError(); if (*TripleOrErr) return **TripleOrErr; } - return None; + return std::nullopt; } Error ReadBundleEnd(MemoryBuffer &Input) final { return Error::success(); } @@ -733,13 +737,14 @@ protected: Error ReadHeader(MemoryBuffer &Input) final { return Error::success(); } - Expected> ReadBundleStart(MemoryBuffer &Input) final { + Expected> + ReadBundleStart(MemoryBuffer &Input) final { StringRef FC = Input.getBuffer(); // Find start of the bundle. ReadChars = FC.find(BundleStartString, ReadChars); if (ReadChars == FC.npos) - return None; + return std::nullopt; // Get position of the triple. size_t TripleStart = ReadChars = ReadChars + BundleStartString.size(); @@ -747,7 +752,7 @@ // Get position that closes the triple. size_t TripleEnd = ReadChars = FC.find("\n", ReadChars); if (TripleEnd == FC.npos) - return None; + return std::nullopt; // Next time we read after the new line. ++ReadChars; @@ -989,7 +994,8 @@ // assume the file is meant for the host target. bool FoundHostBundle = false; while (!Worklist.empty()) { - Expected> CurTripleOrErr = FH->ReadBundleStart(Input); + Expected> CurTripleOrErr = + FH->ReadBundleStart(Input); if (!CurTripleOrErr) return CurTripleOrErr.takeError(); @@ -1180,12 +1186,12 @@ if (Error ReadErr = FileHandler.get()->ReadHeader(*CodeObjectBuffer)) return ReadErr; - Expected> CurBundleIDOrErr = + Expected> CurBundleIDOrErr = FileHandler->ReadBundleStart(*CodeObjectBuffer); if (!CurBundleIDOrErr) return CurBundleIDOrErr.takeError(); - Optional OptionalCurBundleID = *CurBundleIDOrErr; + std::optional OptionalCurBundleID = *CurBundleIDOrErr; // No device code in this child, skip. if (!OptionalCurBundleID) continue; @@ -1245,7 +1251,7 @@ if (Error Err = FileHandler.get()->ReadBundleEnd(*CodeObjectBuffer)) return Err; - Expected> NextTripleOrErr = + Expected> NextTripleOrErr = FileHandler->ReadBundleStart(*CodeObjectBuffer); if (!NextTripleOrErr) return NextTripleOrErr.takeError(); diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -226,7 +226,8 @@ auto ExpCommand = std::make_unique( JA, *this, ResponseFileSupport::None(), CreateExportListExec, CreateExportCmdArgs, Inputs, Output); - ExpCommand->setRedirectFiles({None, std::string(ExportList), None}); + ExpCommand->setRedirectFiles( + {std::nullopt, std::string(ExportList), std::nullopt}); C.addCommand(std::move(ExpCommand)); CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-bE:") + ExportList)); } diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -113,9 +113,9 @@ /// The struct type returned by getParsedTargetID. struct ParsedTargetIDType { - Optional OptionalTargetID; - Optional OptionalGPUArch; - Optional> OptionalFeatures; + std::optional OptionalTargetID; + std::optional OptionalGPUArch; + std::optional> OptionalFeatures; }; /// Get target ID, GPU arch, and target ID features if the target ID is diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -747,12 +747,12 @@ AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const { StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); if (TargetID.empty()) - return {None, None, None}; + return {std::nullopt, std::nullopt, std::nullopt}; llvm::StringMap FeatureMap; auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap); if (!OptionalGpuArch) - return {TargetID.str(), None, None}; + return {TargetID.str(), std::nullopt, std::nullopt}; return {TargetID.str(), OptionalGpuArch->str(), FeatureMap}; } @@ -778,7 +778,7 @@ llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */, OutputFile); llvm::FileRemover OutputRemover(OutputFile.c_str()); - llvm::Optional Redirects[] = { + std::optional Redirects[] = { {""}, OutputFile.str(), {""}, diff --git a/clang/lib/Driver/ToolChains/AVR.h b/clang/lib/Driver/ToolChains/AVR.h --- a/clang/lib/Driver/ToolChains/AVR.h +++ b/clang/lib/Driver/ToolChains/AVR.h @@ -31,7 +31,7 @@ llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override; - llvm::Optional findAVRLibcInstallation() const; + std::optional findAVRLibcInstallation() const; StringRef getGCCInstallPath() const { return GCCInstallPath; } std::string getCompilerRT(const llvm::opt::ArgList &Args, StringRef Component, FileType Type) const override; diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -341,18 +341,18 @@ return ""; } -llvm::Optional GetMCUFamilyName(StringRef MCUName) { +std::optional GetMCUFamilyName(StringRef MCUName) { for (const auto &MCU : MCUInfo) if (MCU.Name == MCUName) - return Optional(MCU.Family); - return None; + return std::optional(MCU.Family); + return std::nullopt; } -llvm::Optional GetMCUSectionAddressData(StringRef MCUName) { +std::optional GetMCUSectionAddressData(StringRef MCUName) { for (const auto &MCU : MCUInfo) if (MCU.Name == MCUName && MCU.DataAddr > 0) - return Optional(MCU.DataAddr); - return None; + return std::optional(MCU.DataAddr); + return std::nullopt; } const StringRef PossibleAVRLibcLocations[] = { @@ -388,7 +388,7 @@ return; // Omit if there is no avr-libc installed. - Optional AVRLibcRoot = findAVRLibcInstallation(); + std::optional AVRLibcRoot = findAVRLibcInstallation(); if (!AVRLibcRoot) return; @@ -443,9 +443,9 @@ // Compute information about the target AVR. std::string CPU = getCPUName(D, Args, getToolChain().getTriple()); - llvm::Optional FamilyName = GetMCUFamilyName(CPU); - llvm::Optional AVRLibcRoot = TC.findAVRLibcInstallation(); - llvm::Optional SectionAddressData = GetMCUSectionAddressData(CPU); + std::optional FamilyName = GetMCUFamilyName(CPU); + std::optional AVRLibcRoot = TC.findAVRLibcInstallation(); + std::optional SectionAddressData = GetMCUSectionAddressData(CPU); // Compute the linker program path, and use GNU "avr-ld" as default. const Arg *A = Args.getLastArg(options::OPT_fuse_ld_EQ); @@ -562,7 +562,7 @@ CmdArgs, Inputs, Output)); } -llvm::Optional AVRToolChain::findAVRLibcInstallation() const { +std::optional AVRToolChain::findAVRLibcInstallation() const { // Search avr-libc installation according to avr-gcc installation. std::string GCCParent(GCCInstallation.getParentLibPath()); std::string Path(GCCParent + "/avr"); @@ -580,5 +580,5 @@ return Path; } - return llvm::None; + return std::nullopt; } diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -442,8 +442,7 @@ bool KernelOrKext = Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); arm::FloatABI ABI = arm::getARMFloatABI(D, Triple, Args); - llvm::Optional> WaCPU, WaFPU, WaHDiv, - WaArch; + std::optional> WaCPU, WaFPU, WaHDiv, WaArch; // This vector will accumulate features from the architecture // extension suffixes on -mcpu and -march (e.g. the 'bar' in diff --git a/clang/lib/Driver/ToolChains/Arch/CSKY.h b/clang/lib/Driver/ToolChains/Arch/CSKY.h --- a/clang/lib/Driver/ToolChains/Arch/CSKY.h +++ b/clang/lib/Driver/ToolChains/Arch/CSKY.h @@ -35,9 +35,9 @@ llvm::opt::ArgStringList &CmdArgs, std::vector &Features); -llvm::Optional getCSKYArchName(const Driver &D, - const llvm::opt::ArgList &Args, - const llvm::Triple &Triple); +std::optional getCSKYArchName(const Driver &D, + const llvm::opt::ArgList &Args, + const llvm::Triple &Triple); } // end namespace csky } // namespace tools diff --git a/clang/lib/Driver/ToolChains/Arch/CSKY.cpp b/clang/lib/Driver/ToolChains/Arch/CSKY.cpp --- a/clang/lib/Driver/ToolChains/Arch/CSKY.cpp +++ b/clang/lib/Driver/ToolChains/Arch/CSKY.cpp @@ -25,7 +25,7 @@ using namespace clang; using namespace llvm::opt; -llvm::Optional +std::optional csky::getCSKYArchName(const Driver &D, const ArgList &Args, const llvm::Triple &Triple) { if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { @@ -33,21 +33,21 @@ if (ArchKind == llvm::CSKY::ArchKind::INVALID) { D.Diag(clang::diag::err_drv_invalid_arch_name) << A->getAsString(Args); - return llvm::None; + return std::nullopt; } - return llvm::Optional(A->getValue()); + return std::optional(A->getValue()); } if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) { llvm::CSKY::ArchKind ArchKind = llvm::CSKY::parseCPUArch(A->getValue()); if (ArchKind == llvm::CSKY::ArchKind::INVALID) { D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args); - return llvm::None; + return std::nullopt; } - return llvm::Optional(llvm::CSKY::getArchName(ArchKind)); + return std::optional(llvm::CSKY::getArchName(ArchKind)); } - return llvm::Optional("ck810"); + return std::optional("ck810"); } csky::FloatABI csky::getCSKYFloatABI(const Driver &D, const ArgList &Args) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -292,98 +292,6 @@ } } -static void getWebAssemblyTargetFeatures(const ArgList &Args, - std::vector &Features) { - handleTargetFeaturesGroup(Args, Features, options::OPT_m_wasm_Features_Group); -} - -static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple, - const ArgList &Args, ArgStringList &CmdArgs, - bool ForAS, bool IsAux = false) { - std::vector Features; - switch (Triple.getArch()) { - default: - break; - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - mips::getMIPSTargetFeatures(D, Triple, Args, Features); - break; - - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - arm::getARMTargetFeatures(D, Triple, Args, Features, ForAS); - break; - - case llvm::Triple::ppc: - case llvm::Triple::ppcle: - case llvm::Triple::ppc64: - case llvm::Triple::ppc64le: - ppc::getPPCTargetFeatures(D, Triple, Args, Features); - break; - case llvm::Triple::riscv32: - case llvm::Triple::riscv64: - riscv::getRISCVTargetFeatures(D, Triple, Args, Features); - break; - case llvm::Triple::systemz: - systemz::getSystemZTargetFeatures(D, Args, Features); - break; - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_32: - case llvm::Triple::aarch64_be: - aarch64::getAArch64TargetFeatures(D, Triple, Args, Features, ForAS); - break; - case llvm::Triple::x86: - case llvm::Triple::x86_64: - x86::getX86TargetFeatures(D, Triple, Args, Features); - break; - case llvm::Triple::hexagon: - hexagon::getHexagonTargetFeatures(D, Args, Features); - break; - case llvm::Triple::wasm32: - case llvm::Triple::wasm64: - getWebAssemblyTargetFeatures(Args, Features); - break; - case llvm::Triple::sparc: - case llvm::Triple::sparcel: - case llvm::Triple::sparcv9: - sparc::getSparcTargetFeatures(D, Args, Features); - break; - case llvm::Triple::r600: - case llvm::Triple::amdgcn: - amdgpu::getAMDGPUTargetFeatures(D, Triple, Args, Features); - break; - case llvm::Triple::nvptx: - case llvm::Triple::nvptx64: - NVPTX::getNVPTXTargetFeatures(D, Triple, Args, Features); - break; - case llvm::Triple::m68k: - m68k::getM68kTargetFeatures(D, Triple, Args, Features); - break; - case llvm::Triple::msp430: - msp430::getMSP430TargetFeatures(D, Args, Features); - break; - case llvm::Triple::ve: - ve::getVETargetFeatures(D, Args, Features); - break; - case llvm::Triple::csky: - csky::getCSKYTargetFeatures(D, Triple, Args, CmdArgs, Features); - break; - case llvm::Triple::loongarch32: - case llvm::Triple::loongarch64: - loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); - break; - } - - for (auto Feature : unifyTargetFeatures(Features)) { - CmdArgs.push_back(IsAux ? "-aux-target-feature" : "-target-feature"); - CmdArgs.push_back(Feature.data()); - } -} - static bool shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime, const llvm::Triple &Triple) { @@ -6306,6 +6214,12 @@ << A->getAsString(Args) << TripleStr; } } + if (Arg *A = Args.getLastArgNoClaim(options::OPT_p)) { + if (!TC.getTriple().isOSAIX() && !TC.getTriple().isOSOpenBSD()) { + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getAsString(Args) << TripleStr; + } + } if (Args.getLastArg(options::OPT_fapple_kext) || (Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType))) @@ -8247,7 +8161,7 @@ C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::None(), TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, None, Output)); + CmdArgs, std::nullopt, Output)); } void OffloadBundler::ConstructJobMultipleOutputs( @@ -8331,7 +8245,7 @@ C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::None(), TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, None, Outputs)); + CmdArgs, std::nullopt, Outputs)); } void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -174,6 +174,11 @@ std::string getCPUName(const Driver &D, const llvm::opt::ArgList &Args, const llvm::Triple &T, bool FromAs = false); +void getTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, bool ForAS, + bool IsAux = false); + /// Iterate \p Args and convert -mxxx to +xxx and -mno-xxx to -xxx and /// append it to \p Features. /// diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -9,6 +9,8 @@ #include "CommonArgs.h" #include "Arch/AArch64.h" #include "Arch/ARM.h" +#include "Arch/CSKY.h" +#include "Arch/LoongArch.h" #include "Arch/M68k.h" #include "Arch/Mips.h" #include "Arch/PPC.h" @@ -19,6 +21,7 @@ #include "Arch/X86.h" #include "HIPAMD.h" #include "Hexagon.h" +#include "MSP430.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/ObjCRuntime.h" @@ -470,6 +473,96 @@ } } +static void getWebAssemblyTargetFeatures(const ArgList &Args, + std::vector &Features) { + handleTargetFeaturesGroup(Args, Features, options::OPT_m_wasm_Features_Group); +} + +void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args, ArgStringList &CmdArgs, + bool ForAS, bool IsAux) { + std::vector Features; + switch (Triple.getArch()) { + default: + break; + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + mips::getMIPSTargetFeatures(D, Triple, Args, Features); + break; + case llvm::Triple::arm: + case llvm::Triple::armeb: + case llvm::Triple::thumb: + case llvm::Triple::thumbeb: + arm::getARMTargetFeatures(D, Triple, Args, Features, ForAS); + break; + case llvm::Triple::ppc: + case llvm::Triple::ppcle: + case llvm::Triple::ppc64: + case llvm::Triple::ppc64le: + ppc::getPPCTargetFeatures(D, Triple, Args, Features); + break; + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + riscv::getRISCVTargetFeatures(D, Triple, Args, Features); + break; + case llvm::Triple::systemz: + systemz::getSystemZTargetFeatures(D, Args, Features); + break; + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_32: + case llvm::Triple::aarch64_be: + aarch64::getAArch64TargetFeatures(D, Triple, Args, Features, ForAS); + break; + case llvm::Triple::x86: + case llvm::Triple::x86_64: + x86::getX86TargetFeatures(D, Triple, Args, Features); + break; + case llvm::Triple::hexagon: + hexagon::getHexagonTargetFeatures(D, Args, Features); + break; + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + getWebAssemblyTargetFeatures(Args, Features); + break; + case llvm::Triple::sparc: + case llvm::Triple::sparcel: + case llvm::Triple::sparcv9: + sparc::getSparcTargetFeatures(D, Args, Features); + break; + case llvm::Triple::r600: + case llvm::Triple::amdgcn: + amdgpu::getAMDGPUTargetFeatures(D, Triple, Args, Features); + break; + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + NVPTX::getNVPTXTargetFeatures(D, Triple, Args, Features); + break; + case llvm::Triple::m68k: + m68k::getM68kTargetFeatures(D, Triple, Args, Features); + break; + case llvm::Triple::msp430: + msp430::getMSP430TargetFeatures(D, Args, Features); + break; + case llvm::Triple::ve: + ve::getVETargetFeatures(D, Args, Features); + break; + case llvm::Triple::csky: + csky::getCSKYTargetFeatures(D, Triple, Args, CmdArgs, Features); + break; + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); + break; + } + + for (auto Feature : unifyTargetFeatures(Features)) { + CmdArgs.push_back(IsAux ? "-aux-target-feature" : "-target-feature"); + CmdArgs.push_back(Feature.data()); + } +} + llvm::StringRef tools::getLTOParallelism(const ArgList &Args, const Driver &D) { Arg *LtoJobsArg = Args.getLastArg(options::OPT_flto_jobs_EQ); if (!LtoJobsArg) @@ -941,10 +1034,7 @@ SharedRuntimes.push_back("ubsan_standalone"); } if (SanArgs.needsScudoRt() && SanArgs.linkRuntimes()) { - if (SanArgs.requiresMinimalRuntime()) - SharedRuntimes.push_back("scudo_minimal"); - else - SharedRuntimes.push_back("scudo"); + SharedRuntimes.push_back("scudo_standalone"); } if (SanArgs.needsTsanRt() && SanArgs.linkRuntimes()) SharedRuntimes.push_back("tsan"); @@ -1041,15 +1131,9 @@ RequiredSymbols.push_back("__sanitizer_stats_register"); } if (!SanArgs.needsSharedRt() && SanArgs.needsScudoRt() && SanArgs.linkRuntimes()) { - if (SanArgs.requiresMinimalRuntime()) { - StaticRuntimes.push_back("scudo_minimal"); - if (SanArgs.linkCXXRuntimes()) - StaticRuntimes.push_back("scudo_cxx_minimal"); - } else { - StaticRuntimes.push_back("scudo"); - if (SanArgs.linkCXXRuntimes()) - StaticRuntimes.push_back("scudo_cxx"); - } + StaticRuntimes.push_back("scudo_standalone"); + if (SanArgs.linkCXXRuntimes()) + StaticRuntimes.push_back("scudo_standalone_cxx"); } } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -81,7 +81,7 @@ // None otherwise. auto StartsWithWords = [](llvm::StringRef Line, - const SmallVector words) -> llvm::Optional { + const SmallVector words) -> std::optional { for (StringRef word : words) { if (!Line.consume_front(word)) return {}; diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h --- a/clang/lib/Driver/ToolChains/Darwin.h +++ b/clang/lib/Driver/ToolChains/Darwin.h @@ -148,7 +148,7 @@ mutable std::unique_ptr VerifyDebug; /// The version of the linker known to be available in the tool chain. - mutable Optional LinkerVersion; + mutable std::optional LinkerVersion; public: MachO(const Driver &D, const llvm::Triple &Triple, @@ -318,10 +318,10 @@ mutable VersionTuple OSTargetVersion; /// The information about the darwin SDK that was used. - mutable Optional SDKInfo; + mutable std::optional SDKInfo; /// The target variant triple that was specified (if any). - mutable Optional TargetVariantTriple; + mutable std::optional TargetVariantTriple; CudaInstallationDetector CudaInstallation; RocmInstallationDetector RocmInstallation; diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -550,8 +550,9 @@ const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("touch")); CmdArgs.push_back(Output.getFilename()); - C.addCommand(std::make_unique( - JA, *this, ResponseFileSupport::None(), Exec, CmdArgs, None, Output)); + C.addCommand(std::make_unique(JA, *this, + ResponseFileSupport::None(), Exec, + CmdArgs, std::nullopt, Output)); return; } @@ -1554,7 +1555,7 @@ /// Returns true if the simulator environment can be inferred from the arch. bool canInferSimulatorFromArch() const { return InferSimulatorFromArch; } - const Optional &getTargetVariantTriple() const { + const std::optional &getTargetVariantTriple() const { return TargetVariantTriple; } @@ -1605,7 +1606,7 @@ void setEnvironment(llvm::Triple::EnvironmentType EnvType, const VersionTuple &OSVersion, - const Optional &SDKInfo) { + const std::optional &SDKInfo) { switch (EnvType) { case llvm::Triple::Simulator: Environment = DarwinEnvironmentKind::Simulator; @@ -1618,7 +1619,7 @@ if (const auto *MacCatalystToMacOSMapping = SDKInfo->getVersionMapping( DarwinSDKInfo::OSEnvPair::macCatalystToMacOSPair())) { if (auto MacOSVersion = MacCatalystToMacOSMapping->map( - OSVersion, NativeTargetVersion, None)) { + OSVersion, NativeTargetVersion, std::nullopt)) { NativeTargetVersion = *MacOSVersion; } } @@ -1642,8 +1643,8 @@ static DarwinPlatform createFromTarget(const llvm::Triple &TT, StringRef OSVersion, Arg *A, - Optional TargetVariantTriple, - const Optional &SDKInfo) { + std::optional TargetVariantTriple, + const std::optional &SDKInfo) { DarwinPlatform Result(TargetArg, getPlatformFromOS(TT.getOS()), OSVersion, A); VersionTuple OsVersion = TT.getOSVersion(); @@ -1656,7 +1657,7 @@ static DarwinPlatform createFromMTargetOS(llvm::Triple::OSType OS, VersionTuple OSVersion, llvm::Triple::EnvironmentType Environment, Arg *A, - const Optional &SDKInfo) { + const std::optional &SDKInfo) { DarwinPlatform Result(MTargetOSArg, getPlatformFromOS(OS), OSVersion.getAsString(), A); Result.InferSimulatorFromArch = false; @@ -1738,12 +1739,12 @@ bool HasOSVersion = true, InferSimulatorFromArch = true; Arg *Argument; StringRef EnvVarName; - Optional TargetVariantTriple; + std::optional TargetVariantTriple; }; /// Returns the deployment target that's specified using the -m-version-min /// argument. -Optional +std::optional getDeploymentTargetFromOSVersionArg(DerivedArgList &Args, const Driver &TheDriver) { Arg *macOSVersion = Args.getLastArg(options::OPT_mmacos_version_min_EQ); @@ -1790,12 +1791,12 @@ Darwin::WatchOS, WatchOSVersion, WatchOSVersion->getOption().getID() == options::OPT_mwatchos_simulator_version_min_EQ); - return None; + return std::nullopt; } /// Returns the deployment target that's specified using the /// OS_DEPLOYMENT_TARGET environment variable. -Optional +std::optional getDeploymentTargetFromEnvironmentVariables(const Driver &TheDriver, const llvm::Triple &Triple) { std::string Targets[Darwin::LastDarwinPlatform + 1]; @@ -1845,7 +1846,7 @@ (Darwin::DarwinPlatformKind)Target.index(), EnvVars[Target.index()], Target.value()); } - return None; + return std::nullopt; } /// Returns the SDK name without the optional prefix that ends with a '.' or an @@ -1860,16 +1861,16 @@ /// Tries to infer the deployment target from the SDK specified by -isysroot /// (or SDKROOT). Uses the version specified in the SDKSettings.json file if /// it's available. -Optional +std::optional inferDeploymentTargetFromSDK(DerivedArgList &Args, - const Optional &SDKInfo) { + const std::optional &SDKInfo) { const Arg *A = Args.getLastArg(options::OPT_isysroot); if (!A) - return None; + return std::nullopt; StringRef isysroot = A->getValue(); StringRef SDK = Darwin::getSDKName(isysroot); if (!SDK.size()) - return None; + return std::nullopt; std::string Version; if (SDKInfo) { @@ -1884,10 +1885,10 @@ Version = std::string(SDK.slice(StartVer, EndVer + 1)); } if (Version.empty()) - return None; + return std::nullopt; auto CreatePlatformFromSDKName = - [&](StringRef SDK) -> Optional { + [&](StringRef SDK) -> std::optional { if (SDK.startswith("iPhoneOS") || SDK.startswith("iPhoneSimulator")) return DarwinPlatform::createFromSDK( Darwin::IPhoneOS, Version, @@ -1905,7 +1906,7 @@ /*IsSimulator=*/SDK.startswith("AppleTVSimulator")); else if (SDK.startswith("DriverKit")) return DarwinPlatform::createFromSDK(Darwin::DriverKit, Version); - return None; + return std::nullopt; }; if (auto Result = CreatePlatformFromSDKName(SDK)) return Result; @@ -1957,7 +1958,7 @@ } /// Tries to infer the target OS from the -arch. -Optional +std::optional inferDeploymentTargetFromArch(DerivedArgList &Args, const Darwin &Toolchain, const llvm::Triple &Triple, const Driver &TheDriver) { @@ -1974,22 +1975,22 @@ MachOArchName != "armv7em") OSTy = llvm::Triple::MacOSX; if (OSTy == llvm::Triple::UnknownOS) - return None; + return std::nullopt; return DarwinPlatform::createFromArch(OSTy, getOSVersion(OSTy, Triple, TheDriver)); } /// Returns the deployment target that's specified using the -target option. -Optional getDeploymentTargetFromTargetArg( +std::optional getDeploymentTargetFromTargetArg( DerivedArgList &Args, const llvm::Triple &Triple, const Driver &TheDriver, - const Optional &SDKInfo) { + const std::optional &SDKInfo) { if (!Args.hasArg(options::OPT_target)) - return None; + return std::nullopt; if (Triple.getOS() == llvm::Triple::Darwin || Triple.getOS() == llvm::Triple::UnknownOS) - return None; + return std::nullopt; std::string OSVersion = getOSVersion(Triple.getOS(), Triple, TheDriver); - Optional TargetVariantTriple; + std::optional TargetVariantTriple; for (const Arg *A : Args.filtered(options::OPT_darwin_target_variant)) { llvm::Triple TVT(A->getValue()); // Find a matching - target variant triple that can be used. @@ -2020,13 +2021,12 @@ } /// Returns the deployment target that's specified using the -mtargetos option. -Optional -getDeploymentTargetFromMTargetOSArg(DerivedArgList &Args, - const Driver &TheDriver, - const Optional &SDKInfo) { +std::optional getDeploymentTargetFromMTargetOSArg( + DerivedArgList &Args, const Driver &TheDriver, + const std::optional &SDKInfo) { auto *A = Args.getLastArg(options::OPT_mtargetos_EQ); if (!A) - return None; + return std::nullopt; llvm::Triple TT(llvm::Twine("unknown-apple-") + A->getValue()); switch (TT.getOS()) { case llvm::Triple::MacOSX: @@ -2037,31 +2037,31 @@ default: TheDriver.Diag(diag::err_drv_invalid_os_in_arg) << TT.getOSName() << A->getAsString(Args); - return None; + return std::nullopt; } VersionTuple Version = TT.getOSVersion(); if (!Version.getMajor()) { TheDriver.Diag(diag::err_drv_invalid_version_number) << A->getAsString(Args); - return None; + return std::nullopt; } return DarwinPlatform::createFromMTargetOS(TT.getOS(), Version, TT.getEnvironment(), A, SDKInfo); } -Optional parseSDKSettings(llvm::vfs::FileSystem &VFS, - const ArgList &Args, - const Driver &TheDriver) { +std::optional parseSDKSettings(llvm::vfs::FileSystem &VFS, + const ArgList &Args, + const Driver &TheDriver) { const Arg *A = Args.getLastArg(options::OPT_isysroot); if (!A) - return None; + return std::nullopt; StringRef isysroot = A->getValue(); auto SDKInfoOrErr = parseDarwinSDKInfo(VFS, isysroot); if (!SDKInfoOrErr) { llvm::consumeError(SDKInfoOrErr.takeError()); TheDriver.Diag(diag::warn_drv_darwin_sdk_invalid_settings); - return None; + return std::nullopt; } return *SDKInfoOrErr; } @@ -2095,7 +2095,7 @@ SDKInfo = parseSDKSettings(getVFS(), Args, getDriver()); // The OS and the version can be specified using the -target argument. - Optional OSTarget = + std::optional OSTarget = getDeploymentTargetFromTargetArg(Args, getTriple(), getDriver(), SDKInfo); if (OSTarget) { // Disallow mixing -target and -mtargetos=. @@ -2105,7 +2105,7 @@ getDriver().Diag(diag::err_drv_cannot_mix_options) << TargetArgStr << MTargetOSArgStr; } - Optional OSVersionArgTarget = + std::optional OSVersionArgTarget = getDeploymentTargetFromOSVersionArg(Args, getDriver()); if (OSVersionArgTarget) { unsigned TargetMajor, TargetMinor, TargetMicro; @@ -2140,7 +2140,7 @@ SDKInfo))) { // The OS target can be specified using the -mtargetos= argument. // Disallow mixing -mtargetos= and -mversion-min=. - Optional OSVersionArgTarget = + std::optional OSVersionArgTarget = getDeploymentTargetFromOSVersionArg(Args, getDriver()); if (OSVersionArgTarget) { std::string MTargetOSArgStr = OSTarget->getAsString(Args, Opts); @@ -2158,7 +2158,7 @@ getDeploymentTargetFromEnvironmentVariables(getDriver(), getTriple()); if (OSTarget) { // Don't infer simulator from the arch when the SDK is also specified. - Optional SDKTarget = + std::optional SDKTarget = inferDeploymentTargetFromSDK(Args, SDKInfo); if (SDKTarget) OSTarget->setEnvironment(SDKTarget->getEnvironment()); @@ -2828,8 +2828,9 @@ if (isTargetMacCatalyst()) { if (const auto *MacOStoMacCatalystMapping = SDKInfo->getVersionMapping( DarwinSDKInfo::OSEnvPair::macOStoMacCatalystPair())) { - Optional SDKVersion = MacOStoMacCatalystMapping->map( - SDKInfo->getVersion(), minimumMacCatalystDeploymentTarget(), None); + std::optional SDKVersion = MacOStoMacCatalystMapping->map( + SDKInfo->getVersion(), minimumMacCatalystDeploymentTarget(), + std::nullopt); EmitTargetSDKVersionArg( SDKVersion ? *SDKVersion : minimumMacCatalystDeploymentTarget()); } @@ -2848,9 +2849,10 @@ } else if (const auto *MacOStoMacCatalystMapping = SDKInfo->getVersionMapping( DarwinSDKInfo::OSEnvPair::macOStoMacCatalystPair())) { - if (Optional SDKVersion = MacOStoMacCatalystMapping->map( - SDKInfo->getVersion(), minimumMacCatalystDeploymentTarget(), - None)) { + if (std::optional SDKVersion = + MacOStoMacCatalystMapping->map( + SDKInfo->getVersion(), minimumMacCatalystDeploymentTarget(), + std::nullopt)) { std::string Arg; llvm::raw_string_ostream OS(Arg); OS << "-darwin-target-variant-sdk-version=" << *SDKVersion; @@ -3089,14 +3091,14 @@ if (TargetPlatform == IPhoneOS && TargetEnvironment == MacCatalyst) { // Mac Catalyst programs must use the appropriate iOS SDK version // that corresponds to the macOS SDK version used for the compilation. - Optional iOSSDKVersion; + std::optional iOSSDKVersion; if (SDKInfo) { if (const auto *MacOStoMacCatalystMapping = SDKInfo->getVersionMapping( DarwinSDKInfo::OSEnvPair::macOStoMacCatalystPair())) { iOSSDKVersion = MacOStoMacCatalystMapping->map( SDKInfo->getVersion().withoutBuild(), - minimumMacCatalystDeploymentTarget(), None); + minimumMacCatalystDeploymentTarget(), std::nullopt); } } CmdArgs.push_back(Args.MakeArgString( diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h --- a/clang/lib/Driver/ToolChains/Flang.h +++ b/clang/lib/Driver/ToolChains/Flang.h @@ -48,6 +48,14 @@ void addPicOptions(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + /// Extract target options from the driver arguments and add them to + /// the command arguments. + /// + /// \param [in] Args The list of input driver arguments + /// \param [out] CmdArgs The list of output command arguments + void addTargetOptions(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + /// Extract other compilation options from the driver arguments and add them /// to the command arguments. /// diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -80,6 +80,32 @@ } } +void Flang::addTargetOptions(const ArgList &Args, + ArgStringList &CmdArgs) const { + const ToolChain &TC = getToolChain(); + const llvm::Triple &Triple = TC.getEffectiveTriple(); + const Driver &D = TC.getDriver(); + + std::string CPU = getCPUName(D, Args, Triple); + if (!CPU.empty()) { + CmdArgs.push_back("-target-cpu"); + CmdArgs.push_back(Args.MakeArgString(CPU)); + } + + // Add the target features. + switch (TC.getArch()) { + default: + break; + case llvm::Triple::aarch64: + [[fallthrough]]; + case llvm::Triple::x86_64: + getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); + break; + } + + // TODO: Add target specific flags, ABI, mtune option etc. +} + static void addFloatingPointOptions(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs) { StringRef FPContract; @@ -243,6 +269,9 @@ // Floating point related options addFloatingPointOptions(D, Args, CmdArgs); + // Add target args, features, etc. + addTargetOptions(Args, CmdArgs); + // Add other compile options addOtherOptions(Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/Gnu.h b/clang/lib/Driver/ToolChains/Gnu.h --- a/clang/lib/Driver/ToolChains/Gnu.h +++ b/clang/lib/Driver/ToolChains/Gnu.h @@ -27,7 +27,7 @@ /// On Biarch systems, this corresponds to the default multilib when /// targeting the non-default multilib. Otherwise, it is empty. - llvm::Optional BiarchSibling; + std::optional BiarchSibling; }; bool findMIPSMultilibs(const Driver &D, const llvm::Triple &TargetTriple, @@ -201,7 +201,7 @@ Multilib SelectedMultilib; /// On Biarch systems, this corresponds to the default multilib when /// targeting the non-default multilib. Otherwise, it is empty. - llvm::Optional BiarchSibling; + std::optional BiarchSibling; GCCVersion Version; @@ -218,7 +218,7 @@ public: explicit GCCInstallationDetector(const Driver &D) : IsValid(false), D(D) {} void init(const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args, - ArrayRef ExtraTripleAliases = None); + ArrayRef ExtraTripleAliases = std::nullopt); /// Check whether we detected a valid GCC install. bool isValid() const { return IsValid; } diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -1623,7 +1623,8 @@ FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS()); tools::csky::FloatABI TheFloatABI = tools::csky::getCSKYFloatABI(D, Args); - llvm::Optional Res = tools::csky::getCSKYArchName(D, Args, TargetTriple); + std::optional Res = + tools::csky::getCSKYArchName(D, Args, TargetTriple); if (!Res) return; diff --git a/clang/lib/Driver/ToolChains/HLSL.h b/clang/lib/Driver/ToolChains/HLSL.h --- a/clang/lib/Driver/ToolChains/HLSL.h +++ b/clang/lib/Driver/ToolChains/HLSL.h @@ -29,8 +29,7 @@ llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override; - static llvm::Optional - parseTargetProfile(StringRef TargetProfile); + static std::optional parseTargetProfile(StringRef TargetProfile); }; } // end namespace toolchains diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp b/clang/lib/Driver/ToolChains/HLSL.cpp --- a/clang/lib/Driver/ToolChains/HLSL.cpp +++ b/clang/lib/Driver/ToolChains/HLSL.cpp @@ -64,12 +64,12 @@ return false; } -llvm::Optional tryParseProfile(StringRef Profile) { +std::optional tryParseProfile(StringRef Profile) { // [ps|vs|gs|hs|ds|cs|ms|as]_[major]_[minor] SmallVector Parts; Profile.split(Parts, "_"); if (Parts.size() != 3) - return None; + return std::nullopt; Triple::EnvironmentType Kind = StringSwitch(Parts[0]) @@ -84,17 +84,17 @@ .Case("as", Triple::EnvironmentType::Amplification) .Default(Triple::EnvironmentType::UnknownEnvironment); if (Kind == Triple::EnvironmentType::UnknownEnvironment) - return None; + return std::nullopt; unsigned long long Major = 0; if (llvm::getAsUnsignedInteger(Parts[1], 0, Major)) - return None; + return std::nullopt; unsigned long long Minor = 0; if (Parts[2] == "x" && Kind == Triple::EnvironmentType::Library) Minor = OfflineLibMinor; else if (llvm::getAsUnsignedInteger(Parts[2], 0, Minor)) - return None; + return std::nullopt; // dxil-unknown-shadermodel-hull llvm::Triple T; @@ -105,7 +105,7 @@ if (isLegalShaderModel(T)) return T.getTriple(); else - return None; + return std::nullopt; } bool isLegalValidatorVersion(StringRef ValVersionStr, const Driver &D) { @@ -138,7 +138,7 @@ const ArgList &Args) : ToolChain(D, Triple, Args) {} -llvm::Optional +std::optional clang::driver::toolchains::HLSLToolChain::parseTargetProfile( StringRef TargetProfile) { return tryParseProfile(TargetProfile); diff --git a/clang/lib/Driver/ToolChains/Hexagon.h b/clang/lib/Driver/ToolChains/Hexagon.h --- a/clang/lib/Driver/ToolChains/Hexagon.h +++ b/clang/lib/Driver/ToolChains/Hexagon.h @@ -107,8 +107,8 @@ static StringRef GetDefaultCPU(); static StringRef GetTargetCPUVersion(const llvm::opt::ArgList &Args); - static Optional getSmallDataThreshold( - const llvm::opt::ArgList &Args); + static std::optional + getSmallDataThreshold(const llvm::opt::ArgList &Args); }; } // end namespace toolchains diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -120,16 +120,17 @@ HvxVerNum = 0; // Handle HVX floating point flags. - auto checkFlagHvxVersion = [&](auto FlagOn, auto FlagOff, - unsigned MinVerNum) -> Optional { - // Return an Optional: + auto checkFlagHvxVersion = + [&](auto FlagOn, auto FlagOff, + unsigned MinVerNum) -> std::optional { + // Return an std::optional: // - None indicates a verification failure, or that the flag was not // present in Args. // - Otherwise the returned value is that name of the feature to add // to Features. Arg *A = Args.getLastArg(FlagOn, FlagOff); if (!A) - return None; + return std::nullopt; StringRef OptName = A->getOption().getName(); if (A->getOption().matches(FlagOff)) @@ -137,12 +138,12 @@ if (!HasHVX) { D.Diag(diag::err_drv_needs_hvx) << withMinus(OptName); - return None; + return std::nullopt; } if (HvxVerNum < MinVerNum) { D.Diag(diag::err_drv_needs_hvx_version) << withMinus(OptName) << ("v" + std::to_string(HvxVerNum)); - return None; + return std::nullopt; } return makeFeature(OptName, true); }; @@ -519,8 +520,8 @@ return InstalledDir; } -Optional HexagonToolChain::getSmallDataThreshold( - const ArgList &Args) { +std::optional +HexagonToolChain::getSmallDataThreshold(const ArgList &Args) { StringRef Gn = ""; if (Arg *A = Args.getLastArg(options::OPT_G)) { Gn = A->getValue(); @@ -533,7 +534,7 @@ if (!Gn.getAsInteger(10, G)) return G; - return None; + return std::nullopt; } std::string HexagonToolChain::getCompilerRTPath() const { diff --git a/clang/lib/Driver/ToolChains/MSVC.h b/clang/lib/Driver/ToolChains/MSVC.h --- a/clang/lib/Driver/ToolChains/MSVC.h +++ b/clang/lib/Driver/ToolChains/MSVC.h @@ -133,7 +133,7 @@ Tool *buildLinker() const override; Tool *buildAssembler() const override; private: - llvm::Optional WinSdkDir, WinSdkVersion, WinSysRoot; + std::optional WinSdkDir, WinSdkVersion, WinSysRoot; std::string VCToolChainPath; llvm::ToolsetLayout VSLayout = llvm::ToolsetLayout::OlderVS; CudaInstallationDetector CudaInstallation; diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -410,7 +410,7 @@ if (getDriver().getInstalledDir() != getDriver().Dir) getProgramPaths().push_back(getDriver().Dir); - Optional VCToolsDir, VCToolsVersion; + std::optional VCToolsDir, VCToolsVersion; if (Arg *A = Args.getLastArg(options::OPT__SLASH_vctoolsdir)) VCToolsDir = A->getValue(); if (Arg *A = Args.getLastArg(options::OPT__SLASH_vctoolsversion)) diff --git a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp --- a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp +++ b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp @@ -157,7 +157,7 @@ Rule.match(File, &Matches); // Returned matches are always in stable order. if (Matches.size() != 4) - return None; + return std::nullopt; return path::convert_to_slash( (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" + @@ -172,7 +172,7 @@ } // Couldn't determine a include name, use full path instead. - return None; + return std::nullopt; } struct LocationFileChecker { diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -60,7 +60,7 @@ /// the semantic version representation of \p V. Optional serializeSemanticVersion(const VersionTuple &V) { if (V.empty()) - return None; + return std::nullopt; Object Version; Version["major"] = V.getMajor(); @@ -146,7 +146,7 @@ /// an \c Array containing the formatted availability information. Optional serializeAvailability(const AvailabilitySet &Availabilities) { if (Availabilities.isDefault()) - return None; + return std::nullopt; Array AvailabilityArray; @@ -232,7 +232,7 @@ /// formatted lines. Optional serializeDocComment(const DocComment &Comment) { if (Comment.empty()) - return None; + return std::nullopt; Object DocComment; Array LinesArray; @@ -284,7 +284,7 @@ /// declaration fragments array. Optional serializeDeclarationFragments(const DeclarationFragments &DF) { if (DF.getFragments().empty()) - return None; + return std::nullopt; Array Fragments; for (const auto &F : DF.getFragments()) { @@ -412,7 +412,7 @@ std::true_type) { const auto &FS = Record.Signature; if (FS.empty()) - return None; + return std::nullopt; Object Signature; serializeArray(Signature, "returns", @@ -436,7 +436,7 @@ template Optional serializeFunctionSignatureMixinImpl(const RecordTy &Record, std::false_type) { - return None; + return std::nullopt; } /// Serialize the function signature field, as specified by the @@ -501,7 +501,7 @@ Optional SymbolGraphSerializer::serializeAPIRecord(const RecordTy &Record) const { if (shouldSkip(Record)) - return None; + return std::nullopt; Object Obj; serializeObject(Obj, "identifier", diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -152,7 +152,7 @@ static llvm::Optional getRawStringDelimiter(StringRef TokenText) { if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'. || !TokenText.startswith("R\"") || !TokenText.endswith("\"")) { - return None; + return std::nullopt; } // A raw string starts with 'R"(' and delimiter is ascii and has @@ -160,15 +160,15 @@ // 19 bytes. size_t LParenPos = TokenText.substr(0, 19).find_first_of('('); if (LParenPos == StringRef::npos) - return None; + return std::nullopt; StringRef Delimiter = TokenText.substr(2, LParenPos - 2); // Check that the string ends in ')Delimiter"'. size_t RParenPos = TokenText.size() - Delimiter.size() - 2; if (TokenText[RParenPos] != ')') - return None; + return std::nullopt; if (!TokenText.substr(RParenPos + 1).startswith(Delimiter)) - return None; + return std::nullopt; return Delimiter; } @@ -209,7 +209,7 @@ RawStringFormatStyleManager::getDelimiterStyle(StringRef Delimiter) const { auto It = DelimiterStyle.find(Delimiter); if (It == DelimiterStyle.end()) - return None; + return std::nullopt; return It->second; } @@ -218,7 +218,7 @@ StringRef EnclosingFunction) const { auto It = EnclosingFunctionStyle.find(EnclosingFunction); if (It == EnclosingFunctionStyle.end()) - return None; + return std::nullopt; return It->second; } @@ -2071,17 +2071,17 @@ ContinuationIndenter::getRawStringStyle(const FormatToken &Current, const LineState &State) { if (!Current.isStringLiteral()) - return None; + return std::nullopt; auto Delimiter = getRawStringDelimiter(Current.TokenText); if (!Delimiter) - return None; + return std::nullopt; auto RawStringStyle = RawStringFormats.getDelimiterStyle(*Delimiter); if (!RawStringStyle && Delimiter->empty()) { RawStringStyle = RawStringFormats.getEnclosingFunctionStyle( getEnclosingFunctionName(Current)); } if (!RawStringStyle) - return None; + return std::nullopt; RawStringStyle->ColumnLimit = getColumnLimit(State); return RawStringStyle; } diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1874,10 +1874,10 @@ llvm::Optional FormatStyle::FormatStyleSet::Get(FormatStyle::LanguageKind Language) const { if (!Styles) - return None; + return std::nullopt; auto It = Styles->find(Language); if (It == Styles->end()) - return None; + return std::nullopt; FormatStyle Style = It->second; Style.StyleSet = *this; return Style; @@ -3333,7 +3333,7 @@ // Make header insertion replacements insert new headers into correct blocks. tooling::Replacements NewReplaces = fixCppIncludeInsertions(Code, Replaces, Style); - return processReplacements(Cleanup, Code, NewReplaces, Style); + return cantFail(processReplacements(Cleanup, Code, NewReplaces, Style)); } namespace internal { diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -957,7 +957,8 @@ if (Style.AlignTrailingComments.Kind == FormatStyle::TCAS_Leave) { auto OriginalSpaces = Changes[i].OriginalWhitespaceRange.getEnd().getRawEncoding() - - Changes[i].OriginalWhitespaceRange.getBegin().getRawEncoding(); + Changes[i].OriginalWhitespaceRange.getBegin().getRawEncoding() - + Changes[i].Tok->NewlinesBefore; unsigned RestoredLineLength = Changes[i].StartOfTokenColumn + Changes[i].TokenLength + OriginalSpaces; // If leaving comments makes the line exceed the column limit, give up to diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -87,6 +87,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/Timer.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -524,6 +525,7 @@ IntrusiveRefCntPtr &Target; unsigned &Counter; bool InitializedLanguage = false; + bool InitializedHeaderSearchPaths = false; public: ASTInfoCollector(Preprocessor &PP, ASTContext *Context, @@ -550,7 +552,34 @@ bool ReadHeaderSearchOptions(const HeaderSearchOptions &HSOpts, StringRef SpecificModuleCachePath, bool Complain) override { + // Preserve previously set header search paths. + llvm::SaveAndRestore X(this->HSOpts.UserEntries); + llvm::SaveAndRestore Y(this->HSOpts.SystemHeaderPrefixes); + llvm::SaveAndRestore Z(this->HSOpts.VFSOverlayFiles); + this->HSOpts = HSOpts; + + return false; + } + + bool ReadHeaderSearchPaths(const HeaderSearchOptions &HSOpts, + bool Complain) override { + if (InitializedHeaderSearchPaths) + return false; + + this->HSOpts.UserEntries = HSOpts.UserEntries; + this->HSOpts.SystemHeaderPrefixes = HSOpts.SystemHeaderPrefixes; + this->HSOpts.VFSOverlayFiles = HSOpts.VFSOverlayFiles; + + // Initialize the FileManager. We can't do this in update(), since that + // performs the initialization too late (once both target and language + // options are read). + PP.getFileManager().setVirtualFileSystem(createVFSFromOverlayFiles( + HSOpts.VFSOverlayFiles, PP.getDiagnostics(), + PP.getFileManager().getVirtualFileSystemPtr())); + + InitializedHeaderSearchPaths = true; + return false; } @@ -2217,7 +2246,7 @@ [&FileMgr](StringRef Filename) -> Optional { if (auto Status = FileMgr.getVirtualFileSystem().status(Filename)) return Status->getUniqueID(); - return None; + return std::nullopt; }; auto hasSameUniqueID = [getUniqueID](StringRef LHS, StringRef RHS) { diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1289,7 +1289,7 @@ else if (Filename == "module.private.modulemap") llvm::sys::path::append(PublicFilename, "module.modulemap"); else - return None; + return std::nullopt; return FileMgr.getOptionalFileRef(PublicFilename); } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -176,7 +176,7 @@ DiagnosticsEngine &Diags) { if (Args.hasArg(Opt)) return true; - return None; + return std::nullopt; } static Optional normalizeSimpleNegativeFlag(OptSpecifier Opt, unsigned, @@ -184,7 +184,7 @@ DiagnosticsEngine &) { if (Args.hasArg(Opt)) return false; - return None; + return std::nullopt; } /// The tblgen-erated code passes in a fifth parameter of an arbitrary type, but @@ -209,7 +209,7 @@ DiagnosticsEngine &) -> Optional { if (Args.hasArg(Opt)) return Value; - return None; + return std::nullopt; }; } @@ -227,7 +227,7 @@ if (const Arg *A = Args.getLastArg(Opt, OtherOpt)) { return A->getOption().matches(Opt) ? Value : OtherValue; } - return None; + return std::nullopt; }; } @@ -276,7 +276,7 @@ if (Name == Table.Table[I].Name) return Table.Table[I]; - return None; + return std::nullopt; } static Optional @@ -285,7 +285,7 @@ if (Value == Table.Table[I].Value) return Table.Table[I]; - return None; + return std::nullopt; } static llvm::Optional normalizeSimpleEnum(OptSpecifier Opt, @@ -297,7 +297,7 @@ auto *Arg = Args.getLastArg(Opt); if (!Arg) - return None; + return std::nullopt; StringRef ArgValue = Arg->getValue(); if (auto MaybeEnumVal = findValueTableByName(Table, ArgValue)) @@ -305,7 +305,7 @@ Diags.Report(diag::err_drv_invalid_value) << Arg->getAsString(Args) << ArgValue; - return None; + return std::nullopt; } static void denormalizeSimpleEnumImpl(SmallVectorImpl &Args, @@ -339,7 +339,7 @@ DiagnosticsEngine &Diags) { auto *Arg = Args.getLastArg(Opt); if (!Arg) - return None; + return std::nullopt; return std::string(Arg->getValue()); } @@ -349,12 +349,12 @@ DiagnosticsEngine &Diags) { auto *Arg = Args.getLastArg(Opt); if (!Arg) - return None; + return std::nullopt; IntTy Res; if (StringRef(Arg->getValue()).getAsInteger(0, Res)) { Diags.Report(diag::err_drv_invalid_int_value) << Arg->getAsString(Args) << Arg->getValue(); - return None; + return std::nullopt; } return Res; } @@ -402,7 +402,7 @@ DiagnosticsEngine &Diags) { auto *Arg = Args.getLastArg(Opt); if (!Arg) - return None; + return std::nullopt; return llvm::Triple::normalize(Arg->getValue()); } @@ -1064,11 +1064,12 @@ static void initOption(AnalyzerOptions::ConfigTable &Config, DiagnosticsEngine *Diags, bool &OptionField, StringRef Name, bool DefaultVal) { - auto PossiblyInvalidVal = llvm::StringSwitch>( - getStringOption(Config, Name, (DefaultVal ? "true" : "false"))) - .Case("true", true) - .Case("false", false) - .Default(None); + auto PossiblyInvalidVal = + llvm::StringSwitch>( + getStringOption(Config, Name, (DefaultVal ? "true" : "false"))) + .Case("true", true) + .Case("false", false) + .Default(std::nullopt); if (!PossiblyInvalidVal) { if (Diags) @@ -1382,10 +1383,10 @@ DebugInfoVal = "unused-types"; break; case codegenoptions::NoDebugInfo: // default value - DebugInfoVal = None; + DebugInfoVal = std::nullopt; break; case codegenoptions::LocTrackingOnly: // implied value - DebugInfoVal = None; + DebugInfoVal = std::nullopt; break; } if (DebugInfoVal) @@ -2504,7 +2505,7 @@ if (ActionOpt.second == Opt.getID()) return ActionOpt.first; - return None; + return std::nullopt; } /// Maps frontend action to command line option. @@ -2514,7 +2515,7 @@ if (ActionOpt.first == ProgramAction) return OptSpecifier(ActionOpt.second); - return None; + return std::nullopt; } static void GenerateFrontendArgs(const FrontendOptions &Opts, @@ -2996,8 +2997,8 @@ auto End = Opts.UserEntries.end(); // Add -I..., -F..., and -index-header-map options in order. - for (; It < End && - Matches(*It, {frontend::IndexHeaderMap, frontend::Angled}, None, true); + for (; It < End && Matches(*It, {frontend::IndexHeaderMap, frontend::Angled}, + std::nullopt, true); ++It) { OptSpecifier Opt = [It, Matches]() { if (Matches(*It, frontend::IndexHeaderMap, true, true)) @@ -3035,7 +3036,8 @@ GenerateArg(Args, OPT_idirafter, It->Path, SA); for (; It < End && Matches(*It, {frontend::Quoted}, false, true); ++It) GenerateArg(Args, OPT_iquote, It->Path, SA); - for (; It < End && Matches(*It, {frontend::System}, false, None); ++It) + for (; It < End && Matches(*It, {frontend::System}, false, std::nullopt); + ++It) GenerateArg(Args, It->IgnoreSysRoot ? OPT_isystem : OPT_iwithsysroot, It->Path, SA); for (; It < End && Matches(*It, {frontend::System}, true, true); ++It) @@ -4737,12 +4739,19 @@ clang::createVFSFromCompilerInvocation( const CompilerInvocation &CI, DiagnosticsEngine &Diags, IntrusiveRefCntPtr BaseFS) { - if (CI.getHeaderSearchOpts().VFSOverlayFiles.empty()) + return createVFSFromOverlayFiles(CI.getHeaderSearchOpts().VFSOverlayFiles, + Diags, std::move(BaseFS)); +} + +IntrusiveRefCntPtr clang::createVFSFromOverlayFiles( + ArrayRef VFSOverlayFiles, DiagnosticsEngine &Diags, + IntrusiveRefCntPtr BaseFS) { + if (VFSOverlayFiles.empty()) return BaseFS; IntrusiveRefCntPtr Result = BaseFS; // earlier vfs files are on the bottom - for (const auto &File : CI.getHeaderSearchOpts().VFSOverlayFiles) { + for (const auto &File : VFSOverlayFiles) { llvm::ErrorOr> Buffer = Result->getBufferForFile(File); if (!Buffer) { diff --git a/clang/lib/Frontend/DiagnosticRenderer.cpp b/clang/lib/Frontend/DiagnosticRenderer.cpp --- a/clang/lib/Frontend/DiagnosticRenderer.cpp +++ b/clang/lib/Frontend/DiagnosticRenderer.cpp @@ -148,7 +148,7 @@ void DiagnosticRenderer::emitBasicNote(StringRef Message) { emitDiagnosticMessage(FullSourceLoc(), PresumedLoc(), DiagnosticsEngine::Note, - Message, None, DiagOrStoredDiag()); + Message, std::nullopt, DiagOrStoredDiag()); } /// Prints an include stack when appropriate for a particular @@ -453,7 +453,7 @@ Message << "expanded from macro '" << MacroName << "'"; emitDiagnostic(SpellingLoc, DiagnosticsEngine::Note, Message.str(), - SpellingRanges, None); + SpellingRanges, std::nullopt); } /// Check that the macro argument location of Loc starts with ArgumentLoc. diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -335,7 +335,7 @@ return std::error_code(); // Resolve all lazy header directives to header files. - ModMap.resolveHeaderDirectives(Module, /*File=*/llvm::None); + ModMap.resolveHeaderDirectives(Module, /*File=*/std::nullopt); // If any headers are missing, we can't build this module. In most cases, // diagnostics for this should have already been produced; we only get here diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -926,12 +926,13 @@ static llvm::Optional> findLinesForRange(const CharSourceRange &R, FileID FID, const SourceManager &SM) { - if (!R.isValid()) return None; + if (!R.isValid()) + return std::nullopt; SourceLocation Begin = R.getBegin(); SourceLocation End = R.getEnd(); if (SM.getFileID(Begin) != FID || SM.getFileID(End) != FID) - return None; + return std::nullopt; return std::make_pair(SM.getExpansionLineNumber(Begin), SM.getExpansionLineNumber(End)); diff --git a/clang/lib/Index/IndexingContext.h b/clang/lib/Index/IndexingContext.h --- a/clang/lib/Index/IndexingContext.h +++ b/clang/lib/Index/IndexingContext.h @@ -68,20 +68,18 @@ static bool isTemplateImplicitInstantiation(const Decl *D); bool handleDecl(const Decl *D, SymbolRoleSet Roles = SymbolRoleSet(), - ArrayRef Relations = None); + ArrayRef Relations = std::nullopt); bool handleDecl(const Decl *D, SourceLocation Loc, SymbolRoleSet Roles = SymbolRoleSet(), - ArrayRef Relations = None, + ArrayRef Relations = std::nullopt, const DeclContext *DC = nullptr); bool handleReference(const NamedDecl *D, SourceLocation Loc, - const NamedDecl *Parent, - const DeclContext *DC, + const NamedDecl *Parent, const DeclContext *DC, SymbolRoleSet Roles = SymbolRoleSet(), - ArrayRef Relations = None, - const Expr *RefE = nullptr, - const Decl *RefD = nullptr); + ArrayRef Relations = std::nullopt, + const Expr *RefE = nullptr, const Decl *RefD = nullptr); void handleMacroDefined(const IdentifierInfo &Name, SourceLocation Loc, const MacroInfo &MI); @@ -97,7 +95,7 @@ bool indexDecl(const Decl *D); void indexTagDecl(const TagDecl *D, - ArrayRef Relations = None); + ArrayRef Relations = std::nullopt); void indexTypeSourceInfo(TypeSourceInfo *TInfo, const NamedDecl *Parent, const DeclContext *DC = nullptr, diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp --- a/clang/lib/Interpreter/IncrementalParser.cpp +++ b/clang/lib/Interpreter/IncrementalParser.cpp @@ -101,7 +101,6 @@ CompletionConsumer = &CI.getCodeCompletionConsumer(); Preprocessor &PP = CI.getPreprocessor(); - PP.enableIncrementalProcessing(); PP.EnterMainSourceFile(); if (!CI.hasSema()) @@ -174,9 +173,6 @@ Sema::ModuleImportState ImportState; for (bool AtEOF = P->ParseFirstTopLevelDecl(ADecl, ImportState); !AtEOF; AtEOF = P->ParseTopLevelDecl(ADecl, ImportState)) { - // If we got a null return and something *was* parsed, ignore it. This - // is due to a top-level semicolon, an action override, or a parse error - // skipping something. if (ADecl && !Consumer->HandleTopLevelDecl(ADecl.get())) return llvm::make_error("Parsing failed. " "The consumer rejected a decl", diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -138,13 +138,11 @@ // specified. By prepending we allow users to override the default // action and use other actions in incremental mode. // FIXME: Print proper driver diagnostics if the driver flags are wrong. - ClangArgv.insert(ClangArgv.begin() + 1, "-c"); - - if (!llvm::is_contained(ClangArgv, " -x")) { - // We do C++ by default; append right after argv[0] if no "-x" given - ClangArgv.push_back("-x"); - ClangArgv.push_back("c++"); - } + // We do C++ by default; append right after argv[0] if no "-x" given + ClangArgv.insert(ClangArgv.end(), "-xc++"); + ClangArgv.insert(ClangArgv.end(), "-Xclang"); + ClangArgv.insert(ClangArgv.end(), "-fincremental-extensions"); + ClangArgv.insert(ClangArgv.end(), "-c"); // Put a dummy C++ file on to ensure there's at least one compile job for the // driver to construct. diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -530,7 +530,7 @@ if (Tok.isNot(tok::raw_identifier)) { if (!Tok.is(tok::eod)) skipLine(First, End); - return None; + return std::nullopt; } bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp --- a/clang/lib/Lex/HeaderMap.cpp +++ b/clang/lib/Lex/HeaderMap.cpp @@ -151,7 +151,7 @@ // Check for invalid index. if (StrTabIdx >= FileBuffer->getBufferSize()) - return None; + return std::nullopt; const char *Data = FileBuffer->getBufferStart() + StrTabIdx; unsigned MaxLen = FileBuffer->getBufferSize() - StrTabIdx; @@ -159,7 +159,7 @@ // Check whether the buffer is null-terminated. if (Len == MaxLen && Data[Len - 1]) - return None; + return std::nullopt; return StringRef(Data, Len); } diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -432,14 +432,14 @@ Diags.Report(IncludeLoc, diag::err_cannot_open_file) << FileName << EC.message(); } - return None; + return std::nullopt; } // If there is a module that corresponds to this header, suggest it. if (!findUsableModuleForHeader( &File->getFileEntry(), Dir ? Dir : File->getFileEntry().getDir(), RequestingModule, SuggestedModule, IsSystemHeaderDir)) - return None; + return std::nullopt; return *File; } @@ -487,7 +487,7 @@ SmallString<1024> Path; StringRef Dest = HM->lookupFilename(Filename, Path); if (Dest.empty()) - return None; + return std::nullopt; IsInHeaderMap = true; @@ -522,7 +522,7 @@ // function as part of the regular logic that applies to include search paths. // The case where the target file **does not exist** is handled here: HS.noteLookupUsage(HS.searchDirIdx(*this), IncludeLoc); - return None; + return std::nullopt; } /// Given a framework directory, find the top-most framework directory. @@ -595,7 +595,7 @@ // Framework names must have a '/' in the filename. size_t SlashPos = Filename.find('/'); if (SlashPos == StringRef::npos) - return None; + return std::nullopt; // Find out if this is the home for the specified framework, by checking // HeaderSearch. Possible answers are yes/no and unknown. @@ -604,7 +604,7 @@ // If it is known and in some other directory, fail. if (CacheEntry.Directory && CacheEntry.Directory != getFrameworkDirRef()) - return None; + return std::nullopt; // Otherwise, construct the path to this framework dir. @@ -628,7 +628,7 @@ // If the framework dir doesn't exist, we fail. auto Dir = FileMgr.getDirectory(FrameworkName); if (!Dir) - return None; + return std::nullopt; // Otherwise, if it does, remember that this is the right direntry for this // framework. @@ -711,17 +711,17 @@ if (!HS.findUsableModuleForFrameworkHeader( &File->getFileEntry(), FrameworkPath, RequestingModule, SuggestedModule, IsSystem)) - return None; + return std::nullopt; } else { if (!HS.findUsableModuleForHeader(&File->getFileEntry(), getDir(), RequestingModule, SuggestedModule, IsSystem)) - return None; + return std::nullopt; } } if (File) return *File; - return None; + return std::nullopt; } void HeaderSearch::cacheLookupSuccess(LookupFileCacheInfo &CacheLookup, @@ -880,7 +880,7 @@ // If this was an #include_next "/absolute/file", fail. if (FromDir) - return None; + return std::nullopt; if (SearchPath) SearchPath->clear(); @@ -1166,7 +1166,7 @@ // Otherwise, didn't find it. Remember we didn't find this. CacheLookup.HitIt = search_dir_end(); - return None; + return std::nullopt; } /// LookupSubframeworkHeader - Look up a subframework for the specified @@ -1184,7 +1184,7 @@ // FIXME: Should we permit '\' on Windows? size_t SlashPos = Filename.find('/'); if (SlashPos == StringRef::npos) - return None; + return std::nullopt; // Look up the base framework name of the ContextFileEnt. StringRef ContextName = ContextFileEnt->getName(); @@ -1195,7 +1195,7 @@ if (FrameworkPos == StringRef::npos || (ContextName[FrameworkPos + DotFrameworkLen] != '/' && ContextName[FrameworkPos + DotFrameworkLen] != '\\')) - return None; + return std::nullopt; SmallString<1024> FrameworkName(ContextName.data(), ContextName.data() + FrameworkPos + @@ -1215,7 +1215,7 @@ CacheLookup.first().size() == FrameworkName.size() && memcmp(CacheLookup.first().data(), &FrameworkName[0], CacheLookup.first().size()) != 0) - return None; + return std::nullopt; // Cache subframework. if (!CacheLookup.second.Directory) { @@ -1224,7 +1224,7 @@ // If the framework dir doesn't exist, we fail. auto Dir = FileMgr.getOptionalDirectoryRef(FrameworkName); if (!Dir) - return None; + return std::nullopt; // Otherwise, if it does, remember that this is the right direntry for this // framework. @@ -1262,7 +1262,7 @@ File = FileMgr.getOptionalFileRef(HeadersFilename, /*OpenFile=*/true); if (!File) - return None; + return std::nullopt; } // This file is a system header or C++ unfriendly if the old file is. @@ -1277,7 +1277,7 @@ if (!findUsableModuleForFrameworkHeader(&File->getFileEntry(), FrameworkName, RequestingModule, SuggestedModule, /*IsSystem*/ false)) - return None; + return std::nullopt; return *File; } diff --git a/clang/lib/Lex/InitHeaderSearch.cpp b/clang/lib/Lex/InitHeaderSearch.cpp --- a/clang/lib/Lex/InitHeaderSearch.cpp +++ b/clang/lib/Lex/InitHeaderSearch.cpp @@ -62,14 +62,14 @@ /// if used. /// Returns true if the path exists, false if it was ignored. bool AddPath(const Twine &Path, IncludeDirGroup Group, bool isFramework, - Optional UserEntryIdx = None); + Optional UserEntryIdx = std::nullopt); /// Add the specified path to the specified group list, without performing any /// sysroot remapping. /// Returns true if the path exists, false if it was ignored. bool AddUnmappedPath(const Twine &Path, IncludeDirGroup Group, bool isFramework, - Optional UserEntryIdx = None); + Optional UserEntryIdx = std::nullopt); /// Add the specified prefix to the system header prefix list. void AddSystemHeaderPrefix(StringRef Prefix, bool IsSystemHeader) { diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1261,7 +1261,7 @@ const LangOptions &LangOpts) { if (Loc.isMacroID()) { if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc)) - return None; + return std::nullopt; } Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); @@ -1272,7 +1272,7 @@ bool InvalidTemp = false; StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); if (InvalidTemp) - return None; + return std::nullopt; const char *TokenBegin = File.data() + LocInfo.second; @@ -3216,7 +3216,7 @@ if (!LangOpts.CPlusPlus && !LangOpts.C99) { if (Diagnose) Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89); - return llvm::None; + return std::nullopt; } const char *CurPtr = StartPtr + CharSize; @@ -3244,13 +3244,13 @@ if (Diagnose) Diag(BufferPtr, diag::warn_delimited_ucn_incomplete) << StringRef(KindLoc, 1); - return llvm::None; + return std::nullopt; } if (CodePoint & 0xF000'0000) { if (Diagnose) Diag(KindLoc, diag::err_escape_too_large) << 0; - return llvm::None; + return std::nullopt; } CodePoint <<= 4; @@ -3264,13 +3264,13 @@ Diag(StartPtr, FoundEndDelimiter ? diag::warn_delimited_ucn_empty : diag::warn_ucn_escape_no_digits) << StringRef(KindLoc, 1); - return llvm::None; + return std::nullopt; } if (Delimited && Kind == 'U') { if (Diagnose) Diag(StartPtr, diag::err_hex_escape_no_digits) << StringRef(KindLoc, 1); - return llvm::None; + return std::nullopt; } if (!Delimited && Count != NumHexDigits) { @@ -3283,7 +3283,7 @@ << FixItHint::CreateReplacement(URange, "u"); } } - return llvm::None; + return std::nullopt; } if (Delimited && PP) { @@ -3321,7 +3321,7 @@ if (C != '{') { if (Diagnose) Diag(StartPtr, diag::warn_ucn_escape_incomplete); - return llvm::None; + return std::nullopt; } CurPtr += CharSize; const char *StartName = CurPtr; @@ -3345,7 +3345,7 @@ Diag(StartPtr, FoundEndDelimiter ? diag::warn_delimited_ucn_empty : diag::warn_delimited_ucn_incomplete) << StringRef(KindLoc, 1); - return llvm::None; + return std::nullopt; } StringRef Name(Buffer.data(), Buffer.size()); @@ -3367,7 +3367,7 @@ // When finding a match using Unicode loose matching rules // recover after having emitted a diagnostic. if (!LooseMatch) - return llvm::None; + return std::nullopt; // We do not offer misspelled character names suggestions here // as the set of what would be a valid suggestion depends on context, // and we should not make invalid suggestions. diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp --- a/clang/lib/Lex/MacroArgs.cpp +++ b/clang/lib/Lex/MacroArgs.cpp @@ -169,7 +169,7 @@ std::vector &Result = PreExpArgTokens[Arg]; if (!Result.empty()) return Result; - SaveAndRestore PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true); + SaveAndRestore PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true); const Token *AT = getUnexpArgument(Arg); unsigned NumToks = getArgLength(AT)+1; // Include the EOF. diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -189,7 +189,7 @@ expectedToOptional(SourceMgr.getFileManager().getFileRef(Filename)); if (!File || (Header.Size && File->getSize() != *Header.Size) || (Header.ModTime && File->getModificationTime() != *Header.ModTime)) - return None; + return std::nullopt; return *File; }; @@ -247,7 +247,7 @@ << Header.FileName << M->getFullModuleName(); NeedsFramework = true; } - return None; + return std::nullopt; } return NormalHdrFile; @@ -482,7 +482,7 @@ if (RequestingModule) { resolveUses(RequestingModule, /*Complain=*/false); - resolveHeaderDirectives(RequestingModule, /*File=*/llvm::None); + resolveHeaderDirectives(RequestingModule, /*File=*/std::nullopt); } bool Excluded = false; @@ -690,7 +690,7 @@ if (findOrCreateModuleForHeaderInUmbrellaDir(File)) return Headers.find(File)->second; - return None; + return std::nullopt; } ArrayRef @@ -699,7 +699,7 @@ resolveHeaderDirectives(File); auto It = Headers.find(File); if (It == Headers.end()) - return None; + return std::nullopt; return It->second; } @@ -1262,7 +1262,7 @@ Optional ModuleMap::getContainingModuleMapFile(const Module *Module) const { if (Module->DefinitionLoc.isInvalid()) - return None; + return std::nullopt; return SourceMgr.getFileEntryRefForID( SourceMgr.getFileID(Module->DefinitionLoc)); @@ -1303,9 +1303,16 @@ // Canonicalize the directory. StringRef CanonicalDir = FM.getCanonicalName(*DirEntry); if (CanonicalDir != Dir) { - bool Done = llvm::sys::path::replace_path_prefix(Path, Dir, CanonicalDir); - (void)Done; - assert(Done && "Path should always start with Dir"); + auto CanonicalDirEntry = FM.getDirectory(CanonicalDir); + // Only use the canonicalized path if it resolves to the same entry as the + // original. This is not true if there's a VFS overlay on top of a FS where + // the directory is a symlink. The overlay would not remap the target path + // of the symlink to the same directory entry in that case. + if (CanonicalDirEntry && *CanonicalDirEntry == *DirEntry) { + bool Done = llvm::sys::path::replace_path_prefix(Path, Dir, CanonicalDir); + (void)Done; + assert(Done && "Path should always start with Dir"); + } } // In theory, the filename component should also be canonicalized if it diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -307,7 +307,7 @@ if (SimilarStr) { return SimilarStr->first; } else { - return None; + return std::nullopt; } } @@ -491,8 +491,7 @@ // lookup pointer. assert(!SkippingExcludedConditionalBlock && "calling SkipExcludedConditionalBlock recursively"); - llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, - true); + llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true); ++NumSkipped; assert(!CurTokenLexer && CurPPLexer && "Lexing a macro, not a file?"); @@ -1078,7 +1077,7 @@ } // Otherwise, we really couldn't find the file. - return None; + return std::nullopt; } //===----------------------------------------------------------------------===// @@ -2021,7 +2020,7 @@ return File; if (SuppressIncludeNotFoundError) - return None; + return std::nullopt; // If the file could not be located and it was included via angle // brackets, we can attempt a lookup as though it were a quoted path to @@ -2096,7 +2095,7 @@ << CacheEntry.Directory->getName(); } - return None; + return std::nullopt; } /// Handle either a #include-like directive or an import declaration that names diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -869,7 +869,7 @@ /// to "!defined(X)" return X in IfNDefMacro. Preprocessor::DirectiveEvalResult Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { - SaveAndRestore PPDir(ParsingIfOrElifDirective, true); + SaveAndRestore PPDir(ParsingIfOrElifDirective, true); // Save the current state of 'DisableMacroExpansion' and reset it to false. If // 'DisableMacroExpansion' is true, then we must be in a macro argument list // in which case a directive is undefined behavior. We want macros to be able diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -285,7 +285,8 @@ // Dump module macros. llvm::DenseSet Active; - for (auto *MM : State ? State->getActiveModuleMacros(*this, II) : None) + for (auto *MM : + State ? State->getActiveModuleMacros(*this, II) : std::nullopt) Active.insert(MM); llvm::DenseSet Visited; llvm::SmallVector Worklist(Leaf.begin(), Leaf.end()); diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2421,8 +2421,8 @@ // Recover as if it were an explicit specialization. TemplateParameterLists FakedParamLists; FakedParamLists.push_back(Actions.ActOnTemplateParameterList( - 0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, None, - LAngleLoc, nullptr)); + 0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, + std::nullopt, LAngleLoc, nullptr)); ThisDecl = Actions.ActOnTemplateDeclarator(getCurScope(), FakedParamLists, D); @@ -5381,6 +5381,25 @@ } } +Parser::DeclGroupPtrTy Parser::ParseTopLevelStmtDecl() { + assert(PP.isIncrementalProcessingEnabled() && "Not in incremental mode"); + + // Parse a top-level-stmt. + Parser::StmtVector Stmts; + ParsedStmtContext SubStmtCtx = ParsedStmtContext(); + StmtResult R = ParseStatementOrDeclaration(Stmts, SubStmtCtx); + if (!R.isUsable()) + return nullptr; + + SmallVector DeclsInGroup; + DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(R.get())); + // Currently happens for things like -fms-extensions and use `__if_exists`. + for (Stmt *S : Stmts) + DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(S)); + + return Actions.BuildDeclaratorGroup(DeclsInGroup); +} + /// isDeclarationSpecifier() - Return true if the current token is part of a /// declaration specifier. /// diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -1999,8 +1999,8 @@ // "template<>", so that we treat this construct as a class // template specialization. FakedParamLists.push_back(Actions.ActOnTemplateParameterList( - 0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, None, - LAngleLoc, nullptr)); + 0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, + std::nullopt, LAngleLoc, nullptr)); TemplateParams = &FakedParamLists; } } diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3612,8 +3612,8 @@ /*NumExceptions=*/0, /*NoexceptExpr=*/nullptr, /*ExceptionSpecTokens=*/nullptr, - /*DeclsInPrototype=*/None, CaretLoc, - CaretLoc, ParamInfo), + /*DeclsInPrototype=*/std::nullopt, + CaretLoc, CaretLoc, ParamInfo), CaretLoc); MaybeParseGNUAttributes(ParamInfo); @@ -3702,11 +3702,11 @@ if (Tok.is(tok::code_completion)) { cutOffParsing(); Actions.CodeCompleteAvailabilityPlatformName(); - return None; + return std::nullopt; } if (Tok.isNot(tok::identifier)) { Diag(Tok, diag::err_avail_query_expected_platform_name); - return None; + return std::nullopt; } IdentifierLoc *PlatformIdentifier = ParseIdentifierLoc(); @@ -3714,7 +3714,7 @@ VersionTuple Version = ParseVersionTuple(VersionRange); if (Version.empty()) - return None; + return std::nullopt; StringRef GivenPlatform = PlatformIdentifier->Ident->getName(); StringRef Platform = @@ -3724,7 +3724,7 @@ Diag(PlatformIdentifier->Loc, diag::err_avail_query_unrecognized_platform_name) << GivenPlatform; - return None; + return std::nullopt; } return AvailabilitySpec(Version, Platform, PlatformIdentifier->Loc, diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1443,8 +1443,8 @@ DynamicExceptions.size(), NoexceptExpr.isUsable() ? NoexceptExpr.get() : nullptr, /*ExceptionSpecTokens*/ nullptr, - /*DeclsInPrototype=*/None, LParenLoc, FunLocalRangeEnd, D, - TrailingReturnType, TrailingReturnTypeLoc, &DS), + /*DeclsInPrototype=*/std::nullopt, LParenLoc, FunLocalRangeEnd, + D, TrailingReturnType, TrailingReturnTypeLoc, &DS), std::move(Attr), DeclEndLoc); }; diff --git a/clang/lib/Parse/ParseInit.cpp b/clang/lib/Parse/ParseInit.cpp --- a/clang/lib/Parse/ParseInit.cpp +++ b/clang/lib/Parse/ParseInit.cpp @@ -451,7 +451,7 @@ if (!getLangOpts().CPlusPlus) Diag(LBraceLoc, diag::ext_gnu_empty_initializer); // Match the '}'. - return Actions.ActOnInitList(LBraceLoc, None, ConsumeBrace()); + return Actions.ActOnInitList(LBraceLoc, std::nullopt, ConsumeBrace()); } // Enter an appropriate expression evaluation context for an initializer list. diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp --- a/clang/lib/Parse/ParseObjc.cpp +++ b/clang/lib/Parse/ParseObjc.cpp @@ -3201,14 +3201,14 @@ if (Tok.is(tok::code_completion)) { cutOffParsing(); if (SuperLoc.isValid()) - Actions.CodeCompleteObjCSuperMessage(getCurScope(), SuperLoc, None, - false); + Actions.CodeCompleteObjCSuperMessage(getCurScope(), SuperLoc, + std::nullopt, false); else if (ReceiverType) - Actions.CodeCompleteObjCClassMessage(getCurScope(), ReceiverType, None, - false); + Actions.CodeCompleteObjCClassMessage(getCurScope(), ReceiverType, + std::nullopt, false); else Actions.CodeCompleteObjCInstanceMessage(getCurScope(), ReceiverExpr, - None, false); + std::nullopt, false); return ExprError(); } @@ -3540,9 +3540,8 @@ // We have a valid expression. Collect it in a vector so we can // build the argument list. - ObjCDictionaryElement Element = { - KeyExpr.get(), ValueExpr.get(), EllipsisLoc, None - }; + ObjCDictionaryElement Element = {KeyExpr.get(), ValueExpr.get(), + EllipsisLoc, std::nullopt}; Elements.push_back(Element); if (!TryConsumeToken(tok::comma) && Tok.isNot(tok::r_brace)) diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -1790,7 +1790,7 @@ BalancedDelimiterTracker T(P, tok::l_paren, tok::annot_pragma_openmp_end); if (T.expectAndConsume(diag::err_expected_lparen_after, getOpenMPClauseName(Kind).data())) - return llvm::None; + return std::nullopt; unsigned Type = getOpenMPSimpleClauseType( Kind, Tok.isAnnotation() ? "" : P.getPreprocessor().getSpelling(Tok), @@ -2961,7 +2961,7 @@ DKind == OMPD_target_exit_data) { Actions.ActOnOpenMPRegionStart(DKind, getCurScope()); AssociatedStmt = (Sema::CompoundScopeRAII(Actions), - Actions.ActOnCompoundStmt(Loc, Loc, llvm::None, + Actions.ActOnCompoundStmt(Loc, Loc, std::nullopt, /*isStmtExpr=*/false)); AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses); } diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -3203,7 +3203,7 @@ .Case("reassociate", TokFPAnnotValue::Reassociate) .Case("exceptions", TokFPAnnotValue::Exceptions) .Case("eval_method", TokFPAnnotValue::EvalMethod) - .Default(None); + .Default(std::nullopt); if (!FlagKind) { PP.Diag(Tok.getLocation(), diag::err_pragma_fp_invalid_option) << /*MissingOption=*/false << OptionInfo; @@ -3236,7 +3236,7 @@ .Case("on", LangOptions::FPModeKind::FPM_On) .Case("off", LangOptions::FPModeKind::FPM_Off) .Case("fast", LangOptions::FPModeKind::FPM_Fast) - .Default(llvm::None); + .Default(std::nullopt); if (!AnnotValue->ContractValue) { PP.Diag(Tok.getLocation(), diag::err_pragma_fp_invalid_argument) << PP.getSpelling(Tok) << OptionInfo->getName() << *FlagKind; @@ -3248,7 +3248,7 @@ II->getName()) .Case("on", LangOptions::FPModeKind::FPM_On) .Case("off", LangOptions::FPModeKind::FPM_Off) - .Default(llvm::None); + .Default(std::nullopt); if (!AnnotValue->ReassociateValue) { PP.Diag(Tok.getLocation(), diag::err_pragma_fp_invalid_argument) << PP.getSpelling(Tok) << OptionInfo->getName() << *FlagKind; @@ -3261,7 +3261,7 @@ .Case("ignore", LangOptions::FPE_Ignore) .Case("maytrap", LangOptions::FPE_MayTrap) .Case("strict", LangOptions::FPE_Strict) - .Default(llvm::None); + .Default(std::nullopt); if (!AnnotValue->ExceptionsValue) { PP.Diag(Tok.getLocation(), diag::err_pragma_fp_invalid_argument) << PP.getSpelling(Tok) << OptionInfo->getName() << *FlagKind; @@ -3274,7 +3274,7 @@ .Case("source", LangOptions::FPEvalMethodKind::FEM_Source) .Case("double", LangOptions::FPEvalMethodKind::FEM_Double) .Case("extended", LangOptions::FPEvalMethodKind::FEM_Extended) - .Default(llvm::None); + .Default(std::nullopt); if (!AnnotValue->EvalMethodValue) { PP.Diag(Tok.getLocation(), diag::err_pragma_fp_invalid_argument) << PP.getSpelling(Tok) << OptionInfo->getName() << *FlagKind; diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -2451,7 +2451,8 @@ // If the function body could not be parsed, make a bogus compoundstmt. if (FnBody.isInvalid()) { Sema::CompoundScopeRAII CompoundScope(Actions); - FnBody = Actions.ActOnCompoundStmt(LBraceLoc, LBraceLoc, None, false); + FnBody = + Actions.ActOnCompoundStmt(LBraceLoc, LBraceLoc, std::nullopt, false); } BodyScope.Exit(); @@ -2488,7 +2489,8 @@ // compound statement as the body. if (FnBody.isInvalid()) { Sema::CompoundScopeRAII CompoundScope(Actions); - FnBody = Actions.ActOnCompoundStmt(LBraceLoc, LBraceLoc, None, false); + FnBody = + Actions.ActOnCompoundStmt(LBraceLoc, LBraceLoc, std::nullopt, false); } BodyScope.Exit(); diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp --- a/clang/lib/Parse/ParseTemplate.cpp +++ b/clang/lib/Parse/ParseTemplate.cpp @@ -341,8 +341,8 @@ // Recover as if it were an explicit specialization. TemplateParameterLists FakedParamLists; FakedParamLists.push_back(Actions.ActOnTemplateParameterList( - 0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, None, - LAngleLoc, nullptr)); + 0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, + std::nullopt, LAngleLoc, nullptr)); return ParseFunctionDefinition( DeclaratorInfo, ParsedTemplateInfo(&FakedParamLists, diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -46,7 +46,10 @@ /// 'using' 'namespace' '::'[opt] nested-name-specifier[opt] /// namespace-name ';' /// -bool Parser::isCXXDeclarationStatement() { +bool Parser::isCXXDeclarationStatement( + bool DisambiguatingWithExpression /*=false*/) { + assert(getLangOpts().CPlusPlus && "Must be called for C++ only."); + switch (Tok.getKind()) { // asm-definition case tok::kw_asm: @@ -59,6 +62,42 @@ case tok::kw_static_assert: case tok::kw__Static_assert: return true; + case tok::identifier: { + if (DisambiguatingWithExpression) { + RevertingTentativeParsingAction TPA(*this); + // Parse the C++ scope specifier. + CXXScopeSpec SS; + ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr, + /*ObjectHasErrors=*/false, + /*EnteringContext=*/true); + + switch (Tok.getKind()) { + case tok::identifier: { + IdentifierInfo *II = Tok.getIdentifierInfo(); + bool isDeductionGuide = + Actions.isDeductionGuideName(getCurScope(), *II, Tok.getLocation(), + /*Template=*/nullptr); + if (Actions.isCurrentClassName(*II, getCurScope(), &SS) || + isDeductionGuide) { + if (isConstructorDeclarator(/*Unqualified=*/SS.isEmpty(), + isDeductionGuide, + DeclSpec::FriendSpecified::No)) + return true; + } + break; + } + case tok::kw_operator: + return true; + case tok::annot_cxxscope: // Check if this is a dtor. + if (NextToken().is(tok::tilde)) + return true; + break; + default: + break; + } + } + } + [[fallthrough]]; // simple-declaration default: return isCXXSimpleDeclaration(/*AllowForRangeDecl=*/false); diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -707,8 +707,7 @@ // Late template parsing can begin. Actions.SetLateTemplateParser(LateTemplateParserCallback, nullptr, this); - if (!PP.isIncrementalProcessingEnabled()) - Actions.ActOnEndOfTranslationUnit(); + Actions.ActOnEndOfTranslationUnit(); //else don't tell Sema that we ended parsing: more input might come. return true; @@ -918,7 +917,7 @@ if (CurParsedObjCImpl) { // Code-complete Objective-C methods even without leading '-'/'+' prefix. Actions.CodeCompleteObjCMethodDecl(getCurScope(), - /*IsInstanceMethod=*/None, + /*IsInstanceMethod=*/std::nullopt, /*ReturnType=*/nullptr); } Actions.CodeCompleteOrdinaryName( @@ -1029,8 +1028,13 @@ ConsumeToken(); return nullptr; } + if (PP.isIncrementalProcessingEnabled() && + !isDeclarationStatement(/*DisambiguatingWithExpression=*/true)) + return ParseTopLevelStmtDecl(); + // We can't tell whether this is a function-definition or declaration yet. - return ParseDeclarationOrFunctionDefinition(Attrs, DeclSpecAttrs, DS); + if (!SingleDecl) + return ParseDeclarationOrFunctionDefinition(Attrs, DeclSpecAttrs, DS); } // This routine returns a DeclGroup, if the thing we parsed only contains a diff --git a/clang/lib/Sema/Scope.cpp b/clang/lib/Sema/Scope.cpp --- a/clang/lib/Sema/Scope.cpp +++ b/clang/lib/Sema/Scope.cpp @@ -91,7 +91,7 @@ UsingDirectives.clear(); Entity = nullptr; ErrorTrap.reset(); - NRVO = None; + NRVO = std::nullopt; } bool Scope::containedInPrototypeScope() const { diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2475,7 +2475,7 @@ if (IsMemExpr && !E.isTypeDependent()) { Sema::TentativeAnalysisScope Trap(*this); ExprResult R = BuildCallToMemberFunction(nullptr, &E, SourceLocation(), - None, SourceLocation()); + std::nullopt, SourceLocation()); if (R.isUsable()) { ZeroArgCallReturnTy = R.get()->getType(); return true; @@ -2625,7 +2625,7 @@ // FIXME: Try this before emitting the fixit, and suppress diagnostics // while doing so. - E = BuildCallExpr(nullptr, E.get(), Range.getEnd(), None, + E = BuildCallExpr(nullptr, E.get(), Range.getEnd(), std::nullopt, Range.getEnd().getLocWithOffset(1)); return true; } diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp --- a/clang/lib/Sema/SemaAttr.cpp +++ b/clang/lib/Sema/SemaAttr.cpp @@ -890,7 +890,7 @@ using namespace attr; switch (Rule) { default: - return None; + return std::nullopt; #define ATTR_MATCH_RULE(Value, Spelling, IsAbstract) #define ATTR_MATCH_SUB_RULE(Value, Spelling, IsAbstract, Parent, IsNegated) \ case Value: \ diff --git a/clang/lib/Sema/SemaAvailability.cpp b/clang/lib/Sema/SemaAvailability.cpp --- a/clang/lib/Sema/SemaAvailability.cpp +++ b/clang/lib/Sema/SemaAvailability.cpp @@ -253,7 +253,7 @@ if (!Name.empty() && (Name.front() == '-' || Name.front() == '+')) Name = Name.drop_front(1); if (Name.empty()) - return None; + return std::nullopt; Name.split(SlotNames, ':'); unsigned NumParams; if (Name.back() == ':') { @@ -263,7 +263,7 @@ } else { if (SlotNames.size() != 1) // Not a valid method name, just a colon-separated string. - return None; + return std::nullopt; NumParams = 0; } // Verify all slot names are valid. @@ -272,7 +272,7 @@ if (S.empty()) continue; if (!isValidAsciiIdentifier(S, AllowDollar)) - return None; + return std::nullopt; } return NumParams; } @@ -286,14 +286,14 @@ return AttributeInsertion::createInsertionAfter(D); if (const auto *MD = dyn_cast(D)) { if (MD->hasBody()) - return None; + return std::nullopt; return AttributeInsertion::createInsertionAfter(D); } if (const auto *TD = dyn_cast(D)) { SourceLocation Loc = Lexer::getLocForEndOfToken(TD->getInnerLocStart(), 0, SM, LangOpts); if (Loc.isInvalid()) - return None; + return std::nullopt; // Insert after the 'struct'/whatever keyword. return AttributeInsertion::createInsertionAfter(Loc); } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1066,7 +1066,7 @@ ? DABAttr->argIndices_begin()[Index] : Index - DABIndices + FD->getNumParams(); if (NewIndex >= TheCall->getNumArgs()) - return llvm::None; + return std::nullopt; return NewIndex; }; @@ -1074,12 +1074,12 @@ [&](unsigned Index) -> Optional { Optional IndexOptional = TranslateIndex(Index); if (!IndexOptional) - return llvm::None; + return std::nullopt; unsigned NewIndex = *IndexOptional; Expr::EvalResult Result; Expr *SizeArg = TheCall->getArg(NewIndex); if (!SizeArg->EvaluateAsInt(Result, getASTContext())) - return llvm::None; + return std::nullopt; llvm::APSInt Integer = Result.Val.getInt(); Integer.setIsUnsigned(true); return Integer; @@ -1099,16 +1099,16 @@ Optional IndexOptional = TranslateIndex(Index); if (!IndexOptional) - return llvm::None; + return std::nullopt; unsigned NewIndex = *IndexOptional; if (NewIndex >= TheCall->getNumArgs()) - return llvm::None; + return std::nullopt; const Expr *ObjArg = TheCall->getArg(NewIndex); uint64_t Result; if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType)) - return llvm::None; + return std::nullopt; // Get the object size in the target's size_t width. return llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth); @@ -1117,13 +1117,13 @@ auto ComputeStrLenArgument = [&](unsigned Index) -> Optional { Optional IndexOptional = TranslateIndex(Index); if (!IndexOptional) - return llvm::None; + return std::nullopt; unsigned NewIndex = *IndexOptional; const Expr *ObjArg = TheCall->getArg(NewIndex); uint64_t Result; if (!ObjArg->tryEvaluateStrLen(Result, getASTContext())) - return llvm::None; + return std::nullopt; // Add 1 for null byte. return llvm::APSInt::getUnsigned(Result + 1).extOrTrunc(SizeTypeWidth); }; @@ -9129,7 +9129,7 @@ EmitFormatDiagnostic(Sema &S, bool inFunctionCall, const Expr *ArgumentExpr, const PartialDiagnostic &PDiag, SourceLocation StringLoc, bool IsStringLocation, Range StringRange, - ArrayRef Fixit = None); + ArrayRef Fixit = std::nullopt); protected: bool HandleInvalidConversionSpecifier(unsigned argIndex, SourceLocation Loc, @@ -9156,7 +9156,7 @@ template void EmitFormatDiagnostic(PartialDiagnostic PDiag, SourceLocation StringLoc, bool IsStringLocation, Range StringRange, - ArrayRef Fixit = None); + ArrayRef Fixit = std::nullopt); }; } // namespace @@ -12739,7 +12739,7 @@ if (R & EQ) return StringRef("'std::strong_ordering::equal'"); if (R & LTFlag) return StringRef("'std::strong_ordering::less'"); if (R & GTFlag) return StringRef("'std::strong_ordering::greater'"); - return llvm::None; + return std::nullopt; } ComparisonResult TrueFlag, FalseFlag; @@ -12764,7 +12764,7 @@ return StringRef("true"); if (R & FalseFlag) return StringRef("false"); - return llvm::None; + return std::nullopt; } }; } @@ -15584,8 +15584,8 @@ void Sema::CheckCompletedExpr(Expr *E, SourceLocation CheckLoc, bool IsConstexpr) { - llvm::SaveAndRestore ConstantContext( - isConstantEvaluatedOverride, IsConstexpr || isa(E)); + llvm::SaveAndRestore ConstantContext(isConstantEvaluatedOverride, + IsConstexpr || isa(E)); CheckImplicitConversions(E, CheckLoc); if (!E->isInstantiationDependent()) CheckUnsequencedOperations(E); @@ -15751,12 +15751,12 @@ QualType PointeeType = PtrE->getType()->getPointeeType(); if (!PointeeType->isConstantSizeType()) - return llvm::None; + return std::nullopt; auto P = getBaseAlignmentAndOffsetFromPtr(PtrE, Ctx); if (!P) - return llvm::None; + return std::nullopt; CharUnits EltSize = Ctx.getTypeSizeInChars(PointeeType); if (Optional IdxRes = IntE->getIntegerConstantExpr(Ctx)) { @@ -15859,7 +15859,7 @@ break; } } - return llvm::None; + return std::nullopt; } /// This helper function takes a pointer expression and returns the alignment of @@ -15924,7 +15924,7 @@ break; } } - return llvm::None; + return std::nullopt; } static CharUnits getPresumedAlignmentOfPointer(const Expr *E, Sema &S) { @@ -16491,7 +16491,7 @@ Message->getReceiverInterface(), NSAPI::ClassId_NSMutableArray); if (!IsMutableArray) { - return None; + return std::nullopt; } Selector Sel = Message->getSelector(); @@ -16499,7 +16499,7 @@ Optional MKOpt = S.NSAPIObj->getNSArrayMethodKind(Sel); if (!MKOpt) { - return None; + return std::nullopt; } NSAPI::NSArrayMethodKind MK = *MKOpt; @@ -16513,10 +16513,10 @@ return 1; default: - return None; + return std::nullopt; } - return None; + return std::nullopt; } static @@ -16526,7 +16526,7 @@ Message->getReceiverInterface(), NSAPI::ClassId_NSMutableDictionary); if (!IsMutableDictionary) { - return None; + return std::nullopt; } Selector Sel = Message->getSelector(); @@ -16534,7 +16534,7 @@ Optional MKOpt = S.NSAPIObj->getNSDictionaryMethodKind(Sel); if (!MKOpt) { - return None; + return std::nullopt; } NSAPI::NSDictionaryMethodKind MK = *MKOpt; @@ -16546,10 +16546,10 @@ return 0; default: - return None; + return std::nullopt; } - return None; + return std::nullopt; } static Optional GetNSSetArgumentIndex(Sema &S, ObjCMessageExpr *Message) { @@ -16561,14 +16561,14 @@ Message->getReceiverInterface(), NSAPI::ClassId_NSMutableOrderedSet); if (!IsMutableSet && !IsMutableOrderedSet) { - return None; + return std::nullopt; } Selector Sel = Message->getSelector(); Optional MKOpt = S.NSAPIObj->getNSSetMethodKind(Sel); if (!MKOpt) { - return None; + return std::nullopt; } NSAPI::NSSetMethodKind MK = *MKOpt; @@ -16583,7 +16583,7 @@ return 1; } - return None; + return std::nullopt; } void Sema::CheckObjCCircularContainer(ObjCMessageExpr *Message) { diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -2855,13 +2855,12 @@ FunctionTypeLoc &Block, FunctionProtoTypeLoc &BlockProto, bool SuppressBlockName = false, bool SuppressBlock = false, - Optional> ObjCSubsts = None); + Optional> ObjCSubsts = std::nullopt); -static std::string -FormatFunctionParameter(const PrintingPolicy &Policy, - const DeclaratorDecl *Param, bool SuppressName = false, - bool SuppressBlock = false, - Optional> ObjCSubsts = None) { +static std::string FormatFunctionParameter( + const PrintingPolicy &Policy, const DeclaratorDecl *Param, + bool SuppressName = false, bool SuppressBlock = false, + Optional> ObjCSubsts = std::nullopt) { // Params are unavailable in FunctionTypeLoc if the FunctionType is invalid. // It would be better to pass in the param Type, which is usually available. // But this case is rare, so just pretend we fell back to int as elsewhere. @@ -4480,7 +4479,8 @@ 0) { ParsedType T = DS.getRepAsType(); if (!T.get().isNull() && T.get()->isObjCObjectOrInterfaceType()) - AddClassMessageCompletions(*this, S, T, None, false, false, Results); + AddClassMessageCompletions(*this, S, T, std::nullopt, false, false, + Results); } // Note that we intentionally suppress macro results here, since we do not @@ -4837,7 +4837,7 @@ if (E.isInvalid()) CodeCompleteExpression(S, PreferredType); else if (getLangOpts().ObjC) - CodeCompleteObjCInstanceMessage(S, E.get(), None, false); + CodeCompleteObjCInstanceMessage(S, E.get(), std::nullopt, false); } /// The set of properties that have already been added, referenced by @@ -5734,7 +5734,7 @@ Results.EnterNewScope(); - bool CompletionSucceded = DoCompletion(Base, IsArrow, None); + bool CompletionSucceded = DoCompletion(Base, IsArrow, std::nullopt); if (CodeCompleter->includeFixIts()) { const CharSourceRange OpRange = CharSourceRange::getTokenRange(OpLoc, OpLoc); @@ -6219,7 +6219,7 @@ } } if (!DesignatedFieldName) - return llvm::None; + return std::nullopt; // Find the index within the class's fields. // (Probing getParamDecl() directly would be quadratic in number of fields). @@ -7573,7 +7573,8 @@ Results.EnterNewScope(); VisitedSelectorSet Selectors; - AddObjCMethods(Class, true, MK_ZeroArgSelector, None, CurContext, Selectors, + AddObjCMethods(Class, true, MK_ZeroArgSelector, std::nullopt, CurContext, + Selectors, /*AllowSameLength=*/true, Results); Results.ExitScope(); HandleCodeCompleteResults(this, CodeCompleter, Results.getCompletionContext(), @@ -7599,7 +7600,8 @@ Results.EnterNewScope(); VisitedSelectorSet Selectors; - AddObjCMethods(Class, true, MK_OneArgSelector, None, CurContext, Selectors, + AddObjCMethods(Class, true, MK_OneArgSelector, std::nullopt, CurContext, + Selectors, /*AllowSameLength=*/true, Results); Results.ExitScope(); @@ -7895,7 +7897,8 @@ if (Iface->getSuperClass()) { Results.AddResult(Result("super")); - AddSuperSendCompletion(*this, /*NeedSuperKeyword=*/true, None, Results); + AddSuperSendCompletion(*this, /*NeedSuperKeyword=*/true, std::nullopt, + Results); } if (getLangOpts().CPlusPlus11) diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -575,7 +575,7 @@ /*Pattern=*/nullptr, /*ForConstraintInstantiation=*/true); if (SetupConstraintScope(FD, TemplateArgs, MLTAL, Scope)) - return llvm::None; + return std::nullopt; return MLTAL; } @@ -1154,11 +1154,11 @@ assert(E.size() != 0); auto Conjunction = fromConstraintExpr(S, D, E[0]); if (!Conjunction) - return None; + return std::nullopt; for (unsigned I = 1; I < E.size(); ++I) { auto Next = fromConstraintExpr(S, D, E[I]); if (!Next) - return None; + return std::nullopt; *Conjunction = NormalizedConstraint(S.Context, std::move(*Conjunction), std::move(*Next), CCK_Conjunction); } @@ -1183,10 +1183,10 @@ if (LogicalBinOp BO = E) { auto LHS = fromConstraintExpr(S, D, BO.getLHS()); if (!LHS) - return None; + return std::nullopt; auto RHS = fromConstraintExpr(S, D, BO.getRHS()); if (!RHS) - return None; + return std::nullopt; return NormalizedConstraint(S.Context, std::move(*LHS), std::move(*RHS), BO.isAnd() ? CCK_Conjunction : CCK_Disjunction); @@ -1210,14 +1210,14 @@ SubNF = S.getNormalizedAssociatedConstraints(CD, {CD->getConstraintExpr()}); if (!SubNF) - return None; + return std::nullopt; } Optional New; New.emplace(S.Context, *SubNF); if (substituteParameterMappings(S, *New, CSE)) - return None; + return std::nullopt; return New; } diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -339,7 +339,7 @@ // EvaluateBinaryTypeTrait(BTT_IsConvertible, ...) which is at the moment // a private function in SemaExprCXX.cpp - ExprResult AddressExpr = buildMemberCall(S, E, Loc, "address", None); + ExprResult AddressExpr = buildMemberCall(S, E, Loc, "address", std::nullopt); if (AddressExpr.isInvalid()) return nullptr; @@ -395,8 +395,8 @@ return Result.get(); }; - CallExpr *AwaitReady = - cast_or_null(BuildSubExpr(ACT::ACT_Ready, "await_ready", None)); + CallExpr *AwaitReady = cast_or_null( + BuildSubExpr(ACT::ACT_Ready, "await_ready", std::nullopt)); if (!AwaitReady) return Calls; if (!AwaitReady->getType()->isDependentType()) { @@ -457,7 +457,7 @@ } } - BuildSubExpr(ACT::ACT_Resume, "await_resume", None); + BuildSubExpr(ACT::ACT_Resume, "await_resume", std::nullopt); // Make sure the awaiter object gets a chance to be cleaned up. S.Cleanup.setExprNeedsCleanups(true); @@ -705,8 +705,8 @@ SourceLocation Loc = Fn->getLocation(); // Build the initial suspend point auto buildSuspends = [&](StringRef Name) mutable -> StmtResult { - ExprResult Operand = - buildPromiseCall(*this, ScopeInfo->CoroutinePromise, Loc, Name, None); + ExprResult Operand = buildPromiseCall(*this, ScopeInfo->CoroutinePromise, + Loc, Name, std::nullopt); if (Operand.isInvalid()) return StmtError(); ExprResult Suspend = @@ -997,7 +997,7 @@ PC = buildPromiseCall(*this, Promise, Loc, "return_value", E); } else { E = MakeFullDiscardedValueExpr(E).get(); - PC = buildPromiseCall(*this, Promise, Loc, "return_void", None); + PC = buildPromiseCall(*this, Promise, Loc, "return_void", std::nullopt); } if (PC.isInvalid()) return StmtError(); @@ -1670,8 +1670,8 @@ if (!S.getLangOpts().CXXExceptions) return true; - ExprResult UnhandledException = buildPromiseCall(S, Fn.CoroutinePromise, Loc, - "unhandled_exception", None); + ExprResult UnhandledException = buildPromiseCall( + S, Fn.CoroutinePromise, Loc, "unhandled_exception", std::nullopt); UnhandledException = S.ActOnFinishFullExpr(UnhandledException.get(), Loc, /*DiscardedValue*/ false); if (UnhandledException.isInvalid()) @@ -1694,8 +1694,8 @@ // [dcl.fct.def.coroutine]p7 // The expression promise.get_return_object() is used to initialize the // returned reference or prvalue result object of a call to a coroutine. - ExprResult ReturnObject = - buildPromiseCall(S, Fn.CoroutinePromise, Loc, "get_return_object", None); + ExprResult ReturnObject = buildPromiseCall(S, Fn.CoroutinePromise, Loc, + "get_return_object", std::nullopt); if (ReturnObject.isInvalid()) return false; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2225,7 +2225,7 @@ }; SmallVector DeclDiags; auto addDiag = [&DeclDiags](SourceLocation Loc, PartialDiagnostic PD) { - DeclDiags.push_back(LocAndDiag{Loc, None, std::move(PD)}); + DeclDiags.push_back(LocAndDiag{Loc, std::nullopt, std::move(PD)}); }; auto addDiagWithPrev = [&DeclDiags](SourceLocation Loc, SourceLocation PreviousDeclLoc, @@ -13608,8 +13608,8 @@ InitializationKind Kind = InitializationKind::CreateDefault(Var->getLocation()); - InitializationSequence InitSeq(*this, Entity, Kind, None); - ExprResult Init = InitSeq.Perform(*this, Entity, Kind, None); + InitializationSequence InitSeq(*this, Entity, Kind, std::nullopt); + ExprResult Init = InitSeq.Perform(*this, Entity, Kind, std::nullopt); if (Init.get()) { Var->setInit(MaybeCreateExprWithCleanups(Init.get())); @@ -15760,8 +15760,8 @@ /*NumExceptions=*/0, /*NoexceptExpr=*/nullptr, /*ExceptionSpecTokens=*/nullptr, - /*DeclsInPrototype=*/None, Loc, - Loc, D), + /*DeclsInPrototype=*/std::nullopt, + Loc, Loc, D), std::move(DS.getAttributes()), SourceLocation()); D.SetIdentifier(&II, Loc); @@ -19550,6 +19550,12 @@ return New; } +Decl *Sema::ActOnTopLevelStmtDecl(Stmt *Statement) { + auto *New = TopLevelStmtDecl::Create(Context, Statement); + Context.getTranslationUnitDecl()->addDecl(New); + return New; +} + void Sema::ActOnPragmaRedefineExtname(IdentifierInfo* Name, IdentifierInfo* AliasName, SourceLocation PragmaLoc, diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2676,7 +2676,7 @@ if (IOSToWatchOSMapping) { if (auto MappedVersion = IOSToWatchOSMapping->map( - Version, MinimumWatchOSVersion, None)) { + Version, MinimumWatchOSVersion, std::nullopt)) { return MappedVersion.value(); } } @@ -2731,8 +2731,8 @@ return Version; if (IOSToTvOSMapping) { - if (auto MappedVersion = - IOSToTvOSMapping->map(Version, VersionTuple(0, 0), None)) { + if (auto MappedVersion = IOSToTvOSMapping->map( + Version, VersionTuple(0, 0), std::nullopt)) { return *MappedVersion; } } @@ -2795,24 +2795,25 @@ // attributes that are inferred from 'ios'. NewII = &S.Context.Idents.get("maccatalyst"); auto RemapMacOSVersion = - [&](const VersionTuple &V) -> Optional { + [&](const VersionTuple &V) -> std::optional { if (V.empty()) - return None; + return std::nullopt; // API_TO_BE_DEPRECATED is 100000. if (V.getMajor() == 100000) return VersionTuple(100000); // The minimum iosmac version is 13.1 - return MacOStoMacCatalystMapping->map(V, VersionTuple(13, 1), None); + return MacOStoMacCatalystMapping->map(V, VersionTuple(13, 1), + std::nullopt); }; - Optional NewIntroduced = - RemapMacOSVersion(Introduced.Version), - NewDeprecated = - RemapMacOSVersion(Deprecated.Version), - NewObsoleted = - RemapMacOSVersion(Obsoleted.Version); + std::optional NewIntroduced = + RemapMacOSVersion(Introduced.Version), + NewDeprecated = + RemapMacOSVersion(Deprecated.Version), + NewObsoleted = + RemapMacOSVersion(Obsoleted.Version); if (NewIntroduced || NewDeprecated || NewObsoleted) { auto VersionOrEmptyVersion = - [](const Optional &V) -> VersionTuple { + [](const std::optional &V) -> VersionTuple { return V ? *V : VersionTuple(); }; AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr( diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1265,7 +1265,7 @@ if (E.isInvalid()) return true; - E = S.BuildCallExpr(nullptr, E.get(), Loc, None, Loc); + E = S.BuildCallExpr(nullptr, E.get(), Loc, std::nullopt, Loc); } else { // Otherwise, the initializer is get(e), where get is looked up // in the associated namespaces. @@ -4750,8 +4750,8 @@ case IIK_Default: { InitializationKind InitKind = InitializationKind::CreateDefault(Constructor->getLocation()); - InitializationSequence InitSeq(SemaRef, InitEntity, InitKind, None); - BaseInit = InitSeq.Perform(SemaRef, InitEntity, InitKind, None); + InitializationSequence InitSeq(SemaRef, InitEntity, InitKind, std::nullopt); + BaseInit = InitSeq.Perform(SemaRef, InitEntity, InitKind, std::nullopt); break; } @@ -4915,9 +4915,9 @@ InitializationKind InitKind = InitializationKind::CreateDefault(Loc); - InitializationSequence InitSeq(SemaRef, InitEntity, InitKind, None); + InitializationSequence InitSeq(SemaRef, InitEntity, InitKind, std::nullopt); ExprResult MemberInit = - InitSeq.Perform(SemaRef, InitEntity, InitKind, None); + InitSeq.Perform(SemaRef, InitEntity, InitKind, std::nullopt); MemberInit = SemaRef.MaybeCreateExprWithCleanups(MemberInit); if (MemberInit.isInvalid()) @@ -10774,7 +10774,7 @@ EPI.Variadic = false; EPI.TypeQuals = Qualifiers(); EPI.RefQualifier = RQ_None; - return Context.getFunctionType(Context.VoidTy, None, EPI); + return Context.getFunctionType(Context.VoidTy, std::nullopt, EPI); } static void extendLeft(SourceRange &R, SourceRange Before) { @@ -10950,7 +10950,8 @@ // of the errors above fired) and with the conversion type as the // return type. if (D.isInvalidType()) - R = Context.getFunctionType(ConvType, None, Proto->getExtProtoInfo()); + R = Context.getFunctionType(ConvType, std::nullopt, + Proto->getExtProtoInfo()); // C++0x explicit conversion operators. if (DS.hasExplicitSpecifier() && !getLangOpts().CPlusPlus20) @@ -13538,7 +13539,7 @@ DefaultCon->setAccess(AS_public); DefaultCon->setDefaulted(); - setupImplicitSpecialMemberType(DefaultCon, Context.VoidTy, None); + setupImplicitSpecialMemberType(DefaultCon, Context.VoidTy, std::nullopt); if (getLangOpts().CUDA) inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXDefaultConstructor, @@ -13818,7 +13819,7 @@ Destructor->setAccess(AS_public); Destructor->setDefaulted(); - setupImplicitSpecialMemberType(Destructor, Context.VoidTy, None); + setupImplicitSpecialMemberType(Destructor, Context.VoidTy, std::nullopt); if (getLangOpts().CUDA) inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXDestructor, @@ -13979,7 +13980,8 @@ FunctionProtoType::ExtProtoInfo EPI = DtorType->getExtProtoInfo(); EPI.ExceptionSpec.Type = EST_Unevaluated; EPI.ExceptionSpec.SourceDecl = Destructor; - Destructor->setType(Context.getFunctionType(Context.VoidTy, None, EPI)); + Destructor->setType( + Context.getFunctionType(Context.VoidTy, std::nullopt, EPI)); // FIXME: If the destructor has a body that could throw, and the newly created // spec doesn't allow exceptions, we should emit a warning, because this @@ -15264,7 +15266,8 @@ : CopyConstructor->getLocation(); Sema::CompoundScopeRAII CompoundScope(*this); CopyConstructor->setBody( - ActOnCompoundStmt(Loc, Loc, None, /*isStmtExpr=*/false).getAs()); + ActOnCompoundStmt(Loc, Loc, std::nullopt, /*isStmtExpr=*/false) + .getAs()); CopyConstructor->markUsed(Context); } @@ -15389,8 +15392,9 @@ ? MoveConstructor->getEndLoc() : MoveConstructor->getLocation(); Sema::CompoundScopeRAII CompoundScope(*this); - MoveConstructor->setBody(ActOnCompoundStmt( - Loc, Loc, None, /*isStmtExpr=*/ false).getAs()); + MoveConstructor->setBody( + ActOnCompoundStmt(Loc, Loc, std::nullopt, /*isStmtExpr=*/false) + .getAs()); MoveConstructor->markUsed(Context); } @@ -18188,9 +18192,9 @@ InitializationKind InitKind = InitializationKind::CreateDefault(ObjCImplementation->getLocation()); - InitializationSequence InitSeq(*this, InitEntity, InitKind, None); + InitializationSequence InitSeq(*this, InitEntity, InitKind, std::nullopt); ExprResult MemberInit = - InitSeq.Perform(*this, InitEntity, InitKind, None); + InitSeq.Perform(*this, InitEntity, InitKind, std::nullopt); MemberInit = MaybeCreateExprWithCleanups(MemberInit); // Note, MemberInit could actually come back empty if no initialization // is required (e.g., because it would call a trivial default constructor) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1058,7 +1058,7 @@ return ExprError(); ExprResult Call = BuildCallExpr(TUScope, TrapFn.get(), E->getBeginLoc(), - None, E->getEndLoc()); + std::nullopt, E->getEndLoc()); if (Call.isInvalid()) return ExprError(); @@ -1963,8 +1963,8 @@ TemplateArgument Arg(Lit); TemplateArgumentLocInfo ArgInfo(Lit); ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); - return BuildLiteralOperatorCall(R, OpNameInfo, None, StringTokLocs.back(), - &ExplicitArgs); + return BuildLiteralOperatorCall(R, OpNameInfo, std::nullopt, + StringTokLocs.back(), &ExplicitArgs); } case LOLR_StringTemplatePack: { @@ -1984,8 +1984,8 @@ TemplateArgumentLocInfo ArgInfo; ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); } - return BuildLiteralOperatorCall(R, OpNameInfo, None, StringTokLocs.back(), - &ExplicitArgs); + return BuildLiteralOperatorCall(R, OpNameInfo, std::nullopt, + StringTokLocs.back(), &ExplicitArgs); } case LOLR_Raw: case LOLR_ErrorNoDiagnostic: @@ -2624,7 +2624,7 @@ // a template name, but we happen to have always already looked up the name // before we get here if it must be a template name. if (DiagnoseEmptyLookup(S, SS, R, CCC ? *CCC : DefaultValidator, nullptr, - None, &TE)) { + std::nullopt, &TE)) { if (TE && KeywordReplacement) { auto &State = getTypoExprState(TE); auto BestTC = State.Consumer->getNextCorrection(); @@ -3855,7 +3855,7 @@ TemplateArgumentLocInfo ArgInfo; ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); } - return BuildLiteralOperatorCall(R, OpNameInfo, None, TokLoc, + return BuildLiteralOperatorCall(R, OpNameInfo, std::nullopt, TokLoc, &ExplicitArgs); } case LOLR_StringTemplatePack: @@ -16245,7 +16245,7 @@ FunctionProtoType::ExtProtoInfo EPI; EPI.HasTrailingReturn = false; EPI.TypeQuals.addConst(); - T = Context.getFunctionType(Context.DependentTy, None, EPI); + T = Context.getFunctionType(Context.DependentTy, std::nullopt, EPI); Sig = Context.getTrivialTypeSourceInfo(T); } @@ -16395,10 +16395,10 @@ if (isa(FTy)) { FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = Ext; - BlockTy = Context.getFunctionType(RetTy, None, EPI); + BlockTy = Context.getFunctionType(RetTy, std::nullopt, EPI); - // Otherwise, if we don't need to change anything about the function type, - // preserve its sugar structure. + // Otherwise, if we don't need to change anything about the function type, + // preserve its sugar structure. } else if (FTy->getReturnType() == RetTy && (!NoReturn || FTy->getNoReturnAttr())) { BlockTy = BSI->FunctionType; @@ -16416,7 +16416,7 @@ } else { FunctionProtoType::ExtProtoInfo EPI; EPI.ExtInfo = FunctionType::ExtInfo().withNoReturn(NoReturn); - BlockTy = Context.getFunctionType(RetTy, None, EPI); + BlockTy = Context.getFunctionType(RetTy, std::nullopt, EPI); } DiagnoseUnusedParameters(BD->parameters()); @@ -17697,7 +17697,7 @@ /// Prevent sema calls during the tree transform from adding pointers that /// are already in the sets. - llvm::SaveAndRestore DisableIITracking( + llvm::SaveAndRestore DisableIITracking( SemaRef.RebuildingImmediateInvocation, true); /// Prevent diagnostic during tree transfrom as they are duplicates @@ -19968,7 +19968,7 @@ bool Sema::DiagRuntimeBehavior(SourceLocation Loc, const Stmt *Statement, const PartialDiagnostic &PD) { return DiagRuntimeBehavior( - Loc, Statement ? llvm::makeArrayRef(Statement) : llvm::None, PD); + Loc, Statement ? llvm::makeArrayRef(Statement) : std::nullopt, PD); } bool Sema::CheckCallReturnType(QualType ReturnType, SourceLocation Loc, @@ -20892,7 +20892,7 @@ }); } if (Spec == AvailSpecs.end()) - return None; + return std::nullopt; return Spec->getVersion(); }; diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp --- a/clang/lib/Sema/SemaExprObjC.cpp +++ b/clang/lib/Sema/SemaExprObjC.cpp @@ -298,7 +298,7 @@ &CX.Idents.get("value"), NumberType, /*TInfo=*/nullptr, SC_None, nullptr); - Method->setMethodParams(S.Context, value, None); + Method->setMethodParams(S.Context, value, std::nullopt); } if (!validateBoxingMethod(S, Loc, S.NSNumberDecl, Sel, Method)) @@ -577,7 +577,7 @@ Context.getPointerType(ConstCharType), /*TInfo=*/nullptr, SC_None, nullptr); - M->setMethodParams(Context, value, None); + M->setMethodParams(Context, value, std::nullopt); BoxingMethod = M; } @@ -705,7 +705,7 @@ SC_None, nullptr); Params.push_back(type); - M->setMethodParams(Context, Params, None); + M->setMethodParams(Context, Params, std::nullopt); BoxingMethod = M; } @@ -833,7 +833,7 @@ /*TInfo=*/nullptr, SC_None, nullptr); Params.push_back(cnt); - Method->setMethodParams(Context, Params, None); + Method->setMethodParams(Context, Params, std::nullopt); } if (!validateBoxingMethod(*this, Loc, NSArrayDecl, Sel, Method)) @@ -1003,7 +1003,7 @@ /*TInfo=*/nullptr, SC_None, nullptr); Params.push_back(cnt); - Method->setMethodParams(Context, Params, None); + Method->setMethodParams(Context, Params, std::nullopt); } if (!validateBoxingMethod(*this, SR.getBegin(), NSDictionaryDecl, Sel, @@ -4377,11 +4377,9 @@ Diag(RelatedClass->getBeginLoc(), diag::note_declared_at); Diag(TDNDecl->getBeginLoc(), diag::note_declared_at); - ExprResult msg = - BuildInstanceMessageImplicit(SrcExpr, SrcType, - InstanceMethod->getLocation(), - InstanceMethod->getSelector(), - InstanceMethod, None); + ExprResult msg = BuildInstanceMessageImplicit( + SrcExpr, SrcType, InstanceMethod->getLocation(), + InstanceMethod->getSelector(), InstanceMethod, std::nullopt); SrcExpr = msg.get(); } return true; diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -503,7 +503,7 @@ true); MultiExprArg SubInit; Expr *InitExpr; - InitListExpr DummyInitList(SemaRef.Context, Loc, None, Loc); + InitListExpr DummyInitList(SemaRef.Context, Loc, std::nullopt, Loc); // C++ [dcl.init.aggr]p7: // If there are fewer initializer-clauses in the list than there are @@ -522,8 +522,10 @@ // // Only do this if we're initializing a class type, to avoid filling in // the initializer list where possible. - InitExpr = VerifyOnly ? &DummyInitList : new (SemaRef.Context) - InitListExpr(SemaRef.Context, Loc, None, Loc); + InitExpr = VerifyOnly + ? &DummyInitList + : new (SemaRef.Context) + InitListExpr(SemaRef.Context, Loc, std::nullopt, Loc); InitExpr->setType(SemaRef.Context.VoidTy); SubInit = InitExpr; Kind = InitializationKind::CreateCopy(Loc, Loc); @@ -3120,10 +3122,8 @@ InitListChecker::createInitListExpr(QualType CurrentObjectType, SourceRange InitRange, unsigned ExpectedNumInits) { - InitListExpr *Result - = new (SemaRef.Context) InitListExpr(SemaRef.Context, - InitRange.getBegin(), None, - InitRange.getEnd()); + InitListExpr *Result = new (SemaRef.Context) InitListExpr( + SemaRef.Context, InitRange.getBegin(), std::nullopt, InitRange.getEnd()); QualType ResultType = CurrentObjectType; if (!ResultType->isArrayType()) @@ -5237,7 +5237,7 @@ // constructor for T is called (and the initialization is ill-formed if // T has no accessible default constructor); if (DestType->isRecordType() && S.getLangOpts().CPlusPlus) { - TryConstructorInitialization(S, Entity, Kind, None, DestType, + TryConstructorInitialization(S, Entity, Kind, std::nullopt, DestType, Entity.getType(), Sequence); return; } diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -953,8 +953,8 @@ QualType DefaultTypeForNoTrailingReturn = getLangOpts().CPlusPlus14 ? Context.getAutoDeductType() : Context.DependentTy; - QualType MethodTy = - Context.getFunctionType(DefaultTypeForNoTrailingReturn, None, EPI); + QualType MethodTy = Context.getFunctionType(DefaultTypeForNoTrailingReturn, + std::nullopt, EPI); MethodTyInfo = Context.getTrivialTypeSourceInfo(MethodTy); ExplicitParams = false; ExplicitResultType = false; @@ -1411,7 +1411,7 @@ ConvExtInfo.TypeQuals.addConst(); ConvExtInfo.ExceptionSpec.Type = EST_BasicNoexcept; QualType ConvTy = - S.Context.getFunctionType(PtrToFunctionTy, None, ConvExtInfo); + S.Context.getFunctionType(PtrToFunctionTy, std::nullopt, ConvExtInfo); SourceLocation Loc = IntroducerRange.getBegin(); DeclarationName ConversionName @@ -1588,7 +1588,8 @@ /*IsVariadic=*/false, /*IsCXXMethod=*/true)); ConversionEPI.TypeQuals = Qualifiers(); ConversionEPI.TypeQuals.addConst(); - QualType ConvTy = S.Context.getFunctionType(BlockPtrTy, None, ConversionEPI); + QualType ConvTy = + S.Context.getFunctionType(BlockPtrTy, std::nullopt, ConversionEPI); SourceLocation Loc = IntroducerRange.getBegin(); DeclarationName Name diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -1179,9 +1179,8 @@ FunctionProtoType::ExtProtoInfo EPI = ConvProto->getExtProtoInfo(); EPI.ExtInfo = EPI.ExtInfo.withCallingConv(CC_C); EPI.ExceptionSpec = EST_None; - QualType ExpectedType - = R.getSema().Context.getFunctionType(R.getLookupName().getCXXNameType(), - None, EPI); + QualType ExpectedType = R.getSema().Context.getFunctionType( + R.getLookupName().getCXXNameType(), std::nullopt, EPI); // Perform template argument deduction against the type that we would // expect the function to have. diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -836,7 +836,7 @@ if (isa(D)) return UnnamedDeclKind::UsingDirective; // Everything else either introduces one or more names or is ill-formed. - return llvm::None; + return std::nullopt; } unsigned getUnnamedDeclDiag(UnnamedDeclKind UDK, bool InBlock) { diff --git a/clang/lib/Sema/SemaObjCProperty.cpp b/clang/lib/Sema/SemaObjCProperty.cpp --- a/clang/lib/Sema/SemaObjCProperty.cpp +++ b/clang/lib/Sema/SemaObjCProperty.cpp @@ -2571,7 +2571,7 @@ /*TInfo=*/nullptr, SC_None, nullptr); - SetterMethod->setMethodParams(Context, Argument, None); + SetterMethod->setMethodParams(Context, Argument, std::nullopt); AddPropertyAttrs(*this, SetterMethod, property); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1119,14 +1119,14 @@ const SharingMapTy &StackElem = getTopOfStack(); auto I = StackElem.UsesAllocatorsDecls.find(D); if (I == StackElem.UsesAllocatorsDecls.end()) - return None; + return std::nullopt; return I->getSecond(); } Optional isUsesAllocatorsDecl(const Decl *D) const { const SharingMapTy &StackElem = getTopOfStack(); auto I = StackElem.UsesAllocatorsDecls.find(D); if (I == StackElem.UsesAllocatorsDecls.end()) - return None; + return std::nullopt; return I->getSecond(); } @@ -7293,14 +7293,14 @@ unsigned NumAppendArgs, SourceRange SR) { if (!DG || DG.get().isNull()) - return None; + return std::nullopt; const int VariantId = 1; // Must be applied only to single decl. if (!DG.get().isSingleDecl()) { Diag(SR.getBegin(), diag::err_omp_single_decl_in_declare_simd_variant) << VariantId << SR; - return None; + return std::nullopt; } Decl *ADecl = DG.get().getSingleDecl(); if (auto *FTD = dyn_cast(ADecl)) @@ -7311,7 +7311,7 @@ if (!FD) { Diag(ADecl->getLocation(), diag::err_omp_function_expected) << VariantId << SR; - return None; + return std::nullopt; } auto &&HasMultiVersionAttributes = [](const FunctionDecl *FD) { @@ -7323,7 +7323,7 @@ if (HasMultiVersionAttributes(FD)) { Diag(FD->getLocation(), diag::err_omp_declare_variant_incompat_attributes) << SR; - return None; + return std::nullopt; } // Allow #pragma omp declare variant only if the function is not used. @@ -7341,7 +7341,7 @@ // The VariantRef must point to function. if (!VariantRef) { Diag(SR.getBegin(), diag::err_omp_function_expected) << VariantId; - return None; + return std::nullopt; } auto ShouldDelayChecks = [](Expr *&E, bool) { @@ -7376,7 +7376,7 @@ return true; }; if (TI.anyScoreOrCondition(HandleNonConstantScoresAndConditions)) - return None; + return std::nullopt; QualType AdjustedFnType = FD->getType(); if (NumAppendArgs) { @@ -7384,7 +7384,7 @@ if (!PTy) { Diag(FD->getLocation(), diag::err_omp_declare_variant_prototype_required) << SR; - return None; + return std::nullopt; } // Adjust the function type to account for an extra omp_interop_t for each // specified in the append_args clause. @@ -7397,12 +7397,12 @@ } if (!TD) { Diag(SR.getBegin(), diag::err_omp_interop_type_not_found) << SR; - return None; + return std::nullopt; } QualType InteropType = Context.getTypeDeclType(TD); if (PTy->isVariadic()) { Diag(FD->getLocation(), diag::err_omp_append_args_with_varargs) << SR; - return None; + return std::nullopt; } llvm::SmallVector Params; Params.append(PTy->param_type_begin(), PTy->param_type_end()); @@ -7432,7 +7432,7 @@ if (!ER.isUsable()) { Diag(VariantRef->getExprLoc(), diag::err_omp_function_expected) << VariantId << VariantRef->getSourceRange(); - return None; + return std::nullopt; } VariantRef = ER.get(); } else { @@ -7452,12 +7452,12 @@ << VariantRef->getType() << ((Method && !Method->isStatic()) ? FnPtrType : FD->getType()) << (NumAppendArgs ? 1 : 0) << VariantRef->getSourceRange(); - return None; + return std::nullopt; } VariantRefCast = PerformImplicitConversion( VariantRef, FnPtrType.getUnqualifiedType(), AA_Converting); if (!VariantRefCast.isUsable()) - return None; + return std::nullopt; } // Drop previously built artificial addr_of unary op for member functions. if (Method && !Method->isStatic()) { @@ -7473,7 +7473,7 @@ !ER.get()->IgnoreParenImpCasts()->getType()->isFunctionType()) { Diag(VariantRef->getExprLoc(), diag::err_omp_function_expected) << VariantId << VariantRef->getSourceRange(); - return None; + return std::nullopt; } // The VariantRef must point to function. @@ -7481,20 +7481,20 @@ if (!DRE) { Diag(VariantRef->getExprLoc(), diag::err_omp_function_expected) << VariantId << VariantRef->getSourceRange(); - return None; + return std::nullopt; } auto *NewFD = dyn_cast_or_null(DRE->getDecl()); if (!NewFD) { Diag(VariantRef->getExprLoc(), diag::err_omp_function_expected) << VariantId << VariantRef->getSourceRange(); - return None; + return std::nullopt; } if (FD->getCanonicalDecl() == NewFD->getCanonicalDecl()) { Diag(VariantRef->getExprLoc(), diag::err_omp_declare_variant_same_base_function) << VariantRef->getSourceRange(); - return None; + return std::nullopt; } // Check if function types are compatible in C. @@ -7506,7 +7506,7 @@ diag::err_omp_declare_variant_incompat_types) << NewFD->getType() << FD->getType() << (NumAppendArgs ? 1 : 0) << VariantRef->getSourceRange(); - return None; + return std::nullopt; } if (NewType->isFunctionProtoType()) { if (FD->getType()->isFunctionNoProtoType()) @@ -7524,7 +7524,7 @@ SourceRange SR = NewFD->specific_attr_begin()->getRange(); Diag(SR.getBegin(), diag::note_omp_marked_declare_variant_here) << SR; - return None; + return std::nullopt; } enum DoesntSupport { @@ -7540,38 +7540,38 @@ if (CXXFD->isVirtual()) { Diag(FD->getLocation(), diag::err_omp_declare_variant_doesnt_support) << VirtFuncs; - return None; + return std::nullopt; } if (isa(FD)) { Diag(FD->getLocation(), diag::err_omp_declare_variant_doesnt_support) << Constructors; - return None; + return std::nullopt; } if (isa(FD)) { Diag(FD->getLocation(), diag::err_omp_declare_variant_doesnt_support) << Destructors; - return None; + return std::nullopt; } } if (FD->isDeleted()) { Diag(FD->getLocation(), diag::err_omp_declare_variant_doesnt_support) << DeletedFuncs; - return None; + return std::nullopt; } if (FD->isDefaulted()) { Diag(FD->getLocation(), diag::err_omp_declare_variant_doesnt_support) << DefaultedFuncs; - return None; + return std::nullopt; } if (FD->isConstexpr()) { Diag(FD->getLocation(), diag::err_omp_declare_variant_doesnt_support) << (NewFD->isConsteval() ? ConstevalFuncs : ConstexprFuncs); - return None; + return std::nullopt; } // Check general compatibility. @@ -7587,7 +7587,7 @@ << FD->getLocation()), /*TemplatesSupported=*/true, /*ConstexprSupported=*/false, /*CLinkageMayDiffer=*/true)) - return None; + return std::nullopt; return std::make_pair(FD, cast(DRE)); } @@ -8091,7 +8091,7 @@ DepDecl = LoopStmtChecker.getDepDecl(); return LoopStmtChecker.getBaseLoopId(); } - return llvm::None; + return std::nullopt; } bool OpenMPIterationSpaceChecker::checkAndSetInit(Stmt *S, bool EmitDiags) { @@ -8232,10 +8232,10 @@ (Opcode == BO_LT || Opcode == BO_GT), SR, OpLoc); } else if (IneqCondIsCanonical && Opcode == BO_NE) { return setUB(const_cast(getInitLCDecl(LHS) == LCDecl ? RHS : LHS), - /*LessOp=*/llvm::None, + /*LessOp=*/std::nullopt, /*StrictOp=*/true, SR, OpLoc); } - return llvm::None; + return std::nullopt; }; llvm::Optional Res; if (auto *RBO = dyn_cast(S)) { @@ -16623,7 +16623,7 @@ static std::string getListOfPossibleValues(OpenMPClauseKind K, unsigned First, unsigned Last, - ArrayRef Exclude = llvm::None) { + ArrayRef Exclude = std::nullopt) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); unsigned Skipped = Exclude.size(); @@ -21551,7 +21551,7 @@ CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo MapperId, ArrayRef UnresolvedMappers, OpenMPMapClauseKind MapType = OMPC_MAP_unknown, - ArrayRef Modifiers = None, + ArrayRef Modifiers = std::nullopt, bool IsMapTypeImplicit = false, bool NoDiagnose = false) { // We only expect mappable expressions in 'to', 'from', and 'map' clauses. assert((CKind == OMPC_map || CKind == OMPC_to || CKind == OMPC_from) && diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -890,7 +890,7 @@ return static_cast(Data)->CallArgIndex; default: - return llvm::None; + return std::nullopt; } } @@ -5192,7 +5192,7 @@ } if (CT->getSize().ugt(e)) { // Need an init from empty {}, is there one? - InitListExpr EmptyList(S.Context, From->getEndLoc(), None, + InitListExpr EmptyList(S.Context, From->getEndLoc(), std::nullopt, From->getEndLoc()); EmptyList.setType(S.Context.VoidTy); DfltElt = TryListConversion( @@ -7013,7 +7013,7 @@ } else { AddMethodCandidate(cast(Decl), FoundDecl, ActingContext, ObjectType, ObjectClassification, Args, CandidateSet, - SuppressUserConversions, false, None, PO); + SuppressUserConversions, false, std::nullopt, PO); } } @@ -7628,7 +7628,7 @@ } if (EnableIfAttr *FailedAttr = - CheckEnableIf(Conversion, CandidateSet.getLocation(), None)) { + CheckEnableIf(Conversion, CandidateSet.getLocation(), std::nullopt)) { Candidate.Viable = false; Candidate.FailureKind = ovl_fail_enable_if; Candidate.DeductionFailure.Data = FailedAttr; @@ -7799,7 +7799,7 @@ } if (EnableIfAttr *FailedAttr = - CheckEnableIf(Conversion, CandidateSet.getLocation(), None)) { + CheckEnableIf(Conversion, CandidateSet.getLocation(), std::nullopt)) { Candidate.Viable = false; Candidate.FailureKind = ovl_fail_enable_if; Candidate.DeductionFailure.Data = FailedAttr; @@ -7841,10 +7841,10 @@ continue; AddOverloadCandidate(FD, F.getPair(), FunctionArgs, CandidateSet); if (CandidateSet.getRewriteInfo().shouldAddReversed(*this, Args, FD)) - AddOverloadCandidate(FD, F.getPair(), - {FunctionArgs[1], FunctionArgs[0]}, CandidateSet, - false, false, true, false, ADLCallKind::NotADL, - None, OverloadCandidateParamOrder::Reversed); + AddOverloadCandidate( + FD, F.getPair(), {FunctionArgs[1], FunctionArgs[0]}, CandidateSet, + false, false, true, false, ADLCallKind::NotADL, std::nullopt, + OverloadCandidateParamOrder::Reversed); } } } @@ -9602,7 +9602,8 @@ FD, FoundDecl, {Args[1], Args[0]}, CandidateSet, /*SuppressUserConversions=*/false, PartialOverloading, /*AllowExplicit=*/true, /*AllowExplicitConversion=*/false, - ADLCallKind::UsesADL, None, OverloadCandidateParamOrder::Reversed); + ADLCallKind::UsesADL, std::nullopt, + OverloadCandidateParamOrder::Reversed); } } else { auto *FTD = cast(*I); @@ -9737,7 +9738,7 @@ static Optional getImplicitObjectParamType(ASTContext &Context, const FunctionDecl *F) { if (!isa(F) || isa(F)) - return llvm::None; + return std::nullopt; auto *M = cast(F); // Static member functions' object parameters match all types. @@ -12643,11 +12644,11 @@ FD2->getAssociatedConstraints(AC2); bool AtLeastAsConstrained1, AtLeastAsConstrained2; if (IsAtLeastAsConstrained(FD1, AC1, FD2, AC2, AtLeastAsConstrained1)) - return None; + return std::nullopt; if (IsAtLeastAsConstrained(FD2, AC2, FD1, AC1, AtLeastAsConstrained2)) - return None; + return std::nullopt; if (AtLeastAsConstrained1 == AtLeastAsConstrained2) - return None; + return std::nullopt; return AtLeastAsConstrained1; }; @@ -15125,7 +15126,8 @@ for (LookupResult::iterator Oper = R.begin(), OperEnd = R.end(); Oper != OperEnd; ++Oper) { AddMethodCandidate(Oper.getPair(), Base->getType(), Base->Classify(Context), - None, CandidateSet, /*SuppressUserConversion=*/false); + std::nullopt, CandidateSet, + /*SuppressUserConversion=*/false); } bool HadMultipleCandidates = (CandidateSet.size() > 1); @@ -15315,7 +15317,8 @@ *CallExpr = ExprError(); return FRS_DiagnosticIssued; } - *CallExpr = BuildCallExpr(S, MemberRef.get(), Loc, None, Loc, nullptr); + *CallExpr = + BuildCallExpr(S, MemberRef.get(), Loc, std::nullopt, Loc, nullptr); if (CallExpr->isInvalid()) { *CallExpr = ExprError(); return FRS_DiagnosticIssued; diff --git a/clang/lib/Sema/SemaPseudoObject.cpp b/clang/lib/Sema/SemaPseudoObject.cpp --- a/clang/lib/Sema/SemaPseudoObject.cpp +++ b/clang/lib/Sema/SemaPseudoObject.cpp @@ -737,11 +737,11 @@ assert(InstanceReceiver || RefExpr->isSuperReceiver()); msg = S.BuildInstanceMessageImplicit(InstanceReceiver, receiverType, GenericLoc, Getter->getSelector(), - Getter, None); + Getter, std::nullopt); } else { msg = S.BuildClassMessageImplicit(receiverType, RefExpr->isSuperReceiver(), - GenericLoc, Getter->getSelector(), - Getter, None); + GenericLoc, Getter->getSelector(), Getter, + std::nullopt); } return msg; } @@ -1200,7 +1200,7 @@ /*TInfo=*/nullptr, SC_None, nullptr); - AtIndexGetter->setMethodParams(S.Context, Argument, None); + AtIndexGetter->setMethodParams(S.Context, Argument, std::nullopt); } if (!AtIndexGetter) { @@ -1316,7 +1316,7 @@ SC_None, nullptr); Params.push_back(key); - AtIndexSetter->setMethodParams(S.Context, Params, None); + AtIndexSetter->setMethodParams(S.Context, Params, std::nullopt); } if (!AtIndexSetter) { diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -382,7 +382,7 @@ // type of the left operand could be used for SFINAE, so technically it is // *used*. if (DiagID != diag::warn_unused_comma_left_operand || !isSFINAEContext()) - DiagIfReachable(Loc, S ? llvm::makeArrayRef(S) : llvm::None, + DiagIfReachable(Loc, S ? llvm::makeArrayRef(S) : std::nullopt, PDiag(DiagID) << R1 << R2); } diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -1207,7 +1207,7 @@ ImmediatelyDeclaredConstraint.get(), BO_LAnd, EllipsisLoc, /*RHS=*/nullptr, /*RParenLoc=*/SourceLocation(), - /*NumExpansions=*/None); + /*NumExpansions=*/std::nullopt); } /// Attach a type-constraint to a template parameter. @@ -2378,8 +2378,9 @@ /*Depth*/ 0, Depth1IndexAdjustment + TTP->getIndex(), TTP->getIdentifier(), TTP->wasDeclaredWithTypename(), TTP->isParameterPack(), TTP->hasTypeConstraint(), - TTP->isExpandedParameterPack() ? - llvm::Optional(TTP->getNumExpansionParameters()) : None); + TTP->isExpandedParameterPack() + ? llvm::Optional(TTP->getNumExpansionParameters()) + : std::nullopt); if (const auto *TC = TTP->getTypeConstraint()) SemaRef.SubstTypeConstraint(NewTTP, TC, Args, /*EvaluateConstraint*/ true); @@ -2632,7 +2633,7 @@ // additional function template derived as above from a hypothetical // constructor C(). if (!AddedAny) - Transform.buildSimpleDeductionGuide(None); + Transform.buildSimpleDeductionGuide(std::nullopt); // -- An additional function template derived as above from a hypothetical // constructor C(C), called the copy deduction candidate. @@ -3508,7 +3509,7 @@ // Fabricate an empty template parameter list for the invented header. return TemplateParameterList::Create(Context, SourceLocation(), - SourceLocation(), None, + SourceLocation(), std::nullopt, SourceLocation(), nullptr); } @@ -5294,7 +5295,7 @@ MultiLevelTemplateArgumentList TemplateArgLists(Template, SugaredConverted, /*Final=*/true); for (unsigned i = 0, e = Param->getDepth(); i != e; ++i) - TemplateArgLists.addOuterTemplateArguments(None); + TemplateArgLists.addOuterTemplateArguments(std::nullopt); bool ForLambdaCallOperator = false; if (const auto *Rec = dyn_cast(Template->getDeclContext())) @@ -5346,7 +5347,7 @@ MultiLevelTemplateArgumentList TemplateArgLists(Template, SugaredConverted, /*Final=*/true); for (unsigned i = 0, e = Param->getDepth(); i != e; ++i) - TemplateArgLists.addOuterTemplateArguments(None); + TemplateArgLists.addOuterTemplateArguments(std::nullopt); Sema::ContextRAII SavedContext(SemaRef, Template->getDeclContext()); EnterExpressionEvaluationContext ConstantEvaluated( @@ -5395,7 +5396,7 @@ MultiLevelTemplateArgumentList TemplateArgLists(Template, SugaredConverted, /*Final=*/true); for (unsigned i = 0, e = Param->getDepth(); i != e; ++i) - TemplateArgLists.addOuterTemplateArguments(None); + TemplateArgLists.addOuterTemplateArguments(std::nullopt); Sema::ContextRAII SavedContext(SemaRef, Template->getDeclContext()); // Substitute into the nested-name-specifier first, diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -122,7 +122,7 @@ HandleDefaultTempArgIntoTempTempParam(const TemplateTemplateParmDecl *TTP, MultiLevelTemplateArgumentList &Result) { for (unsigned I = 0, N = TTP->getDepth() + 1; I != N; ++I) - Result.addOuterTemplateArguments(None); + Result.addOuterTemplateArguments(std::nullopt); return Response::Done(); } @@ -523,8 +523,8 @@ : InstantiatingTemplate( SemaRef, CodeSynthesisContext::RequirementInstantiation, PointOfInstantiation, InstantiationRange, /*Entity=*/nullptr, - /*Template=*/nullptr, /*TemplateArgs=*/None, &DeductionInfo) {} - + /*Template=*/nullptr, /*TemplateArgs=*/std::nullopt, &DeductionInfo) { +} Sema::InstantiatingTemplate::InstantiatingTemplate( Sema &SemaRef, SourceLocation PointOfInstantiation, @@ -533,7 +533,7 @@ : InstantiatingTemplate( SemaRef, CodeSynthesisContext::NestedRequirementConstraintsCheck, PointOfInstantiation, InstantiationRange, /*Entity=*/nullptr, - /*Template=*/nullptr, /*TemplateArgs=*/None) {} + /*Template=*/nullptr, /*TemplateArgs=*/std::nullopt) {} Sema::InstantiatingTemplate::InstantiatingTemplate( Sema &SemaRef, SourceLocation PointOfInstantiation, const RequiresExpr *RE, @@ -541,7 +541,8 @@ : InstantiatingTemplate( SemaRef, CodeSynthesisContext::RequirementParameterInstantiation, PointOfInstantiation, InstantiationRange, /*Entity=*/nullptr, - /*Template=*/nullptr, /*TemplateArgs=*/None, &DeductionInfo) {} + /*Template=*/nullptr, /*TemplateArgs=*/std::nullopt, &DeductionInfo) { +} Sema::InstantiatingTemplate::InstantiatingTemplate( Sema &SemaRef, SourceLocation PointOfInstantiation, @@ -1031,7 +1032,7 @@ case CodeSynthesisContext::ConstraintNormalization: case CodeSynthesisContext::NestedRequirementConstraintsCheck: // This is a template instantiation, so there is no SFINAE. - return None; + return std::nullopt; case CodeSynthesisContext::DefaultTemplateArgumentInstantiation: case CodeSynthesisContext::PriorTemplateArgumentSubstitution: @@ -1061,7 +1062,7 @@ case CodeSynthesisContext::BuildingBuiltinDumpStructCall: // This happens in a context unrelated to template instantiation, so // there is no SFINAE. - return None; + return std::nullopt; case CodeSynthesisContext::ExceptionSpecEvaluation: // FIXME: This should not be treated as a SFINAE context, because @@ -1079,7 +1080,7 @@ return Optional(nullptr); } - return None; + return std::nullopt; } //===----------------------------------------------------------------------===/ @@ -1135,7 +1136,7 @@ Optional getPackIndex(TemplateArgument Pack) { int Index = getSema().ArgumentPackSubstitutionIndex; if (Index == -1) - return None; + return std::nullopt; return Pack.pack_size() - 1 - Index; } diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -2787,8 +2787,9 @@ } Decl *TemplateDeclInstantiator::VisitParmVarDecl(ParmVarDecl *D) { - return SemaRef.SubstParmVarDecl(D, TemplateArgs, /*indexAdjustment*/ 0, None, - /*ExpectParameterPack=*/ false); + return SemaRef.SubstParmVarDecl(D, TemplateArgs, /*indexAdjustment*/ 0, + std::nullopt, + /*ExpectParameterPack=*/false); } Decl *TemplateDeclInstantiator::VisitTemplateTypeParmDecl( @@ -4043,7 +4044,7 @@ Decl *R; if (auto *MD = dyn_cast(Spaceship)) { R = Instantiator.VisitCXXMethodDecl( - MD, nullptr, None, + MD, nullptr, std::nullopt, TemplateDeclInstantiator::RewriteKind::RewriteSpaceshipAsEqualEqual); } else { assert(Spaceship->getFriendObjectKind() && diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp --- a/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -612,7 +612,8 @@ if (!TSInfo) return true; - TypeSourceInfo *TSResult = CheckPackExpansion(TSInfo, EllipsisLoc, None); + TypeSourceInfo *TSResult = + CheckPackExpansion(TSInfo, EllipsisLoc, std::nullopt); if (!TSResult) return true; @@ -659,7 +660,7 @@ } ExprResult Sema::ActOnPackExpansion(Expr *Pattern, SourceLocation EllipsisLoc) { - return CheckPackExpansion(Pattern, EllipsisLoc, None); + return CheckPackExpansion(Pattern, EllipsisLoc, std::nullopt); } ExprResult Sema::CheckPackExpansion(Expr *Pattern, SourceLocation EllipsisLoc, @@ -831,7 +832,7 @@ for (auto [I, _] : Unexpanded) { if (const auto *TTP = I.dyn_cast()) { if (setResultPos({TTP->getDepth(), TTP->getIndex()})) - return None; + return std::nullopt; } else if (const auto *STP = I.dyn_cast()) { setResultSz(STP->getNumArgs()); @@ -848,10 +849,10 @@ if (!DAP) // The pattern refers to an unexpanded pack. We're not ready to expand // this pack yet. - return None; + return std::nullopt; setResultSz(DAP->size()); } else if (setResultPos(getDepthAndIndex(ND))) { - return None; + return std::nullopt; } } } @@ -1132,7 +1133,7 @@ if (auto *Subst = Arg.getAsType()->getAs()) Pack = Subst->getArgumentPack(); else - return None; + return std::nullopt; break; case TemplateArgument::Expression: @@ -1142,10 +1143,10 @@ else if (auto *Subst = dyn_cast(Arg.getAsExpr())) { for (VarDecl *PD : *Subst) if (PD->isParameterPack()) - return None; + return std::nullopt; return Subst->getNumExpansions(); } else - return None; + return std::nullopt; break; case TemplateArgument::Template: @@ -1153,7 +1154,7 @@ Arg.getAsTemplate().getAsSubstTemplateTemplateParmPack()) Pack = Subst->getArgumentPack(); else - return None; + return std::nullopt; break; case TemplateArgument::Declaration: @@ -1162,7 +1163,7 @@ case TemplateArgument::Integral: case TemplateArgument::Pack: case TemplateArgument::Null: - return None; + return std::nullopt; } // Check that no argument in the pack is itself a pack expansion. @@ -1170,7 +1171,7 @@ // There's no point recursing in this case; we would have already // expanded this pack expansion into the enclosing pack if we could. if (Elem.isPackExpansion()) - return None; + return std::nullopt; } return Pack.pack_size(); } @@ -1251,7 +1252,7 @@ } return BuildCXXFoldExpr(ULE, LParenLoc, LHS, Opc, EllipsisLoc, RHS, RParenLoc, - None); + std::nullopt); } ExprResult Sema::BuildCXXFoldExpr(UnresolvedLookupExpr *Callee, diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -761,7 +761,7 @@ /*NumExceptions=*/0, /*NoexceptExpr=*/nullptr, /*ExceptionSpecTokens=*/nullptr, - /*DeclsInPrototype=*/None, loc, loc, declarator)); + /*DeclsInPrototype=*/std::nullopt, loc, loc, declarator)); // For consistency, make sure the state still has us as processing // the decl spec. @@ -5846,7 +5846,8 @@ << T << D.getSourceRange(); D.setEllipsisLoc(SourceLocation()); } else { - T = Context.getPackExpansionType(T, None, /*ExpectPackInType=*/false); + T = Context.getPackExpansionType(T, std::nullopt, + /*ExpectPackInType=*/false); } break; case DeclaratorContext::TemplateParam: @@ -5859,7 +5860,7 @@ // parameter packs in the type of the non-type template parameter, then // it expands those parameter packs. if (T->containsUnexpandedParameterPack()) - T = Context.getPackExpansionType(T, None); + T = Context.getPackExpansionType(T, std::nullopt); else S.Diag(D.getEllipsisLoc(), LangOpts.CPlusPlus11 diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -3234,8 +3234,9 @@ ExprResult RebuildCXXScalarValueInitExpr(TypeSourceInfo *TSInfo, SourceLocation LParenLoc, SourceLocation RParenLoc) { - return getSema().BuildCXXTypeConstructExpr( - TSInfo, LParenLoc, None, RParenLoc, /*ListInitialization=*/false); + return getSema().BuildCXXTypeConstructExpr(TSInfo, LParenLoc, std::nullopt, + RParenLoc, + /*ListInitialization=*/false); } /// Build a new C++ "new" expression. @@ -4008,13 +4009,13 @@ // Revert value-initialization back to empty parens. if (CXXScalarValueInitExpr *VIE = dyn_cast(Init)) { SourceRange Parens = VIE->getSourceRange(); - return getDerived().RebuildParenListExpr(Parens.getBegin(), None, + return getDerived().RebuildParenListExpr(Parens.getBegin(), std::nullopt, Parens.getEnd()); } // FIXME: We shouldn't build ImplicitValueInitExprs for direct-initialization. if (isa(Init)) - return getDerived().RebuildParenListExpr(SourceLocation(), None, + return getDerived().RebuildParenListExpr(SourceLocation(), std::nullopt, SourceLocation()); // Revert initialization by constructor back to a parenthesized or braced list @@ -5853,7 +5854,8 @@ "transformation."); } else { NewParm = getDerived().TransformFunctionTypeParam( - OldParm, indexAdjustment, None, /*ExpectParameterPack=*/ false); + OldParm, indexAdjustment, std::nullopt, + /*ExpectParameterPack=*/false); } if (!NewParm) @@ -5903,8 +5905,8 @@ return true; if (NewType->containsUnexpandedParameterPack()) { - NewType = - getSema().getASTContext().getPackExpansionType(NewType, None); + NewType = getSema().getASTContext().getPackExpansionType( + NewType, std::nullopt); if (NewType.isNull()) return true; @@ -13179,7 +13181,7 @@ if (Expand) { for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I); - SubstInitCapture(SourceLocation(), None); + SubstInitCapture(SourceLocation(), std::nullopt); } } if (!Expand || RetainExpansion) { @@ -13188,7 +13190,7 @@ Result.EllipsisLoc = ExpansionTL.getEllipsisLoc(); } } else { - SubstInitCapture(SourceLocation(), None); + SubstInitCapture(SourceLocation(), std::nullopt); } } @@ -13742,9 +13744,9 @@ auto *Pack = E->getPack(); if (auto *TTPD = dyn_cast(Pack)) { ArgStorage = getSema().Context.getPackExpansionType( - getSema().Context.getTypeDeclType(TTPD), None); + getSema().Context.getTypeDeclType(TTPD), std::nullopt); } else if (auto *TTPD = dyn_cast(Pack)) { - ArgStorage = TemplateArgument(TemplateName(TTPD), None); + ArgStorage = TemplateArgument(TemplateName(TTPD), std::nullopt); } else { auto *VD = cast(Pack); ExprResult DRE = getSema().BuildDeclRefExpr( @@ -13753,8 +13755,9 @@ E->getPackLoc()); if (DRE.isInvalid()) return ExprError(); - ArgStorage = new (getSema().Context) PackExpansionExpr( - getSema().Context.DependentTy, DRE.get(), E->getPackLoc(), None); + ArgStorage = new (getSema().Context) + PackExpansionExpr(getSema().Context.DependentTy, DRE.get(), + E->getPackLoc(), std::nullopt); } PackArgs = ArgStorage; } @@ -13766,9 +13769,9 @@ getDerived().TransformDecl(E->getPackLoc(), E->getPack())); if (!Pack) return ExprError(); - return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), Pack, - E->getPackLoc(), - E->getRParenLoc(), None, None); + return getDerived().RebuildSizeOfPackExpr( + E->getOperatorLoc(), Pack, E->getPackLoc(), E->getRParenLoc(), + std::nullopt, std::nullopt); } // Try to compute the result without performing a partial substitution. @@ -13802,7 +13805,7 @@ if (!NumExpansions) { // No: we must be in an alias template expansion, and we're going to need // to actually expand the packs. - Result = None; + Result = std::nullopt; break; } @@ -13812,9 +13815,9 @@ // Common case: we could determine the number of expansions without // substituting. if (Result) - return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), E->getPack(), - E->getPackLoc(), - E->getRParenLoc(), *Result, None); + return getDerived().RebuildSizeOfPackExpr( + E->getOperatorLoc(), E->getPack(), E->getPackLoc(), E->getRParenLoc(), + *Result, std::nullopt); TemplateArgumentListInfo TransformedPackArgs(E->getPackLoc(), E->getPackLoc()); @@ -13839,13 +13842,13 @@ } if (PartialSubstitution) - return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), E->getPack(), - E->getPackLoc(), - E->getRParenLoc(), None, Args); + return getDerived().RebuildSizeOfPackExpr( + E->getOperatorLoc(), E->getPack(), E->getPackLoc(), E->getRParenLoc(), + std::nullopt, Args); return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), E->getPack(), E->getPackLoc(), E->getRParenLoc(), - Args.size(), None); + Args.size(), std::nullopt); } template @@ -14184,9 +14187,8 @@ if (Value.get() != OrigElement.Value) ArgChanged = true; - ObjCDictionaryElement Element = { - Key.get(), Value.get(), SourceLocation(), None - }; + ObjCDictionaryElement Element = {Key.get(), Value.get(), SourceLocation(), + std::nullopt}; Elements.push_back(Element); } diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp --- a/clang/lib/Serialization/ASTCommon.cpp +++ b/clang/lib/Serialization/ASTCommon.cpp @@ -412,6 +412,7 @@ case Decl::PragmaComment: case Decl::PragmaDetectMismatch: case Decl::FileScopeAsm: + case Decl::TopLevelStmt: case Decl::AccessSpec: case Decl::Friend: case Decl::FriendTemplate: diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2266,8 +2266,15 @@ return false; } -ASTReader::InputFileInfo -ASTReader::readInputFileInfo(ModuleFile &F, unsigned ID) { +InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) { + // If this ID is bogus, just return an empty input file. + if (ID == 0 || ID > F.InputFileInfosLoaded.size()) + return InputFileInfo(); + + // If we've already loaded this input file, return it. + if (!F.InputFileInfosLoaded[ID - 1].Filename.empty()) + return F.InputFileInfosLoaded[ID - 1]; + // Go find this input file. BitstreamCursor &Cursor = F.InputFilesCursor; SavedStreamPosition SavedPosition(Cursor); @@ -2320,6 +2327,9 @@ } R.ContentHash = (static_cast(Record[1]) << 32) | static_cast(Record[0]); + + // Note that we've loaded this input file info. + F.InputFileInfosLoaded[ID - 1] = R; return R; } @@ -2344,7 +2354,7 @@ consumeError(std::move(Err)); } - InputFileInfo FI = readInputFileInfo(F, ID); + InputFileInfo FI = getInputFileInfo(F, ID); off_t StoredSize = FI.StoredSize; time_t StoredTime = FI.StoredTime; bool Overridden = FI.Overridden; @@ -2400,8 +2410,8 @@ Content, None, } Kind; - llvm::Optional Old = llvm::None; - llvm::Optional New = llvm::None; + llvm::Optional Old = std::nullopt; + llvm::Optional New = std::nullopt; }; auto HasInputFileChanged = [&]() { if (StoredSize != File->getSize()) @@ -2691,7 +2701,7 @@ : NumUserInputs; for (unsigned I = 0; I < N; ++I) { bool IsSystem = I >= NumUserInputs; - InputFileInfo FI = readInputFileInfo(F, I+1); + InputFileInfo FI = getInputFileInfo(F, I + 1); Listener->visitInputFile(FI.Filename, IsSystem, FI.Overridden, F.Kind == MK_ExplicitModule || F.Kind == MK_PrebuiltModule); @@ -2968,6 +2978,7 @@ F.InputFileOffsets = (const llvm::support::unaligned_uint64_t *)Blob.data(); F.InputFilesLoaded.resize(NumInputs); + F.InputFileInfosLoaded.resize(NumInputs); F.NumUserInputFiles = NumUserInputs; break; } @@ -2983,7 +2994,7 @@ for (unsigned I = 0; I < FileCount; ++I) { size_t ID = endian::readNext(D); - InputFileInfo IFI = readInputFileInfo(F, ID); + InputFileInfo IFI = getInputFileInfo(F, ID); if (llvm::ErrorOr File = PP.getFileManager().getFile(IFI.Filename)) PP.getIncludedFiles().insert(*File); @@ -3955,13 +3966,13 @@ PP.getHeaderSearchInfo().lookupModule(F.ModuleName, F.ImportLoc); auto &Map = PP.getHeaderSearchInfo().getModuleMap(); Optional ModMap = - M ? Map.getModuleMapFileForUniquing(M) : None; + M ? Map.getModuleMapFileForUniquing(M) : std::nullopt; // Don't emit module relocation error if we have -fno-validate-pch if (!bool(PP.getPreprocessorOpts().DisablePCHOrModuleValidation & DisableValidationForModuleKind::Module) && !ModMap) { if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) { - if (auto ASTFE = M ? M->getASTFile() : None) { + if (auto ASTFE = M ? M->getASTFile() : std::nullopt) { // This module was defined by an imported (explicit) module. Diag(diag::err_module_file_conflict) << F.ModuleName << F.FileName << ASTFE->getName(); @@ -4246,8 +4257,7 @@ SmallVectorImpl *Imported) { llvm::TimeTraceScope scope("ReadAST", FileName); - llvm::SaveAndRestore - SetCurImportLocRAII(CurrentImportLoc, ImportLoc); + llvm::SaveAndRestore SetCurImportLocRAII(CurrentImportLoc, ImportLoc); llvm::SaveAndRestore> SetCurModuleKindRAII( CurrentDeserializingModuleKind, Type); @@ -4795,6 +4805,13 @@ Result = OutOfDate; // Don't return early. Read the signature. break; } + case HEADER_SEARCH_PATHS: { + bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0; + if (!AllowCompatibleConfigurationMismatch && + ParseHeaderSearchPaths(Record, Complain, *Listener)) + Result = ConfigurationMismatch; + break; + } case DIAG_PRAGMA_MAPPINGS: if (!F) break; @@ -5897,6 +5914,28 @@ unsigned Idx = 0; HSOpts.Sysroot = ReadString(Record, Idx); + HSOpts.ResourceDir = ReadString(Record, Idx); + HSOpts.ModuleCachePath = ReadString(Record, Idx); + HSOpts.ModuleUserBuildPath = ReadString(Record, Idx); + HSOpts.DisableModuleHash = Record[Idx++]; + HSOpts.ImplicitModuleMaps = Record[Idx++]; + HSOpts.ModuleMapFileHomeIsCwd = Record[Idx++]; + HSOpts.EnablePrebuiltImplicitModules = Record[Idx++]; + HSOpts.UseBuiltinIncludes = Record[Idx++]; + HSOpts.UseStandardSystemIncludes = Record[Idx++]; + HSOpts.UseStandardCXXIncludes = Record[Idx++]; + HSOpts.UseLibcxx = Record[Idx++]; + std::string SpecificModuleCachePath = ReadString(Record, Idx); + + return Listener.ReadHeaderSearchOptions(HSOpts, SpecificModuleCachePath, + Complain); +} + +bool ASTReader::ParseHeaderSearchPaths(const RecordData &Record, bool Complain, + ASTReaderListener &Listener) { + HeaderSearchOptions HSOpts; + unsigned Idx = 0; + // Include entries. for (unsigned N = Record[Idx++]; N; --N) { std::string Path = ReadString(Record, Idx); @@ -5915,21 +5954,13 @@ HSOpts.SystemHeaderPrefixes.emplace_back(std::move(Prefix), IsSystemHeader); } - HSOpts.ResourceDir = ReadString(Record, Idx); - HSOpts.ModuleCachePath = ReadString(Record, Idx); - HSOpts.ModuleUserBuildPath = ReadString(Record, Idx); - HSOpts.DisableModuleHash = Record[Idx++]; - HSOpts.ImplicitModuleMaps = Record[Idx++]; - HSOpts.ModuleMapFileHomeIsCwd = Record[Idx++]; - HSOpts.EnablePrebuiltImplicitModules = Record[Idx++]; - HSOpts.UseBuiltinIncludes = Record[Idx++]; - HSOpts.UseStandardSystemIncludes = Record[Idx++]; - HSOpts.UseStandardCXXIncludes = Record[Idx++]; - HSOpts.UseLibcxx = Record[Idx++]; - std::string SpecificModuleCachePath = ReadString(Record, Idx); + // VFS overlay files. + for (unsigned N = Record[Idx++]; N; --N) { + std::string VFSOverlayFile = ReadString(Record, Idx); + HSOpts.VFSOverlayFiles.emplace_back(std::move(VFSOverlayFile)); + } - return Listener.ReadHeaderSearchOptions(HSOpts, SpecificModuleCachePath, - Complain); + return Listener.ReadHeaderSearchPaths(HSOpts, Complain); } bool ASTReader::ParsePreprocessorOptions(const RecordData &Record, @@ -6448,7 +6479,8 @@ #define TYPE_BIT_CODE(CLASS_ID, CODE_ID, CODE_VALUE) \ case TYPE_##CODE_ID: return Type::CLASS_ID; #include "clang/Serialization/TypeBitCodes.def" - default: return llvm::None; + default: + return std::nullopt; } } @@ -8747,7 +8779,7 @@ llvm::sys::path::parent_path(MF.FileName), FileName, MF.Signature); } - return None; + return std::nullopt; } ExternalASTSource::ExtKind ASTReader::hasExternalDefinitions(const Decl *FD) { @@ -9217,9 +9249,8 @@ llvm::function_ref Visitor) { unsigned NumInputs = MF.InputFilesLoaded.size(); for (unsigned I = 0; I < NumInputs; ++I) { - InputFileInfo IFI = readInputFileInfo(MF, I + 1); + InputFileInfo IFI = getInputFileInfo(MF, I + 1); if (IFI.TopLevelModuleMap) - // FIXME: This unnecessarily re-reads the InputFileInfo. if (auto FE = getInputFile(MF, I + 1).getFile()) Visitor(*FE); } diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -402,6 +402,7 @@ void VisitLinkageSpecDecl(LinkageSpecDecl *D); void VisitExportDecl(ExportDecl *D); void VisitFileScopeAsmDecl(FileScopeAsmDecl *AD); + void VisitTopLevelStmtDecl(TopLevelStmtDecl *D); void VisitImportDecl(ImportDecl *D); void VisitAccessSpecDecl(AccessSpecDecl *D); void VisitFriendDecl(FriendDecl *D); @@ -1678,6 +1679,11 @@ AD->setRParenLoc(readSourceLocation()); } +void ASTDeclReader::VisitTopLevelStmtDecl(TopLevelStmtDecl *D) { + VisitDecl(D); + D->Statement = Record.readStmt(); +} + void ASTDeclReader::VisitBlockDecl(BlockDecl *BD) { VisitDecl(BD); BD->setBody(cast_or_null(Record.readStmt())); @@ -3022,8 +3028,8 @@ return false; } - if (isa(D)) + if (isa(D)) return true; if (isa(D)) @@ -3829,6 +3835,9 @@ case DECL_FILE_SCOPE_ASM: D = FileScopeAsmDecl::CreateDeserialized(Context, ID); break; + case DECL_TOP_LEVEL_STMT_DECL: + D = TopLevelStmtDecl::CreateDeserialized(Context, ID); + break; case DECL_BLOCK: D = BlockDecl::CreateDeserialized(Context, ID); break; @@ -3966,8 +3975,7 @@ // Guard variable to avoid recursively redoing the process of passing // decls to consumer. - SaveAndRestore GuardPassingDeclsToConsumer(PassingDeclsToConsumer, - true); + SaveAndRestore GuardPassingDeclsToConsumer(PassingDeclsToConsumer, true); // Ensure that we've loaded all potentially-interesting declarations // that need to be eagerly loaded. diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -161,12 +161,14 @@ namespace { -std::set GetAffectingModuleMaps(const HeaderSearch &HS, +std::set GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { std::set ModuleMaps{}; std::set ProcessedModules; SmallVector ModulesToProcess{RootModule}; + const HeaderSearch &HS = PP.getHeaderSearchInfo(); + SmallVector FilesByUID; HS.getFileMgr().GetUniqueIDMapping(FilesByUID); @@ -190,30 +192,36 @@ } } - while (!ModulesToProcess.empty()) { - auto *CurrentModule = ModulesToProcess.pop_back_val(); - ProcessedModules.insert(CurrentModule); + const ModuleMap &MM = HS.getModuleMap(); + SourceManager &SourceMgr = PP.getSourceManager(); - Optional ModuleMapFile = - HS.getModuleMap().getModuleMapFileForUniquing(CurrentModule); - if (!ModuleMapFile) { - continue; + auto ForIncludeChain = [&](FileEntryRef F, + llvm::function_ref CB) { + CB(F); + FileID FID = SourceMgr.translateFile(F); + SourceLocation Loc = SourceMgr.getIncludeLoc(FID); + while (Loc.isValid()) { + FID = SourceMgr.getFileID(Loc); + CB(*SourceMgr.getFileEntryRefForID(FID)); + Loc = SourceMgr.getIncludeLoc(FID); } + }; - ModuleMaps.insert(*ModuleMapFile); - - for (auto *ImportedModule : (CurrentModule)->Imports) { - if (!ImportedModule || - ProcessedModules.find(ImportedModule) != ProcessedModules.end()) { - continue; - } - ModulesToProcess.push_back(ImportedModule); - } + auto ProcessModuleOnce = [&](const Module *M) { + for (const Module *Mod = M; Mod; Mod = Mod->Parent) + if (ProcessedModules.insert(Mod).second) + if (auto ModuleMapFile = MM.getModuleMapFileForUniquing(Mod)) + ForIncludeChain(*ModuleMapFile, [&](FileEntryRef F) { + ModuleMaps.insert(F); + }); + }; + for (const Module *CurrentModule : ModulesToProcess) { + ProcessModuleOnce(CurrentModule); + for (const Module *ImportedModule : CurrentModule->Imports) + ProcessModuleOnce(ImportedModule); for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses) - if (UndeclaredModule && - ProcessedModules.find(UndeclaredModule) == ProcessedModules.end()) - ModulesToProcess.push_back(UndeclaredModule); + ProcessModuleOnce(UndeclaredModule); } return ModuleMaps; @@ -1036,6 +1044,7 @@ RECORD(SIGNATURE); RECORD(AST_BLOCK_HASH); RECORD(DIAGNOSTIC_OPTIONS); + RECORD(HEADER_SEARCH_PATHS); RECORD(DIAG_PRAGMA_MAPPINGS); #undef RECORD @@ -1166,6 +1175,35 @@ Stream.EmitRecord(DIAGNOSTIC_OPTIONS, Record); Record.clear(); + // Header search paths. + Record.clear(); + const HeaderSearchOptions &HSOpts = + PP.getHeaderSearchInfo().getHeaderSearchOpts(); + + // Include entries. + Record.push_back(HSOpts.UserEntries.size()); + for (unsigned I = 0, N = HSOpts.UserEntries.size(); I != N; ++I) { + const HeaderSearchOptions::Entry &Entry = HSOpts.UserEntries[I]; + AddString(Entry.Path, Record); + Record.push_back(static_cast(Entry.Group)); + Record.push_back(Entry.IsFramework); + Record.push_back(Entry.IgnoreSysRoot); + } + + // System header prefixes. + Record.push_back(HSOpts.SystemHeaderPrefixes.size()); + for (unsigned I = 0, N = HSOpts.SystemHeaderPrefixes.size(); I != N; ++I) { + AddString(HSOpts.SystemHeaderPrefixes[I].Prefix, Record); + Record.push_back(HSOpts.SystemHeaderPrefixes[I].IsSystemHeader); + } + + // VFS overlay files. + Record.push_back(HSOpts.VFSOverlayFiles.size()); + for (StringRef VFSOverlayFile : HSOpts.VFSOverlayFiles) + AddString(VFSOverlayFile, Record); + + Stream.EmitRecord(HEADER_SEARCH_PATHS, Record); + // Write out the diagnostic/pragma mappings. WritePragmaDiagnosticMappings(Diags, /* isModule = */ WritingModule); @@ -1391,27 +1429,10 @@ // Header search options. Record.clear(); - const HeaderSearchOptions &HSOpts - = PP.getHeaderSearchInfo().getHeaderSearchOpts(); - AddString(HSOpts.Sysroot, Record); - - // Include entries. - Record.push_back(HSOpts.UserEntries.size()); - for (unsigned I = 0, N = HSOpts.UserEntries.size(); I != N; ++I) { - const HeaderSearchOptions::Entry &Entry = HSOpts.UserEntries[I]; - AddString(Entry.Path, Record); - Record.push_back(static_cast(Entry.Group)); - Record.push_back(Entry.IsFramework); - Record.push_back(Entry.IgnoreSysRoot); - } - - // System header prefixes. - Record.push_back(HSOpts.SystemHeaderPrefixes.size()); - for (unsigned I = 0, N = HSOpts.SystemHeaderPrefixes.size(); I != N; ++I) { - AddString(HSOpts.SystemHeaderPrefixes[I].Prefix, Record); - Record.push_back(HSOpts.SystemHeaderPrefixes[I].IsSystemHeader); - } + const HeaderSearchOptions &HSOpts = + PP.getHeaderSearchInfo().getHeaderSearchOpts(); + AddString(HSOpts.Sysroot, Record); AddString(HSOpts.ResourceDir, Record); AddString(HSOpts.ModuleCachePath, Record); AddString(HSOpts.ModuleUserBuildPath, Record); @@ -1605,7 +1626,7 @@ Entry.IsTransient, Entry.IsTopLevelModuleMap}; - EmitRecordWithPath(IFAbbrevCode, Record, Entry.File.getName()); + EmitRecordWithPath(IFAbbrevCode, Record, Entry.File.getNameAsRequested()); } // Emit content hash for this file. @@ -1857,7 +1878,7 @@ // headers list when emitting resolved headers in the first loop below. // FIXME: It'd be preferable to avoid doing this if we were given // sufficient stat information in the module map. - HS.getModuleMap().resolveHeaderDirectives(M, /*File=*/llvm::None); + HS.getModuleMap().resolveHeaderDirectives(M, /*File=*/std::nullopt); // If the file didn't exist, we can still create a module if we were given // enough information in the module map. @@ -4554,8 +4575,7 @@ if (!WritingModule) return; - auto AffectingModuleMaps = - GetAffectingModuleMaps(PP->getHeaderSearchInfo(), WritingModule); + auto AffectingModuleMaps = GetAffectingModuleMaps(*PP, WritingModule); unsigned FileIDAdjustment = 0; unsigned OffsetAdjustment = 0; diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -124,6 +124,7 @@ void VisitLinkageSpecDecl(LinkageSpecDecl *D); void VisitExportDecl(ExportDecl *D); void VisitFileScopeAsmDecl(FileScopeAsmDecl *D); + void VisitTopLevelStmtDecl(TopLevelStmtDecl *D); void VisitImportDecl(ImportDecl *D); void VisitAccessSpecDecl(AccessSpecDecl *D); void VisitFriendDecl(FriendDecl *D); @@ -204,7 +205,7 @@ return Common->PartialSpecializations; } ArrayRef getPartialSpecializations(FunctionTemplateDecl::Common *) { - return None; + return std::nullopt; } template @@ -1171,6 +1172,12 @@ Code = serialization::DECL_FILE_SCOPE_ASM; } +void ASTDeclWriter::VisitTopLevelStmtDecl(TopLevelStmtDecl *D) { + VisitDecl(D); + Record.AddStmt(D->getStmt()); + Code = serialization::DECL_TOP_LEVEL_STMT_DECL; +} + void ASTDeclWriter::VisitEmptyDecl(EmptyDecl *D) { VisitDecl(D); Code = serialization::DECL_EMPTY; @@ -2418,7 +2425,7 @@ // File scoped assembly or obj-c or OMP declare target implementation must be // seen. - if (isa(D)) + if (isa(D)) return true; if (WritingModule && isPartOfPerModuleInitializer(D)) { diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -445,7 +445,7 @@ bool ModuleManager::lookupModuleFile(StringRef FileName, off_t ExpectedSize, time_t ExpectedModTime, Optional &File) { - File = None; + File = std::nullopt; if (FileName == "-") return false; diff --git a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp --- a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp @@ -390,7 +390,7 @@ case kCFNumberCGFloatType: // FIXME: We need a way to map from names to Type*. default: - return None; + return std::nullopt; } return Ctx.getTypeSize(T); diff --git a/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp @@ -38,17 +38,17 @@ llvm::DenseSet &S; bool TraverseObjCAtFinallyStmt(ObjCAtFinallyStmt *S) { - SaveAndRestore inFinally(inEH, true); + SaveAndRestore inFinally(inEH, true); return ::RecursiveASTVisitor::TraverseObjCAtFinallyStmt(S); } bool TraverseObjCAtCatchStmt(ObjCAtCatchStmt *S) { - SaveAndRestore inCatch(inEH, true); + SaveAndRestore inCatch(inEH, true); return ::RecursiveASTVisitor::TraverseObjCAtCatchStmt(S); } bool TraverseCXXCatchStmt(CXXCatchStmt *S) { - SaveAndRestore inCatch(inEH, true); + SaveAndRestore inCatch(inEH, true); return TraverseStmt(S->getHandlerBlock()); } diff --git a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp @@ -58,9 +58,9 @@ // Optional parameter `ExprVal` for expression value to be marked interesting. ExplodedNode *reportBug(llvm::StringRef Msg, CheckerContext &C, - Optional ExprVal = None) const; + Optional ExprVal = std::nullopt) const; ExplodedNode *reportBug(llvm::StringRef Msg, BugReporter &BR, ExplodedNode *N, - Optional ExprVal = None) const; + Optional ExprVal = std::nullopt) const; template void printAndReport(CheckerContext &C, T What) const; const Expr *getArgExpr(const CallExpr *CE, CheckerContext &C) const; @@ -477,7 +477,7 @@ const StringLiteral *SL = *SLPtr; return std::string(SL->getBytes()); } - return None; + return std::nullopt; } Optional VisitSymExpr(const SymExpr *S) { return lookup(S); } @@ -490,7 +490,7 @@ std::to_string(S->getRHS().getLimitedValue()) + (S->getRHS().isUnsigned() ? "U" : "")) .str(); - return None; + return std::nullopt; } Optional VisitSymSymExpr(const SymSymExpr *S) { @@ -501,7 +501,7 @@ return (*Str1 + " " + BinaryOperator::getOpcodeStr(S->getOpcode()) + " " + *Str2) .str(); - return None; + return std::nullopt; } Optional VisitUnarySymExpr(const UnarySymExpr *S) { @@ -509,7 +509,7 @@ return Str; if (Optional Str = Visit(S->getOperand())) return (UnaryOperator::getOpcodeStr(S->getOpcode()) + *Str).str(); - return None; + return std::nullopt; } Optional VisitSymbolCast(const SymbolCast *S) { @@ -517,7 +517,7 @@ return Str; if (Optional Str = Visit(S->getOperand())) return (Twine("(") + S->getType().getAsString() + ")" + *Str).str(); - return None; + return std::nullopt; } }; } // namespace diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -127,7 +127,7 @@ Optional getPointeeOf(const CheckerContext &C, SVal Arg) { if (auto LValue = Arg.getAs()) return getPointeeOf(C, *LValue); - return None; + return std::nullopt; } /// Given a pointer, return the SVal of its pointee or if it is tainted, @@ -147,7 +147,7 @@ if (isStdin(Arg, C.getASTContext())) return Arg; - return None; + return std::nullopt; } bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State, @@ -161,7 +161,8 @@ class ArgSet { public: ArgSet() = default; - ArgSet(ArgVecTy &&DiscreteArgs, Optional VariadicIndex = None) + ArgSet(ArgVecTy &&DiscreteArgs, + Optional VariadicIndex = std::nullopt) : DiscreteArgs(std::move(DiscreteArgs)), VariadicIndex(std::move(VariadicIndex)) {} @@ -202,7 +203,7 @@ GenericTaintRule() = default; GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst, - Optional SinkMsg = None) + Optional SinkMsg = std::nullopt) : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)), PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)), SinkMsg(SinkMsg) {} @@ -211,7 +212,7 @@ /// Make a rule that reports a warning if taint reaches any of \p FilterArgs /// arguments. static GenericTaintRule Sink(ArgSet &&SinkArgs, - Optional Msg = None) { + Optional Msg = std::nullopt) { return {std::move(SinkArgs), {}, {}, {}, Msg}; } @@ -234,7 +235,7 @@ /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. static GenericTaintRule SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs, - Optional Msg = None) { + Optional Msg = std::nullopt) { return { std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg}; } @@ -487,8 +488,10 @@ bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst; Optional JustVarIndex = P.VarIndex; - ArgSet SrcDesc(std::move(P.SrcArgs), IsSrcVariadic ? JustVarIndex : None); - ArgSet DstDesc(std::move(P.DstArgs), IsDstVariadic ? JustVarIndex : None); + ArgSet SrcDesc(std::move(P.SrcArgs), + IsSrcVariadic ? JustVarIndex : std::nullopt); + ArgSet DstDesc(std::move(P.DstArgs), + IsDstVariadic ? JustVarIndex : std::nullopt); consumeRulesFromConfig( P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules); diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -220,10 +220,10 @@ /// Update the RefState to reflect the new memory allocation. /// The optional \p RetVal parameter specifies the newly allocated pointer /// value; if unspecified, the value of expression \p E is used. -static ProgramStateRef MallocUpdateRefState(CheckerContext &C, const Expr *E, - ProgramStateRef State, - AllocationFamily Family, - Optional RetVal = None); +static ProgramStateRef +MallocUpdateRefState(CheckerContext &C, const Expr *E, ProgramStateRef State, + AllocationFamily Family, + Optional RetVal = std::nullopt); //===----------------------------------------------------------------------===// // The modeling of memory reallocation. @@ -468,7 +468,8 @@ /// if unspecified, the value of expression \p E is used. [[nodiscard]] static ProgramStateRef ProcessZeroAllocCheck(const CallEvent &Call, const unsigned IndexOfSizeArg, - ProgramStateRef State, Optional RetVal = None); + ProgramStateRef State, + Optional RetVal = std::nullopt); /// Model functions with the ownership_returns attribute. /// @@ -1154,21 +1155,21 @@ // Fall back to normal malloc behavior on platforms where we don't // know M_ZERO. - return None; + return std::nullopt; } // We treat the last argument as the flags argument, and callers fall-back to // normal malloc on a None return. This works for the FreeBSD kernel malloc // as well as Linux kmalloc. if (Call.getNumArgs() < 2) - return None; + return std::nullopt; const Expr *FlagsEx = Call.getArgExpr(Call.getNumArgs() - 1); const SVal V = C.getSVal(FlagsEx); if (!isa(V)) { // The case where 'V' can be a location can only be due to a bad header, // so in this case bail out. - return None; + return std::nullopt; } NonLoc Flags = V.castAs(); @@ -1180,7 +1181,7 @@ Flags, ZeroFlag, FlagsEx->getType()); if (MaskedFlagsUC.isUnknownOrUndef()) - return None; + return std::nullopt; DefinedSVal MaskedFlags = MaskedFlagsUC.castAs(); // Check if maskedFlags is non-zero. @@ -1194,7 +1195,7 @@ AF_Malloc); } - return None; + return std::nullopt; } SVal MallocChecker::evalMulForBufferSize(CheckerContext &C, const Expr *Blocks, @@ -1650,7 +1651,7 @@ if (S.getNameForSlot(i).equals("freeWhenDone")) return !Call.getArgSVal(i).isZeroConstant(); - return None; + return std::nullopt; } void MallocChecker::checkPostObjCMessage(const ObjCMethodCall &Call, @@ -2071,7 +2072,7 @@ case AF_IfNameIndex: { if (ChecksEnabled[CK_MallocChecker]) return CK_MallocChecker; - return None; + return std::nullopt; } case AF_CXXNew: case AF_CXXNewArray: { @@ -2083,12 +2084,12 @@ if (ChecksEnabled[CK_NewDeleteChecker]) return CK_NewDeleteChecker; } - return None; + return std::nullopt; } case AF_InnerBuffer: { if (ChecksEnabled[CK_InnerPointerChecker]) return CK_InnerPointerChecker; - return None; + return std::nullopt; } case AF_None: { llvm_unreachable("no family"); diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp @@ -420,7 +420,7 @@ return RefVal::makeNotOwned(RE.getObjKind(), ResultTy); } - return None; + return std::nullopt; } static bool isPointerToObject(QualType QT) { diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp @@ -171,7 +171,7 @@ SymbolRef &Sym, Optional> CE) { if (!CE) - return None; + return std::nullopt; for (unsigned Idx = 0; Idx < (*CE)->getNumArgs(); Idx++) if (const MemRegion *MR = (*CE)->getArgSVal(Idx).getAsRegion()) @@ -179,25 +179,25 @@ if (CurrSt->getSVal(MR, TR->getValueType()).getAsSymbol() == Sym) return Idx; - return None; + return std::nullopt; } static Optional findMetaClassAlloc(const Expr *Callee) { if (const auto *ME = dyn_cast(Callee)) { if (ME->getMemberDecl()->getNameAsString() != "alloc") - return None; + return std::nullopt; const Expr *This = ME->getBase()->IgnoreParenImpCasts(); if (const auto *DRE = dyn_cast(This)) { const ValueDecl *VD = DRE->getDecl(); if (VD->getNameAsString() != "metaClass") - return None; + return std::nullopt; if (const auto *RD = dyn_cast(VD->getDeclContext())) return RD->getNameAsString(); } } - return None; + return std::nullopt; } static std::string findAllocatedObjectName(const Stmt *S, QualType QT) { @@ -607,7 +607,7 @@ return std::string(VR->getDecl()->getName()); // Once we support more storage locations for bindings, // this would need to be improved. - return None; + return std::nullopt; } using Bindings = llvm::SmallVector, 4>; diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp @@ -74,7 +74,7 @@ CheckerContext &C) { auto ReturnDV = ReturnV.getAs(); if (!ReturnDV) - return None; + return std::nullopt; if (ExpectedValue) return C.getState()->isNull(*ReturnDV).isConstrainedTrue(); diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -1095,13 +1095,13 @@ StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD, CheckerContext &C) const { if (!FD) - return None; + return std::nullopt; initFunctionSummaries(C); auto FSMI = FunctionSummaryMap.find(FD->getCanonicalDecl()); if (FSMI == FunctionSummaryMap.end()) - return None; + return std::nullopt; return FSMI->second; } @@ -1110,7 +1110,7 @@ CheckerContext &C) const { const FunctionDecl *FD = dyn_cast_or_null(Call.getDecl()); if (!FD) - return None; + return std::nullopt; return findFunctionSummary(FD, C); } @@ -1135,7 +1135,7 @@ IdentifierInfo &II = ACtx.Idents.get(Name); auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II); if (LookupRes.empty()) - return None; + return std::nullopt; // Prioritze typedef declarations. // This is needed in case of C struct typedefs. E.g.: @@ -1153,7 +1153,7 @@ for (Decl *D : LookupRes) if (auto *TD = dyn_cast(D)) return ACtx.getTypeDeclType(TD).getCanonicalType(); - return None; + return std::nullopt; } } lookupTy(ACtx); @@ -1170,7 +1170,7 @@ Optional operator()(Optional Ty) { if (Ty) return operator()(*Ty); - return None; + return std::nullopt; } } getRestrictTy(ACtx); class GetPointerTy { @@ -1182,13 +1182,13 @@ Optional operator()(Optional Ty) { if (Ty) return operator()(*Ty); - return None; + return std::nullopt; } } getPointerTy(ACtx); class { public: Optional operator()(Optional Ty) { - return Ty ? Optional(Ty->withConst()) : None; + return Ty ? Optional(Ty->withConst()) : std::nullopt; } QualType operator()(QualType Ty) { return Ty.withConst(); } } getConstTy; @@ -1204,7 +1204,7 @@ if (Ty) { return operator()(*Ty); } - return None; + return std::nullopt; } } getMaxValue(BVF); diff --git a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp --- a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp @@ -234,7 +234,7 @@ // The region we'd like to acquire. const auto *R = V.getAsRegion()->getAs(); if (!R) - return None; + return std::nullopt; VisitedRegions.insert(R); @@ -245,7 +245,7 @@ R = Tmp->getAs(); if (!R) - return None; + return std::nullopt; // We found a cyclic pointer, like int *ptr = (int *)&ptr. if (!VisitedRegions.insert(R).second) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -51,13 +51,13 @@ const Type *T = Base->getType().getTypePtrOrNull(); if (!T) - return llvm::None; + return std::nullopt; const CXXRecordDecl *R = T->getAsCXXRecordDecl(); if (!R) - return llvm::None; + return std::nullopt; if (!R->hasDefinition()) - return llvm::None; + return std::nullopt; return hasPublicRefAndDeref(R) ? R : nullptr; } @@ -67,7 +67,7 @@ R = R->getDefinition(); if (!R) - return llvm::None; + return std::nullopt; if (hasPublicRefAndDeref(R)) return true; @@ -90,7 +90,7 @@ bool BasesResult = R->lookupInBases(isRefCountableBase, Paths, /*LookupInDependent =*/true); if (AnyInconclusiveBase) - return llvm::None; + return std::nullopt; return BasesResult; } @@ -119,7 +119,7 @@ llvm::Optional IsRefCountable = isRefCountable(Class); if (!IsRefCountable) - return llvm::None; + return std::nullopt; return (*IsRefCountable); } diff --git a/clang/lib/StaticAnalyzer/Checkers/Yaml.h b/clang/lib/StaticAnalyzer/Checkers/Yaml.h --- a/clang/lib/StaticAnalyzer/Checkers/Yaml.h +++ b/clang/lib/StaticAnalyzer/Checkers/Yaml.h @@ -28,7 +28,7 @@ llvm::Optional getConfiguration(CheckerManager &Mgr, Checker *Chk, StringRef Option, StringRef ConfigFile) { if (ConfigFile.trim().empty()) - return None; + return std::nullopt; llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get(); llvm::ErrorOr> Buffer = @@ -38,7 +38,7 @@ Mgr.reportInvalidCheckerOptionValue(Chk, Option, "a valid filename instead of '" + std::string(ConfigFile) + "'"); - return None; + return std::nullopt; } llvm::yaml::Input Input(Buffer.get()->getBuffer()); @@ -48,7 +48,7 @@ if (std::error_code ec = Input.error()) { Mgr.reportInvalidCheckerOptionValue(Chk, Option, "a valid yaml file: " + ec.message()); - return None; + return std::nullopt; } return Config; diff --git a/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp b/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp --- a/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp +++ b/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp @@ -64,19 +64,18 @@ ExplorationStrategyKind AnalyzerOptions::getExplorationStrategy() const { auto K = - llvm::StringSwitch>( - ExplorationStrategy) + llvm::StringSwitch>( + ExplorationStrategy) .Case("dfs", ExplorationStrategyKind::DFS) .Case("bfs", ExplorationStrategyKind::BFS) - .Case("unexplored_first", - ExplorationStrategyKind::UnexploredFirst) + .Case("unexplored_first", ExplorationStrategyKind::UnexploredFirst) .Case("unexplored_first_queue", ExplorationStrategyKind::UnexploredFirstQueue) .Case("unexplored_first_location_queue", ExplorationStrategyKind::UnexploredFirstLocationQueue) .Case("bfs_block_dfs_contents", ExplorationStrategyKind::BFSBlockDFSContents) - .Default(None); + .Default(std::nullopt); assert(K && "User mode is invalid."); return K.value(); } @@ -87,19 +86,19 @@ .Case("none", CTUPhase1InliningKind::None) .Case("small", CTUPhase1InliningKind::Small) .Case("all", CTUPhase1InliningKind::All) - .Default(None); + .Default(std::nullopt); assert(K && "CTU inlining mode is invalid."); return K.value(); } IPAKind AnalyzerOptions::getIPAMode() const { auto K = llvm::StringSwitch>(IPAMode) - .Case("none", IPAK_None) - .Case("basic-inlining", IPAK_BasicInlining) - .Case("inlining", IPAK_Inlining) - .Case("dynamic", IPAK_DynamicDispatch) - .Case("dynamic-bifurcate", IPAK_DynamicDispatchBifurcate) - .Default(None); + .Case("none", IPAK_None) + .Case("basic-inlining", IPAK_BasicInlining) + .Case("inlining", IPAK_Inlining) + .Case("dynamic", IPAK_DynamicDispatch) + .Case("dynamic-bifurcate", IPAK_DynamicDispatchBifurcate) + .Default(std::nullopt); assert(K && "IPA Mode is invalid."); return K.value(); @@ -111,14 +110,13 @@ if (getIPAMode() < IPAK_Inlining) return false; - auto K = - llvm::StringSwitch>( - CXXMemberInliningMode) - .Case("constructors", CIMK_Constructors) - .Case("destructors", CIMK_Destructors) - .Case("methods", CIMK_MemberFunctions) - .Case("none", CIMK_None) - .Default(None); + auto K = llvm::StringSwitch>( + CXXMemberInliningMode) + .Case("constructors", CIMK_Constructors) + .Case("destructors", CIMK_Destructors) + .Case("methods", CIMK_MemberFunctions) + .Case("none", CIMK_None) + .Default(std::nullopt); assert(K && "Invalid c++ member function inlining mode."); @@ -162,12 +160,12 @@ bool AnalyzerOptions::getCheckerBooleanOption(StringRef CheckerName, StringRef OptionName, bool SearchInParents) const { - auto Ret = llvm::StringSwitch>( - getCheckerStringOption(CheckerName, OptionName, - SearchInParents)) - .Case("true", true) - .Case("false", false) - .Default(None); + auto Ret = + llvm::StringSwitch>( + getCheckerStringOption(CheckerName, OptionName, SearchInParents)) + .Case("true", true) + .Case("false", false) + .Default(std::nullopt); assert(Ret && "This option should be either 'true' or 'false', and should've been " diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp --- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -1576,11 +1576,11 @@ FileID FID = SM.getFileID(ExpansionRange.getBegin()); if (FID != SM.getFileID(ExpansionRange.getEnd())) - return None; + return std::nullopt; Optional Buffer = SM.getBufferOrNone(FID); if (!Buffer) - return None; + return std::nullopt; unsigned BeginOffset = SM.getFileOffset(ExpansionRange.getBegin()); unsigned EndOffset = SM.getFileOffset(ExpansionRange.getEnd()); @@ -1591,7 +1591,7 @@ // SourceRange is covering a large or small amount of space in the user's // editor. if (Snippet.find_first_of("\r\n") != StringRef::npos) - return None; + return std::nullopt; // This isn't Unicode-aware, but it doesn't need to be. return Snippet.size(); @@ -2332,25 +2332,25 @@ "BugReport::getInterestingnessKind currently can only handle 2 different " "tracking kinds! Please define what tracking kind should we return here " "when the kind of getAsRegion() and getAsSymbol() is different!"); - return None; + return std::nullopt; } Optional PathSensitiveBugReport::getInterestingnessKind(SymbolRef sym) const { if (!sym) - return None; + return std::nullopt; // We don't currently consider metadata symbols to be interesting // even if we know their region is interesting. Is that correct behavior? auto It = InterestingSymbols.find(sym); if (It == InterestingSymbols.end()) - return None; + return std::nullopt; return It->getSecond(); } Optional PathSensitiveBugReport::getInterestingnessKind(const MemRegion *R) const { if (!R) - return None; + return std::nullopt; R = R->getBaseRegion(); auto It = InterestingRegions.find(R); @@ -2359,7 +2359,7 @@ if (const auto *SR = dyn_cast(R)) return getInterestingnessKind(SR->getSymbol()); - return None; + return std::nullopt; } bool PathSensitiveBugReport::isInteresting(SVal V) const { diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -227,7 +227,7 @@ if (auto FieldL = State->getSVal(ME, LCtx).getAs()) return State->getRawSVal(*FieldL, FD->getType()); - return None; + return std::nullopt; } static Optional @@ -236,7 +236,7 @@ if (Optional V = getSValForVar(CondVarExpr, N)) if (auto CI = V->getAs()) return &CI->getValue(); - return None; + return std::nullopt; } static bool isVarAnInterestingCondition(const Expr *CondVarExpr, @@ -627,11 +627,11 @@ int depth /* = 0 */) { if (depth == DEREFERENCE_LIMIT) // Limit the recursion depth. - return None; + return std::nullopt; if (const auto *RDX = dyn_cast(RD)) if (!RDX->hasDefinition()) - return None; + return std::nullopt; // Recursively examine the base classes. // Note that following base classes does not increase the recursion depth. @@ -669,7 +669,7 @@ return Out; } - return None; + return std::nullopt; } PathDiagnosticPieceRef @@ -933,7 +933,7 @@ ProgramStateRef State = N->getState(); auto *LCtx = N->getLocationContext(); if (!S) - return None; + return std::nullopt; if (const auto *DS = dyn_cast(S)) { if (const auto *VD = dyn_cast(DS->getSingleDecl())) @@ -948,7 +948,7 @@ return RHS->getBeginLoc(); } } - return None; + return std::nullopt; } }; diff --git a/clang/lib/StaticAnalyzer/Core/CallDescription.cpp b/clang/lib/StaticAnalyzer/Core/CallDescription.cpp --- a/clang/lib/StaticAnalyzer/Core/CallDescription.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallDescription.cpp @@ -32,7 +32,7 @@ return RequiredParams; if (RequiredArgs) return RequiredArgs; - return None; + return std::nullopt; } ento::CallDescription::CallDescription(CallDescriptionFlags Flags, diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -519,7 +519,7 @@ CallEvent::getReturnValueUnderConstruction() const { const auto *CC = getConstructionContext(); if (!CC) - return None; + return std::nullopt; EvalCallOptions CallOpts; ExprEngine &Engine = getState()->getStateManager().getOwningEngine(); @@ -532,7 +532,7 @@ ArrayRef AnyFunctionCall::parameters() const { const FunctionDecl *D = getDecl(); if (!D) - return None; + return std::nullopt; return D->parameters(); } @@ -857,7 +857,7 @@ ArrayRef BlockCall::parameters() const { const BlockDecl *D = getDecl(); if (!D) - return None; + return std::nullopt; return D->parameters(); } @@ -946,7 +946,7 @@ ArrayRef ObjCMethodCall::parameters() const { const ObjCMethodDecl *D = getDecl(); if (!D) - return None; + return std::nullopt; return D->parameters(); } diff --git a/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp b/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp --- a/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp +++ b/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp @@ -114,10 +114,10 @@ const Preprocessor &PP) { const auto *MacroII = PP.getIdentifierInfo(Macro); if (!MacroII) - return llvm::None; + return std::nullopt; const MacroInfo *MI = PP.getMacroInfo(MacroII); if (!MI) - return llvm::None; + return std::nullopt; // Filter out parens. std::vector FilteredTokens; @@ -131,12 +131,12 @@ // FIXME: EOF macro token coming from a PCH file on macOS while marked as // literal, doesn't contain any literal data if (!T.isLiteral() || !T.getLiteralData()) - return llvm::None; + return std::nullopt; StringRef ValueStr = StringRef(T.getLiteralData(), T.getLength()); llvm::APInt IntValue; constexpr unsigned AutoSenseRadix = 0; if (ValueStr.getAsInteger(AutoSenseRadix, IntValue)) - return llvm::None; + return std::nullopt; // Parse an optional minus sign. size_t Size = FilteredTokens.size(); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -2712,7 +2712,7 @@ // If the condition is still unknown, give up. if (X.isUnknownOrUndef()) - return None; + return std::nullopt; DefinedSVal V = X.castAs(); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -333,7 +333,7 @@ // Return early if we are unable to reliably foresee // the future stack frame. if (!FutureSFC) - return None; + return std::nullopt; // This should be equivalent to Caller->getDecl() for now, but // FutureSFC->getDecl() is likely to support better stuff (like @@ -342,7 +342,7 @@ // FIXME: Support for variadic arguments is not implemented here yet. if (CallEvent::isVariadic(CalleeD)) - return None; + return std::nullopt; // Operator arguments do not correspond to operator parameters // because this-argument is implemented as a normal argument in @@ -350,7 +350,7 @@ const TypedValueRegion *TVR = Caller->getParameterLocation( *Caller->getAdjustedParameterIndex(Idx), BldrCtx->blockCount()); if (!TVR) - return None; + return std::nullopt; return loc::MemRegionVal(TVR); }; diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp @@ -391,9 +391,8 @@ // result onto the work list. // CEENode -> Dst -> WorkList NodeBuilderContext Ctx(Engine, calleeCtx->getCallSiteBlock(), CEENode); - SaveAndRestore NBCSave(currBldrCtx, - &Ctx); - SaveAndRestore CBISave(currStmtIdx, calleeCtx->getIndex()); + SaveAndRestore NBCSave(currBldrCtx, &Ctx); + SaveAndRestore CBISave(currStmtIdx, calleeCtx->getIndex()); CallEventRef<> UpdatedCall = Call.cloneWithState(CEEState); diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -1041,7 +1041,7 @@ T = getContext().VoidTy; if (!T->getAs()) { FunctionProtoType::ExtProtoInfo Ext; - T = getContext().getFunctionType(T, None, Ext); + T = getContext().getFunctionType(T, std::nullopt, Ext); } T = getContext().getBlockPointerType(T); diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1095,7 +1095,7 @@ // This case is: A != B != 0 -> diseqiality check. return false; default: - return llvm::None; + return std::nullopt; } } @@ -1139,7 +1139,7 @@ if (End) { return *End; } - return llvm::None; + return std::nullopt; } template @@ -1365,7 +1365,7 @@ llvm::Optional convert(const Range &Origin, APSIntType To) { if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within || To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) { - return llvm::None; + return std::nullopt; } return Range(ValueFactory.Convert(To, Origin.From()), ValueFactory.Convert(To, Origin.To())); @@ -1455,13 +1455,13 @@ // Do not negate if the type cannot be meaningfully negated. if (!T->isUnsignedIntegerOrEnumerationType() && !T->isSignedIntegerOrEnumerationType()) - return llvm::None; + return std::nullopt; if (SymbolRef NegatedSym = F()) if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym)) return RangeFactory.negate(*NegatedRange); - return llvm::None; + return std::nullopt; } Optional getRangeForNegatedUnarySym(const UnarySymExpr *USE) { @@ -1511,7 +1511,7 @@ // We currently do not support <=> (C++20). if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp)) - return llvm::None; + return std::nullopt; static const OperatorRelationsTable CmpOpTable{}; @@ -1581,14 +1581,14 @@ : getFalseRange(T); } - return llvm::None; + return std::nullopt; } Optional getRangeForEqualities(const SymSymExpr *Sym) { Optional Equality = meansEquality(Sym); if (!Equality) - return llvm::None; + return std::nullopt; if (Optional AreEqual = EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) { @@ -1602,7 +1602,7 @@ return getFalseRange(Sym->getType()); } - return llvm::None; + return std::nullopt; } RangeSet getTrueRange(QualType T) { @@ -2106,7 +2106,7 @@ if (!Constraint.containsZero()) return true; - return llvm::None; + return std::nullopt; } ProgramStateRef State; @@ -2523,7 +2523,7 @@ return false; // It is not clear. - return llvm::None; + return std::nullopt; } [[nodiscard]] ProgramStateRef diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -1541,11 +1541,11 @@ const SubRegion *R, bool AllowSubregionBindings) { Optional V = B.getDefaultBinding(R); if (!V) - return None; + return std::nullopt; Optional LCV = V->getAs(); if (!LCV) - return None; + return std::nullopt; // If the LCV is for a subregion, the types might not match, and we shouldn't // reuse the binding. @@ -1554,7 +1554,7 @@ !RegionTy->isVoidPointerType()) { QualType SourceRegionTy = LCV->getRegion()->getValueType(); if (!SVB.getContext().hasSameUnqualifiedType(RegionTy, SourceRegionTy)) - return None; + return std::nullopt; } if (!AllowSubregionBindings) { @@ -1564,7 +1564,7 @@ collectSubRegionBindings(Bindings, SVB, *B.lookup(R->getBaseRegion()), R, /*IncludeAllDefaultBindings=*/true); if (Bindings.size() > 1) - return None; + return std::nullopt; } return *LCV; @@ -1717,7 +1717,7 @@ // account. return UnknownVal(); } - return None; + return std::nullopt; } Optional RegionStoreManager::getConstantValFromConstArrayInitializer( @@ -1730,7 +1730,7 @@ std::tie(SValOffsets, Base) = getElementRegionOffsetsWithBase(R); const VarRegion *VR = dyn_cast(Base); if (!VR) - return None; + return std::nullopt; assert(!SValOffsets.empty() && "getElementRegionOffsets guarantees the " "offsets vector is not empty."); @@ -1741,7 +1741,7 @@ if (!VD->getType().isConstQualified() && !R->getElementType().isConstQualified() && (!B.isMainAnalysis() || !VD->hasGlobalStorage())) - return None; + return std::nullopt; // Array's declaration should have `ConstantArrayType` type, because only this // type contains an array extent. It may happen that array type can be of @@ -1756,13 +1756,13 @@ // NOTE: If `Init` is non-null, then a new `VD` is non-null for sure. So check // `Init` for null only and don't worry about the replaced `VD`. if (!Init) - return None; + return std::nullopt; // Array's declaration should have ConstantArrayType type, because only this // type contains an array extent. const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(VD->getType()); if (!CAT) - return None; + return std::nullopt; // Get array extents. SmallVector Extents = getConstantArrayExtents(CAT); @@ -1774,7 +1774,7 @@ // auto x = ptr[4][2]; // UB // FIXME: Should return UndefinedVal. if (SValOffsets.size() != Extents.size()) - return None; + return std::nullopt; SmallVector ConcreteOffsets; if (Optional V = convertOffsetsFromSvalToUnsigneds(SValOffsets, Extents, @@ -1796,7 +1796,7 @@ // FIXME: Handle CompoundLiteralExpr. - return None; + return std::nullopt; } /// Returns an SVal, if possible, for the specified position of an @@ -1908,7 +1908,7 @@ } } } - return None; + return std::nullopt; } SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B, @@ -2042,7 +2042,7 @@ llvm_unreachable("Unknown default value"); } - return None; + return std::nullopt; } SVal RegionStoreManager::getLazyBinding(const SubRegion *LazyBindingRegion, @@ -2439,16 +2439,16 @@ // If we don't know the size, create a lazyCompoundVal instead. if (!CAT) - return None; + return std::nullopt; QualType Ty = CAT->getElementType(); if (!(Ty->isScalarType() || Ty->isReferenceType())) - return None; + return std::nullopt; // If the array is too big, create a LCV instead. uint64_t ArrSize = CAT->getSize().getLimitedValue(); if (ArrSize > SmallArrayLimit) - return None; + return std::nullopt; RegionBindingsRef NewB = B; @@ -2578,7 +2578,7 @@ if (const CXXRecordDecl *Class = dyn_cast(RD)) if (Class->getNumBases() != 0 || Class->getNumVBases() != 0) - return None; + return std::nullopt; for (const auto *FD : RD->fields()) { if (FD->isUnnamedBitfield()) @@ -2587,7 +2587,7 @@ // If there are too many fields, or if any of the fields are aggregates, // just use the LCV as a default binding. if (Fields.size() == SmallStructLimit) - return None; + return std::nullopt; QualType Ty = FD->getType(); @@ -2597,7 +2597,7 @@ continue; if (!(Ty->isScalarType() || Ty->isReferenceType())) - return None; + return std::nullopt; Fields.push_back(FD); } diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -301,7 +301,7 @@ SValBuilder::getCastedMemRegionVal(const MemRegion *R, QualType Ty) { if (auto OptR = StateMgr.getStoreManager().castRegion(R, Ty)) return loc::MemRegionVal(*OptR); - return None; + return std::nullopt; } /// Return a memory region for the 'this' object reference. @@ -391,7 +391,7 @@ const Expr *SE = CE->getSubExpr(); Optional Val = getConstantVal(SE); if (!Val) - return None; + return std::nullopt; return evalCast(*Val, CE->getType(), SE->getType()); } } @@ -403,7 +403,7 @@ default: { // Don't try to come up with a value for materialized temporaries. if (E->isGLValue()) - return None; + return std::nullopt; ASTContext &Ctx = getContext(); Expr::EvalResult Result; @@ -414,7 +414,7 @@ if (E->isNullPointerConstant(Ctx, Expr::NPC_ValueDependentIsNotNull)) return makeNullWithType(E->getType()); - return None; + return std::nullopt; } } } diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -361,35 +361,35 @@ // rearrange additive operations but rearrange comparisons only if // option is set. if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation) - return None; + return std::nullopt; SymbolRef LSym = Lhs.getAsSymbol(); if (!LSym) - return None; + return std::nullopt; if (BinaryOperator::isComparisonOp(Op)) { SingleTy = LSym->getType(); if (ResultTy != SVB.getConditionType()) - return None; + return std::nullopt; // Initialize SingleTy later with a symbol's type. } else if (BinaryOperator::isAdditiveOp(Op)) { SingleTy = ResultTy; if (LSym->getType() != SingleTy) - return None; + return std::nullopt; } else { // Don't rearrange other operations. - return None; + return std::nullopt; } assert(!SingleTy.isNull() && "We should have figured out the type by now!"); // Rearrange signed symbolic expressions only if (!SingleTy->isSignedIntegerOrEnumerationType()) - return None; + return std::nullopt; SymbolRef RSym = Rhs.getAsSymbol(); if (!RSym || RSym->getType() != SingleTy) - return None; + return std::nullopt; BasicValueFactory &BV = State->getBasicVals(); llvm::APSInt LInt, RInt; @@ -397,7 +397,7 @@ std::tie(RSym, RInt) = decomposeSymbol(RSym, BV); if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) || !shouldRearrange(State, Op, RSym, RInt, SingleTy)) - return None; + return std::nullopt; // We know that no overflows can occur anymore. return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt); diff --git a/clang/lib/StaticAnalyzer/Core/Store.cpp b/clang/lib/StaticAnalyzer/Core/Store.cpp --- a/clang/lib/StaticAnalyzer/Core/Store.cpp +++ b/clang/lib/StaticAnalyzer/Core/Store.cpp @@ -89,7 +89,7 @@ // We don't know what to make of it. Return a NULL region, which // will be interpreted as UnknownVal. - return None; + return std::nullopt; } // Now assume we are casting from pointer to pointer. Other cases should @@ -175,7 +175,7 @@ // If we cannot compute a raw offset, throw up our hands and return // a NULL MemRegion*. if (!baseR) - return None; + return std::nullopt; CharUnits off = rawOff.getOffset(); @@ -390,7 +390,7 @@ // We failed if the region we ended up with has perfect type info. if (isa(MR)) - return None; + return std::nullopt; return UnknownVal(); } diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp --- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp +++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp @@ -65,7 +65,7 @@ } // Illegal vscale result would be less than 1 if (Log2ScaleResult < 0) - return llvm::None; + return std::nullopt; return 1 << Log2ScaleResult; } @@ -433,7 +433,7 @@ uint32_t Log2EEW; if (ComplexTT.second.getAsInteger(10, Log2EEW)) { llvm_unreachable("Invalid Log2EEW value!"); - return None; + return std::nullopt; } switch (Log2EEW) { case 3: @@ -450,13 +450,13 @@ break; default: llvm_unreachable("Invalid Log2EEW value, should be [3-6]"); - return None; + return std::nullopt; } } else if (ComplexTT.first == "FixedSEW") { uint32_t NewSEW; if (ComplexTT.second.getAsInteger(10, NewSEW)) { llvm_unreachable("Invalid FixedSEW value!"); - return None; + return std::nullopt; } switch (NewSEW) { case 8: @@ -473,13 +473,13 @@ break; default: llvm_unreachable("Invalid FixedSEW value, should be 8, 16, 32 or 64"); - return None; + return std::nullopt; } } else if (ComplexTT.first == "LFixedLog2LMUL") { int32_t Log2LMUL; if (ComplexTT.second.getAsInteger(10, Log2LMUL)) { llvm_unreachable("Invalid LFixedLog2LMUL value!"); - return None; + return std::nullopt; } switch (Log2LMUL) { case -3: @@ -505,13 +505,13 @@ break; default: llvm_unreachable("Invalid LFixedLog2LMUL value, should be [-3, 3]"); - return None; + return std::nullopt; } } else if (ComplexTT.first == "SFixedLog2LMUL") { int32_t Log2LMUL; if (ComplexTT.second.getAsInteger(10, Log2LMUL)) { llvm_unreachable("Invalid SFixedLog2LMUL value!"); - return None; + return std::nullopt; } switch (Log2LMUL) { case -3: @@ -537,7 +537,7 @@ break; default: llvm_unreachable("Invalid LFixedLog2LMUL value, should be [-3, 3]"); - return None; + return std::nullopt; } } else { @@ -788,13 +788,13 @@ ArrayRef Prototype) { // LMUL x NF must be less than or equal to 8. if ((Log2LMUL >= 1) && (1 << Log2LMUL) * NF > 8) - return llvm::None; + return std::nullopt; RVVTypes Types; for (const PrototypeDescriptor &Proto : Prototype) { auto T = computeType(BT, Log2LMUL, Proto); if (!T) - return llvm::None; + return std::nullopt; // Record legal type index Types.push_back(T.value()); } @@ -823,7 +823,7 @@ return &(It->second); if (IllegalTypes.count(Idx)) - return llvm::None; + return std::nullopt; // Compute type and record the result. RVVType T(BT, Log2LMUL, Proto); @@ -835,7 +835,7 @@ } // Record illegal type index. IllegalTypes.insert(Idx); - return llvm::None; + return std::nullopt; } //===----------------------------------------------------------------------===// diff --git a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp --- a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp +++ b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp @@ -691,7 +691,7 @@ if (ND->getDeclName().isIdentifier()) return ND->getQualifiedNameAsString(); } - return llvm::None; + return std::nullopt; } llvm::Optional Node::getIdentifier() const { @@ -699,7 +699,7 @@ if (ND->getDeclName().isIdentifier()) return ND->getName(); } - return llvm::None; + return std::nullopt; } namespace { diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -135,8 +135,8 @@ DiagOpts.ShowCarets = false; // Don't write out diagnostic file. DiagOpts.DiagnosticSerializationFile.clear(); - // Don't treat warnings as errors. - DiagOpts.Warnings.push_back("no-error"); + // Don't emit warnings as errors (and all other warnings too). + DiagOpts.IgnoreWarnings = true; } /// A clang tool that runs the preprocessor in a mode that's optimized for @@ -147,7 +147,7 @@ StringRef WorkingDirectory, DependencyConsumer &Consumer, llvm::IntrusiveRefCntPtr DepFS, ScanningOutputFormat Format, bool OptimizeArgs, bool EagerLoadModules, - bool DisableFree, llvm::Optional ModuleName = None) + bool DisableFree, llvm::Optional ModuleName = std::nullopt) : WorkingDirectory(WorkingDirectory), Consumer(Consumer), DepFS(std::move(DepFS)), Format(Format), OptimizeArgs(OptimizeArgs), EagerLoadModules(EagerLoadModules), DisableFree(DisableFree), @@ -219,7 +219,7 @@ if (llvm::ErrorOr Entry = LocalDepFS->getOrCreateFileSystemEntry(File.getName())) return Entry->getDirectiveTokens(); - return None; + return std::nullopt; }; } diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -451,9 +451,9 @@ MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps( *MF, [&](FileEntryRef FE) { - if (FE.getName().endswith("__inferred_module.map")) + if (FE.getNameAsRequested().endswith("__inferred_module.map")) return; - MD.ModuleMapFileDeps.emplace_back(FE.getName()); + MD.ModuleMapFileDeps.emplace_back(FE.getNameAsRequested()); }); CompilerInvocation CI = MDC.makeInvocationForModuleBuildWithoutOutputs( @@ -580,23 +580,25 @@ return true; } -static StringRef makeAbsolute(CompilerInstance &CI, StringRef Path, - SmallVectorImpl &Storage) { - if (llvm::sys::path::is_absolute(Path)) +static StringRef makeAbsoluteAndPreferred(CompilerInstance &CI, StringRef Path, + SmallVectorImpl &Storage) { + if (llvm::sys::path::is_absolute(Path) && + !llvm::sys::path::is_style_windows(llvm::sys::path::Style::native)) return Path; Storage.assign(Path.begin(), Path.end()); CI.getFileManager().makeAbsolutePath(Storage); + llvm::sys::path::make_preferred(Storage); return StringRef(Storage.data(), Storage.size()); } void ModuleDepCollector::addFileDep(StringRef Path) { llvm::SmallString<256> Storage; - Path = makeAbsolute(ScanInstance, Path, Storage); + Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage); FileDeps.push_back(std::string(Path)); } void ModuleDepCollector::addFileDep(ModuleDeps &MD, StringRef Path) { llvm::SmallString<256> Storage; - Path = makeAbsolute(ScanInstance, Path, Storage); + Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage); MD.FileDeps.insert(Path); } diff --git a/clang/lib/Tooling/Inclusions/HeaderAnalysis.cpp b/clang/lib/Tooling/Inclusions/HeaderAnalysis.cpp --- a/clang/lib/Tooling/Inclusions/HeaderAnalysis.cpp +++ b/clang/lib/Tooling/Inclusions/HeaderAnalysis.cpp @@ -68,14 +68,14 @@ llvm::Optional parseIWYUPragma(const char *Text) { // Skip the comment start, // or /*. if (Text[0] != '/' || (Text[1] != '/' && Text[1] != '*')) - return llvm::None; + return std::nullopt; bool BlockComment = Text[1] == '*'; Text += 2; // Per spec, direcitves are whitespace- and case-sensitive. constexpr llvm::StringLiteral IWYUPragma = " IWYU pragma: "; if (strncmp(Text, IWYUPragma.data(), IWYUPragma.size())) - return llvm::None; + return std::nullopt; Text += IWYUPragma.size(); const char *End = Text; while (*End != 0 && *End != '\n') diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp --- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -58,7 +58,7 @@ // (second) raw_identifier name is checked. bool checkAndConsumeDirectiveWithName( Lexer &Lex, StringRef Name, Token &Tok, - llvm::Optional RawIDName = llvm::None) { + llvm::Optional RawIDName = std::nullopt) { bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) && @@ -352,7 +352,7 @@ for (const auto &Inc : It->second) if ((IsAngled && StringRef(Inc.Name).startswith("<")) || (!IsAngled && StringRef(Inc.Name).startswith("\""))) - return llvm::None; + return std::nullopt; std::string Quoted = std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName)); StringRef QuotedName = Quoted; diff --git a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp --- a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp +++ b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp @@ -81,7 +81,7 @@ ensureInitialized(); auto It = HeaderIDs->find(Name); if (It == HeaderIDs->end()) - return llvm::None; + return std::nullopt; return Header(It->second); } llvm::StringRef Header::name() const { return HeaderNames[ID]; } @@ -95,7 +95,7 @@ if (It != NSSymbols->end()) return Symbol(It->second); } - return llvm::None; + return std::nullopt; } Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); } llvm::SmallVector
Symbol::headers() const { @@ -137,7 +137,7 @@ } NSSymbolMap *Symbols = namespaceSymbols(cast_or_null(DC)); if (!Symbols) - return llvm::None; + return std::nullopt; llvm::StringRef Name = [&]() -> llvm::StringRef { for (const auto *SymDC : llvm::reverse(IntermediateDecl)) { @@ -153,11 +153,11 @@ return ""; }(); if (Name.empty()) - return llvm::None; + return std::nullopt; auto It = Symbols->find(Name); if (It == Symbols->end()) - return llvm::None; + return std::nullopt; return Symbol(It->second); } diff --git a/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp b/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp --- a/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp +++ b/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp @@ -207,7 +207,7 @@ Type = foldType(*Type); // The contract is to store None instead of TY_INVALID. if (Type == types::TY_INVALID) - Type = llvm::None; + Type = std::nullopt; } // Produce a CompileCommand for \p filename, based on this one. @@ -291,7 +291,7 @@ if (Opt.matches(driver::options::OPT_x)) return types::lookupTypeForTypeSpecifier(Arg.getValue()); } - return None; + return std::nullopt; } // Try to interpret the argument as '-std='. @@ -299,7 +299,7 @@ using namespace driver::options; if (Arg.getOption().matches(ClangCLMode ? OPT__SLASH_std : OPT_std_EQ)) return LangStandard::getLangKind(Arg.getValue()); - return None; + return std::nullopt; } }; diff --git a/clang/lib/Tooling/Refactoring/ASTSelection.cpp b/clang/lib/Tooling/Refactoring/ASTSelection.cpp --- a/clang/lib/Tooling/Refactoring/ASTSelection.cpp +++ b/clang/lib/Tooling/Refactoring/ASTSelection.cpp @@ -55,7 +55,7 @@ SelectedASTNode Result = std::move(SelectionStack.back()); SelectionStack.pop_back(); if (Result.Children.empty()) - return None; + return std::nullopt; return std::move(Result); } @@ -63,14 +63,14 @@ // Avoid traversing the semantic expressions. They should be handled by // looking through the appropriate opaque expressions in order to build // a meaningful selection tree. - llvm::SaveAndRestore LookThrough(LookThroughOpaqueValueExprs, true); + llvm::SaveAndRestore LookThrough(LookThroughOpaqueValueExprs, true); return TraverseStmt(E->getSyntacticForm()); } bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) { if (!LookThroughOpaqueValueExprs) return true; - llvm::SaveAndRestore LookThrough(LookThroughOpaqueValueExprs, false); + llvm::SaveAndRestore LookThrough(LookThroughOpaqueValueExprs, false); return TraverseStmt(E->getSourceExpr()); } @@ -380,17 +380,17 @@ const SelectedASTNode &ASTSelection) { // Code range is selected when the selection range is not empty. if (SelectionRange.getBegin() == SelectionRange.getEnd()) - return None; + return std::nullopt; llvm::SmallVector ContainSelection; findDeepestWithKind(ASTSelection, ContainSelection, SourceSelectionKind::ContainsSelection); // We are looking for a selection in one body of code, so let's focus on // one matching result. if (ContainSelection.size() != 1) - return None; + return std::nullopt; SelectedNodeWithParents &Selected = ContainSelection[0]; if (!Selected.Node.get().Node.get()) - return None; + return std::nullopt; const Stmt *CodeRangeStmt = Selected.Node.get().Node.get(); if (!isa(CodeRangeStmt)) { Selected.canonicalize(); diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -457,7 +457,7 @@ // Mapping an empty range is ambiguous in case of empty mappings at either end // of the range, bail out in that case. if (Expanded.empty()) - return llvm::None; + return std::nullopt; const syntax::Token *First = &Expanded.front(); const syntax::Token *Last = &Expanded.back(); auto [FirstSpelled, FirstMapping] = spelledForExpandedToken(First); @@ -466,7 +466,7 @@ FileID FID = SourceMgr->getFileID(FirstSpelled->location()); // FIXME: Handle multi-file changes by trying to map onto a common root. if (FID != SourceMgr->getFileID(LastSpelled->location())) - return llvm::None; + return std::nullopt; const MarkedFile &File = Files.find(FID)->second; @@ -485,7 +485,7 @@ SourceRange Range = spelledForExpandedSlow( First->location(), Last->location(), Prev, Next, FID, *SourceMgr); if (Range.isInvalid()) - return llvm::None; + return std::nullopt; return getTokensCovering(File.SpelledTokens, Range, *SourceMgr); } @@ -494,9 +494,9 @@ unsigned FirstExpanded = Expanded.begin() - ExpandedTokens.data(); unsigned LastExpanded = Expanded.end() - ExpandedTokens.data(); if (FirstMapping && FirstExpanded != FirstMapping->BeginExpanded) - return llvm::None; + return std::nullopt; if (LastMapping && LastMapping->EndExpanded != LastExpanded) - return llvm::None; + return std::nullopt; return llvm::makeArrayRef( FirstMapping ? File.SpelledTokens.data() + FirstMapping->BeginSpelled : FirstSpelled, @@ -543,7 +543,7 @@ return M.BeginSpelled < SpelledIndex; }); if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex) - return llvm::None; + return std::nullopt; return makeExpansion(File, *M); } @@ -806,7 +806,7 @@ // In the simplest case, skips spelled tokens until finding one that produced // the NextExpanded token, and creates an empty mapping for them. // If Drain is provided, skips remaining tokens from that file instead. - void discard(llvm::Optional Drain = llvm::None) { + void discard(llvm::Optional Drain = std::nullopt) { SourceLocation Target = Drain ? SM.getLocForEndOfFile(*Drain) : SM.getExpansionLoc( diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp --- a/clang/lib/Tooling/Transformer/Parsing.cpp +++ b/clang/lib/Tooling/Transformer/Parsing.cpp @@ -124,7 +124,7 @@ llvm::StringRef Key) { auto it = Map.find(Key); if (it == Map.end()) - return llvm::None; + return std::nullopt; return it->second; } @@ -157,7 +157,7 @@ if (State.Input.empty() || State.Input.front() != c) return makeParseError(State, ("expected char not found: " + llvm::Twine(c)).str()); - return makeParseProgress(advance(State, 1), llvm::None); + return makeParseProgress(advance(State, 1), std::nullopt); } // Parses an identitifer "token" -- handles preceding whitespace. diff --git a/clang/lib/Tooling/Transformer/SourceCode.cpp b/clang/lib/Tooling/Transformer/SourceCode.cpp --- a/clang/lib/Tooling/Transformer/SourceCode.cpp +++ b/clang/lib/Tooling/Transformer/SourceCode.cpp @@ -96,7 +96,7 @@ CharSourceRange Range = Lexer::makeFileCharRange(EditRange, SM, LangOpts); bool IsInvalid = llvm::errorToBool(validateEditRange(Range, SM)); if (IsInvalid) - return llvm::None; + return std::nullopt; return Range; } diff --git a/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp --- a/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp +++ b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp @@ -76,7 +76,7 @@ const ASTContext &Context) { StringRef Text = getText(E, Context); if (Text.empty()) - return llvm::None; + return std::nullopt; if (mayEverNeedParens(E)) return ("(" + Text + ")").str(); return Text.str(); @@ -90,13 +90,13 @@ StringRef Text = getText(*Op->getSubExpr()->IgnoreParenImpCasts(), Context); if (Text.empty()) - return llvm::None; + return std::nullopt; return Text.str(); } StringRef Text = getText(E, Context); if (Text.empty()) - return llvm::None; + return std::nullopt; // Add leading '*'. if (needParensAfterUnaryOperator(E)) return ("*(" + Text + ")").str(); @@ -113,13 +113,13 @@ StringRef Text = getText(*Op->getSubExpr()->IgnoreParenImpCasts(), Context); if (Text.empty()) - return llvm::None; + return std::nullopt; return Text.str(); } // Add leading '&'. StringRef Text = getText(E, Context); if (Text.empty()) - return llvm::None; + return std::nullopt; if (needParensAfterUnaryOperator(E)) { return ("&(" + Text + ")").str(); } @@ -136,7 +136,7 @@ const Expr *SubExpr = Op->getSubExpr()->IgnoreParenImpCasts(); StringRef DerefText = getText(*SubExpr, Context); if (DerefText.empty()) - return llvm::None; + return std::nullopt; if (needParensBeforeDotOrArrow(*SubExpr)) return ("(" + DerefText + ")->").str(); return (DerefText + "->").str(); @@ -145,7 +145,7 @@ // Add following '.'. StringRef Text = getText(E, Context); if (Text.empty()) - return llvm::None; + return std::nullopt; if (needParensBeforeDotOrArrow(E)) { return ("(" + Text + ").").str(); } @@ -162,7 +162,7 @@ const Expr *SubExpr = Op->getSubExpr()->IgnoreParenImpCasts(); StringRef DerefText = getText(*SubExpr, Context); if (DerefText.empty()) - return llvm::None; + return std::nullopt; if (needParensBeforeDotOrArrow(*SubExpr)) return ("(" + DerefText + ").").str(); return (DerefText + ".").str(); @@ -171,7 +171,7 @@ // Add following '->'. StringRef Text = getText(E, Context); if (Text.empty()) - return llvm::None; + return std::nullopt; if (needParensBeforeDotOrArrow(E)) return ("(" + Text + ")->").str(); return (Text + "->").str(); diff --git a/clang/test/ASTMerge/codegen-body/test.c b/clang/test/ASTMerge/codegen-body/test.c --- a/clang/test/ASTMerge/codegen-body/test.c +++ b/clang/test/ASTMerge/codegen-body/test.c @@ -1,4 +1,4 @@ -// UNSUPPORTED: powerpc64-ibm-aix +// UNSUPPORTED: target=powerpc64-ibm-aix{{.*}} // RUN: %clang_cc1 -emit-pch -o %t.1.ast %S/Inputs/body1.c // RUN: %clang_cc1 -emit-pch -o %t.2.ast %S/Inputs/body2.c // RUN: %clang_cc1 -emit-obj -o /dev/null -ast-merge %t.1.ast -ast-merge %t.2.ast %s diff --git a/clang/test/Analysis/cfref_PR2519.c b/clang/test/Analysis/cfref_PR2519.c --- a/clang/test/Analysis/cfref_PR2519.c +++ b/clang/test/Analysis/cfref_PR2519.c @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: %clang_analyze_cc1 -analyzer-checker=core,osx.cocoa.RetainCount,alpha.core -verify %s // expected-no-diagnostics diff --git a/clang/test/CXX/drs/dr3xx.cpp b/clang/test/CXX/drs/dr3xx.cpp --- a/clang/test/CXX/drs/dr3xx.cpp +++ b/clang/test/CXX/drs/dr3xx.cpp @@ -905,7 +905,7 @@ protected: using A::bar; // #dr360-bar-using-decl public: - using A::baz; // #dr360-baz-using-decl + using A::baz; }; int main() { diff --git a/clang/test/ClangScanDeps/diagnostic-pragmas.c b/clang/test/ClangScanDeps/diagnostic-pragmas.c new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/diagnostic-pragmas.c @@ -0,0 +1,35 @@ +// Test scanning deps does not have more errors than the regular compilation. + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json + +// Check the regular compilation does not fail. +// RUN: %clang -fsyntax-only %t/test.c -I %t/include -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -Wnon-modular-include-in-module -Werror=non-modular-include-in-module + +// And now scanning deps should succeed too. +// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 + +//--- cdb.json.template +[ + { + "directory": "DIR", + "command": "clang -fsyntax-only DIR/test.c -I DIR/include -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache -Wnon-modular-include-in-module -Werror=non-modular-include-in-module", + "file": "DIR/test.c" + }, +] + +//--- include/nonmodular.h +// empty + +//--- include/modular-includer.h +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-modular-include-in-module" +#include +#pragma clang diagnostic pop + +//--- include/module.modulemap +module ModularIncluder { header "modular-includer.h" } + +//--- test.c +#include diff --git a/clang/test/ClangScanDeps/modules-extern-submodule.c b/clang/test/ClangScanDeps/modules-extern-submodule.c new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-extern-submodule.c @@ -0,0 +1,128 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- tu.m +@import first; + +//--- first/first/module.modulemap +module first { header "first.h" } +//--- first/first/first.h +#include + +//--- second/second/module.modulemap +module second { extern module sub "sub.modulemap" } +//--- second/second/sub.modulemap +module second.sub { header "sub.h" } +//--- second/second/sub.h +@import third; + +//--- third/module.modulemap +module third {} + +//--- cdb.json.template +[{ + "file": "DIR/tu.c", + "directory": "DIR", + "command": "clang -I DIR/first -I DIR/second -I DIR/third -fmodules -fmodules-cache-path=DIR/cache -c DIR/tu.m -o DIR/tu.o" +}] + +// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json +// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json +// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t + +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "second" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/first/first/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK-NEXT: "-cc1", +// CHECK: "-fmodule-map-file=[[PREFIX]]/second/second/module.modulemap" +// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/second/second/sub.modulemap" +// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/third/module.modulemap" +// CHECK: "-fmodule-file=second=[[PREFIX]]/cache/{{.*}}/second-{{.*}}.pcm" +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/first/first/first.h", +// CHECK-NEXT: "[[PREFIX]]/first/first/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/second/second/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/second/second/sub.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "first" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "third" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/second/second/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK-NEXT: "-cc1", +// CHECK: "-fmodule-map-file=[[PREFIX]]/third/module.modulemap", +// CHECK: "-fmodule-file=third=[[PREFIX]]/cache/{{.*}}/third-{{.*}}.pcm", +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/second/second/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/second/second/sub.h", +// CHECK-NEXT: "[[PREFIX]]/second/second/sub.modulemap", +// CHECK-NEXT: "[[PREFIX]]/third/module.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "second" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/third/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK-NEXT: "-cc1", +// CHECK-NOT: "-fmodule-map-file= +// CHECK-NOT: "-fmodule-file=third= +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/third/module.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "third" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "translation-units": [ +// CHECK-NEXT: { +// CHECK-NEXT: "commands": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-context-hash": "{{.*}}", +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "first" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "command-line": [ +// CHECK-NEXT: "-cc1", +// CHECK: "-fmodule-map-file=[[PREFIX]]/first/first/module.modulemap", +// CHECK: "-fmodule-file=first=[[PREFIX]]/cache/{{.*}}/first-{{.*}}.pcm", +// CHECK: ], +// CHECK-NEXT: "executable": "clang", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/tu.m" +// CHECK-NEXT: ], +// CHECK-NEXT: "input-file": "[[PREFIX]]/tu.c" +// CHECK-NEXT: } +// CHECK: ] +// CHECK: } +// CHECK: ] +// CHECK: } + +// RUN: %deps-to-rsp %t/result.json --module-name=third > %t/third.cc1.rsp +// RUN: %deps-to-rsp %t/result.json --module-name=second > %t/second.cc1.rsp +// RUN: %deps-to-rsp %t/result.json --module-name=first > %t/first.cc1.rsp +// RUN: %clang @%t/third.cc1.rsp +// RUN: %clang @%t/second.cc1.rsp +// RUN: %clang @%t/first.cc1.rsp diff --git a/clang/test/ClangScanDeps/modules-extern-unrelated.m b/clang/test/ClangScanDeps/modules-extern-unrelated.m new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-extern-unrelated.m @@ -0,0 +1,135 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- tu.m +@import zeroth; + +//--- zeroth/module.modulemap +module zeroth { header "zeroth.h" } +//--- zeroth/zeroth.h +@import first; +#include "second.h" + +//--- first/module.modulemap +module first {} +module first_other { header "first_other.h" } +//--- first/first_other.h + +//--- second/module.modulemap +extern module second "second.modulemap" +//--- second/second.modulemap +module second { header "second.h" } +//--- second/second.h +#include "first_other.h" + +//--- cdb.json.template +[{ + "directory": "DIR", + "file": "DIR/tu.m", + "command": "clang -fmodules -fmodules-cache-path=DIR/cache -I DIR/zeroth -I DIR/first -I DIR/second -c DIR/tu.m -o DIR/tu.o" +}] + +// RUN: sed -e "s|DIR|%/t|g" -e "s|INPUTS|%/S/Inputs|g" %t/cdb.json.template > %t/cdb.json +// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json +// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t + +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/first/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/first/module.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "first" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/first/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/first/first_other.h", +// CHECK-NEXT: "[[PREFIX]]/first/module.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "first_other" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "first_other" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/second/second.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/first/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/second/second.h", +// CHECK-NEXT: "[[PREFIX]]/second/second.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "second" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "first" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "second" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/zeroth/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/first/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/second/second.modulemap", +// CHECK-NEXT: "[[PREFIX]]/zeroth/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/zeroth/zeroth.h" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "zeroth" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "translation-units": [ +// CHECK-NEXT: { +// CHECK-NEXT: "commands": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-context-hash": "{{.*}}", +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "zeroth" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "command-line": [ +// CHECK: ], +// CHECK-NEXT: "executable": "clang", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/tu.m" +// CHECK-NEXT: ], +// CHECK-NEXT: "input-file": "[[PREFIX]]/tu.m" +// CHECK-NEXT: } +// CHECK: ] +// CHECK: } +// CHECK: ] +// CHECK: } + +// RUN: %deps-to-rsp --module-name=first %t/result.json > %t/first.cc1.rsp +// RUN: %deps-to-rsp --module-name=first_other %t/result.json > %t/first_other.cc1.rsp +// RUN: %deps-to-rsp --module-name=second %t/result.json > %t/second.cc1.rsp +// RUN: %deps-to-rsp --module-name=zeroth %t/result.json > %t/zeroth.cc1.rsp +// RUN: %clang @%t/first.cc1.rsp +// RUN: %clang @%t/first_other.cc1.rsp +// RUN: %clang @%t/second.cc1.rsp +// RUN: %clang @%t/zeroth.cc1.rsp diff --git a/clang/test/ClangScanDeps/modules-file-name-as-requested.m b/clang/test/ClangScanDeps/modules-file-name-as-requested.m new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-file-name-as-requested.m @@ -0,0 +1,64 @@ +// This test checks that the module map paths we're reporting are the as-requested +// paths (as opposed to the paths files resolve to after going through VFS overlays). + +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- real/module.modulemap +framework module FW { header "Header.h" } +//--- real/Header.h +//--- overlay.json.template +{ + "case-sensitive": "false", + "version": "0", + "roots": [ + { + "contents": [ + { + "external-contents" : "DIR/real/Header.h", + "name" : "Header.h", + "type" : "file" + } + ], + "name": "DIR/frameworks/FW.framework/Headers", + "type": "directory" + }, + { + "contents": [ + { + "external-contents": "DIR/real/module.modulemap", + "name": "module.modulemap", + "type": "file" + } + ], + "name": "DIR/frameworks/FW.framework/Modules", + "type": "directory" + } + ] +} + +//--- modules/module.modulemap +module Importer { header "header.h" } +//--- modules/header.h +#include + +//--- cdb.json.template +[{ + "file": "DIR/tu.m", + "directory": "DIR", + "command": "clang -fmodules -fmodules-cache-path=DIR/cache -Werror=non-modular-include-in-module -ivfsoverlay DIR/overlay.json -F DIR/frameworks -I DIR/modules -c DIR/tu.m -o DIR/tu.o" +}] + +//--- tu.m +@import Importer; + +// RUN: sed -e "s|DIR|%/t|g" %t/overlay.json.template > %t/overlay.json +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json +// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json + +// RUN: %deps-to-rsp %t/result.json --module-name=FW > %t/FW.cc1.rsp +// RUN: %deps-to-rsp %t/result.json --module-name=Importer > %t/Importer.cc1.rsp +// RUN: %deps-to-rsp %t/result.json --tu-index=0 > %t/tu.rsp +// RUN: %clang @%t/FW.cc1.rsp +// RUN: %clang @%t/Importer.cc1.rsp +// RUN: %clang @%t/tu.rsp diff --git a/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp b/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp --- a/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp +++ b/clang/test/ClangScanDeps/modules-full-by-mod-name.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: powerpc64-ibm-aix +// UNSUPPORTED: target=powerpc64-ibm-aix{{.*}} // RUN: rm -rf %t.dir // RUN: rm -rf %t.cdb // RUN: mkdir -p %t.dir diff --git a/clang/test/ClangScanDeps/modules-no-undeclared-includes.c b/clang/test/ClangScanDeps/modules-no-undeclared-includes.c --- a/clang/test/ClangScanDeps/modules-no-undeclared-includes.c +++ b/clang/test/ClangScanDeps/modules-no-undeclared-includes.c @@ -1,6 +1,6 @@ // Unsupported on AIX because we don't support the requisite "__clangast" // section in XCOFF yet. -// UNSUPPORTED: aix +// UNSUPPORTED: target={{.*}}-aix{{.*}} // RUN: rm -rf %t && mkdir %t // RUN: split-file %s %t diff --git a/clang/test/ClangScanDeps/modules-pch-common-submodule.c b/clang/test/ClangScanDeps/modules-pch-common-submodule.c --- a/clang/test/ClangScanDeps/modules-pch-common-submodule.c +++ b/clang/test/ClangScanDeps/modules-pch-common-submodule.c @@ -1,6 +1,6 @@ // Unsupported on AIX because we don't support the requisite "__clangast" // section in XCOFF yet. -// UNSUPPORTED: aix +// UNSUPPORTED: target={{.*}}-aix{{.*}} // Check that when depending on a precompiled module, we depend on the // **top-level** module. Submodules don't have some information present (for diff --git a/clang/test/ClangScanDeps/modules-pch-common-via-submodule.c b/clang/test/ClangScanDeps/modules-pch-common-via-submodule.c --- a/clang/test/ClangScanDeps/modules-pch-common-via-submodule.c +++ b/clang/test/ClangScanDeps/modules-pch-common-via-submodule.c @@ -1,6 +1,6 @@ // Unsupported on AIX because we don't support the requisite "__clangast" // section in XCOFF yet. -// UNSUPPORTED: aix +// UNSUPPORTED: target={{.*}}-aix{{.*}} // Check that we discover dependency on a precompiled module when it's imported // by a **submodule** instead of a top-level module. diff --git a/clang/test/ClangScanDeps/modules-pch-dangling.c b/clang/test/ClangScanDeps/modules-pch-dangling.c --- a/clang/test/ClangScanDeps/modules-pch-dangling.c +++ b/clang/test/ClangScanDeps/modules-pch-dangling.c @@ -1,6 +1,6 @@ // Unsupported on AIX because we don't support the requisite "__clangast" // section in XCOFF yet. -// UNSUPPORTED: aix +// UNSUPPORTED: target={{.*}}-aix{{.*}} // This test checks that the dependency scanner can handle larger amount of // explicitly built modules retrieved from the PCH. diff --git a/clang/test/ClangScanDeps/modules-pch.c b/clang/test/ClangScanDeps/modules-pch.c --- a/clang/test/ClangScanDeps/modules-pch.c +++ b/clang/test/ClangScanDeps/modules-pch.c @@ -1,6 +1,6 @@ // Unsupported on AIX because we don't support the requisite "__clangast" // section in XCOFF yet. -// UNSUPPORTED: aix +// UNSUPPORTED: target={{.*}}-aix{{.*}} // RUN: rm -rf %t && mkdir %t // RUN: cp %S/Inputs/modules-pch/* %t diff --git a/clang/test/ClangScanDeps/modules-symlink-dir-vfs.c b/clang/test/ClangScanDeps/modules-symlink-dir-vfs.c new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-symlink-dir-vfs.c @@ -0,0 +1,90 @@ +// This test checks that we're not canonicalizing framework directories that +// play a role in VFS remapping. This could lead header search to fail when +// building that module. + +// RUN: rm -rf %t +// RUN: split-file %s %t + +// REQUIRES: shell + +// RUN: mkdir -p %t/frameworks-symlink +// RUN: ln -s %t/frameworks/FW.framework %t/frameworks-symlink/FW.framework + +// RUN: mkdir -p %t/copy +// RUN: cp %t/frameworks/FW.framework/Headers/FW.h %t/copy +// RUN: cp %t/frameworks/FW.framework/Headers/Header.h %t/copy + +//--- frameworks/FW.framework/Modules/module.modulemap +framework module FW { umbrella header "FW.h" } +//--- frameworks/FW.framework/Headers/FW.h +#import +//--- frameworks/FW.framework/Headers/Header.h +// empty + +//--- tu.m +@import FW; + +//--- overlay.json.template +{ + "version": 0, + "case-sensitive": "false", + "roots": [ + { + "contents": [ + { + "external-contents": "DIR/copy/Header.h", + "name": "Header.h", + "type": "file" + }, + { + "external-contents": "DIR/copy/FW.h", + "name": "FW.h", + "type": "file" + } + ], + "name": "DIR/frameworks-symlink/FW.framework/Headers", + "type": "directory" + } + ] +} + +//--- cdb.json.template +[{ + "directory": "DIR", + "file": "DIR/tu.m", + "command": "clang -fmodules -fmodules-cache-path=DIR/cache -ivfsoverlay DIR/overlay.json -F DIR/frameworks-symlink -c DIR/tu.m -o DIR/tu.o -Werror=non-modular-include-in-framework-module" +}] + +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json +// RUN: sed -e "s|DIR|%/t|g" %t/overlay.json.template > %t/overlay.json + +// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json +// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t + +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/frameworks-symlink/FW.framework/Modules/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK-NEXT: "-cc1", +// CHECK: "-emit-module", +// CHECK-NEXT: "-x", +// CHECK-NEXT: "objective-c", +// CHECK-NEXT: "[[PREFIX]]/frameworks-symlink/FW.framework/Modules/module.modulemap", +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/copy/FW.h", +// CHECK-NEXT: "[[PREFIX]]/copy/Header.h", +// CHECK-NEXT: "[[PREFIX]]/frameworks-symlink/FW.framework/Modules/module.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "FW" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "translation-units": [ +// CHECK: ] +// CHECK: } + +// RUN: %deps-to-rsp %t/result.json --module-name=FW > %t/FW.cc1.rsp +// RUN: %clang @%t/FW.cc1.rsp diff --git a/clang/test/ClangScanDeps/modules-symlink.c b/clang/test/ClangScanDeps/modules-symlink.c --- a/clang/test/ClangScanDeps/modules-symlink.c +++ b/clang/test/ClangScanDeps/modules-symlink.c @@ -2,7 +2,7 @@ // RUN: split-file %s %t // Unsupported on AIX because we don't support the requisite "__clangast" // section in XCOFF yet. -// UNSUPPORTED: system-windows, aix +// UNSUPPORTED: system-windows, target={{.*}}-aix{{.*}} //--- cdb_pch.json [ diff --git a/clang/test/ClangScanDeps/modules-transitive.c b/clang/test/ClangScanDeps/modules-transitive.c new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-transitive.c @@ -0,0 +1,58 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- tu.m +#include "first.h" + +//--- first/module.modulemap +module first { header "first.h" } +//--- first/first.h +#include "second.h" + +//--- second/module.modulemap +module second { header "second.h" } +//--- second/second.h +#include "third.h" + +//--- third/module.modulemap +module third { header "third.h" } +//--- third/third.h +// empty + +//--- cdb.json.template +[{ + "file": "DIR/tu.c", + "directory": "DIR", + "command": "clang -I DIR/first -I DIR/second -I DIR/third -fmodules -fmodules-cache-path=DIR/cache -c DIR/tu.m -o DIR/tu.o" +}] + +// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json +// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json +// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t + +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "second" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/first/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK-NEXT: "-cc1", +// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/third/module.modulemap" +// CHECK: "-fmodule-map-file=[[PREFIX]]/second/module.modulemap" +// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/third/module.modulemap" +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/first/first.h" +// CHECK-NEXT: "[[PREFIX]]/first/module.modulemap" +// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "first" +// CHECK-NEXT: } +// CHECK: ] +// CHECK: } diff --git a/clang/test/ClangScanDeps/resource_directory.c b/clang/test/ClangScanDeps/resource_directory.c --- a/clang/test/ClangScanDeps/resource_directory.c +++ b/clang/test/ClangScanDeps/resource_directory.c @@ -1,4 +1,4 @@ -// UNSUPPORTED: powerpc64-ibm-aix +// UNSUPPORTED: target=powerpc64-ibm-aix{{.*}} // REQUIRES: shell // RUN: rm -rf %t && mkdir %t diff --git a/clang/test/CodeGen/cfstring2.c b/clang/test/CodeGen/cfstring2.c --- a/clang/test/CodeGen/cfstring2.c +++ b/clang/test/CodeGen/cfstring2.c @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: %clang_cc1 -emit-llvm %s -o %t typedef const struct __CFString * CFStringRef; diff --git a/clang/test/CodeGen/thinlto-backend-option.ll b/clang/test/CodeGen/thinlto-backend-option.ll --- a/clang/test/CodeGen/thinlto-backend-option.ll +++ b/clang/test/CodeGen/thinlto-backend-option.ll @@ -6,7 +6,7 @@ ; scenario independent of any particular backend options that may exist now or ; in the future. -; XFAIL: aix +; XFAIL: target={{.*}}-aix{{.*}} ; RUN: %clang -flto=thin -c -o %t.o %s ; RUN: llvm-lto -thinlto -o %t %t.o diff --git a/clang/test/CodeGen/thinlto-emit-llvm.c b/clang/test/CodeGen/thinlto-emit-llvm.c --- a/clang/test/CodeGen/thinlto-emit-llvm.c +++ b/clang/test/CodeGen/thinlto-emit-llvm.c @@ -1,4 +1,4 @@ -// XFAIL: aix +// XFAIL: target={{.*}}-aix{{.*}} // Test to ensure -emit-llvm and -emit-llvm-bc work when invoking the // ThinLTO backend path. diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -426,6 +426,8 @@ // RUN: /Bt \ // RUN: /Bt+ \ // RUN: /clr:pure \ +// RUN: /d1import_no_registry \ +// RUN: /d1nodatetime \ // RUN: /d2FH4 \ // RUN: /docname \ // RUN: /experimental:external \ diff --git a/clang/test/Driver/clang-offload-bundler-asserts-on.c b/clang/test/Driver/clang-offload-bundler-asserts-on.c --- a/clang/test/Driver/clang-offload-bundler-asserts-on.c +++ b/clang/test/Driver/clang-offload-bundler-asserts-on.c @@ -1,6 +1,6 @@ // REQUIRES: x86-registered-target // REQUIRES: asserts -// UNSUPPORTED: darwin, aix +// UNSUPPORTED: darwin, target={{.*}}-aix{{.*}} // Generate the file we can bundle. // RUN: %clang -O0 -target %itanium_abi_triple %s -c -o %t.o diff --git a/clang/test/Driver/clang-offload-bundler.c b/clang/test/Driver/clang-offload-bundler.c --- a/clang/test/Driver/clang-offload-bundler.c +++ b/clang/test/Driver/clang-offload-bundler.c @@ -1,6 +1,6 @@ // REQUIRES: x86-registered-target // REQUIRES: powerpc-registered-target -// UNSUPPORTED: darwin, aix +// UNSUPPORTED: darwin, target={{.*}}-aix{{.*}} // // Generate all the types of files we can bundle. diff --git a/clang/test/Driver/experimental-library-flag.cpp b/clang/test/Driver/experimental-library-flag.cpp --- a/clang/test/Driver/experimental-library-flag.cpp +++ b/clang/test/Driver/experimental-library-flag.cpp @@ -3,7 +3,7 @@ // XFAIL: target={{.*-windows.*}}, target={{.*-(ps4|ps5)}} // For some reason, this fails with a core dump on AIX. This needs to be investigated. -// UNSUPPORTED: aix +// UNSUPPORTED: target={{.*}}-aix{{.*}} // RUN: %clangxx -fexperimental-library -stdlib=libc++ -### %s 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-LIBCXX %s // RUN: %clangxx -fexperimental-library -stdlib=libstdc++ -### %s 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-LIBSTDCXX %s diff --git a/clang/test/Driver/fat-archive-unbundle-ext.c b/clang/test/Driver/fat-archive-unbundle-ext.c --- a/clang/test/Driver/fat-archive-unbundle-ext.c +++ b/clang/test/Driver/fat-archive-unbundle-ext.c @@ -1,5 +1,5 @@ // REQUIRES: x86-registered-target -// UNSUPPORTED: target={{.*-windows.*}}, darwin, aix +// UNSUPPORTED: target={{.*-windows.*}}, darwin, target={{.*}}-aix{{.*}} // Generate dummy fat object // RUN: %clang -O0 -target %itanium_abi_triple %s -c -o %t.host.o diff --git a/clang/test/Driver/fuchsia.c b/clang/test/Driver/fuchsia.c --- a/clang/test/Driver/fuchsia.c +++ b/clang/test/Driver/fuchsia.c @@ -183,7 +183,7 @@ // CHECK-SCUDO-X86: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" // CHECK-SCUDO-X86: "-fsanitize=safe-stack,scudo" // CHECK-SCUDO-X86: "-pie" -// CHECK-SCUDO-X86: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-fuchsia{{/|\\\\}}libclang_rt.scudo.so" +// CHECK-SCUDO-X86: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}fuchsia{{/|\\\\}}libclang_rt.scudo_standalone-x86_64.so" // RUN: %clang -### %s --target=aarch64-unknown-fuchsia \ // RUN: -fsanitize=scudo 2>&1 \ @@ -193,7 +193,7 @@ // CHECK-SCUDO-AARCH64: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" // CHECK-SCUDO-AARCH64: "-fsanitize=shadow-call-stack,scudo" // CHECK-SCUDO-AARCH64: "-pie" -// CHECK-SCUDO-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aarch64-unknown-fuchsia{{/|\\\\}}libclang_rt.scudo.so" +// CHECK-SCUDO-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}fuchsia{{/|\\\\}}libclang_rt.scudo_standalone-aarch64.so" // RUN: %clang -### %s --target=x86_64-unknown-fuchsia \ // RUN: -fsanitize=scudo -fPIC -shared 2>&1 \ @@ -202,7 +202,7 @@ // RUN: | FileCheck %s -check-prefix=CHECK-SCUDO-SHARED // CHECK-SCUDO-SHARED: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" // CHECK-SCUDO-SHARED: "-fsanitize=safe-stack,scudo" -// CHECK-SCUDO-SHARED: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-fuchsia{{/|\\\\}}libclang_rt.scudo.so" +// CHECK-SCUDO-SHARED: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}fuchsia{{/|\\\\}}libclang_rt.scudo_standalone-x86_64.so" // RUN: %clang -### %s --target=aarch64-unknown-fuchsia \ // RUN: -fsanitize=leak 2>&1 \ diff --git a/clang/test/Driver/hip-version.hip b/clang/test/Driver/hip-version.hip --- a/clang/test/Driver/hip-version.hip +++ b/clang/test/Driver/hip-version.hip @@ -1,6 +1,6 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target -// UNSUPPORTED: aix +// UNSUPPORTED: target={{.*}}-aix{{.*}} // RUN: %clang -v --rocm-path=%S/Inputs/rocm 2>&1 \ // RUN: | FileCheck -check-prefixes=FOUND %s diff --git a/clang/test/Driver/memtag-stack.c b/clang/test/Driver/memtag-stack.c --- a/clang/test/Driver/memtag-stack.c +++ b/clang/test/Driver/memtag-stack.c @@ -1,7 +1,7 @@ -// RUN: %clang -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag -mllvm -stack-safety-print=1 %s -S -o - 2>&1 | FileCheck %s --check-prefix=CHECK-NO-SAFETY -// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag -mllvm -stack-safety-print=1 %s -S -o - 2>&1 | FileCheck %s --check-prefix=CHECK-SAFETY -// RUN: %clang -O2 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag -mllvm -stack-safety-print=1 %s -S -o - 2>&1 | FileCheck %s --check-prefix=CHECK-SAFETY -// RUN: %clang -O3 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag -mllvm -stack-safety-print=1 %s -S -o - 2>&1 | FileCheck %s --check-prefix=CHECK-SAFETY +// RUN: %clang -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag-stack -mllvm -stack-safety-print=1 %s -S -o - 2>&1 | FileCheck %s --check-prefix=CHECK-NO-SAFETY +// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag-stack -mllvm -stack-safety-print=1 %s -S -o - 2>&1 | FileCheck %s --check-prefix=CHECK-SAFETY +// RUN: %clang -O2 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag-stack -mllvm -stack-safety-print=1 %s -S -o - 2>&1 | FileCheck %s --check-prefix=CHECK-SAFETY +// RUN: %clang -O3 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag-stack -mllvm -stack-safety-print=1 %s -S -o - 2>&1 | FileCheck %s --check-prefix=CHECK-SAFETY // REQUIRES: aarch64-registered-target diff --git a/clang/test/Driver/memtag_lto.c b/clang/test/Driver/memtag-stack_lto.c rename from clang/test/Driver/memtag_lto.c rename to clang/test/Driver/memtag-stack_lto.c --- a/clang/test/Driver/memtag_lto.c +++ b/clang/test/Driver/memtag-stack_lto.c @@ -33,14 +33,14 @@ // RUN: rm -f %t* // -O0: both are unsafe. -// RUN: %clang -O0 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag -mllvm -stack-safety-print %s -S -o - 2>&1 | FileCheck %s +// RUN: %clang -O0 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag-stack -mllvm -stack-safety-print %s -S -o - 2>&1 | FileCheck %s // No LTO: just one is safe. -// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag -mllvm -stack-safety-print %s -S -o /dev/null 2>&1 | FileCheck %s -check-prefixes=SSI,XUNSAFE,YSAFE +// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag-stack -mllvm -stack-safety-print %s -S -o /dev/null 2>&1 | FileCheck %s -check-prefixes=SSI,XUNSAFE,YSAFE // Full LTO: both are safe. -// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag -c %s -Xclang -opaque-pointers -flto=full -o %t.ltonewpm1.bc -// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag -c -DBUILD2 %s -Xclang -opaque-pointers -flto=full -o %t.ltonewpm2.bc +// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag-stack -c %s -Xclang -opaque-pointers -flto=full -o %t.ltonewpm1.bc +// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag-stack -c -DBUILD2 %s -Xclang -opaque-pointers -flto=full -o %t.ltonewpm2.bc // RUN: llvm-lto2 run -lto-opaque-pointers -o %t.ltonewpm %t.ltonewpm1.bc %t.ltonewpm2.bc -save-temps -stack-safety-print -thinlto-threads 1 -O1 \ // RUN: -r %t.ltonewpm1.bc,fn,plx \ // RUN: -r %t.ltonewpm1.bc,use,lx \ @@ -50,8 +50,8 @@ // RUN: -r %t.ltonewpm2.bc,z, 2>&1 | FileCheck %s -check-prefixes=SSI,XSAFE,YSAFE // Thin LTO: both are safe. -// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag -c %s -Xclang -opaque-pointers -flto=thin -o %t.thinltonewpm1.bc -// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag -c -DBUILD2 %s -Xclang -opaque-pointers -flto=thin -o %t.thinltonewpm2.bc +// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag-stack -c %s -Xclang -opaque-pointers -flto=thin -o %t.thinltonewpm1.bc +// RUN: %clang -O1 -target aarch64-unknown-linux -march=armv8+memtag -fsanitize=memtag-stack -c -DBUILD2 %s -Xclang -opaque-pointers -flto=thin -o %t.thinltonewpm2.bc // RUN: llvm-lto2 run -lto-opaque-pointers -o %t.thinltonewpm %t.thinltonewpm1.bc %t.thinltonewpm2.bc -save-temps -stack-safety-print -thinlto-threads 1 -O1 \ // RUN: -r %t.thinltonewpm1.bc,fn,plx \ // RUN: -r %t.thinltonewpm1.bc,use,lx \ diff --git a/clang/test/Driver/p.c b/clang/test/Driver/p.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/p.c @@ -0,0 +1,7 @@ +/// For most targets -p is legacy. We used to report -Wunused-command-line-argument. +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -c -p %s 2>&1 | FileCheck %s --check-prefix=ERR + +// RUN: %clang -### --target=x86_64-unknown-openbsd -c -p %s 2>&1 | FileCheck %s --implicit-check-not=error: +// RUN: %clang -### --target=powerpc64-ibm-aix -c -p %s 2>&1 | FileCheck %s --implicit-check-not=error: + +// ERR: error: unsupported option '-p' for target {{.*}} diff --git a/clang/test/Driver/sanitizer-ld.c b/clang/test/Driver/sanitizer-ld.c --- a/clang/test/Driver/sanitizer-ld.c +++ b/clang/test/Driver/sanitizer-ld.c @@ -852,23 +852,12 @@ // RUN: | FileCheck --check-prefix=CHECK-SCUDO-LINUX %s // CHECK-SCUDO-LINUX: "{{.*}}ld{{(.exe)?}}" // CHECK-SCUDO-LINUX: "-pie" -// CHECK-SCUDO-LINUX: "--whole-archive" "{{.*}}libclang_rt.scudo-i386.a" "--no-whole-archive" +// CHECK-SCUDO-LINUX: "--whole-archive" "{{.*}}libclang_rt.scudo_standalone-i386.a" "--no-whole-archive" // CHECK-SCUDO-LINUX-NOT: "-lstdc++" // CHECK-SCUDO-LINUX: "-lpthread" // CHECK-SCUDO-LINUX: "-ldl" // CHECK-SCUDO-LINUX: "-lresolv" -// RUN: %clang -fsanitize=scudo -fsanitize-minimal-runtime -### %s 2>&1 \ -// RUN: --target=i386-unknown-linux -fuse-ld=ld \ -// RUN: -resource-dir=%S/Inputs/resource_dir \ -// RUN: --sysroot=%S/Inputs/basic_linux_tree \ -// RUN: | FileCheck --check-prefix=CHECK-SCUDO-MINIMAL-LINUX %s -// CHECK-SCUDO-MINIMAL-LINUX: "{{.*}}ld{{(.exe)?}}" -// CHECK-SCUDO-MINIMAL-LINUX: "-pie" -// CHECK-SCUDO-MINIMAL-LINUX: "--whole-archive" "{{.*}}libclang_rt.scudo_minimal-i386.a" "--no-whole-archive" -// CHECK-SCUDO-MINIMAL-LINUX: "-lpthread" -// CHECK-SCUDO-MINIMAL-LINUX: "-lresolv" - // RUN: %clang -### %s -o %t.so -shared 2>&1 \ // RUN: --target=i386-unknown-linux -fuse-ld=ld -fsanitize=scudo -shared-libsan \ // RUN: -resource-dir=%S/Inputs/resource_dir \ @@ -877,8 +866,8 @@ // // CHECK-SCUDO-SHARED-LINUX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" // CHECK-SCUDO-SHARED-LINUX-NOT: "-lc" -// CHECK-SCUDO-SHARED-LINUX-NOT: libclang_rt.scudo-i386.a" -// CHECK-SCUDO-SHARED-LINUX: libclang_rt.scudo-i386.so" +// CHECK-SCUDO-SHARED-LINUX-NOT: libclang_rt.scudo_standalone-i386.a" +// CHECK-SCUDO-SHARED-LINUX: libclang_rt.scudo_standalone-i386.so" // CHECK-SCUDO-SHARED-LINUX-NOT: "-lpthread" // CHECK-SCUDO-SHARED-LINUX-NOT: "-lrt" // CHECK-SCUDO-SHARED-LINUX-NOT: "-ldl" @@ -896,7 +885,7 @@ // CHECK-SCUDO-ANDROID: "-pie" // CHECK-SCUDO-ANDROID-NOT: "-lpthread" // CHECK-SCUDO-ANDROID-NOT: "-lresolv" -// CHECK-SCUDO-ANDROID: libclang_rt.scudo-arm-android.so" +// CHECK-SCUDO-ANDROID: libclang_rt.scudo_standalone-arm-android.so" // CHECK-SCUDO-ANDROID-NOT: "-lpthread" // CHECK-SCUDO-ANDROID-NOT: "-lresolv" @@ -907,7 +896,7 @@ // RUN: | FileCheck --check-prefix=CHECK-SCUDO-ANDROID-STATIC %s // CHECK-SCUDO-ANDROID-STATIC: "{{(.*[^.0-9A-Z_a-z])?}}ld.lld{{(.exe)?}}" // CHECK-SCUDO-ANDROID-STATIC: "-pie" -// CHECK-SCUDO-ANDROID-STATIC: "--whole-archive" "{{.*}}libclang_rt.scudo-arm-android.a" "--no-whole-archive" +// CHECK-SCUDO-ANDROID-STATIC: "--whole-archive" "{{.*}}libclang_rt.scudo_standalone-arm-android.a" "--no-whole-archive" // CHECK-SCUDO-ANDROID-STATIC-NOT: "-lstdc++" // CHECK-SCUDO-ANDROID-STATIC-NOT: "-lpthread" // CHECK-SCUDO-ANDROID-STATIC-NOT: "-lrt" diff --git a/clang/test/Driver/thinlto_backend.c b/clang/test/Driver/thinlto_backend.c --- a/clang/test/Driver/thinlto_backend.c +++ b/clang/test/Driver/thinlto_backend.c @@ -1,4 +1,4 @@ -// XFAIL: aix +// XFAIL: target={{.*}}-aix{{.*}} // RUN: %clang -O2 %s -flto=thin -c -o %t.o // RUN: llvm-lto -thinlto -o %t %t.o diff --git a/clang/test/Import/forward-declared-objc-class/test.m b/clang/test/Import/forward-declared-objc-class/test.m --- a/clang/test/Import/forward-declared-objc-class/test.m +++ b/clang/test/Import/forward-declared-objc-class/test.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: clang-import-test -x objective-c++ -import %S/Inputs/S1.m --import %S/Inputs/S2.m --import %S/Inputs/S3.m -expression %s void expr() { MyClass *c = [MyClass fromInteger:3]; diff --git a/clang/test/Import/objc-arc/test-cleanup-object.m b/clang/test/Import/objc-arc/test-cleanup-object.m --- a/clang/test/Import/objc-arc/test-cleanup-object.m +++ b/clang/test/Import/objc-arc/test-cleanup-object.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: clang-import-test -x objective-c -objc-arc -import %S/Inputs/cleanup-objects.m -dump-ast -expression %s | FileCheck %s // CHECK: FunctionDecl {{.*}} getObj ' diff --git a/clang/test/Import/objc-autoreleasepool/test.m b/clang/test/Import/objc-autoreleasepool/test.m --- a/clang/test/Import/objc-autoreleasepool/test.m +++ b/clang/test/Import/objc-autoreleasepool/test.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: clang-import-test -dump-ast -x objective-c++ -import %S/Inputs/F.m -expression %s | FileCheck %s // CHECK: ObjCAutoreleasePoolStmt diff --git a/clang/test/Import/objc-definitions-in-expression/test.m b/clang/test/Import/objc-definitions-in-expression/test.m --- a/clang/test/Import/objc-definitions-in-expression/test.m +++ b/clang/test/Import/objc-definitions-in-expression/test.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: clang-import-test -x objective-c++ -import %S/Inputs/S.m -expression %s @class D; diff --git a/clang/test/Import/objc-method/test.m b/clang/test/Import/objc-method/test.m --- a/clang/test/Import/objc-method/test.m +++ b/clang/test/Import/objc-method/test.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: clang-import-test -x objective-c++ -import %S/Inputs/S.m -expression %s void expr() { C *c; diff --git a/clang/test/Import/objc-param-decl/test.m b/clang/test/Import/objc-param-decl/test.m --- a/clang/test/Import/objc-param-decl/test.m +++ b/clang/test/Import/objc-param-decl/test.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: clang-import-test -dump-ast -x objective-c++ -import %S/Inputs/S.m -expression %s | FileCheck %s // CHECK: ObjCTypeParamDecl diff --git a/clang/test/Import/objc-try-catch/test.m b/clang/test/Import/objc-try-catch/test.m --- a/clang/test/Import/objc-try-catch/test.m +++ b/clang/test/Import/objc-try-catch/test.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: clang-import-test -x objective-c++ -Xcc -fobjc-exceptions -dump-ast -import %S/Inputs/F.m -expression %s | FileCheck %s // FIXME: Seems that Objective-C try/catch crash codegen on Windows. Reenable once this is fixed. diff --git a/clang/test/Integration/thinlto_profile_sample_accurate.c b/clang/test/Integration/thinlto_profile_sample_accurate.c --- a/clang/test/Integration/thinlto_profile_sample_accurate.c +++ b/clang/test/Integration/thinlto_profile_sample_accurate.c @@ -1,4 +1,4 @@ -// XFAIL: aix +// XFAIL: target={{.*}}-aix{{.*}} // Test to ensure -emit-llvm profile-sample-accurate is honored in ThinLTO. // RUN: %clang -O2 %s -flto=thin -fprofile-sample-accurate -c -o %t.o diff --git a/clang/test/Interpreter/disambiguate-decl-stmt.cpp b/clang/test/Interpreter/disambiguate-decl-stmt.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Interpreter/disambiguate-decl-stmt.cpp @@ -0,0 +1,56 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -fincremental-extensions -std=c++20 %s +// RUN: %clang_cc1 -fsyntax-only -DMS -fms-extensions -verify -fincremental-extensions -std=c++20 %s + +// expected-no-diagnostics + +extern "C" int printf(const char*,...); + +// Decls which are hard to disambiguate + +// ParseStatementOrDeclaration returns multiple statements. +#ifdef MS +int g_bFlag = 1; +__if_exists(::g_bFlag) { + printf("Entering __if_exists\n"); + printf("g_bFlag = %d\n", g_bFlag); +} +#endif // MS + +// Operators. +struct S1 { operator int(); }; +S1::operator int() { return 0; } + +// Dtors +using I = int; +I x = 10; +x.I::~I(); +x = 20; + +// Ctors + +// Deduction guide +template struct A { A(); A(T); }; +A() -> A; + +struct S2 { S2(); }; +S2::S2() = default; + +namespace N { struct S { S(); }; } +N::S::S() { printf("N::S::S()\n"); } +N::S s; + +namespace Ns {namespace Ns { void Ns(); void Fs();}} +void Ns::Ns::Ns() { printf("void Ns::Ns::Ns()\n"); } +void Ns::Ns::Fs() {} + +Ns::Ns::Fs(); +Ns::Ns::Ns(); + +struct Attrs1 { Attrs1(); }; +Attrs1::Attrs1() __attribute((pure)) = default; + +struct Attrs2 { Attrs2(); }; +__attribute((pure)) Attrs2::Attrs2() = default; + +// Extra semicolon +namespace N {}; diff --git a/clang/test/Interpreter/execute-stmts.cpp b/clang/test/Interpreter/execute-stmts.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Interpreter/execute-stmts.cpp @@ -0,0 +1,38 @@ +// REQUIRES: host-supports-jit +// UNSUPPORTED: system-aix +// RUN: cat %s | clang-repl -Xcc -Xclang -Xcc -verify | FileCheck %s +// RUN: %clang_cc1 -verify -fincremental-extensions -emit-llvm -o - %s \ +// RUN: | FileCheck --check-prefix=CODEGEN-CHECK %s + +// expected-no-diagnostics + +//CODEGEN-CHECK-COUNT-2: define internal void @__stmts__ +//CODEGEN-CHECK-NOT: define internal void @__stmts__ + + +extern "C" int printf(const char*,...); + +template T call() { printf("called\n"); return T(); } +call(); +// CHECK: called + +int i = 1; +++i; +printf("i = %d\n", i); +// CHECK: i = 2 + +namespace Ns { void f(){ i++; } } +Ns::f(); + +void g() { ++i; } +g(); +::g(); + +printf("i = %d\n", i); +// CHECK-NEXT: i = 5 + +for (; i > 4; --i) printf("i = %d\n", i); +// CHECK-NEXT: i = 5 + +int j = i; printf("j = %d\n", j); +// CHECK-NEXT: j = 4 diff --git a/clang/test/Interpreter/stmt-serialization.cpp b/clang/test/Interpreter/stmt-serialization.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Interpreter/stmt-serialization.cpp @@ -0,0 +1,19 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -std=c++20 -fincremental-extensions -fmodules-cache-path=%t \ +// RUN: -x c++ %s -verify +// expected-no-diagnostics + +#pragma clang module build TopLevelStmt +module TopLevelStmt { module Statements {} } +#pragma clang module contents + +#pragma clang module begin TopLevelStmt.Statements +extern "C" int printf(const char*,...); +int i = 0; +i++; +#pragma clang module end /*TopLevelStmt.Statements*/ +#pragma clang module endbuild /*TopLevelStmt*/ + +#pragma clang module import TopLevelStmt.Statements + +printf("Value of i is '%d'", i); diff --git a/clang/test/Modules/DebugInfoNamespace.cpp b/clang/test/Modules/DebugInfoNamespace.cpp --- a/clang/test/Modules/DebugInfoNamespace.cpp +++ b/clang/test/Modules/DebugInfoNamespace.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -x objective-c++ -std=c++11 -debug-info-kind=standalone \ // RUN: -dwarf-ext-refs -fmodules \ diff --git a/clang/test/Modules/DebugInfoSubmoduleImport.c b/clang/test/Modules/DebugInfoSubmoduleImport.c --- a/clang/test/Modules/DebugInfoSubmoduleImport.c +++ b/clang/test/Modules/DebugInfoSubmoduleImport.c @@ -1,4 +1,4 @@ -// XFAIL: -aix +// XFAIL: target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -fmodules -fmodule-format=obj -debug-info-kind=limited -dwarf-ext-refs \ // RUN: -fimplicit-module-maps -x c -fmodules-cache-path=%t -I %S/Inputs \ diff --git a/clang/test/Modules/DebugInfoTransitiveImport.m b/clang/test/Modules/DebugInfoTransitiveImport.m --- a/clang/test/Modules/DebugInfoTransitiveImport.m +++ b/clang/test/Modules/DebugInfoTransitiveImport.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -fmodules -fmodule-format=obj -debug-info-kind=limited -dwarf-ext-refs \ // RUN: -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs \ diff --git a/clang/test/Modules/ExtDebugInfo.cpp b/clang/test/Modules/ExtDebugInfo.cpp --- a/clang/test/Modules/ExtDebugInfo.cpp +++ b/clang/test/Modules/ExtDebugInfo.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // Test that only forward declarations are emitted for types defined in modules. diff --git a/clang/test/Modules/ExtDebugInfo.m b/clang/test/Modules/ExtDebugInfo.m --- a/clang/test/Modules/ExtDebugInfo.m +++ b/clang/test/Modules/ExtDebugInfo.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // Test that only forward declarations are emitted for types defined in modules. diff --git a/clang/test/Modules/ModuleDebugInfo.cpp b/clang/test/Modules/ModuleDebugInfo.cpp --- a/clang/test/Modules/ModuleDebugInfo.cpp +++ b/clang/test/Modules/ModuleDebugInfo.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // Test that (the same) debug info is emitted for an Objective-C++ // module and a C++ precompiled header. diff --git a/clang/test/Modules/ModuleDebugInfo.m b/clang/test/Modules/ModuleDebugInfo.m --- a/clang/test/Modules/ModuleDebugInfo.m +++ b/clang/test/Modules/ModuleDebugInfo.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // Test that debug info is emitted for an Objective-C module and // a precompiled header. diff --git a/clang/test/Modules/ModuleDebugInfoDwoId.cpp b/clang/test/Modules/ModuleDebugInfoDwoId.cpp --- a/clang/test/Modules/ModuleDebugInfoDwoId.cpp +++ b/clang/test/Modules/ModuleDebugInfoDwoId.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // Tests that dwoIds in modules match the dwoIDs in the main file. // REQUIRES: asserts diff --git a/clang/test/Modules/ModuleModuleDebugInfo.cpp b/clang/test/Modules/ModuleModuleDebugInfo.cpp --- a/clang/test/Modules/ModuleModuleDebugInfo.cpp +++ b/clang/test/Modules/ModuleModuleDebugInfo.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -x objective-c++ -std=c++11 -debug-info-kind=standalone \ diff --git a/clang/test/Modules/autolink.m b/clang/test/Modules/autolink.m --- a/clang/test/Modules/autolink.m +++ b/clang/test/Modules/autolink.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -o %t.pch -I %S/Inputs -x objective-c-header %S/Inputs/autolink-sub3.pch // RUN: %clang_cc1 -emit-llvm -o - -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -I %S/Inputs -include-pch %t.pch %s | FileCheck %s diff --git a/clang/test/Modules/autolinkTBD.m b/clang/test/Modules/autolinkTBD.m --- a/clang/test/Modules/autolinkTBD.m +++ b/clang/test/Modules/autolinkTBD.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -emit-llvm -o - -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs %s | FileCheck %s // RUN: %clang_cc1 -emit-llvm -fno-autolink -o - -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs %s | FileCheck --check-prefix=CHECK-AUTOLINK-DISABLED %s diff --git a/clang/test/Modules/builtins.m b/clang/test/Modules/builtins.m --- a/clang/test/Modules/builtins.m +++ b/clang/test/Modules/builtins.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs %s -verify // RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs -x c %s -verify diff --git a/clang/test/Modules/clang_module_file_info.m b/clang/test/Modules/clang_module_file_info.m --- a/clang/test/Modules/clang_module_file_info.m +++ b/clang/test/Modules/clang_module_file_info.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} @import DependsOnModule; // RUN: rm -rf %t %t-obj diff --git a/clang/test/Modules/cxx-irgen.cpp b/clang/test/Modules/cxx-irgen.cpp --- a/clang/test/Modules/cxx-irgen.cpp +++ b/clang/test/Modules/cxx-irgen.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x objective-c++ -std=c++11 -fmodules-cache-path=%t -I %S/Inputs -triple %itanium_abi_triple -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x objective-c++ -std=c++11 -fmodules-cache-path=%t -I %S/Inputs -triple %itanium_abi_triple -disable-llvm-passes -emit-llvm -debug-info-kind=limited -o - %s | FileCheck %s diff --git a/clang/test/Modules/debug-info-moduleimport-in-module.m b/clang/test/Modules/debug-info-moduleimport-in-module.m --- a/clang/test/Modules/debug-info-moduleimport-in-module.m +++ b/clang/test/Modules/debug-info-moduleimport-in-module.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // Test that an @import inside a module is not represented in the debug info. // REQUIRES: asserts diff --git a/clang/test/Modules/debug-info-moduleimport.m b/clang/test/Modules/debug-info-moduleimport.m --- a/clang/test/Modules/debug-info-moduleimport.m +++ b/clang/test/Modules/debug-info-moduleimport.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -debug-info-kind=limited -fmodules \ // RUN: -DGREETING="Hello World" -UNDEBUG \ diff --git a/clang/test/Modules/direct-module-import.m b/clang/test/Modules/direct-module-import.m --- a/clang/test/Modules/direct-module-import.m +++ b/clang/test/Modules/direct-module-import.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -no-opaque-pointers -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -include Module/Module.h %s -emit-llvm -o - | FileCheck %s diff --git a/clang/test/Modules/merge-anon-record-definition-in-objc.m b/clang/test/Modules/merge-anon-record-definition-in-objc.m --- a/clang/test/Modules/merge-anon-record-definition-in-objc.m +++ b/clang/test/Modules/merge-anon-record-definition-in-objc.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: split-file %s %t // RUN: %clang_cc1 -fsyntax-only -F%t/Frameworks %t/test.m -Wno-objc-property-implementation -Wno-incomplete-implementation \ diff --git a/clang/test/Modules/merge-extension-ivars.m b/clang/test/Modules/merge-extension-ivars.m --- a/clang/test/Modules/merge-extension-ivars.m +++ b/clang/test/Modules/merge-extension-ivars.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: split-file %s %t // RUN: %clang_cc1 -emit-llvm -o %t/test-compatible-extensions.ll -fobjc-runtime=macosx-10.9 -F%t/Frameworks %t/test-compatible-extensions.m \ diff --git a/clang/test/Modules/merge-objc-interface-visibility.m b/clang/test/Modules/merge-objc-interface-visibility.m --- a/clang/test/Modules/merge-objc-interface-visibility.m +++ b/clang/test/Modules/merge-objc-interface-visibility.m @@ -4,7 +4,7 @@ // RUN: -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/modules.cache // RUN: %clang_cc1 -emit-llvm -o %t/test.bc -F%t/Frameworks %t/test.m -DHIDDEN_FIRST=0 \ // RUN: -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/modules.cache -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // Test a case when Objective-C interface is imported both as hidden and as visible. diff --git a/clang/test/Modules/merge-objc-interface.m b/clang/test/Modules/merge-objc-interface.m --- a/clang/test/Modules/merge-objc-interface.m +++ b/clang/test/Modules/merge-objc-interface.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: split-file %s %t // RUN: %clang_cc1 -emit-llvm -o %t/test.bc -F%t/Frameworks %t/test.m \ diff --git a/clang/test/Modules/merge-objc-protocol-visibility.m b/clang/test/Modules/merge-objc-protocol-visibility.m --- a/clang/test/Modules/merge-objc-protocol-visibility.m +++ b/clang/test/Modules/merge-objc-protocol-visibility.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -aix +// UNSUPPORTED: target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: split-file %s %t // RUN: %clang_cc1 -emit-llvm -o %t/test.bc -F%t/Frameworks %t/test.m -Werror=objc-method-access -DHIDDEN_FIRST=1 \ diff --git a/clang/test/Modules/merge-record-definition-nonmodular.m b/clang/test/Modules/merge-record-definition-nonmodular.m --- a/clang/test/Modules/merge-record-definition-nonmodular.m +++ b/clang/test/Modules/merge-record-definition-nonmodular.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: mkdir %t // RUN: %clang_cc1 -emit-llvm -o %t/test.bc -F%S/Inputs/merge-record-definition %s \ diff --git a/clang/test/Modules/merge-record-definition-visibility.m b/clang/test/Modules/merge-record-definition-visibility.m --- a/clang/test/Modules/merge-record-definition-visibility.m +++ b/clang/test/Modules/merge-record-definition-visibility.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: mkdir %t // RUN: %clang_cc1 -emit-llvm -o %t/test.bc -F%S/Inputs/merge-record-definition %s \ diff --git a/clang/test/Modules/merge-record-definition.m b/clang/test/Modules/merge-record-definition.m --- a/clang/test/Modules/merge-record-definition.m +++ b/clang/test/Modules/merge-record-definition.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: mkdir %t // RUN: %clang_cc1 -emit-llvm -o %t/test.bc -F%S/Inputs/merge-record-definition %s \ diff --git a/clang/test/Modules/module-debuginfo-prefix.m b/clang/test/Modules/module-debuginfo-prefix.m --- a/clang/test/Modules/module-debuginfo-prefix.m +++ b/clang/test/Modules/module-debuginfo-prefix.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // REQUIRES: asserts // Modules: diff --git a/clang/test/Modules/module-file-home-is-cwd.m b/clang/test/Modules/module-file-home-is-cwd.m --- a/clang/test/Modules/module-file-home-is-cwd.m +++ b/clang/test/Modules/module-file-home-is-cwd.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: cd %S // RUN: %clang_cc1 -x objective-c -fmodules -fno-implicit-modules \ // RUN: -fmodule-file-home-is-cwd -fmodule-name=libA -emit-module \ diff --git a/clang/test/Modules/module_file_info.m b/clang/test/Modules/module_file_info.m --- a/clang/test/Modules/module_file_info.m +++ b/clang/test/Modules/module_file_info.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} @import DependsOnModule; // RUN: rm -rf %t %t-obj diff --git a/clang/test/Modules/objc-initializer.m b/clang/test/Modules/objc-initializer.m --- a/clang/test/Modules/objc-initializer.m +++ b/clang/test/Modules/objc-initializer.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -no-opaque-pointers -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/objc-initializer %s -emit-llvm -o - -fobjc-arc | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/objc-initializer %s -emit-llvm -o - -fobjc-arc -DIMPORT_TOP | FileCheck %s diff --git a/clang/test/Modules/pch-used.m b/clang/test/Modules/pch-used.m --- a/clang/test/Modules/pch-used.m +++ b/clang/test/Modules/pch-used.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: mkdir %t // RUN: %clang_cc1 -x objective-c-header -emit-pch %S/Inputs/pch-used.h -o %t/pch-used.h.pch -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -O0 -isystem %S/Inputs/System/usr/include diff --git a/clang/test/Modules/redecl-ivars.m b/clang/test/Modules/redecl-ivars.m --- a/clang/test/Modules/redecl-ivars.m +++ b/clang/test/Modules/redecl-ivars.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: split-file %s %t // RUN: %clang_cc1 -fsyntax-only -fobjc-runtime=macosx-10.9 -verify -I%t/include %t/test-mismatch-in-extension.m diff --git a/clang/test/Modules/use-exportas-for-link.m b/clang/test/Modules/use-exportas-for-link.m --- a/clang/test/Modules/use-exportas-for-link.m +++ b/clang/test/Modules/use-exportas-for-link.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: %clang_cc1 -emit-llvm -o - -fmodules-cache-path=%t -DA -fmodules -fimplicit-module-maps -F %S/Inputs/exportas-link %s | FileCheck --check-prefix=CHECK_A %s // CHECK_A: !llvm.linker.options = !{![[MODULE:[0-9]+]]} diff --git a/clang/test/PCH/debug-info-pch-path.c b/clang/test/PCH/debug-info-pch-path.c --- a/clang/test/PCH/debug-info-pch-path.c +++ b/clang/test/PCH/debug-info-pch-path.c @@ -1,6 +1,6 @@ // Unsupported on AIX because we don't support the requisite "__clangast" // section in XCOFF yet. -// UNSUPPORTED: aix +// UNSUPPORTED: target={{.*}}-aix{{.*}} // RUN: rm -rf %t // RUN: mkdir %t diff --git a/clang/test/PCH/externally-retained.m b/clang/test/PCH/externally-retained.m --- a/clang/test/PCH/externally-retained.m +++ b/clang/test/PCH/externally-retained.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // Test for assertion failure due to objc_externally_retained on a function. // Without PCH diff --git a/clang/test/PCH/irgen-rdar13114142.mm b/clang/test/PCH/irgen-rdar13114142.mm --- a/clang/test/PCH/irgen-rdar13114142.mm +++ b/clang/test/PCH/irgen-rdar13114142.mm @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: %clang_cc1 %s -triple %itanium_abi_triple -emit-pch -o %t.pch // RUN: %clang_cc1 %s -triple %itanium_abi_triple -emit-llvm -include-pch %t.pch -o - | FileCheck %s diff --git a/clang/test/PCH/objc_container.m b/clang/test/PCH/objc_container.m --- a/clang/test/PCH/objc_container.m +++ b/clang/test/PCH/objc_container.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // Test this without pch. // RUN: %clang_cc1 -include %S/objc_container.h -fsyntax-only -verify %s diff --git a/clang/test/PCH/objc_literals.m b/clang/test/PCH/objc_literals.m --- a/clang/test/PCH/objc_literals.m +++ b/clang/test/PCH/objc_literals.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: %clang_cc1 -emit-pch -o %t %s // RUN: %clang_cc1 -include-pch %t -verify %s // RUN: %clang_cc1 -include-pch %t -ast-print %s | FileCheck -check-prefix=CHECK-PRINT %s diff --git a/clang/test/PCH/objc_literals.mm b/clang/test/PCH/objc_literals.mm --- a/clang/test/PCH/objc_literals.mm +++ b/clang/test/PCH/objc_literals.mm @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-pch -x objective-c++ -std=c++0x -o %t %s // RUN: %clang_cc1 -triple %itanium_abi_triple -include-pch %t -x objective-c++ -std=c++0x -verify %s // RUN: %clang_cc1 -triple %itanium_abi_triple -include-pch %t -x objective-c++ -std=c++0x -ast-print %s | FileCheck -check-prefix=CHECK-PRINT %s diff --git a/clang/test/PCH/objcxx-ivar-class.mm b/clang/test/PCH/objcxx-ivar-class.mm --- a/clang/test/PCH/objcxx-ivar-class.mm +++ b/clang/test/PCH/objcxx-ivar-class.mm @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // Test this without pch. // RUN: %clang_cc1 -no-opaque-pointers -include %S/objcxx-ivar-class.h -triple %itanium_abi_triple %s -emit-llvm -o - | FileCheck %s diff --git a/clang/test/PCH/pending-ids.m b/clang/test/PCH/pending-ids.m --- a/clang/test/PCH/pending-ids.m +++ b/clang/test/PCH/pending-ids.m @@ -1,4 +1,4 @@ -// UNSUPPORTED: -zos, -aix +// UNSUPPORTED: -zos, target={{.*}}-aix{{.*}} // Test for rdar://10278815 // Without PCH diff --git a/clang/test/SemaObjC/arc-repeated-weak.mm b/clang/test/SemaObjC/arc-repeated-weak.mm --- a/clang/test/SemaObjC/arc-repeated-weak.mm +++ b/clang/test/SemaObjC/arc-repeated-weak.mm @@ -290,6 +290,18 @@ } while(0); } +struct S { + int a; + id b; +}; + +@interface C +@property S p; +@end + +void test_list_init(C *c) { + c.p = {0, c.p.b}; +} @interface Test (Methods) @end diff --git a/clang/tools/clang-format/git-clang-format b/clang/tools/clang-format/git-clang-format --- a/clang/tools/clang-format/git-clang-format +++ b/clang/tools/clang-format/git-clang-format @@ -88,7 +88,7 @@ 'c', 'h', # C 'm', # ObjC 'mm', # ObjC++ - 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++ + 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', 'inc', # C++ 'ccm', 'cppm', 'cxxm', 'c++m', # C++ Modules 'cu', 'cuh', # CUDA # Other languages that clang-format supports diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1237,7 +1237,7 @@ if (sys::fs::exists(Path)) return static_cast(Path); - return None; + return std::nullopt; } Optional findFromSearchPaths(StringRef Name, StringRef Root, @@ -1245,7 +1245,7 @@ for (StringRef Dir : SearchPaths) if (Optional File = findFile(Dir, Root, Name)) return File; - return None; + return std::nullopt; } Optional searchLibraryBaseName(StringRef Name, StringRef Root, @@ -1256,7 +1256,7 @@ if (Optional File = findFile(Dir, Root, "lib" + Name + ".a")) return File; } - return None; + return std::nullopt; } /// Search for static libraries in the linker's library path given input like diff --git a/clang/tools/clang-refactor/ClangRefactor.cpp b/clang/tools/clang-refactor/ClangRefactor.cpp --- a/clang/tools/clang-refactor/ClangRefactor.cpp +++ b/clang/tools/clang-refactor/ClangRefactor.cpp @@ -200,7 +200,7 @@ Value = CLOpt.getValue(); return; } - Value = None; + Value = std::nullopt; if (Opt.isRequired()) MissingRequiredOptions.push_back(&Opt); } diff --git a/clang/tools/clang-refactor/TestSupport.cpp b/clang/tools/clang-refactor/TestSupport.cpp --- a/clang/tools/clang-refactor/TestSupport.cpp +++ b/clang/tools/clang-refactor/TestSupport.cpp @@ -298,7 +298,7 @@ if (!ErrOrFile) { llvm::errs() << "error: -selection=test:" << Filename << " : could not open the given file"; - return None; + return std::nullopt; } StringRef Source = ErrOrFile.get()->getBuffer(); @@ -340,7 +340,7 @@ // Allow CHECK: comments to contain range= commands. if (!RangeRegex.match(Comment, &Matches) || Comment.contains("CHECK")) { if (DetectMistypedCommand()) - return None; + return std::nullopt; continue; } unsigned Offset = Tok.getEndLoc().getRawEncoding(); @@ -359,7 +359,7 @@ SmallVector EndLocMatches; if (!EndLocRegex.match(Matches[3], &EndLocMatches)) { if (DetectMistypedCommand()) - return None; + return std::nullopt; continue; } unsigned EndLineOffset = 0, EndColumn = 0; @@ -380,7 +380,7 @@ if (GroupedRanges.empty()) { llvm::errs() << "error: -selection=test:" << Filename << ": no 'range' commands"; - return None; + return std::nullopt; } TestSelectionRangesInFile TestRanges = {Filename.str(), {}}; diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -82,7 +82,7 @@ "" /*no-suffix*/, ErrorFile); llvm::FileRemover OutputRemover(OutputFile.c_str()); llvm::FileRemover ErrorRemover(ErrorFile.c_str()); - llvm::Optional Redirects[] = { + std::optional Redirects[] = { {""}, // Stdin OutputFile.str(), ErrorFile.str(), diff --git a/clang/tools/diagtool/FindDiagnosticID.cpp b/clang/tools/diagtool/FindDiagnosticID.cpp --- a/clang/tools/diagtool/FindDiagnosticID.cpp +++ b/clang/tools/diagtool/FindDiagnosticID.cpp @@ -33,7 +33,7 @@ if (DiagName == Name) return Diag; } - return None; + return std::nullopt; } int FindDiagnosticID::run(unsigned int argc, char **argv, diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp --- a/clang/tools/driver/cc1_main.cpp +++ b/clang/tools/driver/cc1_main.cpp @@ -177,7 +177,8 @@ // the target machine will handle the mcpu printing llvm::TargetOptions Options; std::unique_ptr TheTargetMachine( - TheTarget->createTargetMachine(TargetStr, "", "+cpuhelp", Options, None)); + TheTarget->createTargetMachine(TargetStr, "", "+cpuhelp", Options, + std::nullopt)); return 0; } diff --git a/clang/tools/driver/cc1gen_reproducer_main.cpp b/clang/tools/driver/cc1gen_reproducer_main.cpp --- a/clang/tools/driver/cc1gen_reproducer_main.cpp +++ b/clang/tools/driver/cc1gen_reproducer_main.cpp @@ -134,7 +134,7 @@ } } - return None; + return std::nullopt; } std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes); diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp --- a/clang/tools/driver/driver.cpp +++ b/clang/tools/driver/driver.cpp @@ -497,7 +497,7 @@ .Case("crash", Driver::ReproLevel::OnCrash) .Case("error", Driver::ReproLevel::OnError) .Case("always", Driver::ReproLevel::Always) - .Default(None); + .Default(std::nullopt); if (!Level) { llvm::errs() << "Unknown value for " << A->getSpelling() << ": '" << A->getValue() << "'\n"; diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -604,12 +604,12 @@ if (RegionOfInterest.isValid()) { SourceRange Range = getFullCursorExtent(Cursor, AU->getSourceManager()); if (Range.isInvalid()) - return None; + return std::nullopt; switch (CompareRegionOfInterest(Range)) { case RangeBefore: // This declaration comes before the region of interest; skip it. - return None; + return std::nullopt; case RangeAfter: // This declaration comes after the region of interest; we're done. @@ -628,8 +628,8 @@ // FIXME: Eventually remove. This part of a hack to support proper // iteration over all Decls contained lexically within an ObjC container. - SaveAndRestore DI_saved(DI_current, &I); - SaveAndRestore DE_saved(DE_current, E); + SaveAndRestore DI_saved(DI_current, &I); + SaveAndRestore DE_saved(DE_current, E); for (; I != E; ++I) { Decl *D = *I; @@ -658,7 +658,7 @@ // we passed the region-of-interest. if (auto *ivarD = dyn_cast(D)) { if (ivarD->getSynthesize()) - return None; + return std::nullopt; } // FIXME: ObjCClassRef/ObjCProtocolRef for forward class/protocol @@ -676,12 +676,12 @@ const Optional V = shouldVisitCursor(Cursor); if (!V) - return None; + return std::nullopt; if (!V.value()) return false; if (Visit(Cursor, true)) return true; - return None; + return std::nullopt; } bool CursorVisitor::VisitTranslationUnitDecl(TranslationUnitDecl *D) { @@ -3797,8 +3797,10 @@ } // Configure the diagnostics. + std::unique_ptr DiagOpts = CreateAndPopulateDiagOpts( + llvm::makeArrayRef(command_line_args, num_command_line_args)); IntrusiveRefCntPtr Diags( - CompilerInstance::createDiagnostics(new DiagnosticOptions)); + CompilerInstance::createDiagnostics(DiagOpts.release())); if (options & CXTranslationUnit_KeepGoing) Diags->setFatalsAsError(true); @@ -3879,7 +3881,7 @@ LibclangInvocationReporter InvocationReporter( *CXXIdx, LibclangInvocationReporter::OperationKind::ParseOperation, - options, llvm::makeArrayRef(*Args), /*InvocationArgs=*/None, + options, llvm::makeArrayRef(*Args), /*InvocationArgs=*/std::nullopt, unsaved_files); std::unique_ptr Unit(ASTUnit::LoadFromCommandLine( Args->data(), Args->data() + Args->size(), @@ -6694,6 +6696,7 @@ case Decl::Export: case Decl::ObjCPropertyImpl: case Decl::FileScopeAsm: + case Decl::TopLevelStmt: case Decl::StaticAssert: case Decl::Block: case Decl::Captured: diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp --- a/clang/tools/libclang/CXType.cpp +++ b/clang/tools/libclang/CXType.cpp @@ -193,13 +193,13 @@ return TemplateDecl->getTemplateArgs().asArray(); } - return None; + return std::nullopt; } static Optional TemplateArgumentToQualType(const TemplateArgument &A) { if (A.getKind() == TemplateArgument::Type) return A.getAsType(); - return None; + return std::nullopt; } static Optional @@ -216,7 +216,7 @@ return TemplateArgumentToQualType(A); current++; } - return None; + return std::nullopt; } CXType clang_getCursorType(CXCursor C) { diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -4818,7 +4818,7 @@ ASSERT_EQ(Subst->getPackIndex(), PackIndex); }; auto tests = [&](ASTContext &Ctx) { - testType(Ctx, "void", None); + testType(Ctx, "void", std::nullopt); testType(Ctx, "char", 3); testType(Ctx, "float", 2); testType(Ctx, "int", 1); diff --git a/clang/unittests/Analysis/FlowSensitive/MultiVarConstantPropagationTest.cpp b/clang/unittests/Analysis/FlowSensitive/MultiVarConstantPropagationTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/MultiVarConstantPropagationTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/MultiVarConstantPropagationTest.cpp @@ -61,9 +61,10 @@ // When `None`, the lattice is either at top or bottom, based on `State`. llvm::Optional Value; - constexpr ValueLattice() : State(ValueState::Undefined), Value(llvm::None) {} + constexpr ValueLattice() + : State(ValueState::Undefined), Value(std::nullopt) {} constexpr ValueLattice(int64_t V) : State(ValueState::Defined), Value(V) {} - constexpr ValueLattice(ValueState S) : State(S), Value(llvm::None) {} + constexpr ValueLattice(ValueState S) : State(S), Value(std::nullopt) {} static constexpr ValueLattice bottom() { return ValueLattice(ValueState::Undefined); diff --git a/clang/unittests/Analysis/FlowSensitive/SingleVarConstantPropagationTest.cpp b/clang/unittests/Analysis/FlowSensitive/SingleVarConstantPropagationTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/SingleVarConstantPropagationTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/SingleVarConstantPropagationTest.cpp @@ -63,7 +63,9 @@ // `None` is "bottom". llvm::Optional Data; - static constexpr ConstantPropagationLattice bottom() { return {llvm::None}; } + static constexpr ConstantPropagationLattice bottom() { + return {std::nullopt}; + } static constexpr ConstantPropagationLattice top() { return {VarValue{nullptr, 0}}; } diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -3849,7 +3849,7 @@ EXPECT_FALSE(Env.flowConditionImplies(FooVal)); EXPECT_FALSE(Env.flowConditionImplies(Env.makeNot(FooVal))); }, - {TransferOptions{/*.ContextSensitiveOpts=*/llvm::None}}); + {TransferOptions{/*.ContextSensitiveOpts=*/std::nullopt}}); } TEST(TransferTest, ContextSensitiveDepthZero) { diff --git a/clang/unittests/Basic/DarwinSDKInfoTest.cpp b/clang/unittests/Basic/DarwinSDKInfoTest.cpp --- a/clang/unittests/Basic/DarwinSDKInfoTest.cpp +++ b/clang/unittests/Basic/DarwinSDKInfoTest.cpp @@ -23,23 +23,26 @@ EXPECT_EQ(Mapping->getMinimumValue(), VersionTuple(1)); // Exact mapping. - EXPECT_EQ(Mapping->map(VersionTuple(3), VersionTuple(0, 1), None), + EXPECT_EQ(Mapping->map(VersionTuple(3), VersionTuple(0, 1), std::nullopt), VersionTuple(1)); - EXPECT_EQ(Mapping->map(VersionTuple(3, 0), VersionTuple(0, 1), None), + EXPECT_EQ(Mapping->map(VersionTuple(3, 0), VersionTuple(0, 1), std::nullopt), VersionTuple(1)); - EXPECT_EQ(Mapping->map(VersionTuple(3, 0, 0), VersionTuple(0, 1), None), - VersionTuple(1)); - EXPECT_EQ(Mapping->map(VersionTuple(3, 1), VersionTuple(0, 1), None), - VersionTuple(1, 2)); - EXPECT_EQ(Mapping->map(VersionTuple(3, 1, 0), VersionTuple(0, 1), None), + EXPECT_EQ( + Mapping->map(VersionTuple(3, 0, 0), VersionTuple(0, 1), std::nullopt), + VersionTuple(1)); + EXPECT_EQ(Mapping->map(VersionTuple(3, 1), VersionTuple(0, 1), std::nullopt), VersionTuple(1, 2)); + EXPECT_EQ( + Mapping->map(VersionTuple(3, 1, 0), VersionTuple(0, 1), std::nullopt), + VersionTuple(1, 2)); // Missing mapping - fallback to major. - EXPECT_EQ(Mapping->map(VersionTuple(3, 0, 1), VersionTuple(0, 1), None), - VersionTuple(1)); + EXPECT_EQ( + Mapping->map(VersionTuple(3, 0, 1), VersionTuple(0, 1), std::nullopt), + VersionTuple(1)); // Minimum - EXPECT_EQ(Mapping->map(VersionTuple(2), VersionTuple(0, 1), None), + EXPECT_EQ(Mapping->map(VersionTuple(2), VersionTuple(0, 1), std::nullopt), VersionTuple(0, 1)); // Maximum @@ -57,7 +60,7 @@ EXPECT_TRUE(Mapping); EXPECT_EQ( Mapping->map(VersionTuple(4), VersionTuple(0, 1), VersionTuple(100)), - None); + std::nullopt); } TEST(DarwinSDKInfo, VersionMappingParseEmpty) { @@ -95,22 +98,23 @@ ASSERT_TRUE(Mapping); // Verify that the macOS versions that are present in the map are translated // directly to their corresponding Mac Catalyst versions. - EXPECT_EQ(*Mapping->map(VersionTuple(10, 15), VersionTuple(), None), + EXPECT_EQ(*Mapping->map(VersionTuple(10, 15), VersionTuple(), std::nullopt), VersionTuple(13, 1)); - EXPECT_EQ(*Mapping->map(VersionTuple(11, 0), VersionTuple(), None), + EXPECT_EQ(*Mapping->map(VersionTuple(11, 0), VersionTuple(), std::nullopt), VersionTuple(14, 0)); - EXPECT_EQ(*Mapping->map(VersionTuple(11, 2), VersionTuple(), None), + EXPECT_EQ(*Mapping->map(VersionTuple(11, 2), VersionTuple(), std::nullopt), VersionTuple(14, 2)); // Verify that a macOS version that's not present in the map is translated // like the nearest major OS version. - EXPECT_EQ(*Mapping->map(VersionTuple(11, 1), VersionTuple(), None), + EXPECT_EQ(*Mapping->map(VersionTuple(11, 1), VersionTuple(), std::nullopt), VersionTuple(14, 0)); // Verify that the macOS versions that are outside of the mapped version // range map to the min/max values passed to the `map` call. - EXPECT_EQ(*Mapping->map(VersionTuple(10, 14), VersionTuple(99, 99), None), - VersionTuple(99, 99)); + EXPECT_EQ( + *Mapping->map(VersionTuple(10, 14), VersionTuple(99, 99), std::nullopt), + VersionTuple(99, 99)); EXPECT_EQ( *Mapping->map(VersionTuple(11, 5), VersionTuple(), VersionTuple(99, 99)), VersionTuple(99, 99)); @@ -143,22 +147,23 @@ // Verify that the iOS versions that are present in the map are translated // directly to their corresponding tvOS versions. - EXPECT_EQ(*Mapping->map(VersionTuple(10, 0), VersionTuple(), None), + EXPECT_EQ(*Mapping->map(VersionTuple(10, 0), VersionTuple(), std::nullopt), VersionTuple(10, 0)); - EXPECT_EQ(*Mapping->map(VersionTuple(10, 3, 1), VersionTuple(), None), + EXPECT_EQ(*Mapping->map(VersionTuple(10, 3, 1), VersionTuple(), std::nullopt), VersionTuple(10, 2)); - EXPECT_EQ(*Mapping->map(VersionTuple(11, 0), VersionTuple(), None), + EXPECT_EQ(*Mapping->map(VersionTuple(11, 0), VersionTuple(), std::nullopt), VersionTuple(11, 0)); // Verify that an iOS version that's not present in the map is translated // like the nearest major OS version. - EXPECT_EQ(*Mapping->map(VersionTuple(10, 1), VersionTuple(), None), + EXPECT_EQ(*Mapping->map(VersionTuple(10, 1), VersionTuple(), std::nullopt), VersionTuple(10, 0)); // Verify that the iOS versions that are outside of the mapped version // range map to the min/max values passed to the `map` call. - EXPECT_EQ(*Mapping->map(VersionTuple(9, 0), VersionTuple(99, 99), None), - VersionTuple(99, 99)); + EXPECT_EQ( + *Mapping->map(VersionTuple(9, 0), VersionTuple(99, 99), std::nullopt), + VersionTuple(99, 99)); EXPECT_EQ( *Mapping->map(VersionTuple(13, 0), VersionTuple(), VersionTuple(99, 99)), VersionTuple(99, 99)); diff --git a/clang/unittests/Basic/FileEntryTest.cpp b/clang/unittests/Basic/FileEntryTest.cpp --- a/clang/unittests/Basic/FileEntryTest.cpp +++ b/clang/unittests/Basic/FileEntryTest.cpp @@ -93,7 +93,7 @@ OptionalFileEntryRefDegradesToFileEntryPtr M0; OptionalFileEntryRefDegradesToFileEntryPtr M1 = Refs.addFile("1"); OptionalFileEntryRefDegradesToFileEntryPtr M2 = Refs.addFile("2"); - OptionalFileEntryRefDegradesToFileEntryPtr M0Also = None; + OptionalFileEntryRefDegradesToFileEntryPtr M0Also = std::nullopt; OptionalFileEntryRefDegradesToFileEntryPtr M1Also = Refs.addFileAlias("1-also", *M1); diff --git a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp --- a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp +++ b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp @@ -183,7 +183,7 @@ return true; if (!UnexpectedInitial.empty() || !UnexpectedNonInitial.empty()) return false; - return llvm::None; + return std::nullopt; } // This method is used by tests. diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp --- a/clang/unittests/Format/FormatTestComments.cpp +++ b/clang/unittests/Format/FormatTestComments.cpp @@ -3062,6 +3062,61 @@ "int d;// comment\n", Style)); + EXPECT_EQ("// do not touch\n" + "int a; // any comments\n" + "\n" + " // comment\n" + "// comment\n" + "\n" + "// comment", + format("// do not touch\n" + "int a; // any comments\n" + "\n" + " // comment\n" + "// comment\n" + "\n" + "// comment", + Style)); + + EXPECT_EQ("// do not touch\n" + "int a; // any comments\n" + "\n" + " // comment\n" + "// comment\n" + "\n" + "// comment", + format("// do not touch\n" + "int a; // any comments\n" + "\n" + "\n" + " // comment\n" + "// comment\n" + "\n" + "\n" + "// comment", + Style)); + + // Allow to keep 2 empty lines + Style.MaxEmptyLinesToKeep = 2; + EXPECT_EQ("// do not touch\n" + "int a; // any comments\n" + "\n" + "\n" + " // comment\n" + "// comment\n" + "\n" + "// comment", + format("// do not touch\n" + "int a; // any comments\n" + "\n" + "\n" + " // comment\n" + "// comment\n" + "\n" + "// comment", + Style)); + Style.MaxEmptyLinesToKeep = 1; + // Just format comments normally when leaving exceeds the column limit Style.ColumnLimit = 35; EXPECT_EQ("int foo = 12345; // comment\n" diff --git a/clang/unittests/Frontend/ASTUnitTest.cpp b/clang/unittests/Frontend/ASTUnitTest.cpp --- a/clang/unittests/Frontend/ASTUnitTest.cpp +++ b/clang/unittests/Frontend/ASTUnitTest.cpp @@ -168,9 +168,9 @@ ASTUnit *AST = ASTUnit::LoadFromCommandLine( &Args[0], &Args[4], PCHContainerOps, Diags, "", false, - CaptureDiagsKind::All, None, true, 0, TU_Complete, false, false, false, - SkipFunctionBodiesScope::None, false, true, false, false, None, &ErrUnit, - nullptr); + CaptureDiagsKind::All, std::nullopt, true, 0, TU_Complete, false, false, + false, SkipFunctionBodiesScope::None, false, true, false, false, + std::nullopt, &ErrUnit, nullptr); ASSERT_EQ(AST, nullptr); ASSERT_NE(ErrUnit, nullptr); diff --git a/clang/unittests/Interpreter/InterpreterTest.cpp b/clang/unittests/Interpreter/InterpreterTest.cpp --- a/clang/unittests/Interpreter/InterpreterTest.cpp +++ b/clang/unittests/Interpreter/InterpreterTest.cpp @@ -124,14 +124,8 @@ auto *PTU1 = R1->TUPart; EXPECT_EQ(2U, DeclsSize(PTU1)); - // FIXME: Add support for wrapping and running statements. auto R2 = Interp->Parse("var1++; printf(\"var1 value %d\\n\", var1);"); - EXPECT_FALSE(!!R2); - using ::testing::HasSubstr; - EXPECT_THAT(DiagnosticsOS.str(), - HasSubstr("error: unknown type name 'var1'")); - auto Err = R2.takeError(); - EXPECT_EQ("Parsing failed.", llvm::toString(std::move(Err))); + EXPECT_TRUE(!!R2); } TEST(InterpreterTest, UndoCommand) { diff --git a/clang/unittests/Lex/HeaderSearchTest.cpp b/clang/unittests/Lex/HeaderSearchTest.cpp --- a/clang/unittests/Lex/HeaderSearchTest.cpp +++ b/clang/unittests/Lex/HeaderSearchTest.cpp @@ -38,8 +38,9 @@ } void addSearchDir(llvm::StringRef Dir) { - VFS->addFile(Dir, 0, llvm::MemoryBuffer::getMemBuffer(""), /*User=*/None, - /*Group=*/None, llvm::sys::fs::file_type::directory_file); + VFS->addFile( + Dir, 0, llvm::MemoryBuffer::getMemBuffer(""), /*User=*/std::nullopt, + /*Group=*/std::nullopt, llvm::sys::fs::file_type::directory_file); auto DE = FileMgr.getOptionalDirectoryRef(Dir); assert(DE); auto DL = DirectoryLookup(*DE, SrcMgr::C_User, /*isFramework=*/false); @@ -47,8 +48,9 @@ } void addSystemFrameworkSearchDir(llvm::StringRef Dir) { - VFS->addFile(Dir, 0, llvm::MemoryBuffer::getMemBuffer(""), /*User=*/None, - /*Group=*/None, llvm::sys::fs::file_type::directory_file); + VFS->addFile( + Dir, 0, llvm::MemoryBuffer::getMemBuffer(""), /*User=*/std::nullopt, + /*Group=*/std::nullopt, llvm::sys::fs::file_type::directory_file); auto DE = FileMgr.getOptionalDirectoryRef(Dir); assert(DE); auto DL = DirectoryLookup(*DE, SrcMgr::C_System, /*isFramework=*/true); @@ -58,7 +60,8 @@ void addHeaderMap(llvm::StringRef Filename, std::unique_ptr Buf, bool isAngled = false) { - VFS->addFile(Filename, 0, std::move(Buf), /*User=*/None, /*Group=*/None, + VFS->addFile(Filename, 0, std::move(Buf), /*User=*/std::nullopt, + /*Group=*/std::nullopt, llvm::sys::fs::file_type::regular_file); auto FE = FileMgr.getFile(Filename, true); assert(FE); @@ -197,9 +200,10 @@ TEST_F(HeaderSearchTest, HeaderFrameworkLookup) { std::string HeaderPath = "/tmp/Frameworks/Foo.framework/Headers/Foo.h"; addSystemFrameworkSearchDir("/tmp/Frameworks"); - VFS->addFile( - HeaderPath, 0, llvm::MemoryBuffer::getMemBufferCopy("", HeaderPath), - /*User=*/None, /*Group=*/None, llvm::sys::fs::file_type::regular_file); + VFS->addFile(HeaderPath, 0, + llvm::MemoryBuffer::getMemBufferCopy("", HeaderPath), + /*User=*/std::nullopt, /*Group=*/std::nullopt, + llvm::sys::fs::file_type::regular_file); bool IsFrameworkFound = false; auto FoundFile = Search.LookupFile( @@ -267,7 +271,8 @@ VFS->addFile( HeaderDirName + HeaderName, 0, llvm::MemoryBuffer::getMemBufferCopy("", HeaderDirName + HeaderName), - /*User=*/None, /*Group=*/None, llvm::sys::fs::file_type::regular_file); + /*User=*/std::nullopt, /*Group=*/std::nullopt, + llvm::sys::fs::file_type::regular_file); bool IsMapped = false; auto FoundFile = Search.LookupFile( diff --git a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp --- a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp +++ b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp @@ -21,7 +21,7 @@ // If ExpectedEncoded is provided, verify the encoded value too. // Loc is the raw (in-memory) form of SourceLocation. void roundTrip(SourceLocation::UIntTy Loc, - llvm::Optional ExpectedEncoded = llvm::None) { + llvm::Optional ExpectedEncoded = std::nullopt) { uint64_t ActualEncoded = SourceLocationEncoding::encode(SourceLocation::getFromRawEncoding(Loc)); if (ExpectedEncoded) diff --git a/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp b/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp --- a/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp +++ b/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp @@ -153,7 +153,7 @@ TEST(CallDescription, LackOfRequiredArguments) { EXPECT_TRUE(tooling::runToolOnCode( std::unique_ptr(new CallDescriptionAction<>({ - {{"foo", None}, true}, + {{"foo", std::nullopt}, true}, {{"foo", 2}, false}, })), "void foo(int); void foo(int, int); void bar() { foo(1); }")); diff --git a/clang/unittests/Tooling/ASTSelectionTest.cpp b/clang/unittests/Tooling/ASTSelectionTest.cpp --- a/clang/unittests/Tooling/ASTSelectionTest.cpp +++ b/clang/unittests/Tooling/ASTSelectionTest.cpp @@ -149,13 +149,13 @@ TEST(ASTSelectionFinder, CursorNoSelection) { findSelectedASTNodes( - " void f() { }", {1, 1}, None, + " void f() { }", {1, 1}, std::nullopt, [](Optional Node) { EXPECT_FALSE(Node); }); } TEST(ASTSelectionFinder, CursorAtStartOfFunction) { findSelectedASTNodes( - "void f() { }", {1, 1}, None, [](Optional Node) { + "void f() { }", {1, 1}, std::nullopt, [](Optional Node) { EXPECT_TRUE(Node); checkNode(*Node, SourceSelectionKind::None, /*NumChildren=*/1); @@ -525,15 +525,15 @@ @ end )"; // Just after '@ end' - findSelectedASTNodes(Source, {5, 6}, None, - [](Optional Node) { - EXPECT_TRUE(Node); - EXPECT_EQ(Node->Children.size(), 1u); - checkNode( - Node->Children[0], - SourceSelectionKind::ContainsSelection); - }, - SelectionFinderVisitor::Lang_OBJC); + findSelectedASTNodes( + Source, {5, 6}, std::nullopt, + [](Optional Node) { + EXPECT_TRUE(Node); + EXPECT_EQ(Node->Children.size(), 1u); + checkNode( + Node->Children[0], SourceSelectionKind::ContainsSelection); + }, + SelectionFinderVisitor::Lang_OBJC); } const SelectedASTNode &checkFnBody(const Optional &Node, @@ -688,7 +688,7 @@ )"; // No selection range. findSelectedASTNodesWithRange( - Source, {2, 2}, None, + Source, {2, 2}, std::nullopt, [](SourceRange SelectionRange, Optional Node) { EXPECT_TRUE(Node); Optional SelectedCode = diff --git a/clang/unittests/Tooling/HeaderAnalysisTest.cpp b/clang/unittests/Tooling/HeaderAnalysisTest.cpp --- a/clang/unittests/Tooling/HeaderAnalysisTest.cpp +++ b/clang/unittests/Tooling/HeaderAnalysisTest.cpp @@ -66,11 +66,11 @@ EXPECT_THAT(parseIWYUPragma("// IWYU pragma: keep me\netc"), ValueIs(Eq("keep me"))); EXPECT_THAT(parseIWYUPragma("/* IWYU pragma: keep */"), ValueIs(Eq("keep"))); - EXPECT_EQ(parseIWYUPragma("// IWYU pragma: keep"), llvm::None) + EXPECT_EQ(parseIWYUPragma("// IWYU pragma: keep"), std::nullopt) << "Prefix is sensitive to whitespace"; - EXPECT_EQ(parseIWYUPragma("// IWYU pragma:keep"), llvm::None) + EXPECT_EQ(parseIWYUPragma("// IWYU pragma:keep"), std::nullopt) << "Prefix is sensitive to whitespace"; - EXPECT_EQ(parseIWYUPragma("/\n* IWYU pragma: keep */"), llvm::None) + EXPECT_EQ(parseIWYUPragma("/\n* IWYU pragma: keep */"), std::nullopt) << "Must start with /* or //"; } diff --git a/clang/unittests/Tooling/SourceCodeBuildersTest.cpp b/clang/unittests/Tooling/SourceCodeBuildersTest.cpp --- a/clang/unittests/Tooling/SourceCodeBuildersTest.cpp +++ b/clang/unittests/Tooling/SourceCodeBuildersTest.cpp @@ -71,14 +71,14 @@ {"-Wno-unused-value"}); if (AstUnit == nullptr) { ADD_FAILURE() << "AST construction failed"; - return llvm::None; + return std::nullopt; } ASTContext &Context = AstUnit->getASTContext(); auto Matches = ast_matchers::match(wrapMatcher(Matcher), Context); // We expect a single, exact match for the statement. if (Matches.size() != 1) { ADD_FAILURE() << "Wrong number of matches: " << Matches.size(); - return llvm::None; + return std::nullopt; } return TestMatch{std::move(AstUnit), MatchResult(Matches[0], &Context)}; } diff --git a/clang/unittests/Tooling/StandardLibraryTest.cpp b/clang/unittests/Tooling/StandardLibraryTest.cpp --- a/clang/unittests/Tooling/StandardLibraryTest.cpp +++ b/clang/unittests/Tooling/StandardLibraryTest.cpp @@ -94,13 +94,13 @@ stdlib::Recognizer Recognizer; - EXPECT_EQ(Recognizer(&VectorNonstd), llvm::None); + EXPECT_EQ(Recognizer(&VectorNonstd), std::nullopt); EXPECT_EQ(Recognizer(Vec), stdlib::Symbol::named("std::", "vector")); EXPECT_EQ(Recognizer(Nest), stdlib::Symbol::named("std::", "vector")); EXPECT_EQ(Recognizer(Clock), stdlib::Symbol::named("std::chrono::", "system_clock")); EXPECT_EQ(Recognizer(CDivT), stdlib::Symbol::named("", "div_t")); - EXPECT_EQ(Recognizer(Sec), llvm::None); + EXPECT_EQ(Recognizer(Sec), std::nullopt); } } // namespace diff --git a/clang/unittests/Tooling/StencilTest.cpp b/clang/unittests/Tooling/StencilTest.cpp --- a/clang/unittests/Tooling/StencilTest.cpp +++ b/clang/unittests/Tooling/StencilTest.cpp @@ -76,14 +76,14 @@ wrapSnippet(ExtraPreface, StatementCode), {"-Wno-unused-value"}); if (AstUnit == nullptr) { ADD_FAILURE() << "AST construction failed"; - return llvm::None; + return std::nullopt; } ASTContext &Context = AstUnit->getASTContext(); auto Matches = ast_matchers::match(wrapMatcher(Matcher), Context); // We expect a single, exact match for the statement. if (Matches.size() != 1) { ADD_FAILURE() << "Wrong number of matches: " << Matches.size(); - return llvm::None; + return std::nullopt; } return TestMatch{std::move(AstUnit), MatchResult(Matches[0], &Context)}; } diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -661,10 +661,10 @@ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), ValueIs(SameRange(findSpelled("split B").drop_front()))); // Ranges not fully covering macro invocations should fail. - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None); + EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt); + EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), std::nullopt); EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")), - llvm::None); + std::nullopt); // Recursive macro invocations. recordTokens(R"cpp( @@ -731,7 +731,7 @@ ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7) )cpp"); // Should fail, spans multiple arguments. - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); + EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt); EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")), ValueIs(SameRange(findSpelled("ID ( a2 ) a3")))); EXPECT_THAT( @@ -742,7 +742,8 @@ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")), ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )")))); // Should fail, spans multiple invocations. - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")), llvm::None); + EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")), + std::nullopt); // https://github.com/clangd/clangd/issues/1289 recordTokens(R"cpp( @@ -750,7 +751,7 @@ #define INDIRECT FOO(y) INDIRECT // expands to foo(y) )cpp"); - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("y")), llvm::None); + EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("y")), std::nullopt); recordTokens(R"cpp( #define FOO(X) a X b @@ -776,7 +777,7 @@ )cpp"); EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")), ValueIs(SameRange(findSpelled("good")))); - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), llvm::None); + EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt); recordTokens(R"cpp( #define PREV prev @@ -787,7 +788,7 @@ )cpp"); EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")), ValueIs(SameRange(findSpelled("good")))); - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), llvm::None); + EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt); recordTokens(R"cpp( #define ID(X) X @@ -798,7 +799,7 @@ )cpp"); EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")), ValueIs(SameRange(findSpelled("good")))); - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), llvm::None); + EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt); } TEST_F(TokenBufferTest, ExpandedTokensForRange) { @@ -856,7 +857,7 @@ SameRange(findExpanded("1 + 2 + 3"))))); // Only the first spelled token should be found. for (const auto &T : ID1.drop_front()) - EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); + EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); llvm::ArrayRef ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )"); EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()), @@ -864,7 +865,7 @@ SameRange(findExpanded("2 + 3 + 4"))))); // Only the first spelled token should be found. for (const auto &T : ID2.drop_front()) - EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); + EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); EXPECT_THAT(Buffer.expansionsOverlapping(llvm::makeArrayRef( findSpelled("1 + 2").data(), findSpelled("4").data())), @@ -886,7 +887,7 @@ SameRange(findExpanded("int a").take_front(0))))); // Only the first spelled token should be found. for (const auto &T : DefineFoo.drop_front()) - EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); + EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); llvm::ArrayRef PragmaOnce = findSpelled("# pragma once"); EXPECT_THAT( @@ -895,7 +896,7 @@ SameRange(findExpanded("int b").take_front(0))))); // Only the first spelled token should be found. for (const auto &T : PragmaOnce.drop_front()) - EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None); + EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt); EXPECT_THAT( Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma")), diff --git a/clang/unittests/Tooling/TransformerTest.cpp b/clang/unittests/Tooling/TransformerTest.cpp --- a/clang/unittests/Tooling/TransformerTest.cpp +++ b/clang/unittests/Tooling/TransformerTest.cpp @@ -110,18 +110,18 @@ "clang-tool", std::make_shared(), FileContents)) { llvm::errs() << "Running tool failed.\n"; - return None; + return std::nullopt; } if (ErrorCount != 0) { llvm::errs() << "Generating changes failed.\n"; - return None; + return std::nullopt; } auto ChangedCode = applyAtomicChanges("input.cc", Code, Changes, ApplyChangesSpec()); if (!ChangedCode) { llvm::errs() << "Applying changes failed: " << llvm::toString(ChangedCode.takeError()) << "\n"; - return None; + return std::nullopt; } return *ChangedCode; } diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -1651,7 +1651,7 @@ std::make_unique(Arg1.first.getElementSizeInBits())); ST.addExpander("MaskExpand", std::make_unique(Arg1.first.getNumElements())); - ST.evaluate(DI->getArg(2), Elts, None); + ST.evaluate(DI->getArg(2), Elts, std::nullopt); std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; for (auto &E : Elts) { diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake --- a/compiler-rt/cmake/base-config-ix.cmake +++ b/compiler-rt/cmake/base-config-ix.cmake @@ -236,9 +236,10 @@ if(WIN32) test_target_arch(arm "" "" "") else() + test_target_arch(armv4t "" "-march=armv4t" "-mfloat-abi=soft") + test_target_arch(armv6m "" "-march=armv6m" "-mfloat-abi=soft") test_target_arch(arm "" "-march=armv7-a" "-mfloat-abi=soft") test_target_arch(armhf "" "-march=armv7-a" "-mfloat-abi=hard") - test_target_arch(armv6m "" "-march=armv6m" "-mfloat-abi=soft") endif() elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "avr") test_target_arch(avr "__AVR__" "--target=avr") diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -45,7 +45,7 @@ ") set(ARM64 aarch64) -set(ARM32 arm armhf armv6m armv7m armv7em armv7 armv7s armv7k armv8m.main armv8.1m.main) +set(ARM32 arm armhf armv4t armv6m armv7m armv7em armv7 armv7s armv7k armv8m.main armv8.1m.main) set(AVR avr) set(HEXAGON hexagon) set(X86 i386) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -619,9 +619,6 @@ list_intersect(CFI_SUPPORTED_ARCH ALL_CFI_SUPPORTED_ARCH SANITIZER_COMMON_SUPPORTED_ARCH) - list_intersect(SCUDO_SUPPORTED_ARCH - ALL_SCUDO_SUPPORTED_ARCH - SANITIZER_COMMON_SUPPORTED_ARCH) list_intersect(SCUDO_STANDALONE_SUPPORTED_ARCH ALL_SCUDO_STANDALONE_SUPPORTED_ARCH SANITIZER_COMMON_SUPPORTED_ARCH) @@ -661,7 +658,6 @@ filter_available_targets(SAFESTACK_SUPPORTED_ARCH ${ALL_SAFESTACK_SUPPORTED_ARCH}) filter_available_targets(CFI_SUPPORTED_ARCH ${ALL_CFI_SUPPORTED_ARCH}) - filter_available_targets(SCUDO_SUPPORTED_ARCH ${ALL_SCUDO_SUPPORTED_ARCH}) filter_available_targets(SCUDO_STANDALONE_SUPPORTED_ARCH ${ALL_SCUDO_STANDALONE_SUPPORTED_ARCH}) filter_available_targets(XRAY_SUPPORTED_ARCH ${ALL_XRAY_SUPPORTED_ARCH}) filter_available_targets(SHADOWCALLSTACK_SUPPORTED_ARCH @@ -701,7 +697,7 @@ endif() message(STATUS "Compiler-RT supported architectures: ${COMPILER_RT_SUPPORTED_ARCH}") -set(ALL_SANITIZERS asan;dfsan;msan;hwasan;tsan;safestack;cfi;scudo;ubsan_minimal;gwp_asan) +set(ALL_SANITIZERS asan;dfsan;msan;hwasan;tsan;safestack;cfi;scudo_standalone;ubsan_minimal;gwp_asan) set(COMPILER_RT_SANITIZERS_TO_BUILD all CACHE STRING "sanitizers to build if supported on the target (all;${ALL_SANITIZERS})") list_replace(COMPILER_RT_SANITIZERS_TO_BUILD all "${ALL_SANITIZERS}") @@ -823,20 +819,16 @@ endif() #TODO(kostyak): add back Android & Fuchsia when the code settles a bit. -if (SCUDO_STANDALONE_SUPPORTED_ARCH AND OS_NAME MATCHES "Linux" AND +if (SCUDO_STANDALONE_SUPPORTED_ARCH AND + COMPILER_RT_BUILD_SANITIZERS AND + "scudo_standalone" IN_LIST COMPILER_RT_SANITIZERS_TO_BUILD AND + OS_NAME MATCHES "Linux" AND COMPILER_RT_HAS_AUXV) set(COMPILER_RT_HAS_SCUDO_STANDALONE TRUE) else() set(COMPILER_RT_HAS_SCUDO_STANDALONE FALSE) endif() -if (COMPILER_RT_HAS_SANITIZER_COMMON AND SCUDO_SUPPORTED_ARCH AND - OS_NAME MATCHES "Linux|Fuchsia") - set(COMPILER_RT_HAS_SCUDO TRUE) -else() - set(COMPILER_RT_HAS_SCUDO FALSE) -endif() - if (COMPILER_RT_HAS_SANITIZER_COMMON AND XRAY_SUPPORTED_ARCH AND OS_NAME MATCHES "Darwin|Linux|FreeBSD|NetBSD|Fuchsia") set(COMPILER_RT_HAS_XRAY TRUE) @@ -869,7 +861,10 @@ # calling malloc on first use. # TODO(hctim): Enable this on Android again. Looks like it's causing a SIGSEGV # for Scudo and GWP-ASan, further testing needed. -if (GWP_ASAN_SUPPORTED_ARCH AND COMPILER_RT_BUILD_GWP_ASAN AND +if (GWP_ASAN_SUPPORTED_ARCH AND + COMPILER_RT_BUILD_GWP_ASAN AND + COMPILER_RT_BUILD_SANITIZERS AND + "gwp_asan" IN_LIST COMPILER_RT_SANITIZERS_TO_BUILD AND OS_NAME MATCHES "Linux") set(COMPILER_RT_HAS_GWP_ASAN TRUE) else() diff --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt --- a/compiler-rt/lib/CMakeLists.txt +++ b/compiler-rt/lib/CMakeLists.txt @@ -24,12 +24,13 @@ function(compiler_rt_build_runtime runtime) string(TOUPPER ${runtime} runtime_uppercase) if(COMPILER_RT_HAS_${runtime_uppercase}) - add_subdirectory(${runtime}) if(${runtime} STREQUAL tsan) add_subdirectory(tsan/dd) endif() - if(${runtime} STREQUAL scudo) + if(${runtime} STREQUAL scudo_standalone) add_subdirectory(scudo/standalone) + else() + add_subdirectory(${runtime}) endif() endif() endfunction() diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -375,7 +375,9 @@ set(i386_SOURCES ${GENERIC_SOURCES} ${x86_ARCH_SOURCES}) endif () # if (NOT MSVC) -set(arm_SOURCES + +# builtin support for Targets that have Arm state or have Thumb2 +set(arm_or_thumb2_base_SOURCES arm/fp_mode.c arm/bswapdi2.S arm/bswapsi2.S @@ -385,6 +387,13 @@ arm/divmodsi4.S arm/divsi3.S arm/modsi3.S + arm/udivmodsi4.S + arm/udivsi3.S + arm/umodsi3.S + ${GENERIC_SOURCES} +) + +set(arm_sync_SOURCES arm/sync_fetch_and_add_4.S arm/sync_fetch_and_add_8.S arm/sync_fetch_and_and_4.S @@ -405,13 +414,11 @@ arm/sync_fetch_and_umin_8.S arm/sync_fetch_and_xor_4.S arm/sync_fetch_and_xor_8.S - arm/udivmodsi4.S - arm/udivsi3.S - arm/umodsi3.S - ${GENERIC_SOURCES} ) -set(thumb1_SOURCES +# builtin support for Thumb-only targets with very limited Thumb2 technology, +# such as v6-m and v8-m.baseline +set(thumb1_base_SOURCES arm/divsi3.S arm/udivsi3.S arm/comparesf2.S @@ -490,6 +497,8 @@ set(arm_Thumb1_icache_SOURCES arm/sync_synchronize.S ) + +# thumb1 calling into Arm to cover support set(arm_Thumb1_SOURCES ${arm_Thumb1_JT_SOURCES} ${arm_Thumb1_SjLj_EH_SOURCES} @@ -498,6 +507,13 @@ ${arm_Thumb1_icache_SOURCES} ) +# base functionality for Arm Targets prior to Arm v7-a and Armv6-m such as v6, +# v5t, v4t +set(arm_min_SOURCES + ${arm_or_thumb2_base_SOURCES} + ${arm_EABI_SOURCES} +) + if(MINGW) set(arm_SOURCES arm/aeabi_idivmod.S @@ -505,18 +521,24 @@ arm/aeabi_uidivmod.S arm/aeabi_uldivmod.S arm/chkstk.S - ${arm_SOURCES} + ${arm_or_thumb2_base_SOURCES} + ${arm_sync_SOURCES} + ) + + set(thumb1_SOURCES + ${thumb1_base_SOURCES} ) elseif(NOT WIN32) # TODO the EABI sources should only be added to EABI targets set(arm_SOURCES - ${arm_SOURCES} + ${arm_or_thumb2_base_SOURCES} + ${arm_sync_SOURCES} ${arm_EABI_SOURCES} ${arm_Thumb1_SOURCES} ) set(thumb1_SOURCES - ${thumb1_SOURCES} + ${thumb1_base_SOURCES} ${arm_EABI_SOURCES} ) endif() @@ -563,6 +585,7 @@ ) endif() +set(armv4t_SOURCES ${arm_min_SOURCES}) set(armhf_SOURCES ${arm_SOURCES}) set(armv7_SOURCES ${arm_SOURCES}) set(armv7s_SOURCES ${arm_SOURCES}) diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt --- a/compiler-rt/test/CMakeLists.txt +++ b/compiler-rt/test/CMakeLists.txt @@ -50,7 +50,11 @@ function(compiler_rt_test_runtime runtime) string(TOUPPER ${runtime} runtime_uppercase) if(COMPILER_RT_HAS_${runtime_uppercase}) - add_subdirectory(${runtime}) + if (${runtime} STREQUAL scudo_standalone) + add_subdirectory(scudo/standalone) + else() + add_subdirectory(${runtime}) + endif() foreach(directory ${ARGN}) add_subdirectory(${directory}) endforeach() diff --git a/compiler-rt/test/scudo/CMakeLists.txt b/compiler-rt/test/scudo/CMakeLists.txt deleted file mode 100644 --- a/compiler-rt/test/scudo/CMakeLists.txt +++ /dev/null @@ -1,35 +0,0 @@ -set(SCUDO_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(SCUDO_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) - -set(SCUDO_TESTSUITES) - -set(SCUDO_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS}) -if(NOT COMPILER_RT_STANDALONE_BUILD) - list(APPEND SCUDO_TEST_DEPS scudo) -endif() - -configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py - ) - -set(SCUDO_TEST_ARCH ${SCUDO_SUPPORTED_ARCH}) -foreach(arch ${SCUDO_TEST_ARCH}) - set(SCUDO_TEST_TARGET_ARCH ${arch}) - string(TOLOWER "-${arch}" SCUDO_TEST_CONFIG_SUFFIX) - get_test_cc_for_arch(${arch} SCUDO_TEST_TARGET_CC SCUDO_TEST_TARGET_CFLAGS) - string(TOUPPER ${arch} ARCH_UPPER_CASE) - set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config) - - configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py) - list(APPEND SCUDO_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) -endforeach() - -add_subdirectory(standalone) - -add_lit_testsuite(check-scudo "Running the Scudo Hardened Allocator tests" - ${SCUDO_TESTSUITES} - DEPENDS ${SCUDO_TEST_DEPS}) -set_target_properties(check-scudo PROPERTIES FOLDER "Compiler-RT Misc") diff --git a/compiler-rt/test/scudo/standalone/CMakeLists.txt b/compiler-rt/test/scudo/standalone/CMakeLists.txt --- a/compiler-rt/test/scudo/standalone/CMakeLists.txt +++ b/compiler-rt/test/scudo/standalone/CMakeLists.txt @@ -10,12 +10,12 @@ ${CMAKE_CURRENT_BINARY_DIR}/unit/gwp_asan/lit.site.cfg.py) list(APPEND SCUDO_STANDALONE_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/unit/gwp_asan) endif() -endif() -add_lit_testsuite(check-scudo_standalone - "Running Scudo Standalone tests" - ${SCUDO_STANDALONE_TESTSUITES} - DEPENDS ${SCUDO_STANDALONE_TEST_DEPS}) + add_lit_testsuite(check-scudo_standalone + "Running Scudo Standalone tests" + ${SCUDO_STANDALONE_TESTSUITES} + DEPENDS ${SCUDO_STANDALONE_TEST_DEPS}) -set_target_properties(check-scudo_standalone - PROPERTIES FOLDER "Compiler-RT Tests") + set_target_properties(check-scudo_standalone + PROPERTIES FOLDER "Compiler-RT Tests") +endif() diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -240,6 +240,16 @@ * The legacy extension intrinsic functions `IZEXT` and `JZEXT` are supported; `ZEXT` has different behavior with various older compilers, so it is not supported. +* f18 doesn't impose a limit on the number of continuation lines + allowed for a single statement. +* When a type-bound procedure declaration statement has neither interface + nor attributes, the "::" before the bindings is optional, even + if a binding has renaming with "=> proc". + The colons are not necessary for an unambiguous parse, C768 + notwithstanding. +* A type-bound procedure binding can be passed as an actual + argument corresponding to a dummy procedure and can be used as + the target of a procedure pointer assignment statement. ### Extensions supported when enabled by options @@ -350,6 +360,19 @@ pointer-valued function reference. No other Fortran compiler seems to handle this correctly for `ASSOCIATE`, though NAG gets it right for `SELECT TYPE`. +* The standard doesn't explicitly require that a named constant that + appears as part of a complex-literal-constant be a scalar, but + most compilers emit an error when an array appears. + f18 supports them with a portability warning. +* f18 does not enforce a blanket prohibition against generic + interfaces containing a mixture of functions and subroutines. + Apart from some contexts in which the standard requires all of + a particular generic interface to have only all functions or + all subroutines as its specific procedures, we allow both to + appear, unlike several other Fortran compilers. + This is especially desirable when two generics of the same + name are combined due to USE association and the mixture may + be inadvertent. ## Behavior in cases where the standard is ambiguous or indefinite @@ -490,3 +513,10 @@ application codes that expect exterior symbols whose names match components to be visible in a derived-type definition's default initialization expressions, and so f18 follows that precedent. + +* 19.3.1p1 "Within its scope, a local identifier of an entity of class (1) + or class (4) shall not be the same as a global identifier used in that scope..." + is read so as to allow the name of a module, submodule, main program, + or `BLOCK DATA` subprogram to also be the name of an local entity in its + scope, with a portability warning, since that global name is not actually + capable of being "used" in its scope. diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp --- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp +++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp @@ -187,41 +187,50 @@ } void OpenMPCounterVisitor::Post(const OmpProcBindClause::Type &c) { - clauseDetails += "type=" + OmpProcBindClause::EnumToString(c) + ";"; + clauseDetails += + "type=" + std::string{OmpProcBindClause::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post(const OmpDefaultClause::Type &c) { - clauseDetails += "type=" + OmpDefaultClause::EnumToString(c) + ";"; + clauseDetails += + "type=" + std::string{OmpDefaultClause::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post( const OmpDefaultmapClause::ImplicitBehavior &c) { clauseDetails += - "implicit_behavior=" + OmpDefaultmapClause::EnumToString(c) + ";"; + "implicit_behavior=" + std::string{OmpDefaultmapClause::EnumToString(c)} + + ";"; } void OpenMPCounterVisitor::Post( const OmpDefaultmapClause::VariableCategory &c) { clauseDetails += - "variable_category=" + OmpDefaultmapClause::EnumToString(c) + ";"; + "variable_category=" + std::string{OmpDefaultmapClause::EnumToString(c)} + + ";"; } void OpenMPCounterVisitor::Post(const OmpScheduleModifierType::ModType &c) { - clauseDetails += "modifier=" + OmpScheduleModifierType::EnumToString(c) + ";"; + clauseDetails += + "modifier=" + std::string{OmpScheduleModifierType::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post(const OmpLinearModifier::Type &c) { - clauseDetails += "modifier=" + OmpLinearModifier::EnumToString(c) + ";"; + clauseDetails += + "modifier=" + std::string{OmpLinearModifier::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post(const OmpDependenceType::Type &c) { - clauseDetails += "type=" + OmpDependenceType::EnumToString(c) + ";"; + clauseDetails += + "type=" + std::string{OmpDependenceType::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post(const OmpMapType::Type &c) { - clauseDetails += "type=" + OmpMapType::EnumToString(c) + ";"; + clauseDetails += "type=" + std::string{OmpMapType::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post(const OmpScheduleClause::ScheduleType &c) { - clauseDetails += "type=" + OmpScheduleClause::EnumToString(c) + ";"; + clauseDetails += + "type=" + std::string{OmpScheduleClause::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post(const OmpIfClause::DirectiveNameModifier &c) { - clauseDetails += "name_modifier=" + OmpIfClause::EnumToString(c) + ";"; + clauseDetails += + "name_modifier=" + std::string{OmpIfClause::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post(const OmpCancelType::Type &c) { - clauseDetails += "type=" + OmpCancelType::EnumToString(c) + ";"; + clauseDetails += "type=" + std::string{OmpCancelType::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post(const OmpClause &c) { PostClauseCommon(normalize_clause_name(c.source.ToString())); diff --git a/flang/include/flang/Common/enum-class.h b/flang/include/flang/Common/enum-class.h new file mode 100644 --- /dev/null +++ b/flang/include/flang/Common/enum-class.h @@ -0,0 +1,73 @@ +//===-- include/flang/Common/enum-class.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// The macro +// ENUM_CLASS(className, enum1, enum2, ..., enumN) +// defines +// enum class className { enum1, enum2, ... , enumN }; +// as well as the introspective utilities +// static constexpr std::size_t className_enumSize{N}; +// static inline const std::string &EnumToString(className); + +#ifndef FORTRAN_COMMON_ENUM_CLASS_H_ +#define FORTRAN_COMMON_ENUM_CLASS_H_ + +#include +#include + +namespace Fortran::common { + +constexpr std::size_t CountEnumNames(const char *p) { + std::size_t n{0}; + std::size_t any{0}; + for (; *p; ++p) { + if (*p == ',') { + n += any; + any = 0; + } else if (*p != ' ') { + any = 1; + } + } + return n + any; +} + +template +constexpr std::array EnumNames(const char *p) { + std::array result{""}; + std::size_t at{0}; + const char *start{nullptr}; + for (; *p; ++p) { + if (*p == ',' || *p == ' ') { + if (start) { + result[at++] = + std::string_view{start, static_cast(p - start)}; + start = nullptr; + } + } else if (!start) { + start = p; + } + } + if (start) { + result[at] = std::string_view{start, static_cast(p - start)}; + } + return result; +} + +#define ENUM_CLASS(NAME, ...) \ + enum class NAME { __VA_ARGS__ }; \ + [[maybe_unused]] static constexpr std::size_t NAME##_enumSize{ \ + ::Fortran::common::CountEnumNames(#__VA_ARGS__)}; \ + [[maybe_unused]] static inline std::string_view EnumToString(NAME e) { \ + static const constexpr char vaArgs[]{#__VA_ARGS__}; \ + static const constexpr auto names{ \ + ::Fortran::common::EnumNames(vaArgs)}; \ + return names[static_cast(e)]; \ + } + +} // namespace Fortran::common +#endif // FORTRAN_COMMON_ENUM_CLASS_H_ diff --git a/flang/include/flang/Common/enum-set.h b/flang/include/flang/Common/enum-set.h --- a/flang/include/flang/Common/enum-set.h +++ b/flang/include/flang/Common/enum-set.h @@ -207,7 +207,7 @@ template STREAM &Dump( - STREAM &o, const std::string &EnumToString(enumerationType)) const { + STREAM &o, std::string_view EnumToString(enumerationType)) const { char sep{'{'}; IterateOverMembers([&](auto e) { o << sep << EnumToString(e); diff --git a/flang/include/flang/Common/idioms.h b/flang/include/flang/Common/idioms.h --- a/flang/include/flang/Common/idioms.h +++ b/flang/include/flang/Common/idioms.h @@ -23,6 +23,7 @@ #error g++ >= 7.2 is required #endif +#include "enum-class.h" #include "visit.h" #include #include @@ -125,32 +126,6 @@ const std::size_t value; }; -#define ENUM_CLASS(NAME, ...) \ - enum class NAME { __VA_ARGS__ }; \ - [[maybe_unused]] static constexpr std::size_t NAME##_enumSize{[] { \ - enum { __VA_ARGS__ }; \ - return Fortran::common::ListItemCount{__VA_ARGS__}.value; \ - }()}; \ - struct NAME##_struct { \ - NAME##_struct(const NAME##_struct &) = delete; \ - NAME##_struct &operator=(const NAME##_struct &) = delete; \ - static NAME##_struct &instance() { \ - static NAME##_struct s; \ - return s; \ - } \ - std::array _enumNames; \ -\ - private: \ - NAME##_struct() { \ - Fortran::common::BuildIndexToString( \ - #__VA_ARGS__, _enumNames.data(), NAME##_enumSize); \ - } \ - ~NAME##_struct() {} \ - }; \ - [[maybe_unused]] static inline const std::string &EnumToString(NAME e) { \ - return NAME##_struct::instance()._enumNames[static_cast(e)]; \ - } - // Check that a pointer is non-null and dereference it #define DEREF(p) Fortran::common::Deref(p, __FILE__, __LINE__) diff --git a/flang/include/flang/Evaluate/check-expression.h b/flang/include/flang/Evaluate/check-expression.h --- a/flang/include/flang/Evaluate/check-expression.h +++ b/flang/include/flang/Evaluate/check-expression.h @@ -108,5 +108,8 @@ template bool IsErrorExpr(const A &); extern template bool IsErrorExpr(const Expr &); +std::optional CheckStatementFunction( + const Symbol &, const Expr &, FoldingContext &); + } // namespace Fortran::evaluate #endif diff --git a/flang/include/flang/Evaluate/expression.h b/flang/include/flang/Evaluate/expression.h --- a/flang/include/flang/Evaluate/expression.h +++ b/flang/include/flang/Evaluate/expression.h @@ -473,20 +473,20 @@ public: using Result = Type; using Base = ArrayConstructorValues; - CLASS_BOILERPLATE(ArrayConstructor) - ArrayConstructor(Expr &&len, Base &&v) - : Base{std::move(v)}, length_{std::move(len)} {} - template - explicit ArrayConstructor(const A &prototype) - : length_{prototype.LEN().value()} {} + DEFAULT_CONSTRUCTORS_AND_ASSIGNMENTS(ArrayConstructor) + explicit ArrayConstructor(Base &&values) : Base{std::move(values)} {} + template explicit ArrayConstructor(const Expr &) {} + ArrayConstructor &set_LEN(Expr &&); bool operator==(const ArrayConstructor &) const; static constexpr Result result() { return Result{}; } static constexpr DynamicType GetType() { return Result::GetType(); } llvm::raw_ostream &AsFortran(llvm::raw_ostream &) const; - const Expr &LEN() const { return length_.value(); } + const Expr *LEN() const { + return length_ ? &length_->value() : nullptr; + } private: - common::CopyableIndirection> length_; + std::optional>> length_; }; template <> diff --git a/flang/include/flang/Evaluate/type.h b/flang/include/flang/Evaluate/type.h --- a/flang/include/flang/Evaluate/type.h +++ b/flang/include/flang/Evaluate/type.h @@ -373,7 +373,7 @@ static constexpr TypeCategory category{CATEGORY}; constexpr bool operator==(const SomeKind &) const { return true; } static std::string AsFortran() { - return "Some"s + common::EnumToString(category); + return "Some"s + std::string{common::EnumToString(category)}; } }; diff --git a/flang/include/flang/Frontend/TargetOptions.h b/flang/include/flang/Frontend/TargetOptions.h --- a/flang/include/flang/Frontend/TargetOptions.h +++ b/flang/include/flang/Frontend/TargetOptions.h @@ -22,18 +22,18 @@ namespace Fortran::frontend { -/// Options for controlling the target. Currently this is just a placeholder. -/// In the future, we will use this to specify various target options that -/// will affect the generated code e.g.: -/// * CPU to tune the code for -/// * available CPU/hardware extensions -/// * target specific features to enable/disable -/// * options for accelerators (e.g. GPUs) -/// * (...) +/// Options for controlling the target. class TargetOptions { public: /// The name of the target triple to compile for. std::string triple; + + /// If given, the name of the target CPU to generate code for. + std::string cpu; + + /// The list of target specific features to enable or disable, as written on + /// the command line. + std::vector featuresAsWritten; }; } // end namespace Fortran::frontend diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -193,7 +193,7 @@ /// Generate the type from a category and kind and length parameters. virtual mlir::Type genType(Fortran::common::TypeCategory tc, int kind, - llvm::ArrayRef lenParameters = llvm::None) = 0; + llvm::ArrayRef lenParameters = std::nullopt) = 0; /// Generate the type from a DerivedTypeSpec. virtual mlir::Type genType(const Fortran::semantics::DerivedTypeSpec &) = 0; /// Generate the type from a Variable diff --git a/flang/include/flang/Lower/BoxAnalyzer.h b/flang/include/flang/Lower/BoxAnalyzer.h --- a/flang/include/flang/Lower/BoxAnalyzer.h +++ b/flang/include/flang/Lower/BoxAnalyzer.h @@ -79,7 +79,7 @@ llvm::Optional charLen() const { if (auto *l = std::get_if(&len)) return {*l}; - return llvm::None; + return std::nullopt; } static constexpr bool staticSize() { return false; } @@ -323,7 +323,7 @@ [](const ScalarStaticChar &x) -> A { return {x.charLen()}; }, [](const StaticArrayStaticChar &x) -> A { return {x.charLen()}; }, [](const DynamicArrayStaticChar &x) -> A { return {x.charLen()}; }, - [](const auto &) -> A { return llvm::None; }); + [](const auto &) -> A { return std::nullopt; }); } llvm::Optional getCharLenExpr() const { @@ -331,7 +331,7 @@ return match([](const ScalarDynamicChar &x) { return x.charLen(); }, [](const StaticArrayDynamicChar &x) { return x.charLen(); }, [](const DynamicArrayDynamicChar &x) { return x.charLen(); }, - [](const auto &) -> A { return llvm::None; }); + [](const auto &) -> A { return std::nullopt; }); } /// Is the origin of this array the default of vector of `1`? @@ -480,7 +480,7 @@ return 0; return *asInt; } - return llvm::None; + return std::nullopt; } // Get the `SomeExpr` that describes the CHARACTER's LEN. @@ -503,7 +503,7 @@ if (Fortran::semantics::MaybeSubscriptIntExpr expr = charExpr->LEN()) return {Fortran::evaluate::AsGenericExpr(std::move(*expr))}; - return llvm::None; + return std::nullopt; } VT box; diff --git a/flang/include/flang/Lower/ComponentPath.h b/flang/include/flang/Lower/ComponentPath.h --- a/flang/include/flang/Lower/ComponentPath.h +++ b/flang/include/flang/Lower/ComponentPath.h @@ -51,7 +51,7 @@ bool hasExtendCoorRef() const { return extendCoorRef.has_value(); } ExtendRefFunc getExtendCoorRef() const; - void resetExtendCoorRef() { extendCoorRef = llvm::None; } + void resetExtendCoorRef() { extendCoorRef = std::nullopt; } void resetPC(); llvm::SmallVector reversePath; @@ -68,7 +68,7 @@ /// This optional continuation allows the generation of those dereferences. /// These accesses are always on Fortran entities of record types, which are /// implicitly in-memory objects. - llvm::Optional extendCoorRef = llvm::None; + llvm::Optional extendCoorRef = std::nullopt; private: void setPC(bool isImplicit); diff --git a/flang/include/flang/Lower/IterationSpace.h b/flang/include/flang/Lower/IterationSpace.h --- a/flang/include/flang/Lower/IterationSpace.h +++ b/flang/include/flang/Lower/IterationSpace.h @@ -469,7 +469,7 @@ assert(i < lhsBases.size()); if (lhsBases[counter]) return findBinding(*lhsBases[counter]); - return llvm::None; + return std::nullopt; } /// Return the outermost loop in this FORALL nest. diff --git a/flang/include/flang/Optimizer/Builder/Character.h b/flang/include/flang/Optimizer/Builder/Character.h --- a/flang/include/flang/Optimizer/Builder/Character.h +++ b/flang/include/flang/Optimizer/Builder/Character.h @@ -176,6 +176,9 @@ /// to the number of characters per the Fortran KIND. mlir::Value readLengthFromBox(mlir::Value box); + /// Same as readLengthFromBox but the CharacterType is provided. + mlir::Value readLengthFromBox(mlir::Value box, fir::CharacterType charTy); + private: /// FIXME: the implementation also needs a clean-up now that /// CharBoxValue are better propagated. diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -385,14 +385,14 @@ /// Create an IfOp with no "else" region, and no result values. /// Usage: genIfThen(loc, cdt).genThen(lambda).end(); IfBuilder genIfThen(mlir::Location loc, mlir::Value cdt) { - auto op = create(loc, llvm::None, cdt, false); + auto op = create(loc, std::nullopt, cdt, false); return IfBuilder(op, *this); } /// Create an IfOp with an "else" region, and no result values. /// Usage: genIfThenElse(loc, cdt).genThen(lambda).genElse(lambda).end(); IfBuilder genIfThenElse(mlir::Location loc, mlir::Value cdt) { - auto op = create(loc, llvm::None, cdt, true); + auto op = create(loc, std::nullopt, cdt, true); return IfBuilder(op, *this); } diff --git a/flang/include/flang/Optimizer/Support/KindMapping.h b/flang/include/flang/Optimizer/Support/KindMapping.h --- a/flang/include/flang/Optimizer/Support/KindMapping.h +++ b/flang/include/flang/Optimizer/Support/KindMapping.h @@ -66,7 +66,7 @@ /// of 6 KindTy must be passed. The kinds must be the given in the following /// order: CHARACTER, COMPLEX, DOUBLE PRECISION, INTEGER, LOGICAL, and REAL. explicit KindMapping(mlir::MLIRContext *context, llvm::StringRef map, - llvm::ArrayRef defs = llvm::None); + llvm::ArrayRef defs = std::nullopt); explicit KindMapping(mlir::MLIRContext *context, llvm::StringRef map, llvm::StringRef defs) : KindMapping{context, map, toDefaultKinds(defs)} {} diff --git a/flang/include/flang/Parser/characters.h b/flang/include/flang/Parser/characters.h --- a/flang/include/flang/Parser/characters.h +++ b/flang/include/flang/Parser/characters.h @@ -65,7 +65,7 @@ return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch; } -inline std::string ToLowerCaseLetters(const std::string &str) { +inline std::string ToLowerCaseLetters(std::string_view str) { std::string lowered{str}; for (char &ch : lowered) { ch = ToLowerCaseLetter(ch); @@ -81,7 +81,7 @@ return IsLowerCaseLetter(ch) ? ch - 'a' + 'A' : ch; } -inline std::string ToUpperCaseLetters(const std::string &str) { +inline std::string ToUpperCaseLetters(std::string_view str) { std::string raised{str}; for (char &ch : raised) { ch = ToUpperCaseLetter(ch); diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -37,7 +37,7 @@ static constexpr const char *GetNodeName(const T &) { return N; } #define NODE_ENUM(T, E) \ static std::string GetNodeName(const T::E &x) { \ - return #E " = "s + T::EnumToString(x); \ + return #E " = "s + std::string{T::EnumToString(x)}; \ } #define NODE(T1, T2) NODE_NAME(T1::T2, #T2) NODE_NAME(bool, "bool") diff --git a/flang/include/flang/Parser/message.h b/flang/include/flang/Parser/message.h --- a/flang/include/flang/Parser/message.h +++ b/flang/include/flang/Parser/message.h @@ -96,9 +96,9 @@ // The construction of a MessageFormattedText uses a MessageFixedText // as a vsnprintf() formatting string that is applied to the -// following arguments. CharBlock and std::string argument -// values are also supported; they are automatically converted into -// char pointers that are suitable for '%s' formatting. +// following arguments. CharBlock, std::string, and std::string_view +// argument values are also supported; they are automatically converted +// into char pointers that are suitable for '%s' formatting. class MessageFormattedText { public: template @@ -128,10 +128,6 @@ static_assert(!std::is_class_v>); return x; } - template A Convert(A &x) { - static_assert(!std::is_class_v>); - return x; - } template common::IfNoLvalue Convert(A &&x) { static_assert(!std::is_class_v>); return std::move(x); @@ -139,8 +135,9 @@ const char *Convert(const char *s) { return s; } const char *Convert(char *s) { return s; } const char *Convert(const std::string &); - const char *Convert(std::string &); const char *Convert(std::string &&); + const char *Convert(const std::string_view &); + const char *Convert(std::string_view &&); const char *Convert(CharBlock); std::intmax_t Convert(std::int64_t x) { return x; } std::uintmax_t Convert(std::uint64_t x) { return x; } diff --git a/flang/include/flang/Semantics/expression.h b/flang/include/flang/Semantics/expression.h --- a/flang/include/flang/Semantics/expression.h +++ b/flang/include/flang/Semantics/expression.h @@ -142,12 +142,6 @@ // its INTEGER kind type parameter. std::optional IsImpliedDo(parser::CharBlock) const; - // Allows a whole assumed-size array to appear for the lifetime of - // the returned value. - common::Restorer AllowWholeAssumedSizeArray() { - return common::ScopedSet(isWholeAssumedSizeArrayOk_, true); - } - common::Restorer DoNotUseSavedTypedExprs() { return common::ScopedSet(useSavedTypedExprs_, false); } @@ -243,6 +237,7 @@ MaybeExpr Analyze(const parser::StructureConstructor &); MaybeExpr Analyze(const parser::InitialDataTarget &); MaybeExpr Analyze(const parser::NullInit &); + MaybeExpr Analyze(const parser::StmtFunctionStmt &); void Analyze(const parser::CallStmt &); const Assignment *Analyze(const parser::AssignmentStmt &); @@ -255,6 +250,17 @@ int IntegerTypeSpecKind(const parser::IntegerTypeSpec &); private: + // Allows a whole assumed-size array to appear for the lifetime of + // the returned value. + common::Restorer AllowWholeAssumedSizeArray() { + return common::ScopedSet(isWholeAssumedSizeArrayOk_, true); + } + + // Allows an Expr to be a null pointer. + common::Restorer AllowNullPointer() { + return common::ScopedSet(isNullPointerOk_, true); + } + MaybeExpr Analyze(const parser::IntLiteralConstant &, bool negated = false); MaybeExpr Analyze(const parser::RealLiteralConstant &); MaybeExpr Analyze(const parser::ComplexPart &); @@ -349,7 +355,8 @@ std::pair ResolveGeneric(const Symbol &, const ActualArguments &, const AdjustActuals &, bool isSubroutine, bool mightBeStructureConstructor = false); - void EmitGenericResolutionError(const Symbol &, bool dueToNullActuals); + void EmitGenericResolutionError( + const Symbol &, bool dueToNullActuals, bool isSubroutine); const Symbol &AccessSpecific( const Symbol &originalGeneric, const Symbol &specific); std::optional GetCalleeAndArguments(const parser::Name &, @@ -369,14 +376,17 @@ return evaluate::Fold(foldingContext_, std::move(expr)); } bool CheckIsValidForwardReference(const semantics::DerivedTypeSpec &); + MaybeExpr AnalyzeComplex(MaybeExpr &&re, MaybeExpr &&im, const char *what); semantics::SemanticsContext &context_; FoldingContext &foldingContext_{context_.foldingContext()}; std::map impliedDos_; // values are INTEGER kinds bool isWholeAssumedSizeArrayOk_{false}; + bool isNullPointerOk_{false}; bool useSavedTypedExprs_{true}; bool inWhereBody_{false}; bool inDataStmtConstant_{false}; + bool inStmtFunctionDefinition_{false}; friend class ArgumentAnalyzer; }; diff --git a/flang/include/flang/Semantics/scope.h b/flang/include/flang/Semantics/scope.h --- a/flang/include/flang/Semantics/scope.h +++ b/flang/include/flang/Semantics/scope.h @@ -117,6 +117,7 @@ const Scope *GetDerivedTypeParent() const; const Scope &GetDerivedTypeBase() const; inline std::optional GetName() const; + // Returns true if this scope contains, or is, another scope. bool Contains(const Scope &) const; /// Make a scope nested in this one Scope &MakeScope(Kind kind, Symbol *symbol = nullptr); diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -168,10 +168,12 @@ return messages_.Say(std::move(msg)); } template - void SayWithDecl(const Symbol &symbol, const parser::CharBlock &at, - parser::MessageFixedText &&msg, A &&...args) { + parser::Message &SayWithDecl(const Symbol &symbol, + const parser::CharBlock &at, parser::MessageFixedText &&msg, + A &&...args) { auto &message{Say(at, std::move(msg), args...)}; evaluate::AttachDeclaration(&message, symbol); + return message; } const Scope &FindScope(parser::CharBlock) const; diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -523,9 +523,9 @@ class Symbol { public: ENUM_CLASS(Flag, - Function, // symbol is a function + Function, // symbol is a function or statement function Subroutine, // symbol is a subroutine - StmtFunction, // symbol is a statement function (Function is set too) + StmtFunction, // symbol is a statement function or result Implicit, // symbol is implicitly typed ImplicitOrError, // symbol must be implicitly typed or it's an error ModFile, // symbol came from .mod file @@ -698,7 +698,7 @@ Details details_; Symbol() {} // only created in class Symbols - const std::string GetDetailsName() const; + std::string GetDetailsName() const; friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Symbol &); friend llvm::raw_ostream &DumpForUnparse( llvm::raw_ostream &, const Symbol &, bool); diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -53,6 +53,7 @@ const Symbol *FindSubprogram(const Symbol &); const Symbol *FindFunctionResult(const Symbol &); const Symbol *FindOverriddenBinding(const Symbol &); +const Symbol *FindGlobal(const Symbol &); const DeclTypeSpec *FindParentTypeSpec(const DerivedTypeSpec &); const DeclTypeSpec *FindParentTypeSpec(const DeclTypeSpec &); @@ -84,8 +85,13 @@ bool IsGenericDefinedOp(const Symbol &); bool IsDefinedOperator(SourceName); std::string MakeOpName(SourceName); + +// Returns true if maybeAncestor exists and is a proper ancestor of a +// descendent scope (or symbol owner). Will be false, unlike Scope::Contains(), +// if maybeAncestor *is* the descendent. bool DoesScopeContain(const Scope *maybeAncestor, const Scope &maybeDescendent); bool DoesScopeContain(const Scope *, const Symbol &); + bool IsUseAssociated(const Symbol &, const Scope &); bool IsHostAssociated(const Symbol &, const Scope &); bool IsHostAssociatedIntoSubprogram(const Symbol &, const Scope &); @@ -181,8 +187,9 @@ bool IsAssumedType(const Symbol &); bool IsPolymorphic(const Symbol &); bool IsPolymorphicAllocatable(const Symbol &); -// Return an error if component symbol is not accessible from scope (7.5.4.8(2)) -std::optional CheckAccessibleComponent( + +// Return an error if a symbol is not accessible from a scope +std::optional CheckAccessibleSymbol( const semantics::Scope &, const Symbol &); // Analysis of image control statements @@ -609,11 +616,6 @@ // procedure. bool HasDefinedIo( GenericKind::DefinedIo, const DerivedTypeSpec &, const Scope * = nullptr); -// Seeks out an allocatable or pointer ultimate component that is not -// nested in a nonallocatable/nonpointer component with a specific -// defined I/O procedure. -const Symbol *FindUnsafeIoDirectComponent( - GenericKind::DefinedIo, const DerivedTypeSpec &, const Scope * = nullptr); // Some intrinsic operators have more than one name (e.g. `operator(.eq.)` and // `operator(==)`). GetAllNames() returns them all, including symbolName. diff --git a/flang/lib/Common/idioms.cpp b/flang/lib/Common/idioms.cpp --- a/flang/lib/Common/idioms.cpp +++ b/flang/lib/Common/idioms.cpp @@ -10,7 +10,6 @@ #include #include #include -#include namespace Fortran::common { @@ -24,22 +23,4 @@ std::abort(); } -// Converts the comma separated list of enumerators into tokens which are then -// stored into the provided array of strings. This is intended for use from the -// expansion of ENUM_CLASS. -void BuildIndexToString( - const char *commaSeparated, std::string enumNames[], int enumSize) { - std::string input(commaSeparated); - std::regex reg("\\s*,\\s*"); - - std::sregex_token_iterator iter(input.begin(), input.end(), reg, -1); - std::sregex_token_iterator end; - int index = 0; - while (iter != end) { - enumNames[index] = *iter; - iter++; - index++; - } - CHECK(index == enumSize); -} } // namespace Fortran::common diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp --- a/flang/lib/Evaluate/characteristics.cpp +++ b/flang/lib/Evaluate/characteristics.cpp @@ -921,7 +921,7 @@ if (whyNot) { *whyNot = "function results have distinct constant extents"; } - } else if (!ifaceTypeShape->type().IsTkCompatibleWith( + } else if (!ifaceTypeShape->type().IsTkLenCompatibleWith( actualTypeShape->type())) { if (whyNot) { *whyNot = "function results have incompatible types: "s + @@ -1000,7 +1000,7 @@ auto sep{": "s}; *whyNot = "incompatible procedure attributes"; differences.IterateOverMembers([&](Attr x) { - *whyNot += sep + EnumToString(x); + *whyNot += sep + std::string{EnumToString(x)}; sep = ", "; }); } diff --git a/flang/lib/Evaluate/check-expression.cpp b/flang/lib/Evaluate/check-expression.cpp --- a/flang/lib/Evaluate/check-expression.cpp +++ b/flang/lib/Evaluate/check-expression.cpp @@ -871,4 +871,83 @@ template bool IsErrorExpr(const Expr &); +// C1577 +// TODO: Also check C1579 & C1582 here +class StmtFunctionChecker + : public AnyTraverse> { +public: + using Result = std::optional; + using Base = AnyTraverse; + StmtFunctionChecker(const Symbol &sf, FoldingContext &context) + : Base{*this}, sf_{sf}, context_{context} {} + using Base::operator(); + + template Result operator()(const ArrayConstructor &) const { + return parser::Message{sf_.name(), + "Statement function '%s' should not contain an array constructor"_port_en_US, + sf_.name()}; + } + Result operator()(const StructureConstructor &) const { + return parser::Message{sf_.name(), + "Statement function '%s' should not contain a structure constructor"_port_en_US, + sf_.name()}; + } + Result operator()(const TypeParamInquiry &) const { + return parser::Message{sf_.name(), + "Statement function '%s' should not contain a type parameter inquiry"_port_en_US, + sf_.name()}; + } + Result operator()(const ProcedureDesignator &proc) const { + if (const Symbol * symbol{proc.GetSymbol()}) { + const Symbol &ultimate{symbol->GetUltimate()}; + if (const auto *subp{ + ultimate.detailsIf()}) { + if (subp->stmtFunction() && &ultimate.owner() == &sf_.owner()) { + if (ultimate.name().begin() > sf_.name().begin()) { + return parser::Message{sf_.name(), + "Statement function '%s' may not reference another statement function '%s' that is defined later"_err_en_US, + sf_.name(), ultimate.name()}; + } + } + } + if (auto chars{ + characteristics::Procedure::Characterize(proc, context_)}) { + if (!chars->CanBeCalledViaImplicitInterface()) { + return parser::Message(sf_.name(), + "Statement function '%s' should not reference function '%s' that requires an explicit interface"_port_en_US, + sf_.name(), symbol->name()); + } + } + } + if (proc.Rank() > 0) { + return parser::Message(sf_.name(), + "Statement function '%s' should not reference a function that returns an array"_port_en_US, + sf_.name()); + } + return std::nullopt; + } + Result operator()(const ActualArgument &arg) const { + if (const auto *expr{arg.UnwrapExpr()}) { + if (auto result{(*this)(*expr)}) { + return result; + } + if (expr->Rank() > 0 && !UnwrapWholeSymbolOrComponentDataRef(*expr)) { + return parser::Message(sf_.name(), + "Statement function '%s' should not pass an array argument that is not a whole array"_port_en_US, + sf_.name()); + } + } + return std::nullopt; + } + +private: + const Symbol &sf_; + FoldingContext &context_; +}; + +std::optional CheckStatementFunction( + const Symbol &sf, const Expr &expr, FoldingContext &context) { + return StmtFunctionChecker{sf, context}(expr); +} + } // namespace Fortran::evaluate diff --git a/flang/lib/Evaluate/expression.cpp b/flang/lib/Evaluate/expression.cpp --- a/flang/lib/Evaluate/expression.cpp +++ b/flang/lib/Evaluate/expression.cpp @@ -35,7 +35,13 @@ [](const Constant &c) -> T { return AsExpr(Constant{c.LEN()}); }, - [](const ArrayConstructor &a) -> T { return a.LEN(); }, + [](const ArrayConstructor &a) -> T { + if (const auto *len{a.LEN()}) { + return T{*len}; + } else { + return std::nullopt; + } + }, [](const Parentheses &x) { return x.left().LEN(); }, [](const Convert &x) { return common::visit( @@ -142,6 +148,13 @@ return values_ == that.values_; } +template +auto ArrayConstructor>::set_LEN( + Expr &&len) -> ArrayConstructor & { + length_.emplace(std::move(len)); + return *this; +} + template bool ArrayConstructor>::operator==( const ArrayConstructor &that) const { diff --git a/flang/lib/Evaluate/fold-implementation.h b/flang/lib/Evaluate/fold-implementation.h --- a/flang/lib/Evaluate/fold-implementation.h +++ b/flang/lib/Evaluate/fold-implementation.h @@ -1171,10 +1171,12 @@ return Expr{Constant{array.GetType().GetDerivedTypeSpec(), std::move(elements_), ConstantSubscripts{n}}}; } else if constexpr (T::category == TypeCategory::Character) { - auto length{Fold(context_, common::Clone(array.LEN()))}; - if (std::optional lengthValue{ToInt64(length)}) { - return Expr{Constant{ - *lengthValue, std::move(elements_), ConstantSubscripts{n}}}; + if (const auto *len{array.LEN()}) { + auto length{Fold(context_, common::Clone(*len))}; + if (std::optional lengthValue{ToInt64(length)}) { + return Expr{Constant{ + *lengthValue, std::move(elements_), ConstantSubscripts{n}}}; + } } } else { return Expr{ @@ -1371,12 +1373,13 @@ template ArrayConstructor ArrayConstructorFromMold( const A &prototype, std::optional> &&length) { + ArrayConstructor result{prototype}; if constexpr (RESULT::category == TypeCategory::Character) { - return ArrayConstructor{ - std::move(length.value()), ArrayConstructorValues{}}; - } else { - return ArrayConstructor{prototype}; + if (length) { + result.set_LEN(std::move(*length)); + } } + return result; } // array * array case diff --git a/flang/lib/Evaluate/formatting.cpp b/flang/lib/Evaluate/formatting.cpp --- a/flang/lib/Evaluate/formatting.cpp +++ b/flang/lib/Evaluate/formatting.cpp @@ -421,7 +421,10 @@ llvm::raw_ostream & ArrayConstructor>::AsFortran( llvm::raw_ostream &o) const { - o << '[' << GetType().AsFortran(LEN().AsFortran()) << "::"; + o << '['; + if (const auto *len{LEN()}) { + o << GetType().AsFortran(len->AsFortran()) << "::"; + } EmitArray(o, *this); return o << ']'; } diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -2631,8 +2631,8 @@ *targetProcDesignator, context); targetName = targetProcDesignator->GetName(); } else if (targetSymbol) { - // proc that's not a call if (IsProcedure(*targetSymbol)) { + // proc that's not a call targetProc = characteristics::Procedure::Characterize( *targetSymbol, context); } @@ -2693,6 +2693,11 @@ for (SymbolRef ref : symbols) { msg = AttachDeclaration(msg, *ref); } + } else if (HasVectorSubscript(*targetExpr) || + ExtractCoarrayRef(*targetExpr)) { + context.messages().Say(targetArg->sourceLocation(), + "TARGET= argument '%s' may not have a vector subscript or coindexing"_err_en_US, + targetExpr->AsFortran()); } if (const auto pointerType{pointerArg->GetType()}) { if (const auto targetType{targetArg->GetType()}) { diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1272,6 +1272,9 @@ // reference an IMPURE procedure or a VOLATILE variable if (const auto &expr{symbol.get().stmtFunction()}) { for (const SymbolRef &ref : evaluate::CollectSymbols(*expr)) { + if (&*ref == &symbol) { + return false; // error recovery, recursion is caught elsewhere + } if (IsFunction(*ref) && !IsPureProcedure(*ref)) { return false; } @@ -1338,7 +1341,12 @@ bool IsProcedure(const Symbol &symbol) { return common::visit(common::visitors{ - [](const SubprogramDetails &) { return true; }, + [&symbol](const SubprogramDetails &) { + const Scope *scope{symbol.scope()}; + // Main programs & BLOCK DATA are not procedures. + return !scope || + scope->kind() == Scope::Kind::Subprogram; + }, [](const SubprogramNameDetails &) { return true; }, [](const ProcEntityDetails &) { return true; }, [](const GenericDetails &) { return true; }, diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -139,7 +139,7 @@ .Case("ropi", llvm::Reloc::ROPI) .Case("rwpi", llvm::Reloc::RWPI) .Case("ropi-rwpi", llvm::Reloc::ROPI_RWPI) - .Default(llvm::None); + .Default(std::nullopt); if (RM.has_value()) opts.setRelocationModel(*RM); else @@ -171,6 +171,14 @@ if (const llvm::opt::Arg *a = args.getLastArg(clang::driver::options::OPT_triple)) opts.triple = a->getValue(); + + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_target_cpu)) + opts.cpu = a->getValue(); + + for (const llvm::opt::Arg *currentArg : + args.filtered(clang::driver::options::OPT_target_feature)) + opts.featuresAsWritten.emplace_back(currentArg->getValue()); } // Tweak the frontend configuration based on the frontend action diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -591,7 +591,8 @@ void CodeGenAction::setUpTargetMachine() { CompilerInstance &ci = this->getInstance(); - const std::string &theTriple = ci.getInvocation().getTargetOpts().triple; + const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts(); + const std::string &theTriple = targetOpts.triple; // Create `Target` std::string error; @@ -602,11 +603,13 @@ // Create `TargetMachine` const auto &CGOpts = ci.getInvocation().getCodeGenOpts(); llvm::CodeGenOpt::Level OptLevel = getCGOptLevel(CGOpts); + std::string featuresStr = llvm::join(targetOpts.featuresAsWritten.begin(), + targetOpts.featuresAsWritten.end(), ","); tm.reset(theTarget->createTargetMachine( - theTriple, /*CPU=*/"", - /*Features=*/"", llvm::TargetOptions(), + theTriple, /*CPU=*/targetOpts.cpu, + /*Features=*/featuresStr, llvm::TargetOptions(), /*Reloc::Model=*/CGOpts.getRelocationModel(), - /*CodeModel::Model=*/llvm::None, OptLevel)); + /*CodeModel::Model=*/std::nullopt, OptLevel)); assert(tm && "Failed to create TargetMachine"); } diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -692,19 +692,19 @@ baseAddrTy = boxType.getEleTy(); // Allocate and set a variable to hold the address. // It will be set to null in setUnallocatedStatus. - mutableProperties.addr = - builder.allocateLocal(loc, baseAddrTy, name + ".addr", "", - /*shape=*/llvm::None, /*typeparams=*/llvm::None); + mutableProperties.addr = builder.allocateLocal( + loc, baseAddrTy, name + ".addr", "", + /*shape=*/std::nullopt, /*typeparams=*/std::nullopt); // Allocate variables to hold lower bounds and extents. int rank = sym.Rank(); mlir::Type idxTy = builder.getIndexType(); for (decltype(rank) i = 0; i < rank; ++i) { - mlir::Value lboundVar = - builder.allocateLocal(loc, idxTy, name + ".lb" + std::to_string(i), "", - /*shape=*/llvm::None, /*typeparams=*/llvm::None); - mlir::Value extentVar = - builder.allocateLocal(loc, idxTy, name + ".ext" + std::to_string(i), "", - /*shape=*/llvm::None, /*typeparams=*/llvm::None); + mlir::Value lboundVar = builder.allocateLocal( + loc, idxTy, name + ".lb" + std::to_string(i), "", + /*shape=*/std::nullopt, /*typeparams=*/std::nullopt); + mlir::Value extentVar = builder.allocateLocal( + loc, idxTy, name + ".ext" + std::to_string(i), "", + /*shape=*/std::nullopt, /*typeparams=*/std::nullopt); mutableProperties.lbounds.emplace_back(lboundVar); mutableProperties.extents.emplace_back(extentVar); } @@ -721,8 +721,8 @@ if (fir::isa_char(eleTy) && nonDeferredParams.empty()) { mlir::Value lenVar = builder.allocateLocal(loc, builder.getCharacterLengthType(), - name + ".len", "", /*shape=*/llvm::None, - /*typeparams=*/llvm::None); + name + ".len", "", /*shape=*/std::nullopt, + /*typeparams=*/std::nullopt); mutableProperties.deferredParams.emplace_back(lenVar); } return mutableProperties; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -32,6 +32,7 @@ #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/Character.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/Runtime/Assign.h" #include "flang/Optimizer/Builder/Runtime/Character.h" #include "flang/Optimizer/Builder/Runtime/Derived.h" #include "flang/Optimizer/Builder/Runtime/EnvironmentDefaults.h" @@ -602,7 +603,7 @@ mlir::Type genType(Fortran::common::TypeCategory tc) override final { return Fortran::lower::getFIRType( &getMLIRContext(), tc, bridge.getDefaultKinds().GetDefaultKind(tc), - llvm::None); + std::nullopt); } bool createHostAssociateVarClone( @@ -959,8 +960,8 @@ assert(falseTarget && "missing conditional branch false block"); mlir::Location loc = toLocation(); mlir::Value bcc = builder->createConvert(loc, builder->getI1Type(), cond); - builder->create(loc, bcc, trueTarget, llvm::None, - falseTarget, llvm::None); + builder->create(loc, bcc, trueTarget, std::nullopt, + falseTarget, std::nullopt); } void genFIRConditionalBranch(mlir::Value cond, Fortran::lower::pft::Evaluation *trueTarget, @@ -1100,7 +1101,7 @@ assert(stmt.typedCall && "Call was not analyzed"); mlir::Value res{}; if (bridge.getLoweringOptions().getLowerToHighLevelFIR()) { - llvm::Optional resultType = llvm::None; + llvm::Optional resultType = std::nullopt; if (stmt.typedCall->hasAlternateReturns()) resultType = builder->getIndexType(); auto hlfirRes = Fortran::lower::convertCallToHLFIR( @@ -2236,11 +2237,6 @@ int kind = Fortran::evaluate::ToInt64(intrinsic->kind()).value_or(kind); llvm::SmallVector params; - if (intrinsic->category() == - Fortran::common::TypeCategory::Character || - intrinsic->category() == - Fortran::common::TypeCategory::Derived) - TODO(loc, "typeSpec with length parameters"); ty = genType(intrinsic->category(), kind, params); } else { const Fortran::semantics::DerivedTypeSpec *derived = @@ -2304,12 +2300,24 @@ exactValue = builder->create( loc, fir::ReferenceType::get(attr.getType()), fir::getBase(selector)); + const Fortran::semantics::IntrinsicTypeSpec *intrinsic = + typeSpec->declTypeSpec->AsIntrinsic(); + if (intrinsic->category() == + Fortran::common::TypeCategory::Character) { + auto charTy = attr.getType().dyn_cast(); + mlir::Value charLen = + fir::factory::CharacterExprHelper(*builder, loc) + .readLengthFromBox(fir::getBase(selector), charTy); + addAssocEntitySymbol(fir::CharBoxValue(exactValue, charLen)); + } else { + addAssocEntitySymbol(exactValue); + } } else if (std::holds_alternative( typeSpec->u)) { exactValue = builder->create( loc, fir::BoxType::get(attr.getType()), fir::getBase(selector)); + addAssocEntitySymbol(exactValue); } - addAssocEntitySymbol(exactValue); } else if (std::holds_alternative( guard.u)) { // CLASS IS @@ -2501,8 +2509,8 @@ void genArrayAssignment( const Fortran::evaluate::Assignment &assign, Fortran::lower::StatementContext &localStmtCtx, - llvm::Optional> lbounds = llvm::None, - llvm::Optional> ubounds = llvm::None) { + llvm::Optional> lbounds = std::nullopt, + llvm::Optional> ubounds = std::nullopt) { Fortran::lower::StatementContext &stmtCtx = explicitIterationSpace() @@ -2626,8 +2634,13 @@ // Assignment to polymorphic allocatables may require changing the // variable dynamic type (See Fortran 2018 10.2.1.3 p3). if (lhsType->IsPolymorphic() && - Fortran::lower::isWholeAllocatable(assign.lhs)) - TODO(loc, "assignment to polymorphic allocatable"); + Fortran::lower::isWholeAllocatable(assign.lhs)) { + mlir::Value lhs = genExprMutableBox(loc, assign.lhs).getAddr(); + mlir::Value rhs = + fir::getBase(genExprBox(loc, assign.rhs, stmtCtx)); + fir::runtime::genAssign(*builder, loc, lhs, rhs); + return; + } // Note: No ad-hoc handling for pointers is required here. The // target will be assigned as per 2018 10.2.1.3 p2. genExprAddr @@ -2662,7 +2675,7 @@ "LEN parameters"); lhsRealloc = fir::factory::genReallocIfNeeded( *builder, loc, *lhsMutableBox, - /*shape=*/llvm::None, lengthParams); + /*shape=*/std::nullopt, lengthParams); return lhsRealloc->newValue; } return genExprAddr(assign.lhs, stmtCtx); @@ -2703,7 +2716,7 @@ if (lhsIsWholeAllocatable) fir::factory::finalizeRealloc( *builder, loc, lhsMutableBox.value(), - /*lbounds=*/llvm::None, /*takeLboundsIfRealloc=*/false, + /*lbounds=*/std::nullopt, /*takeLboundsIfRealloc=*/false, lhsRealloc.value()); }, @@ -3345,7 +3358,7 @@ mlir::func::FuncOp func = fir::FirOpBuilder::createFunction( mlir::UnknownLoc::get(context), getModuleOp(), fir::NameUniquer::doGenerated("Sham"), - mlir::FunctionType::get(context, llvm::None, llvm::None)); + mlir::FunctionType::get(context, std::nullopt, std::nullopt)); func.addEntryBlock(); builder = new fir::FirOpBuilder(func, bridge.getKindMap()); assert(builder && "FirOpBuilder did not instantiate"); diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -683,7 +683,7 @@ &result = procedure.functionResult) if (const auto *resultTypeAndShape = result->GetTypeAndShape()) return resultTypeAndShape->type(); - return llvm::None; + return std::nullopt; } static bool mustPassLengthWithDummyProcedure( @@ -1060,15 +1060,15 @@ getConverter().getFoldingContext(), toEvExpr(*expr))); return std::nullopt; } - void - addFirOperand(mlir::Type type, int entityPosition, Property p, - llvm::ArrayRef attributes = llvm::None) { + void addFirOperand( + mlir::Type type, int entityPosition, Property p, + llvm::ArrayRef attributes = std::nullopt) { interface.inputs.emplace_back( FirPlaceHolder{type, entityPosition, p, attributes}); } void addFirResult(mlir::Type type, int entityPosition, Property p, - llvm::ArrayRef attributes = llvm::None) { + llvm::ArrayRef attributes = std::nullopt) { interface.outputs.emplace_back( FirPlaceHolder{type, entityPosition, p, attributes}); } diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -290,7 +290,8 @@ auto *context = builder.getContext(); if (snd.isa() && fst.getType().isa()) { - auto funcTy = mlir::FunctionType::get(context, llvm::None, llvm::None); + auto funcTy = + mlir::FunctionType::get(context, std::nullopt, std::nullopt); auto boxProcTy = builder.getBoxProcType(funcTy); if (mlir::Value host = argumentHostAssocs(converter, fst)) { cast = builder.create( @@ -462,7 +463,7 @@ loc, getConverter(), *expr, getSymMap(), getStmtCtx())); } else { // Optional dummy argument for which there is no actual argument. - loweredActuals.emplace_back(llvm::None); + loweredActuals.emplace_back(std::nullopt); } llvm::SmallVector exprAssociations; @@ -553,7 +554,7 @@ for (auto associate : exprAssociations) builder.create(loc, associate); if (!resultFirBase) - return llvm::None; // subroutine call. + return std::nullopt; // subroutine call. if (fir::isa_trivial(resultFirBase.getType())) return hlfir::EntityWithAttributes{resultFirBase}; return hlfir::genDeclare(loc, builder, result, "tmp.funcresult", diff --git a/flang/lib/Lower/ConvertConstant.cpp b/flang/lib/Lower/ConvertConstant.cpp --- a/flang/lib/Lower/ConvertConstant.cpp +++ b/flang/lib/Lower/ConvertConstant.cpp @@ -126,8 +126,8 @@ auto attrTc = TC == Fortran::common::TypeCategory::Logical ? Fortran::common::TypeCategory::Integer : TC; - attributeElementType = Fortran::lower::getFIRType(builder.getContext(), - attrTc, KIND, llvm::None); + attributeElementType = Fortran::lower::getFIRType( + builder.getContext(), attrTc, KIND, std::nullopt); for (auto element : constant.values()) attributes.push_back( convertToAttribute(builder, element, attributeElementType)); @@ -198,8 +198,8 @@ fir::FirOpBuilder &builder, mlir::Location loc, const Fortran::evaluate::Scalar> &value) { if constexpr (TC == Fortran::common::TypeCategory::Integer) { - mlir::Type ty = - Fortran::lower::getFIRType(builder.getContext(), TC, KIND, llvm::None); + mlir::Type ty = Fortran::lower::getFIRType(builder.getContext(), TC, KIND, + std::nullopt); if (KIND == 16) { auto bigInt = llvm::APInt(ty.getIntOrFloatBitWidth(), value.SignedDecimal(), 10); @@ -272,7 +272,7 @@ mlir::NamedAttribute sizeAttr(sizeTag, builder.getI64IntegerAttr(len)); llvm::SmallVector attrs = {dataAttr, sizeAttr}; return builder.create( - loc, llvm::ArrayRef{type}, llvm::None, attrs); + loc, llvm::ArrayRef{type}, std::nullopt, attrs); } } diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp --- a/flang/lib/Lower/ConvertExpr.cpp +++ b/flang/lib/Lower/ConvertExpr.cpp @@ -554,7 +554,7 @@ mlir::Type boxType = box.getType(); assert(boxType.isa() && "argument must be a fir.box"); mlir::Value emptyBox = - fir::factory::createUnallocatedBox(builder, loc, boxType, llvm::None); + fir::factory::createUnallocatedBox(builder, loc, boxType, std::nullopt); auto safeToReadBox = builder.create(loc, isPresent, box, emptyBox); return fir::substBase(exv, safeToReadBox); @@ -1086,8 +1086,9 @@ }, [&](const fir::MutableBoxValue &toBox) { if (toBox.isPointer()) { - Fortran::lower::associateMutableBox( - converter, loc, toBox, expr, /*lbounds=*/llvm::None, stmtCtx); + Fortran::lower::associateMutableBox(converter, loc, toBox, expr, + /*lbounds=*/std::nullopt, + stmtCtx); return; } // For allocatable components, a deep copy is needed. @@ -1929,7 +1930,7 @@ const Fortran::evaluate::ProcedureRef &procRef, llvm::Optional resultType, llvm::Optional intrinsic = - llvm::None) { + std::nullopt) { llvm::SmallVector operands; std::string name = @@ -1947,7 +1948,7 @@ operands.emplace_back(optionalArg, isPresent); }; auto prepareOtherArg = [&](const Fortran::lower::SomeExpr &expr) { - operands.emplace_back(genval(expr), llvm::None); + operands.emplace_back(genval(expr), std::nullopt); }; Fortran::lower::prepareCustomIntrinsicArgument( procRef, *intrinsic, resultType, prepareOptionalArg, prepareOtherArg, @@ -2492,7 +2493,7 @@ if (!actualArgIsVariable && !needsCopy) // Actual argument is not a variable. Make sure a variable address is // not passed. - return {genTempExtAddr(expr), llvm::None}; + return {genTempExtAddr(expr), std::nullopt}; ExtValue baseAddr; if (arg.isOptional() && Fortran::evaluate::MayBePassedAsAbsentOptional( expr, converter.getFoldingContext())) { @@ -2532,18 +2533,19 @@ ExtValue box = genBoxArg(expr); if (needsCopy) return {genCopyIn(box, arg, copyOutPairs, - /*restrictCopyAtRuntime=*/llvm::None, byValue), - llvm::None}; + /*restrictCopyAtRuntime=*/std::nullopt, byValue), + std::nullopt}; // Contiguous: just use the box we created above! // This gets "unboxed" below, if needed. - return {box, llvm::None}; + return {box, std::nullopt}; } // Actual argument is a non-optional, non-pointer, non-allocatable // scalar. ExtValue actualArg = genExtAddr(expr); if (needsCopy) - return {createInMemoryScalarCopy(builder, loc, actualArg), llvm::None}; - return {actualArg, llvm::None}; + return {createInMemoryScalarCopy(builder, loc, actualArg), + std::nullopt}; + return {actualArg, std::nullopt}; }(); // Scalar and contiguous expressions may be lowered to a fir.box, // either to account for potential polymorphism, or because lowering @@ -2640,7 +2642,8 @@ /*nonDeferredParams=*/mlir::ValueRange{}, /*mutableProperties=*/{}); Fortran::lower::associateMutableBox(converter, loc, pointer, *expr, - /*lbounds=*/llvm::None, stmtCtx); + /*lbounds=*/std::nullopt, + stmtCtx); caller.placeInput(arg, irBox); continue; } @@ -3453,7 +3456,7 @@ builder.createConvert(loc, fir::HeapType::get(seqTy), load); mlir::Value shapeOp = builder.genShape(loc, shape); return builder.create( - loc, seqTy, castTo, shapeOp, /*slice=*/mlir::Value{}, llvm::None); + loc, seqTy, castTo, shapeOp, /*slice=*/mlir::Value{}, std::nullopt); }; // Custom lowering of the element store to deal with the extra indirection // to the lazy allocated buffer. @@ -4000,7 +4003,7 @@ auto addr = builder->create(loc, eleRefTy, tmp, shape, /*slice=*/mlir::Value{}, indices, - /*typeParams=*/llvm::None); + /*typeParams=*/std::nullopt); auto load = builder->create(loc, addr); return builder->createConvert(loc, i1Ty, load); }; @@ -4314,14 +4317,14 @@ mlir::Value temp = !seqTy.hasDynamicExtents() ? builder.create(loc, type) : builder.create( - loc, type, ".array.expr", llvm::None, shape); + loc, type, ".array.expr", std::nullopt, shape); fir::FirOpBuilder *bldr = &converter.getFirOpBuilder(); stmtCtx.attachCleanup( [bldr, loc, temp]() { bldr->create(loc, temp); }); mlir::Value shapeOp = genShapeOp(shape); return builder.create(loc, seqTy, temp, shapeOp, /*slice=*/mlir::Value{}, - llvm::None); + std::nullopt); } static fir::ShapeOp genShapeOp(mlir::Location loc, fir::FirOpBuilder &builder, @@ -4431,7 +4434,7 @@ const Fortran::evaluate::ProcedureRef &procRef, llvm::Optional retTy, llvm::Optional intrinsic = - llvm::None) { + std::nullopt) { llvm::SmallVector operands; std::string name = @@ -4461,7 +4464,7 @@ }; auto prepareOtherArg = [&](const Fortran::lower::SomeExpr &expr) { PushSemantics(ConstituentSemantics::RefTransparent); - operands.emplace_back(genElementalArgument(expr), llvm::None); + operands.emplace_back(genElementalArgument(expr), std::nullopt); }; Fortran::lower::prepareCustomIntrinsicArgument( procRef, *intrinsic, retTy, prepareOptionalArg, prepareOtherArg, @@ -4790,7 +4793,7 @@ if (x.hasAlternateReturns()) fir::emitFatalError(getLoc(), "array procedure reference with alt-return"); - return genProcRef(x, llvm::None); + return genProcRef(x, std::nullopt); } template CC genScalarAndForwardValue(const A &x) { @@ -6198,7 +6201,7 @@ mlir::Value initBuffSz = builder.createIntegerConstant(loc, idxTy, clInitialBufferSize); mem = builder.create( - loc, eleTy, /*typeparams=*/llvm::None, initBuffSz); + loc, eleTy, /*typeparams=*/std::nullopt, initBuffSz); builder.create(loc, initBuffSz, buffSize); } } else { @@ -7400,11 +7403,11 @@ } if (esp.loopCleanup) { (*esp.loopCleanup)(builder); - esp.loopCleanup = llvm::None; + esp.loopCleanup = std::nullopt; } esp.initialArgs.clear(); esp.innerArgs.clear(); - esp.outerLoop = llvm::None; + esp.outerLoop = std::nullopt; esp.resetBindings(); esp.incrementCounter(); } diff --git a/flang/lib/Lower/ConvertExprToHLFIR.cpp b/flang/lib/Lower/ConvertExprToHLFIR.cpp --- a/flang/lib/Lower/ConvertExprToHLFIR.cpp +++ b/flang/lib/Lower/ConvertExprToHLFIR.cpp @@ -20,6 +20,8 @@ #include "flang/Lower/IntrinsicCall.h" #include "flang/Lower/StatementContext.h" #include "flang/Lower/SymbolMap.h" +#include "flang/Optimizer/Builder/Complex.h" +#include "flang/Optimizer/Builder/Runtime/Character.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" @@ -250,22 +252,18 @@ //===--------------------------------------------------------------------===// template -struct BinaryOp { - static hlfir::EntityWithAttributes gen(mlir::Location loc, - fir::FirOpBuilder &builder, - hlfir::Entity lhs, hlfir::Entity rhs) { - TODO(loc, "binary op implementation in HLFIR"); - } -}; +struct BinaryOp {}; #undef GENBIN #define GENBIN(GenBinEvOp, GenBinTyCat, GenBinFirOp) \ template \ struct BinaryOp>> { \ + using Op = Fortran::evaluate::GenBinEvOp>; \ static hlfir::EntityWithAttributes gen(mlir::Location loc, \ fir::FirOpBuilder &builder, \ - hlfir::Entity lhs, \ + const Op &, hlfir::Entity lhs, \ hlfir::Entity rhs) { \ return hlfir::EntityWithAttributes{ \ builder.create(loc, lhs, rhs)}; \ @@ -287,11 +285,12 @@ template struct BinaryOp>> { + using Op = Fortran::evaluate::Power>; static hlfir::EntityWithAttributes gen(mlir::Location loc, - fir::FirOpBuilder &builder, + fir::FirOpBuilder &builder, const Op &, hlfir::Entity lhs, hlfir::Entity rhs) { mlir::Type ty = Fortran::lower::getFIRType(builder.getContext(), TC, KIND, - /*params=*/llvm::None); + /*params=*/std::nullopt); return hlfir::EntityWithAttributes{ Fortran::lower::genPow(builder, loc, ty, lhs, rhs)}; } @@ -300,16 +299,314 @@ template struct BinaryOp< Fortran::evaluate::RealToIntPower>> { + using Op = + Fortran::evaluate::RealToIntPower>; static hlfir::EntityWithAttributes gen(mlir::Location loc, - fir::FirOpBuilder &builder, + fir::FirOpBuilder &builder, const Op &, hlfir::Entity lhs, hlfir::Entity rhs) { mlir::Type ty = Fortran::lower::getFIRType(builder.getContext(), TC, KIND, - /*params=*/llvm::None); + /*params=*/std::nullopt); return hlfir::EntityWithAttributes{ Fortran::lower::genPow(builder, loc, ty, lhs, rhs)}; } }; +template +struct BinaryOp< + Fortran::evaluate::Extremum>> { + using Op = Fortran::evaluate::Extremum>; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, + const Op &op, hlfir::Entity lhs, + hlfir::Entity rhs) { + // evaluate::Extremum is only created by the front-end when building + // compiler generated expressions (like when folding LEN() or shape/bounds + // inquiries). MIN and MAX are represented as evaluate::ProcedureRef and are + // not going through here. So far the frontend does not generate character + // Extremum so there is no way to test it. + if constexpr (TC == Fortran::common::TypeCategory::Character) { + fir::emitFatalError(loc, "Fortran::evaluate::Extremum are unexpected"); + } + llvm::SmallVector args{lhs, rhs}; + fir::ExtendedValue res = op.ordering == Fortran::evaluate::Ordering::Greater + ? Fortran::lower::genMax(builder, loc, args) + : Fortran::lower::genMin(builder, loc, args); + return hlfir::EntityWithAttributes{fir::getBase(res)}; + } +}; + +/// Convert parser's INTEGER relational operators to MLIR. +static mlir::arith::CmpIPredicate +translateRelational(Fortran::common::RelationalOperator rop) { + switch (rop) { + case Fortran::common::RelationalOperator::LT: + return mlir::arith::CmpIPredicate::slt; + case Fortran::common::RelationalOperator::LE: + return mlir::arith::CmpIPredicate::sle; + case Fortran::common::RelationalOperator::EQ: + return mlir::arith::CmpIPredicate::eq; + case Fortran::common::RelationalOperator::NE: + return mlir::arith::CmpIPredicate::ne; + case Fortran::common::RelationalOperator::GT: + return mlir::arith::CmpIPredicate::sgt; + case Fortran::common::RelationalOperator::GE: + return mlir::arith::CmpIPredicate::sge; + } + llvm_unreachable("unhandled INTEGER relational operator"); +} + +/// Convert parser's REAL relational operators to MLIR. +/// The choice of order (O prefix) vs unorder (U prefix) follows Fortran 2018 +/// requirements in the IEEE context (table 17.1 of F2018). This choice is +/// also applied in other contexts because it is easier and in line with +/// other Fortran compilers. +/// FIXME: The signaling/quiet aspect of the table 17.1 requirement is not +/// fully enforced. FIR and LLVM `fcmp` instructions do not give any guarantee +/// whether the comparison will signal or not in case of quiet NaN argument. +static mlir::arith::CmpFPredicate +translateFloatRelational(Fortran::common::RelationalOperator rop) { + switch (rop) { + case Fortran::common::RelationalOperator::LT: + return mlir::arith::CmpFPredicate::OLT; + case Fortran::common::RelationalOperator::LE: + return mlir::arith::CmpFPredicate::OLE; + case Fortran::common::RelationalOperator::EQ: + return mlir::arith::CmpFPredicate::OEQ; + case Fortran::common::RelationalOperator::NE: + return mlir::arith::CmpFPredicate::UNE; + case Fortran::common::RelationalOperator::GT: + return mlir::arith::CmpFPredicate::OGT; + case Fortran::common::RelationalOperator::GE: + return mlir::arith::CmpFPredicate::OGE; + } + llvm_unreachable("unhandled REAL relational operator"); +} + +template +struct BinaryOp>> { + using Op = Fortran::evaluate::Relational< + Fortran::evaluate::Type>; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, + const Op &op, hlfir::Entity lhs, + hlfir::Entity rhs) { + auto cmp = builder.create( + loc, translateRelational(op.opr), lhs, rhs); + return hlfir::EntityWithAttributes{cmp}; + } +}; + +template +struct BinaryOp>> { + using Op = Fortran::evaluate::Relational< + Fortran::evaluate::Type>; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, + const Op &op, hlfir::Entity lhs, + hlfir::Entity rhs) { + auto cmp = builder.create( + loc, translateFloatRelational(op.opr), lhs, rhs); + return hlfir::EntityWithAttributes{cmp}; + } +}; + +template +struct BinaryOp>> { + using Op = Fortran::evaluate::Relational< + Fortran::evaluate::Type>; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, + const Op &op, hlfir::Entity lhs, + hlfir::Entity rhs) { + auto cmp = builder.create( + loc, translateFloatRelational(op.opr), lhs, rhs); + return hlfir::EntityWithAttributes{cmp}; + } +}; + +template +struct BinaryOp>> { + using Op = Fortran::evaluate::Relational< + Fortran::evaluate::Type>; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, + const Op &op, hlfir::Entity lhs, + hlfir::Entity rhs) { + auto [lhsExv, lhsCleanUp] = + hlfir::translateToExtendedValue(loc, builder, lhs); + auto [rhsExv, rhsCleanUp] = + hlfir::translateToExtendedValue(loc, builder, rhs); + auto cmp = fir::runtime::genCharCompare( + builder, loc, translateRelational(op.opr), lhsExv, rhsExv); + if (lhsCleanUp) + lhsCleanUp.value()(); + if (rhsCleanUp) + rhsCleanUp.value()(); + return hlfir::EntityWithAttributes{cmp}; + } +}; + +template +struct BinaryOp> { + using Op = Fortran::evaluate::LogicalOperation; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, + const Op &op, hlfir::Entity lhs, + hlfir::Entity rhs) { + mlir::Type i1Type = builder.getI1Type(); + mlir::Value i1Lhs = builder.createConvert(loc, i1Type, lhs); + mlir::Value i1Rhs = builder.createConvert(loc, i1Type, rhs); + switch (op.logicalOperator) { + case Fortran::evaluate::LogicalOperator::And: + return hlfir::EntityWithAttributes{ + builder.create(loc, i1Lhs, i1Rhs)}; + case Fortran::evaluate::LogicalOperator::Or: + return hlfir::EntityWithAttributes{ + builder.create(loc, i1Lhs, i1Rhs)}; + case Fortran::evaluate::LogicalOperator::Eqv: + return hlfir::EntityWithAttributes{builder.create( + loc, mlir::arith::CmpIPredicate::eq, i1Lhs, i1Rhs)}; + case Fortran::evaluate::LogicalOperator::Neqv: + return hlfir::EntityWithAttributes{builder.create( + loc, mlir::arith::CmpIPredicate::ne, i1Lhs, i1Rhs)}; + case Fortran::evaluate::LogicalOperator::Not: + // lib/evaluate expression for .NOT. is Fortran::evaluate::Not. + llvm_unreachable(".NOT. is not a binary operator"); + } + llvm_unreachable("unhandled logical operation"); + } +}; + +template +struct BinaryOp> { + using Op = Fortran::evaluate::ComplexConstructor; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, const Op &, + hlfir::Entity lhs, hlfir::Entity rhs) { + mlir::Value res = + fir::factory::Complex{builder, loc}.createComplex(KIND, lhs, rhs); + return hlfir::EntityWithAttributes{res}; + } +}; + +template +struct BinaryOp> { + using Op = Fortran::evaluate::SetLength; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &, const Op &, + hlfir::Entity, hlfir::Entity) { + TODO(loc, "SetLength lowering to HLFIR"); + } +}; + +//===--------------------------------------------------------------------===// +// Unary Operation implementation +//===--------------------------------------------------------------------===// + +template +struct UnaryOp {}; + +template +struct UnaryOp> { + using Op = Fortran::evaluate::Not; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, const Op &, + hlfir::Entity lhs) { + mlir::Value one = builder.createBool(loc, true); + mlir::Value val = builder.createConvert(loc, builder.getI1Type(), lhs); + return hlfir::EntityWithAttributes{ + builder.create(loc, val, one)}; + } +}; + +template +struct UnaryOp>> { + using Op = Fortran::evaluate::Negate< + Fortran::evaluate::Type>; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, const Op &, + hlfir::Entity lhs) { + // Like LLVM, integer negation is the binary op "0 - value" + mlir::Type type = Fortran::lower::getFIRType( + builder.getContext(), Fortran::common::TypeCategory::Integer, KIND, + /*params=*/std::nullopt); + mlir::Value zero = builder.createIntegerConstant(loc, type, 0); + return hlfir::EntityWithAttributes{ + builder.create(loc, zero, lhs)}; + } +}; + +template +struct UnaryOp>> { + using Op = Fortran::evaluate::Negate< + Fortran::evaluate::Type>; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, const Op &, + hlfir::Entity lhs) { + return hlfir::EntityWithAttributes{ + builder.create(loc, lhs)}; + } +}; + +template +struct UnaryOp>> { + using Op = Fortran::evaluate::Negate< + Fortran::evaluate::Type>; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, const Op &, + hlfir::Entity lhs) { + return hlfir::EntityWithAttributes{builder.create(loc, lhs)}; + } +}; + +template +struct UnaryOp> { + using Op = Fortran::evaluate::ComplexComponent; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, + const Op &op, hlfir::Entity lhs) { + mlir::Value res = fir::factory::Complex{builder, loc}.extractComplexPart( + lhs, op.isImaginaryPart); + return hlfir::EntityWithAttributes{res}; + } +}; + +template +struct UnaryOp> { + using Op = Fortran::evaluate::Parentheses; + static hlfir::EntityWithAttributes + gen(mlir::Location loc, fir::FirOpBuilder &, const Op &, hlfir::Entity) { + TODO(loc, "Parentheses lowering to HLFIR"); + } +}; + +template +struct UnaryOp< + Fortran::evaluate::Convert, TC2>> { + using Op = + Fortran::evaluate::Convert, TC2>; + static hlfir::EntityWithAttributes gen(mlir::Location loc, + fir::FirOpBuilder &builder, const Op &, + hlfir::Entity lhs) { + if constexpr (TC1 == Fortran::common::TypeCategory::Character && + TC2 == TC1) { + TODO(loc, "character conversion in HLFIR"); + } + mlir::Type type = Fortran::lower::getFIRType(builder.getContext(), TC1, + KIND, /*params=*/std::nullopt); + mlir::Value res = builder.convertWithSemantics(loc, type, lhs); + return hlfir::EntityWithAttributes{res}; + } +}; + /// Lower Expr to HLFIR. class HlfirBuilder { public: @@ -388,18 +685,15 @@ TODO(getLoc(), "lowering ArrayCtor to HLFIR"); } - template - hlfir::EntityWithAttributes - gen(const Fortran::evaluate::Convert, TC2> - &convert) { - TODO(getLoc(), "lowering convert to HLFIR"); - } - template hlfir::EntityWithAttributes gen(const Fortran::evaluate::Operation &op) { - TODO(getLoc(), "lowering unary op to HLFIR"); + auto &builder = getBuilder(); + mlir::Location loc = getLoc(); + if (op.Rank() != 0) + TODO(loc, "elemental operations in HLFIR"); + auto left = hlfir::loadTrivialScalar(loc, builder, gen(op.left())); + return UnaryOp::gen(loc, builder, op.derived(), left); } template @@ -411,7 +705,7 @@ TODO(loc, "elemental operations in HLFIR"); auto left = hlfir::loadTrivialScalar(loc, builder, gen(op.left())); auto right = hlfir::loadTrivialScalar(loc, builder, gen(op.right())); - return BinaryOp::gen(loc, builder, left, right); + return BinaryOp::gen(loc, builder, op.derived(), left, right); } template diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -198,8 +198,9 @@ fir::FirOpBuilder &builder = converter.getFirOpBuilder(); if (Fortran::evaluate::UnwrapExpr( initialTarget)) - return fir::factory::createUnallocatedBox(builder, loc, boxType, - /*nonDeferredParams=*/llvm::None); + return fir::factory::createUnallocatedBox( + builder, loc, boxType, + /*nonDeferredParams=*/std::nullopt); // Pointer initial data target, and NULL(mold). for (const auto &sym : Fortran::evaluate::CollectSymbols(initialTarget)) { // Length parameters processing will need care in global initializer @@ -343,7 +344,7 @@ // need to be disassociated, but for sanity and simplicity, do it in // global constructor since this has no runtime cost. componentValue = fir::factory::createUnallocatedBox( - builder, loc, componentTy, llvm::None); + builder, loc, componentTy, std::nullopt); } else if (hasDefaultInitialization(component)) { // Component type has default initialization. componentValue = genDefaultInitializerValue(converter, loc, component, @@ -471,7 +472,7 @@ Fortran::lower::createGlobalInitialization( builder, global, [&](fir::FirOpBuilder &b) { mlir::Value box = - fir::factory::createUnallocatedBox(b, loc, symTy, llvm::None); + fir::factory::createUnallocatedBox(b, loc, symTy, std::nullopt); b.create(loc, box); }); } @@ -842,7 +843,7 @@ fir::SequenceType::Shape shape(1, size); auto seqTy = fir::SequenceType::get(shape, i8Ty); mlir::Value local = - builder.allocateLocal(loc, seqTy, aggName, "", llvm::None, llvm::None, + builder.allocateLocal(loc, seqTy, aggName, "", std::nullopt, std::nullopt, /*target=*/false); insertAggregateStore(storeMap, var, local); } @@ -1347,8 +1348,8 @@ Fortran::lower::SymMap &symMap, const Fortran::semantics::Symbol &sym, mlir::Value base, mlir::Value len = {}, - llvm::ArrayRef shape = llvm::None, - llvm::ArrayRef lbounds = llvm::None, + llvm::ArrayRef shape = std::nullopt, + llvm::ArrayRef lbounds = std::nullopt, bool force = false) { if (converter.getLoweringOptions().getLowerToHighLevelFIR()) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); diff --git a/flang/lib/Lower/HostAssociations.cpp b/flang/lib/Lower/HostAssociations.cpp --- a/flang/lib/Lower/HostAssociations.cpp +++ b/flang/lib/Lower/HostAssociations.cpp @@ -321,15 +321,15 @@ .genThen([&]() { fir::factory::associateMutableBox(builder, loc, boxInTuple, args.hostValue, - /*lbounds=*/llvm::None); + /*lbounds=*/std::nullopt); }) .genElse([&]() { fir::factory::disassociateMutableBox(builder, loc, boxInTuple); }) .end(); } else { - fir::factory::associateMutableBox(builder, loc, boxInTuple, - args.hostValue, /*lbounds=*/llvm::None); + fir::factory::associateMutableBox( + builder, loc, boxInTuple, args.hostValue, /*lbounds=*/std::nullopt); } } @@ -360,7 +360,7 @@ } if (canReadCapturedBoxValue(converter, sym)) { - fir::BoxValue boxValue(box, lbounds, /*explicitParams=*/llvm::None); + fir::BoxValue boxValue(box, lbounds, /*explicitParams=*/std::nullopt); args.symMap.addSymbol(sym, fir::factory::readBoxValue(builder, loc, boxValue)); } else { @@ -380,7 +380,7 @@ box = builder.create(loc, isPresent, box, absentBox); } - fir::BoxValue boxValue(box, lbounds, /*explicitParams=*/llvm::None); + fir::BoxValue boxValue(box, lbounds, /*explicitParams=*/std::nullopt); args.symMap.addSymbol(sym, boxValue); } } diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp --- a/flang/lib/Lower/IO.cpp +++ b/flang/lib/Lower/IO.cpp @@ -352,7 +352,7 @@ descAddr = builder.createTemporary(loc, boxType); fir::MutableBoxValue box = fir::MutableBoxValue(descAddr, {}, {}); fir::factory::associateMutableBox(builder, loc, box, exv, - /*lbounds=*/llvm::None); + /*lbounds=*/std::nullopt); } descAddr = builder.createConvert(loc, descRefTy, descAddr); list = builder.create(loc, listTy, list, descAddr, @@ -1348,7 +1348,7 @@ return varBox; if (fir::factory::CharacterExprHelper::isArray(varAddr.getType())) return varBox; - return llvm::None; + return std::nullopt; } template @@ -1362,14 +1362,14 @@ if (auto *unit = getIOControl(stmt)) if (auto *var = std::get_if(&unit->u)) return getVariableBufferRequiredDescriptor(converter, loc, *var, stmtCtx); - return llvm::None; + return std::nullopt; } template <> inline llvm::Optional maybeGetInternalIODescriptor( Fortran::lower::AbstractConverter &, mlir::Location loc, const Fortran::parser::PrintStmt &, Fortran::lower::StatementContext &) { - return llvm::None; + return std::nullopt; } template @@ -1955,7 +1955,7 @@ const bool isInternal = isDataTransferInternal(stmt); llvm::Optional descRef = isInternal ? maybeGetInternalIODescriptor(converter, loc, stmt, stmtCtx) - : llvm::None; + : std::nullopt; const bool isInternalWithDesc = descRef.has_value(); const bool isAsync = isDataTransferAsynchronous(loc, stmt); const bool isNml = isDataTransferNamelist(stmt); @@ -2091,10 +2091,10 @@ builder.createConvert(loc, specFuncTy.getInput(0), cookie), builder.createIntegerConstant( loc, specFuncTy.getInput(1), - Fortran::runtime::io::HashInquiryKeyword( + Fortran::runtime::io::HashInquiryKeyword(std::string{ Fortran::parser::InquireSpec::CharVar::EnumToString( - std::get(var.t)) - .c_str())), + std::get(var.t))} + .c_str())), builder.createConvert(loc, specFuncTy.getInput(2), fir::getBase(str)), builder.createConvert(loc, specFuncTy.getInput(3), fir::getLen(str))}; return builder.create(loc, specFunc, args).getResult(0); @@ -2128,10 +2128,10 @@ builder.createConvert(loc, specFuncTy.getInput(0), cookie), builder.createIntegerConstant( loc, specFuncTy.getInput(1), - Fortran::runtime::io::HashInquiryKeyword( + Fortran::runtime::io::HashInquiryKeyword(std::string{ Fortran::parser::InquireSpec::IntVar::EnumToString( - std::get(var.t)) - .c_str())), + std::get(var.t))} + .c_str())), builder.createConvert(loc, specFuncTy.getInput(2), addr), builder.createConvert(loc, specFuncTy.getInput(3), kind)}; return builder.create(loc, specFunc, args).getResult(0); @@ -2165,9 +2165,9 @@ else args.push_back(builder.createIntegerConstant( loc, specFuncTy.getInput(1), - Fortran::runtime::io::HashInquiryKeyword( - Fortran::parser::InquireSpec::LogVar::EnumToString(logVarKind) - .c_str()))); + Fortran::runtime::io::HashInquiryKeyword(std::string{ + Fortran::parser::InquireSpec::LogVar::EnumToString(logVarKind)} + .c_str()))); args.push_back(builder.createConvert(loc, specFuncTy.getInput(2), addr)); auto call = builder.create(loc, specFunc, args); boolRefToLogical(loc, builder, addr); diff --git a/flang/lib/Lower/IterationSpace.cpp b/flang/lib/Lower/IterationSpace.cpp --- a/flang/lib/Lower/IterationSpace.cpp +++ b/flang/lib/Lower/IterationSpace.cpp @@ -822,7 +822,7 @@ endAssign(); if (lhs) { if (bases.empty()) { - lhsBases.push_back(llvm::None); + lhsBases.push_back(std::nullopt); return; } assert(bases.size() >= 1 && "must detect an array reference on lhs"); @@ -854,7 +854,7 @@ loadBindings.clear(); ccLoopNest.clear(); innerArgs.clear(); - outerLoop = llvm::None; + outerLoop = std::nullopt; clearLoops(); counter = 0; } @@ -870,7 +870,7 @@ assert(optPos.has_value() && "load does not correspond to lhs"); return optPos; } - return llvm::None; + return std::nullopt; } llvm::SmallVector diff --git a/flang/lib/Lower/Mangler.cpp b/flang/lib/Lower/Mangler.cpp --- a/flang/lib/Lower/Mangler.cpp +++ b/flang/lib/Lower/Mangler.cpp @@ -104,7 +104,7 @@ // Mangle external procedure without any scope prefix. if (!keepExternalInScope && Fortran::semantics::IsExternal(ultimateSymbol)) - return fir::NameUniquer::doProcedure(llvm::None, llvm::None, + return fir::NameUniquer::doProcedure(std::nullopt, std::nullopt, symbolName); // Separate module subprograms must be mangled according to the // scope where they were declared (the symbol we have is the @@ -127,7 +127,7 @@ // Otherwise, this is an external procedure, even if it does not // have an explicit EXTERNAL attribute. Mangle it without any // prefix. - return fir::NameUniquer::doProcedure(llvm::None, llvm::None, + return fir::NameUniquer::doProcedure(std::nullopt, std::nullopt, symbolName); }, [&](const Fortran::semantics::ObjectEntityDetails &) { diff --git a/flang/lib/Lower/Runtime.cpp b/flang/lib/Lower/Runtime.cpp --- a/flang/lib/Lower/Runtime.cpp +++ b/flang/lib/Lower/Runtime.cpp @@ -114,7 +114,7 @@ mlir::Location loc = converter.getCurrentLocation(); mlir::func::FuncOp callee = fir::runtime::getRuntimeFunc(loc, builder); - builder.create(loc, callee, llvm::None); + builder.create(loc, callee, std::nullopt); genUnreachable(builder, loc); } @@ -173,7 +173,7 @@ mlir::Location loc = converter.getCurrentLocation(); mlir::func::FuncOp callee = fir::runtime::getRuntimeFunc(loc, builder); - builder.create(loc, callee, llvm::None); + builder.create(loc, callee, std::nullopt); } mlir::Value Fortran::lower::genAssociated(fir::FirOpBuilder &builder, @@ -203,7 +203,7 @@ mlir::Location loc) { mlir::func::FuncOp func = fir::runtime::getRuntimeFunc(loc, builder); - return builder.create(loc, func, llvm::None).getResult(0); + return builder.create(loc, func, std::nullopt).getResult(0); } void Fortran::lower::genDateAndTime(fir::FirOpBuilder &builder, diff --git a/flang/lib/Lower/SymbolMap.cpp b/flang/lib/Lower/SymbolMap.cpp --- a/flang/lib/Lower/SymbolMap.cpp +++ b/flang/lib/Lower/SymbolMap.cpp @@ -101,10 +101,10 @@ std::get_if(&iter->second)) return *varDef; else - return llvm::None; + return std::nullopt; } } - return llvm::None; + return std::nullopt; } llvm::raw_ostream & diff --git a/flang/lib/Lower/VectorSubscripts.cpp b/flang/lib/Lower/VectorSubscripts.cpp --- a/flang/lib/Lower/VectorSubscripts.cpp +++ b/flang/lib/Lower/VectorSubscripts.cpp @@ -121,7 +121,7 @@ TODO(loc, "threading length parameters in field index op"); fir::FirOpBuilder &builder = converter.getFirOpBuilder(); componentPath.emplace_back(builder.create( - loc, fldTy, componentName, recTy, /*typeParams*/ llvm::None)); + loc, fldTy, componentName, recTy, /*typeParams*/ std::nullopt)); return fir::unwrapSequenceType(recTy.getType(componentName)); } diff --git a/flang/lib/Optimizer/Builder/Character.cpp b/flang/lib/Optimizer/Builder/Character.cpp --- a/flang/lib/Optimizer/Builder/Character.cpp +++ b/flang/lib/Optimizer/Builder/Character.cpp @@ -372,7 +372,7 @@ if (typeLen == fir::CharacterType::unknownLen()) lenParams.push_back(len); auto ref = builder.allocateLocal(loc, charTy, "", ".chrtmp", - /*shape=*/llvm::None, lenParams); + /*shape=*/std::nullopt, lenParams); return {ref, len}; } @@ -664,9 +664,14 @@ mlir::Value fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) { + auto charTy = recoverCharacterType(box.getType()); + return readLengthFromBox(box, charTy); +} + +mlir::Value fir::factory::CharacterExprHelper::readLengthFromBox( + mlir::Value box, fir::CharacterType charTy) { auto lenTy = builder.getCharacterLengthType(); auto size = builder.create(loc, lenTy, box); - auto charTy = recoverCharacterType(box.getType()); auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind()); auto width = bits / 8; if (width > 1) { diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -374,7 +374,7 @@ mlir::NamedAttribute sizeAttr(sizeTag, getI64IntegerAttr(data.size())); llvm::SmallVector attrs{dataAttr, sizeAttr}; return create(loc, llvm::ArrayRef{type}, - llvm::None, attrs); + std::nullopt, attrs); } mlir::Value fir::FirOpBuilder::genShape(mlir::Location loc, diff --git a/flang/lib/Optimizer/Builder/LowLevelIntrinsics.cpp b/flang/lib/Optimizer/Builder/LowLevelIntrinsics.cpp --- a/flang/lib/Optimizer/Builder/LowLevelIntrinsics.cpp +++ b/flang/lib/Optimizer/Builder/LowLevelIntrinsics.cpp @@ -26,7 +26,7 @@ llvm::SmallVector args = {ptrTy, ptrTy, builder.getI64Type(), builder.getI1Type()}; auto memcpyTy = - mlir::FunctionType::get(builder.getContext(), args, llvm::None); + mlir::FunctionType::get(builder.getContext(), args, std::nullopt); return builder.addNamedFunction(builder.getUnknownLoc(), "llvm.memcpy.p0.p0.i64", memcpyTy); } @@ -36,7 +36,7 @@ llvm::SmallVector args = {ptrTy, ptrTy, builder.getI64Type(), builder.getI1Type()}; auto memmoveTy = - mlir::FunctionType::get(builder.getContext(), args, llvm::None); + mlir::FunctionType::get(builder.getContext(), args, std::nullopt); return builder.addNamedFunction(builder.getUnknownLoc(), "llvm.memmove.p0.p0.i64", memmoveTy); } @@ -46,7 +46,7 @@ llvm::SmallVector args = {ptrTy, ptrTy, builder.getI64Type(), builder.getI1Type()}; auto memsetTy = - mlir::FunctionType::get(builder.getContext(), args, llvm::None); + mlir::FunctionType::get(builder.getContext(), args, std::nullopt); return builder.addNamedFunction(builder.getUnknownLoc(), "llvm.memset.p0.p0.i64", memsetTy); } @@ -62,7 +62,7 @@ mlir::func::FuncOp fir::factory::getLlvmStackSave(fir::FirOpBuilder &builder) { auto ptrTy = builder.getRefType(builder.getIntegerType(8)); auto funcTy = - mlir::FunctionType::get(builder.getContext(), llvm::None, {ptrTy}); + mlir::FunctionType::get(builder.getContext(), std::nullopt, {ptrTy}); return builder.addNamedFunction(builder.getUnknownLoc(), "llvm.stacksave", funcTy); } @@ -71,7 +71,7 @@ fir::factory::getLlvmStackRestore(fir::FirOpBuilder &builder) { auto ptrTy = builder.getRefType(builder.getIntegerType(8)); auto funcTy = - mlir::FunctionType::get(builder.getContext(), {ptrTy}, llvm::None); + mlir::FunctionType::get(builder.getContext(), {ptrTy}, std::nullopt); return builder.addNamedFunction(builder.getUnknownLoc(), "llvm.stackrestore", funcTy); } @@ -80,7 +80,7 @@ fir::factory::getLlvmInitTrampoline(fir::FirOpBuilder &builder) { auto ptrTy = builder.getRefType(builder.getIntegerType(8)); auto funcTy = mlir::FunctionType::get(builder.getContext(), - {ptrTy, ptrTy, ptrTy}, llvm::None); + {ptrTy, ptrTy, ptrTy}, std::nullopt); return builder.addNamedFunction(builder.getUnknownLoc(), "llvm.init.trampoline", funcTy); } diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp --- a/flang/lib/Optimizer/Builder/MutableBox.cpp +++ b/flang/lib/Optimizer/Builder/MutableBox.cpp @@ -336,7 +336,7 @@ auto zero = builder.createIntegerConstant(loc, builder.getIndexType(), 0); llvm::SmallVector extents(seqTy.getDimension(), zero); shape = builder.createShape( - loc, fir::ArrayBoxValue{nullAddr, extents, /*lbounds=*/llvm::None}); + loc, fir::ArrayBoxValue{nullAddr, extents, /*lbounds=*/std::nullopt}); } // Provide dummy length parameters if they are dynamic. If a length parameter // is deferred. It is set to zero here and will be set on allocation. @@ -481,22 +481,23 @@ mlir::Value tdesc; if (auto polyBox = source.getBoxOf()) tdesc = polyBox->getTdesc(); - writer.updateMutableBox(p.getAddr(), /*lbounds=*/llvm::None, - /*extents=*/llvm::None, /*lengths=*/llvm::None, - tdesc); + writer.updateMutableBox(p.getAddr(), /*lbounds=*/std::nullopt, + /*extents=*/std::nullopt, + /*lengths=*/std::nullopt, tdesc); }, [&](const fir::UnboxedValue &addr) { - writer.updateMutableBox(addr, /*lbounds=*/llvm::None, - /*extents=*/llvm::None, /*lengths=*/llvm::None); + writer.updateMutableBox(addr, /*lbounds=*/std::nullopt, + /*extents=*/std::nullopt, + /*lengths=*/std::nullopt); }, [&](const fir::CharBoxValue &ch) { - writer.updateMutableBox(ch.getAddr(), /*lbounds=*/llvm::None, - /*extents=*/llvm::None, {ch.getLen()}); + writer.updateMutableBox(ch.getAddr(), /*lbounds=*/std::nullopt, + /*extents=*/std::nullopt, {ch.getLen()}); }, [&](const fir::ArrayBoxValue &arr) { writer.updateMutableBox(arr.getAddr(), lbounds.empty() ? arr.getLBounds() : lbounds, - arr.getExtents(), /*lengths=*/llvm::None); + arr.getExtents(), /*lengths=*/std::nullopt); }, [&](const fir::CharArrayBoxValue &arr) { writer.updateMutableBox(arr.getAddr(), @@ -586,11 +587,11 @@ source.match( [&](const fir::PolymorphicValue &p) { writer.updateMutableBox(cast(p.getAddr()), lbounds, extents, - /*lengths=*/llvm::None); + /*lengths=*/std::nullopt); }, [&](const fir::UnboxedValue &addr) { writer.updateMutableBox(cast(addr), lbounds, extents, - /*lengths=*/llvm::None); + /*lengths=*/std::nullopt); }, [&](const fir::CharBoxValue &ch) { writer.updateMutableBox(cast(ch.getAddr()), lbounds, extents, @@ -598,7 +599,7 @@ }, [&](const fir::ArrayBoxValue &arr) { writer.updateMutableBox(cast(arr.getAddr()), lbounds, extents, - /*lengths=*/llvm::None); + /*lengths=*/std::nullopt); }, [&](const fir::CharArrayBoxValue &arr) { writer.updateMutableBox(cast(arr.getAddr()), lbounds, extents, @@ -705,7 +706,7 @@ // information is available at compile time and could be reflected here // somehow. mlir::Value irBox = createNewFirBox(builder, loc, box, newStorage, - llvm::None, extents, lengths); + std::nullopt, extents, lengths); fir::runtime::genDerivedTypeInitialize(builder, loc, irBox); } return newStorage; diff --git a/flang/lib/Optimizer/Builder/Runtime/Stop.cpp b/flang/lib/Optimizer/Builder/Runtime/Stop.cpp --- a/flang/lib/Optimizer/Builder/Runtime/Stop.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Stop.cpp @@ -25,7 +25,7 @@ void fir::runtime::genAbort(fir::FirOpBuilder &builder, mlir::Location loc) { mlir::func::FuncOp abortFunc = fir::runtime::getRuntimeFunc(loc, builder); - builder.create(loc, abortFunc, llvm::None); + builder.create(loc, abortFunc, std::nullopt); } void fir::runtime::genReportFatalUserError(fir::FirOpBuilder &builder, diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -2047,14 +2047,14 @@ if (!rebox.getSubstr().empty()) substringOffset = operands[rebox.substrOffset()]; base = genBoxOffsetGep(rewriter, loc, base, zero, - /*cstInteriorIndices=*/llvm::None, fieldIndices, + /*cstInteriorIndices=*/std::nullopt, fieldIndices, substringOffset); } if (rebox.getSlice().empty()) // The array section is of the form array[%component][substring], keep // the input array extents and strides. - return finalizeRebox(rebox, dest, base, /*lbounds*/ llvm::None, + return finalizeRebox(rebox, dest, base, /*lbounds*/ std::nullopt, inputExtents, inputStrides, rewriter); // Strides from the fir.box are in bytes. @@ -2104,7 +2104,7 @@ slicedStrides.emplace_back(stride); } } - return finalizeRebox(rebox, dest, base, /*lbounds*/ llvm::None, + return finalizeRebox(rebox, dest, base, /*lbounds*/ std::nullopt, slicedExtents, slicedStrides, rewriter); } @@ -3012,7 +3012,7 @@ if (destOps) rewriter.replaceOpWithNewOp(caseOp, *destOps, dest); else - rewriter.replaceOpWithNewOp(caseOp, llvm::None, dest); + rewriter.replaceOpWithNewOp(caseOp, std::nullopt, dest); } static void genCaseLadderStep(mlir::Location loc, mlir::Value cmp, diff --git a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp --- a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp +++ b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp @@ -106,8 +106,8 @@ shapeOpers.push_back(extVal); } auto xbox = rewriter.create( - loc, embox.getType(), embox.getMemref(), shapeOpers, llvm::None, - llvm::None, llvm::None, llvm::None, embox.getTypeparams(), + loc, embox.getType(), embox.getMemref(), shapeOpers, std::nullopt, + std::nullopt, std::nullopt, std::nullopt, embox.getTypeparams(), embox.getTdesc()); LLVM_DEBUG(llvm::dbgs() << "rewriting " << embox << " to " << xbox << '\n'); rewriter.replaceOp(embox, xbox.getOperation()->getResults()); diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.h b/flang/lib/Optimizer/CodeGen/TypeConverter.h --- a/flang/lib/Optimizer/CodeGen/TypeConverter.h +++ b/flang/lib/Optimizer/CodeGen/TypeConverter.h @@ -62,7 +62,7 @@ addConversion([&](BoxProcType boxproc) { // TODO: Support for this type will be added later when the Fortran 2003 // procedure pointer feature is implemented. - return llvm::None; + return std::nullopt; }); addConversion( [&](fir::ClassType classTy) { return convertBoxType(classTy); }); @@ -128,7 +128,7 @@ }); addConversion([&](mlir::NoneType none) { return mlir::LLVM::LLVMStructType::getLiteral( - none.getContext(), llvm::None, /*isPacked=*/false); + none.getContext(), std::nullopt, /*isPacked=*/false); }); // FIXME: https://reviews.llvm.org/D82831 introduced an automatic // materialization of conversion around function calls that is not working @@ -140,7 +140,7 @@ mlir::ValueRange inputs, mlir::Location loc) -> llvm::Optional { if (inputs.size() != 1) - return llvm::None; + return std::nullopt; return inputs[0]; }); // Similar FIXME workaround here (needed for compare.fir/select-type.fir @@ -150,7 +150,7 @@ mlir::ValueRange inputs, mlir::Location loc) -> llvm::Optional { if (inputs.size() != 1) - return llvm::None; + return std::nullopt; return inputs[0]; }); } diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -3353,7 +3353,7 @@ void fir::IfOp::build(mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Value cond, bool withElseRegion) { - build(builder, result, llvm::None, cond, withElseRegion); + build(builder, result, std::nullopt, cond, withElseRegion); } void fir::IfOp::build(mlir::OpBuilder &builder, mlir::OperationState &result, diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp --- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp @@ -351,7 +351,7 @@ hlfir::getFortranElementType(t).dyn_cast()) if (charType.hasConstantLen()) return charType.getLen(); - return llvm::None; + return std::nullopt; } mlir::LogicalResult hlfir::ConcatOp::verify() { diff --git a/flang/lib/Optimizer/Transforms/AbstractResult.cpp b/flang/lib/Optimizer/Transforms/AbstractResult.cpp --- a/flang/lib/Optimizer/Transforms/AbstractResult.cpp +++ b/flang/lib/Optimizer/Transforms/AbstractResult.cpp @@ -371,7 +371,7 @@ } patterns.insert(context, newArg); target.addDynamicallyLegalOp( - [](mlir::func::ReturnOp ret) { return ret.operands().empty(); }); + [](mlir::func::ReturnOp ret) { return ret.getOperands().empty(); }); assert(func.getFunctionType() == getNewFunctionType(funcTy, shouldBoxResult)); } else { diff --git a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp --- a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp +++ b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp @@ -464,7 +464,10 @@ return fir::complexBitsToTypeCode( kindMap.getRealBitsize(cmplxTy.getFKind())); } - return 0; // TODO more types. + if (auto charTy = ty.dyn_cast()) + return fir::characterBitsToTypeCode( + kindMap.getCharacterBitsize(charTy.getFKind())); + return 0; } mlir::LogicalResult @@ -476,13 +479,14 @@ mlir::Value cmp; // TYPE IS type guard comparison are all done inlined. if (auto a = attr.dyn_cast()) { - if (fir::isa_trivial(a.getType())) { + if (fir::isa_trivial(a.getType()) || + a.getType().isa()) { // For type guard statement with Intrinsic type spec the type code of // the descriptor is compared. int code = getTypeCode(a.getType(), kindMap); if (code == 0) return mlir::emitError(loc) - << "type code not done for " << a.getType(); + << "type code unavailable for " << a.getType(); mlir::Value typeCode = rewriter.create( loc, rewriter.getI8IntegerAttr(code)); mlir::Value selectorTypeCode = rewriter.create( @@ -543,7 +547,7 @@ rewriter.setInsertionPointToEnd(thisBlock); if (destOps.has_value()) rewriter.create(loc, cmp, dest, destOps.value(), - newBlock, llvm::None); + newBlock, std::nullopt); else rewriter.create(loc, cmp, dest, newBlock); rewriter.setInsertionPointToEnd(newBlock); diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp --- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp @@ -515,7 +515,7 @@ defOp = val.getDefiningOp(); // Analyze only sequences of convert operations. if (!mlir::isa(defOp)) - return llvm::None; + return std::nullopt; val = defOp->getOperand(0); // The convert operation is expected to convert from one // box type to another box type. diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -522,6 +522,9 @@ // R749 type-bound-procedure-stmt -> // PROCEDURE [[, bind-attr-list] ::] type-bound-proc-decl-list | // PROCEDURE ( interface-name ) , bind-attr-list :: binding-name-list +// The "::" is required by the standard (C768) in the first production if +// any type-bound-proc-decl has a "=>', but it's not strictly necessary to +// avoid a bad parse. TYPE_CONTEXT_PARSER("type bound PROCEDURE statement"_en_US, "PROCEDURE" >> (construct( @@ -531,6 +534,15 @@ "," >> nonemptyList(Parser{}), ok), localRecovery("expected list of binding names"_err_en_US, "::" >> listOfNames, SkipTo<'\n'>{}))) || + construct(construct< + TypeBoundProcedureStmt::WithoutInterface>( + pure>(), + nonemptyList( + "expected type bound procedure declarations"_err_en_US, + construct(name, + maybe(extension( + "type-bound procedure statement should have '::' if it has '=>'"_port_en_US, + "=>" >> name)))))) || construct( construct( optionalListBeforeColons(Parser{}), diff --git a/flang/lib/Parser/message.cpp b/flang/lib/Parser/message.cpp --- a/flang/lib/Parser/message.cpp +++ b/flang/lib/Parser/message.cpp @@ -70,13 +70,18 @@ return conversions_.front().c_str(); } -const char *MessageFormattedText::Convert(std::string &s) { +const char *MessageFormattedText::Convert(std::string &&s) { + conversions_.emplace_front(std::move(s)); + return conversions_.front().c_str(); +} + +const char *MessageFormattedText::Convert(const std::string_view &s) { conversions_.emplace_front(s); return conversions_.front().c_str(); } -const char *MessageFormattedText::Convert(std::string &&s) { - conversions_.emplace_front(std::move(s)); +const char *MessageFormattedText::Convert(std::string_view &&s) { + conversions_.emplace_front(s); return conversions_.front().c_str(); } diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h --- a/flang/lib/Parser/prescan.h +++ b/flang/lib/Parser/prescan.h @@ -109,6 +109,7 @@ slashInCurrentStatement_ = false; preventHollerith_ = false; delimiterNesting_ = 0; + continuationLines_ = 0; } Provenance GetProvenance(const char *sourceChar) const { @@ -195,6 +196,7 @@ Encoding encoding_{Encoding::UTF_8}; int delimiterNesting_{0}; int prescannerNesting_{0}; + int continuationLines_{0}; Provenance startProvenance_; const char *start_{nullptr}; // beginning of current source file content diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -89,7 +89,8 @@ void Prescanner::Statement() { TokenSequence tokens; - LineClassification line{ClassifyLine(nextLine_)}; + const char *statementStart{nextLine_}; + LineClassification line{ClassifyLine(statementStart)}; switch (line.kind) { case LineClassification::Kind::Comment: nextLine_ += line.payloadOffset; // advance to '!' or newline @@ -164,6 +165,11 @@ while (NextToken(tokens)) { } + if (continuationLines_ > 255) { + Say(GetProvenance(statementStart), + "%d continuation lines is more than the Fortran standard allows"_port_en_US, + continuationLines_); + } Provenance newlineProvenance{GetCurrentProvenance()}; if (std::optional preprocessed{ @@ -299,7 +305,7 @@ token.CloseToken(); SkipToNextSignificantCharacter(); if (IsDecimalDigit(*at_)) { - Say(GetProvenance(at_), + Say(GetCurrentProvenance(), "Label digit is not in fixed-form label field"_port_en_US); } } @@ -406,6 +412,7 @@ mightNeedSpace = *at_ == '\n'; } for (; Continuation(mightNeedSpace); mightNeedSpace = false) { + ++continuationLines_; if (MustSkipToEndOfLine()) { SkipToEndOfLine(); } @@ -493,7 +500,7 @@ // Recognize and skip over classic C style /*comments*/ when // outside a character literal. if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { - Say(GetProvenance(at_), + Say(GetCurrentProvenance(), "nonstandard usage: C-style comment"_port_en_US); } SkipCComments(); diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2621,6 +2621,7 @@ void PutKeywordLetter(char); void Word(const char *); void Word(const std::string &); + void Word(const std::string_view &); void Indent() { indent_ += indentationAmount_; } void Outdent() { CHECK(indent_ >= indentationAmount_); @@ -2777,6 +2778,12 @@ void UnparseVisitor::Word(const std::string &str) { Word(str.c_str()); } +void UnparseVisitor::Word(const std::string_view &str) { + for (std::size_t j{0}; j < str.length(); ++j) { + PutKeywordLetter(str[j]); + } +} + template void Unparse(llvm::raw_ostream &out, const A &root, Encoding encoding, bool capitalizeKeywords, bool backslashEscapes, diff --git a/flang/lib/Semantics/attr.cpp b/flang/lib/Semantics/attr.cpp --- a/flang/lib/Semantics/attr.cpp +++ b/flang/lib/Semantics/attr.cpp @@ -30,7 +30,7 @@ case Attr::INTENT_OUT: return "INTENT(OUT)"; default: - return EnumToString(attr); + return std::string{EnumToString(attr)}; } } diff --git a/flang/lib/Semantics/check-call.h b/flang/lib/Semantics/check-call.h --- a/flang/lib/Semantics/check-call.h +++ b/flang/lib/Semantics/check-call.h @@ -30,8 +30,9 @@ // Argument treatingExternalAsImplicit should be true when the called procedure // does not actually have an explicit interface at the call site, but // its characteristics are known because it is a subroutine or function -// defined at the top level in the same source file. -void CheckArguments(const evaluate::characteristics::Procedure &, +// defined at the top level in the same source file. Returns false if +// messages were created, true if all is well. +bool CheckArguments(const evaluate::characteristics::Procedure &, evaluate::ActualArguments &, evaluate::FoldingContext &, const Scope &, bool treatingExternalAsImplicit, const evaluate::SpecificIntrinsic *intrinsic); diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -397,11 +397,17 @@ } else if (dummy.intent == common::Intent::InOut) { reason = "INTENT(IN OUT)"; } + bool dummyIsPointer{ + dummy.attrs.test(characteristics::DummyDataObject::Attr::Pointer)}; if (reason && scope) { - DefinabilityFlags flags; + // Problems with polymorphism are caught in the callee's definition. + DefinabilityFlags flags{DefinabilityFlag::PolymorphicOkInPure}; if (isElemental || dummyIsValue) { // 15.5.2.4(21) flags.set(DefinabilityFlag::VectorSubscriptIsOk); } + if (actualIsPointer && dummyIsPointer) { // 19.6.8 + flags.set(DefinabilityFlag::PointerDefinition); + } if (auto whyNot{WhyNotDefinable(messages.at(), *scope, flags, actual)}) { if (auto *msg{messages.Say( "Actual argument associated with %s %s is not definable"_err_en_US, @@ -415,8 +421,6 @@ bool actualIsContiguous{IsSimplyContiguous(actual, context)}; bool dummyIsAssumedShape{dummy.type.attrs().test( characteristics::TypeAndShape::Attr::AssumedShape)}; - bool dummyIsPointer{ - dummy.attrs.test(characteristics::DummyDataObject::Attr::Pointer)}; bool dummyIsContiguous{ dummy.attrs.test(characteristics::DummyDataObject::Attr::Contiguous)}; if ((actualIsAsynchronous || actualIsVolatile) && @@ -596,6 +600,10 @@ argProcSymbol->name()); return; } + } else if (argProcSymbol->has()) { + evaluate::SayWithDeclaration(messages, *argProcSymbol, + "Procedure binding '%s' passed as an actual argument"_port_en_US, + argProcSymbol->name()); } } if (auto argChars{characteristics::DummyArgument::FromActual( @@ -684,9 +692,15 @@ } if (dummyIsPointer && dummy.intent != common::Intent::In) { const Symbol *last{GetLastSymbol(*expr)}; - if (!(last && IsProcedurePointer(*last)) && - !(dummy.intent == common::Intent::Default && - IsNullProcedurePointer(*expr))) { + if (last && IsProcedurePointer(*last)) { + if (dummy.intent != common::Intent::Default && + IsIntentIn(last->GetUltimate())) { // 19.6.8 + messages.Say( + "Actual argument associated with procedure pointer %s may not be INTENT(IN)"_err_en_US, + dummyName); + } + } else if (!(dummy.intent == common::Intent::Default && + IsNullProcedurePointer(*expr))) { // 15.5.2.9(5) -- dummy procedure POINTER // Interface compatibility has already been checked above messages.Say( @@ -960,7 +974,7 @@ .AnyFatalError(); } -void CheckArguments(const characteristics::Procedure &proc, +bool CheckArguments(const characteristics::Procedure &proc, evaluate::ActualArguments &actuals, evaluate::FoldingContext &context, const Scope &scope, bool treatingExternalAsImplicit, const evaluate::SpecificIntrinsic *intrinsic) { @@ -980,21 +994,25 @@ if (auto *msgs{messages.messages()}) { msgs->Annex(std::move(buffer)); } - return; // don't pile on + return false; // don't pile on } } if (explicitInterface) { auto buffer{ CheckExplicitInterface(proc, actuals, context, scope, intrinsic)}; - if (treatingExternalAsImplicit && !buffer.empty()) { - if (auto *msg{messages.Say( - "If the procedure's interface were explicit, this reference would be in error"_warn_en_US)}) { - buffer.AttachTo(*msg, parser::Severity::Because); + if (!buffer.empty()) { + if (treatingExternalAsImplicit && !buffer.empty()) { + if (auto *msg{messages.Say( + "If the procedure's interface were explicit, this reference would be in error"_warn_en_US)}) { + buffer.AttachTo(*msg, parser::Severity::Because); + } } - } - if (auto *msgs{messages.messages()}) { - msgs->Annex(std::move(buffer)); + if (auto *msgs{messages.messages()}) { + msgs->Annex(std::move(buffer)); + } + return false; } } + return true; } } // namespace Fortran::semantics diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -65,6 +65,7 @@ void CheckArraySpec(const Symbol &, const ArraySpec &); void CheckProcEntity(const Symbol &, const ProcEntityDetails &); void CheckSubprogram(const Symbol &, const SubprogramDetails &); + void CheckLocalVsGlobal(const Symbol &); void CheckAssumedTypeEntity(const Symbol &, const ObjectEntityDetails &); void CheckDerivedType(const Symbol &, const DerivedTypeDetails &); bool CheckFinal( @@ -87,6 +88,7 @@ void CheckGenericOps(const Scope &); bool CheckConflicting(const Symbol &, Attr, Attr); void WarnMissingFinal(const Symbol &); + void CheckSymbolType(const Symbol &); // C702 bool InPure() const { return innermostSymbol_ && IsPureProcedure(*innermostSymbol_); } @@ -103,12 +105,12 @@ return subp && subp->isInterface(); } template - void SayWithDeclaration(const Symbol &symbol, A &&...x) { - if (parser::Message * msg{messages_.Say(std::forward(x)...)}) { - if (messages_.at().begin() != symbol.name().begin()) { - evaluate::AttachDeclaration(*msg, symbol); - } + parser::Message *SayWithDeclaration(const Symbol &symbol, A &&...x) { + parser::Message *msg{messages_.Say(std::forward(x)...)}; + if (msg && messages_.at().begin() != symbol.name().begin()) { + evaluate::AttachDeclaration(*msg, symbol); } + return msg; } bool IsResultOkToDiffer(const FunctionResult &); void CheckBindC(const Symbol &); @@ -199,7 +201,7 @@ const DeclTypeSpec &type, bool canHaveAssumedTypeParameters) { if (type.category() == DeclTypeSpec::Character) { Check(type.characterTypeSpec().length(), canHaveAssumedTypeParameters); - } else if (const DerivedTypeSpec * derived{type.AsDerived()}) { + } else if (const DerivedTypeSpec *derived{type.AsDerived()}) { for (auto &parm : derived->parameters()) { Check(parm.second, canHaveAssumedTypeParameters); } @@ -346,7 +348,7 @@ messages_.Say( "An assumed-length CHARACTER(*) function cannot be PURE"_err_en_US); } - if (const Symbol * result{FindFunctionResult(symbol)}) { + if (const Symbol *result{FindFunctionResult(symbol)}) { if (IsPointer(*result)) { messages_.Say( "An assumed-length CHARACTER(*) function cannot return a POINTER"_err_en_US); @@ -449,7 +451,7 @@ void CheckHelper::CheckAssumedTypeEntity( // C709 const Symbol &symbol, const ObjectEntityDetails &details) { - if (const DeclTypeSpec * type{symbol.GetType()}; + if (const DeclTypeSpec *type{symbol.GetType()}; type && type->category() == DeclTypeSpec::TypeStar) { if (!IsDummy(symbol)) { messages_.Say( @@ -493,15 +495,7 @@ void CheckHelper::CheckObjectEntity( const Symbol &symbol, const ObjectEntityDetails &details) { - if (!IsAllocatableOrPointer(symbol)) { // C702 - if (auto dyType{evaluate::DynamicType::From(symbol)}) { - if (dyType->HasDeferredTypeParameter()) { - messages_.Say( - "'%s' has a type %s with a deferred type parameter but is neither an allocatable or a pointer"_err_en_US, - symbol.name(), dyType->AsFortran()); - } - } - } + CheckSymbolType(symbol); CheckArraySpec(symbol, details.shape()); Check(details.shape()); Check(details.coshape()); @@ -539,7 +533,7 @@ symbol.name()); } } - if (const DeclTypeSpec * type{details.type()}) { + if (const DeclTypeSpec *type{details.type()}) { if (IsBadCoarrayType(type->AsDerived())) { // C747 & C824 messages_.Say( "Coarray '%s' may not have type TEAM_TYPE, C_PTR, or C_FUNPTR"_err_en_US, @@ -567,11 +561,11 @@ messages_.Say( "non-POINTER dummy argument of pure function must be INTENT(IN) or VALUE"_err_en_US); } else if (IsIntentOut(symbol)) { - if (const DeclTypeSpec * type{details.type()}) { + if (const DeclTypeSpec *type{details.type()}) { if (type && type->IsPolymorphic()) { // C1588 messages_.Say( "An INTENT(OUT) dummy argument of a pure subroutine may not be polymorphic"_err_en_US); - } else if (const DerivedTypeSpec * derived{type->AsDerived()}) { + } else if (const DerivedTypeSpec *derived{type->AsDerived()}) { if (FindUltimateComponent(*derived, [](const Symbol &x) { const DeclTypeSpec *type{x.GetType()}; return type && type->IsPolymorphic(); @@ -661,7 +655,7 @@ "An initialized variable in BLOCK DATA must be in a COMMON block"_err_en_US); } } - if (const DeclTypeSpec * type{details.type()}) { // C708 + if (const DeclTypeSpec *type{details.type()}) { // C708 if (type->IsPolymorphic() && !(type->IsAssumedType() || IsAllocatableOrPointer(symbol) || IsDummy(symbol))) { @@ -800,6 +794,7 @@ void CheckHelper::CheckProcEntity( const Symbol &symbol, const ProcEntityDetails &details) { + CheckSymbolType(symbol); if (details.isDummy()) { if (!symbol.attrs().test(Attr::POINTER) && // C843 (symbol.attrs().test(Attr::INTENT_IN) || @@ -812,7 +807,9 @@ messages_.Say( "An ELEMENTAL subprogram may not have a dummy procedure"_err_en_US); } - const Symbol *interface { details.interface().symbol() }; + const Symbol *interface { + details.interface().symbol() + }; if (!symbol.attrs().test(Attr::INTRINSIC) && (IsElementalProcedure(symbol) || (interface && !interface->attrs().test(Attr::INTRINSIC) && @@ -844,7 +841,7 @@ } if (symbol.attrs().test(Attr::POINTER)) { CheckPointerInitialization(symbol); - if (const Symbol * interface{details.interface().symbol()}) { + if (const Symbol *interface{details.interface().symbol()}) { const Symbol &ultimate{interface->GetUltimate()}; if (ultimate.attrs().test(Attr::INTRINSIC)) { if (const auto intrinsic{ @@ -867,6 +864,7 @@ "Procedure '%s' with SAVE attribute must also have POINTER attribute"_err_en_US, symbol.name()); } + CheckLocalVsGlobal(symbol); } // When a module subprogram has the MODULE prefix the following must match @@ -931,10 +929,10 @@ void CheckHelper::CheckSubprogram( const Symbol &symbol, const SubprogramDetails &details) { - if (const Symbol * iface{FindSeparateModuleSubprogramInterface(&symbol)}) { + if (const Symbol *iface{FindSeparateModuleSubprogramInterface(&symbol)}) { SubprogramMatchHelper{*this}.Check(symbol, *iface); } - if (const Scope * entryScope{details.entryScope()}) { + if (const Scope *entryScope{details.entryScope()}) { // ENTRY 15.6.2.6, esp. C1571 std::optional error; const Symbol *subprogram{entryScope->symbol()}; @@ -967,6 +965,12 @@ } } } + if (const MaybeExpr & stmtFunction{details.stmtFunction()}) { + if (auto msg{evaluate::CheckStatementFunction( + symbol, *stmtFunction, context_.foldingContext())}) { + SayWithDeclaration(symbol, std::move(*msg)); + } + } if (IsElementalProcedure(symbol)) { // See comment on the similar check in CheckProcEntity() if (details.isDummy()) { @@ -980,10 +984,56 @@ } } } - if (details.isInterface() && !details.isDummy() && details.isFunction() && - IsAssumedLengthCharacter(details.result())) { // C721 - messages_.Say(details.result().name(), - "A function interface may not declare an assumed-length CHARACTER(*) result"_err_en_US); + if (details.isInterface()) { + if (!details.isDummy() && details.isFunction() && + IsAssumedLengthCharacter(details.result())) { // C721 + messages_.Say(details.result().name(), + "A function interface may not declare an assumed-length CHARACTER(*) result"_err_en_US); + } + } + CheckLocalVsGlobal(symbol); +} + +void CheckHelper::CheckLocalVsGlobal(const Symbol &symbol) { + if (IsProcedure(symbol) && IsExternal(symbol)) { + if (const Symbol *global{FindGlobal(symbol)}; global && global != &symbol) { + std::string interfaceName{symbol.name().ToString()}; + if (const auto *bind{symbol.GetBindName()}) { + interfaceName = *bind; + } + std::string definitionName{global->name().ToString()}; + if (const auto *bind{global->GetBindName()}) { + definitionName = *bind; + } + if (interfaceName == definitionName) { + parser::Message *msg{nullptr}; + if (!IsProcedure(*global)) { + if (symbol.flags().test(Symbol::Flag::Function) || + symbol.flags().test(Symbol::Flag::Subroutine)) { + msg = messages_.Say( + "The global entity '%s' corresponding to the local procedure '%s' is not a callable subprogram"_err_en_US, + global->name(), symbol.name()); + } + } else if (auto chars{Characterize(symbol)}) { + if (auto globalChars{Characterize(*global)}) { + if (chars->HasExplicitInterface()) { + std::string whyNot; + if (!chars->IsCompatibleWith(*globalChars, &whyNot)) { + msg = messages_.Say( + "The global subprogram '%s' is not compatible with its local procedure declaration (%s)"_warn_en_US, + global->name(), whyNot); + } + } else if (!globalChars->CanBeCalledViaImplicitInterface()) { + msg = messages_.Say( + "The global subprogram '%s' may not be referenced via the implicit interface '%s'"_err_en_US, + global->name(), symbol.name()); + } + } + } + evaluate::AttachDeclaration(msg, *global); + evaluate::AttachDeclaration(msg, symbol); + } + } } } @@ -1004,7 +1054,7 @@ (derivedType.attrs().test(Attr::BIND_C) || details.sequence())) { messages_.Say("An ABSTRACT derived type must be extensible"_err_en_US); } - if (const DeclTypeSpec * parent{FindParentTypeSpec(derivedType)}) { + if (const DeclTypeSpec *parent{FindParentTypeSpec(derivedType)}) { const DerivedTypeSpec *parentDerived{parent->AsDerived()}; if (!IsExtensibleType(parentDerived)) { // C705 messages_.Say("The parent type is not extensible"_err_en_US); @@ -1091,7 +1141,7 @@ const Symbol *errSym{&subroutine}; if (const auto *details{subroutine.detailsIf()}) { if (!details->dummyArgs().empty()) { - if (const Symbol * argSym{details->dummyArgs()[0]}) { + if (const Symbol *argSym{details->dummyArgs()[0]}) { errSym = argSym; } } @@ -1230,7 +1280,7 @@ } DistinguishabilityHelper helper{context_}; for (const Symbol &specific : details.specificProcs()) { - if (const Procedure * procedure{Characterize(specific)}) { + if (const Procedure *procedure{Characterize(specific)}) { if (procedure->HasExplicitInterface()) { helper.Add(generic, kind, specific, *procedure); } else { @@ -1573,7 +1623,9 @@ return; } const auto &name{proc.name()}; - const Symbol *interface { interface0 ? FindInterface(*interface0) : nullptr }; + const Symbol *interface { + interface0 ? FindInterface(*interface0) : nullptr + }; if (!interface) { messages_.Say(name, "Procedure component '%s' must have NOPASS attribute or explicit interface"_err_en_US, @@ -1683,7 +1735,7 @@ const Scope &dtScope{symbol.owner()}; CHECK(dtScope.kind() == Scope::Kind::DerivedType); if (symbol.attrs().test(Attr::DEFERRED)) { - if (const Symbol * dtSymbol{dtScope.symbol()}) { + if (const Symbol *dtSymbol{dtScope.symbol()}) { if (!dtSymbol->attrs().test(Attr::ABSTRACT)) { // C733 SayWithDeclaration(*dtSymbol, "Procedure bound to non-ABSTRACT derived type '%s' may not be DEFERRED"_err_en_US, @@ -1703,7 +1755,7 @@ "Intrinsic procedure '%s' is not a specific intrinsic permitted for use in the definition of binding '%s'"_err_en_US, binding.symbol().name(), symbol.name()); } - if (const Symbol * overridden{FindOverriddenBinding(symbol)}) { + if (const Symbol *overridden{FindOverriddenBinding(symbol)}) { if (overridden->attrs().test(Attr::NON_OVERRIDABLE)) { SayWithDeclaration(*overridden, "Override of NON_OVERRIDABLE '%s' is not permitted"_err_en_US, @@ -1768,7 +1820,7 @@ void CheckHelper::Check(const Scope &scope) { scope_ = &scope; common::Restorer restorer{innermostSymbol_, innermostSymbol_}; - if (const Symbol * symbol{scope.symbol()}) { + if (const Symbol *symbol{scope.symbol()}) { innermostSymbol_ = symbol; } if (scope.IsParameterizedDerivedTypeInstantiation()) { @@ -1805,6 +1857,31 @@ if (scope.kind() == Scope::Kind::BlockData) { CheckBlockData(scope); } + if (auto name{scope.GetName()}) { + auto iter{scope.find(*name)}; + if (iter != scope.end()) { + const char *kind{nullptr}; + switch (scope.kind()) { + case Scope::Kind::Module: + kind = scope.symbol()->get().isSubmodule() + ? "submodule" + : "module"; + break; + case Scope::Kind::MainProgram: + kind = "main program"; + break; + case Scope::Kind::BlockData: + kind = "BLOCK DATA subprogram"; + break; + default:; + } + if (kind) { + messages_.Say(iter->second->name(), + "Name '%s' declared in a %s should not have the same name as the %s"_port_en_US, + *name, kind, kind); + } + } + } CheckGenericOps(scope); } } @@ -1877,7 +1954,7 @@ // Not a generic; ensure characteristics are defined if a function. auto restorer{messages_.SetLocation(generic.name())}; if (IsFunction(generic) && !context_.HasError(generic)) { - if (const Symbol * result{FindFunctionResult(generic)}; + if (const Symbol *result{FindFunctionResult(generic)}; result && !context_.HasError(*result)) { Characterize(generic); } @@ -1893,7 +1970,7 @@ for (std::size_t i{0}; i < specifics.size(); ++i) { const Symbol &specific{*specifics[i]}; auto restorer{messages_.SetLocation(bindingNames[i])}; - if (const Procedure * proc{Characterize(specific)}) { + if (const Procedure *proc{Characterize(specific)}) { if (kind.IsAssignment()) { if (!CheckDefinedAssignment(specific, *proc)) { continue; @@ -1912,7 +1989,7 @@ addSpecifics(symbol); const Symbol &ultimate{symbol.GetUltimate()}; if (ultimate.has()) { - if (const Scope * typeScope{ultimate.scope()}) { + if (const Scope *typeScope{ultimate.scope()}) { for (const auto &pair2 : *typeScope) { addSpecifics(*pair2.second); } @@ -1944,7 +2021,7 @@ "A variable with BIND(C) attribute may only appear in the specification part of a module"_err_en_US); context_.SetError(symbol); } - if (const std::string * name{DefinesBindCName(symbol)}) { + if (const std::string *name{DefinesBindCName(symbol)}) { auto pair{bindC_.emplace(*name, symbol)}; if (!pair.second) { const Symbol &other{*pair.first->second}; @@ -2056,8 +2133,8 @@ void CheckHelper::CheckDioDummyIsDerived(const Symbol &subp, const Symbol &arg, GenericKind::DefinedIo ioKind, const Symbol &generic) { - if (const DeclTypeSpec * type{arg.GetType()}) { - if (const DerivedTypeSpec * derivedType{type->AsDerived()}) { + if (const DeclTypeSpec *type{arg.GetType()}) { + if (const DerivedTypeSpec *derivedType{type->AsDerived()}) { CheckAlreadySeenDefinedIo(*derivedType, ioKind, subp, generic); bool isPolymorphic{type->IsPolymorphic()}; if (isPolymorphic != IsExtensibleType(derivedType)) { @@ -2077,7 +2154,7 @@ void CheckHelper::CheckDioDummyIsDefaultInteger( const Symbol &subp, const Symbol &arg) { - if (const DeclTypeSpec * type{arg.GetType()}; + if (const DeclTypeSpec *type{arg.GetType()}; type && type->IsNumeric(TypeCategory::Integer)) { if (const auto kind{evaluate::ToInt64(type->numericTypeSpec().kind())}; kind && *kind == context_.GetDefaultKind(TypeCategory::Integer)) { @@ -2285,6 +2362,18 @@ } } +void CheckHelper::CheckSymbolType(const Symbol &symbol) { + if (!IsAllocatableOrPointer(symbol)) { // C702 + if (auto dyType{evaluate::DynamicType::From(symbol)}) { + if (dyType->HasDeferredTypeParameter()) { + messages_.Say( + "'%s' has a type %s with a deferred type parameter but is neither an allocatable or a pointer"_err_en_US, + symbol.name(), dyType->AsFortran()); + } + } + } +} + void SubprogramMatchHelper::Check( const Symbol &symbol1, const Symbol &symbol2) { const auto details1{symbol1.get()}; diff --git a/flang/lib/Semantics/check-io.h b/flang/lib/Semantics/check-io.h --- a/flang/lib/Semantics/check-io.h +++ b/flang/lib/Semantics/check-io.h @@ -126,8 +126,15 @@ void CheckForPureSubprogram() const; - void CheckForBadIoComponent( + parser::Message *CheckForBadIoType(const evaluate::DynamicType &, + GenericKind::DefinedIo, parser::CharBlock) const; + void CheckForBadIoType( const SomeExpr &, GenericKind::DefinedIo, parser::CharBlock) const; + parser::Message *CheckForBadIoType( + const Symbol &, GenericKind::DefinedIo, parser::CharBlock) const; + + void CheckNamelist( + const Symbol &, GenericKind::DefinedIo, parser::CharBlock) const; void Init(IoStmtKind s) { stmt_ = s; diff --git a/flang/lib/Semantics/check-io.cpp b/flang/lib/Semantics/check-io.cpp --- a/flang/lib/Semantics/check-io.cpp +++ b/flang/lib/Semantics/check-io.cpp @@ -231,6 +231,9 @@ if (!IsVariable(*expr)) { context_.Say(format.source, "Assigned format label must be a scalar variable"_err_en_US); + } else if (context_.ShouldWarn(common::LanguageFeature::Assign)) { + context_.Say(format.source, + "Assigned format labels are deprecated"_port_en_US); } return; } @@ -323,7 +326,7 @@ } CheckForDefinableVariable(*var, "Input"); if (auto expr{AnalyzeExpr(context_, *var)}) { - CheckForBadIoComponent(*expr, + CheckForBadIoType(*expr, flags_.test(Flag::FmtOrNml) ? GenericKind::DefinedIo::ReadFormatted : GenericKind::DefinedIo::ReadUnformatted, var->GetSource()); @@ -616,7 +619,7 @@ context_.Say(parser::FindSourceLocation(*x), "Output item must not be a procedure pointer"_err_en_US); // C1233 } - CheckForBadIoComponent(*expr, + CheckForBadIoType(*expr, flags_.test(Flag::FmtOrNml) ? GenericKind::DefinedIo::WriteFormatted : GenericKind::DefinedIo::WriteUnformatted, @@ -738,29 +741,21 @@ Done(); } -static void CheckForDoVariableInNamelist(const Symbol &namelist, - SemanticsContext &context, parser::CharBlock namelistLocation) { - const auto &details{namelist.GetUltimate().get()}; - for (const Symbol &object : details.objects()) { - context.CheckIndexVarRedefine(namelistLocation, object); - } -} - -static void CheckForDoVariableInNamelistSpec( - const parser::ReadStmt &readStmt, SemanticsContext &context) { - const std::list &controls{readStmt.controls}; +static const parser::Name *FindNamelist( + const std::list &controls) { for (const auto &control : controls) { - if (const auto *namelist{std::get_if(&control.u)}) { - if (const Symbol * symbol{namelist->symbol}) { - CheckForDoVariableInNamelist(*symbol, context, namelist->source); + if (const parser::Name * namelist{std::get_if(&control.u)}) { + if (namelist->symbol && + namelist->symbol->GetUltimate().has()) { + return namelist; } } } + return nullptr; } static void CheckForDoVariable( const parser::ReadStmt &readStmt, SemanticsContext &context) { - CheckForDoVariableInNamelistSpec(readStmt, context); const std::list &items{readStmt.items}; for (const auto &item : items) { if (const parser::Variable * @@ -774,6 +769,12 @@ if (!flags_.test(Flag::InternalUnit)) { CheckForPureSubprogram(); } + if (const parser::Name * namelist{FindNamelist(readStmt.controls)}) { + if (namelist->symbol) { + CheckNamelist(*namelist->symbol, GenericKind::DefinedIo::ReadFormatted, + namelist->source); + } + } CheckForDoVariable(readStmt, context_); if (!flags_.test(Flag::IoControlList)) { Done(); @@ -807,10 +808,16 @@ Done(); } -void IoChecker::Leave(const parser::WriteStmt &) { +void IoChecker::Leave(const parser::WriteStmt &writeStmt) { if (!flags_.test(Flag::InternalUnit)) { CheckForPureSubprogram(); } + if (const parser::Name * namelist{FindNamelist(writeStmt.controls)}) { + if (namelist->symbol) { + CheckNamelist(*namelist->symbol, GenericKind::DefinedIo::WriteFormatted, + namelist->source); + } + } LeaveReadWrite(); CheckForProhibitedSpecifier(IoSpecKind::Blank); // C1213 CheckForProhibitedSpecifier(IoSpecKind::End); // C1213 @@ -1030,20 +1037,139 @@ } } -// Fortran 2018, 12.6.3 paragraph 7 -void IoChecker::CheckForBadIoComponent(const SomeExpr &expr, +// Seeks out an allocatable or pointer ultimate component that is not +// nested in a nonallocatable/nonpointer component with a specific +// defined I/O procedure. +static const Symbol *FindUnsafeIoDirectComponent(GenericKind::DefinedIo which, + const DerivedTypeSpec &derived, const Scope &scope) { + if (HasDefinedIo(which, derived, &scope)) { + return nullptr; + } + if (const Scope * dtScope{derived.scope()}) { + for (const auto &pair : *dtScope) { + const Symbol &symbol{*pair.second}; + if (IsAllocatableOrPointer(symbol)) { + return &symbol; + } + if (const auto *details{symbol.detailsIf()}) { + if (const DeclTypeSpec * type{details->type()}) { + if (type->category() == DeclTypeSpec::Category::TypeDerived) { + const DerivedTypeSpec &componentDerived{type->derivedTypeSpec()}; + if (const Symbol * + bad{FindUnsafeIoDirectComponent( + which, componentDerived, scope)}) { + return bad; + } + } + } + } + } + } + return nullptr; +} + +// For a type that does not have a defined I/O subroutine, finds a direct +// component that is a witness to an accessibility violation outside the module +// in which the type was defined. +static const Symbol *FindInaccessibleComponent(GenericKind::DefinedIo which, + const DerivedTypeSpec &derived, const Scope &scope) { + if (const Scope * dtScope{derived.scope()}) { + if (const Scope * module{FindModuleContaining(*dtScope)}) { + for (const auto &pair : *dtScope) { + const Symbol &symbol{*pair.second}; + if (IsAllocatableOrPointer(symbol)) { + continue; // already an error + } + if (const auto *details{symbol.detailsIf()}) { + const DerivedTypeSpec *componentDerived{nullptr}; + if (const DeclTypeSpec * type{details->type()}) { + if (type->category() == DeclTypeSpec::Category::TypeDerived) { + componentDerived = &type->derivedTypeSpec(); + } + } + if (componentDerived && + HasDefinedIo(which, *componentDerived, &scope)) { + continue; // this component and its descendents are fine + } + if (symbol.attrs().test(Attr::PRIVATE) && + !symbol.test(Symbol::Flag::ParentComp)) { + if (!DoesScopeContain(module, scope)) { + return &symbol; + } + } + if (componentDerived) { + if (const Symbol * + bad{FindInaccessibleComponent( + which, *componentDerived, scope)}) { + return bad; + } + } + } + } + } + } + return nullptr; +} + +// Fortran 2018, 12.6.3 paragraphs 5 & 7 +parser::Message *IoChecker::CheckForBadIoType(const evaluate::DynamicType &type, GenericKind::DefinedIo which, parser::CharBlock where) const { - if (auto type{expr.GetType()}) { - if (type->category() == TypeCategory::Derived && - !type->IsUnlimitedPolymorphic()) { + if (type.IsUnlimitedPolymorphic()) { + return &context_.Say( + where, "I/O list item may not be unlimited polymorphic"_err_en_US); + } else if (type.category() == TypeCategory::Derived) { + const auto &derived{type.GetDerivedTypeSpec()}; + const Scope &scope{context_.FindScope(where)}; + if (const Symbol * + bad{FindUnsafeIoDirectComponent(which, derived, scope)}) { + return &context_.SayWithDecl(*bad, where, + "Derived type '%s' in I/O cannot have an allocatable or pointer direct component '%s' unless using defined I/O"_err_en_US, + derived.name(), bad->name()); + } + if (!HasDefinedIo(which, derived, &scope)) { + if (type.IsPolymorphic()) { + return &context_.Say(where, + "Derived type '%s' in I/O may not be polymorphic unless using defined I/O"_err_en_US, + derived.name()); + } if (const Symbol * - bad{FindUnsafeIoDirectComponent( - which, type->GetDerivedTypeSpec(), &context_.FindScope(where))}) { - context_.SayWithDecl(*bad, where, - "Derived type in I/O cannot have an allocatable or pointer direct component unless using defined I/O"_err_en_US); + bad{FindInaccessibleComponent(which, derived, scope)}) { + return &context_.Say(where, + "I/O of the derived type '%s' may not be performed without defined I/O in a scope in which a direct component like '%s' is inaccessible"_err_en_US, + derived.name(), bad->name()); } } } + return nullptr; +} + +void IoChecker::CheckForBadIoType(const SomeExpr &expr, + GenericKind::DefinedIo which, parser::CharBlock where) const { + if (auto type{expr.GetType()}) { + CheckForBadIoType(*type, which, where); + } +} + +parser::Message *IoChecker::CheckForBadIoType(const Symbol &symbol, + GenericKind::DefinedIo which, parser::CharBlock where) const { + if (auto type{evaluate::DynamicType::From(symbol)}) { + if (auto *msg{CheckForBadIoType(*type, which, where)}) { + evaluate::AttachDeclaration(*msg, symbol); + return msg; + } + } + return nullptr; +} + +void IoChecker::CheckNamelist(const Symbol &namelist, + GenericKind::DefinedIo which, parser::CharBlock namelistLocation) const { + const auto &details{namelist.GetUltimate().get()}; + for (const Symbol &object : details.objects()) { + context_.CheckIndexVarRedefine(namelistLocation, object); + if (auto *msg{CheckForBadIoType(object, which, namelistLocation)}) { + evaluate::AttachDeclaration(*msg, namelist); + } + } } } // namespace Fortran::semantics diff --git a/flang/lib/Semantics/definable.h b/flang/lib/Semantics/definable.h --- a/flang/lib/Semantics/definable.h +++ b/flang/lib/Semantics/definable.h @@ -27,7 +27,8 @@ ENUM_CLASS(DefinabilityFlag, VectorSubscriptIsOk, // a vector subscript may appear (i.e., assignment) - PointerDefinition) // a pointer is being defined, not its target + PointerDefinition, // a pointer is being defined, not its target + PolymorphicOkInPure) // don't check for polymorphic type in pure subprogram using DefinabilityFlags = common::EnumSet; diff --git a/flang/lib/Semantics/definable.cpp b/flang/lib/Semantics/definable.cpp --- a/flang/lib/Semantics/definable.cpp +++ b/flang/lib/Semantics/definable.cpp @@ -149,7 +149,8 @@ "'%s' is an entity with either an EVENT_TYPE or LOCK_TYPE"_en_US, original); } - if (FindPureProcedureContaining(scope)) { + if (!flags.test(DefinabilityFlag::PolymorphicOkInPure) && + FindPureProcedureContaining(scope)) { if (auto dyType{evaluate::DynamicType::From(ultimate)}) { if (dyType->IsPolymorphic()) { // C1596 return BlameSymbol(at, diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -43,7 +43,7 @@ using common::NumericOperator; using common::TypeCategory; -static inline std::string ToUpperCase(const std::string &str) { +static inline std::string ToUpperCase(std::string_view str) { return parser::ToUpperCaseLetters(str); } @@ -64,12 +64,12 @@ static std::optional AnalyzeTypeSpec( const std::optional &spec) { if (spec) { - if (const semantics::DeclTypeSpec * typeSpec{spec->declTypeSpec}) { + if (const semantics::DeclTypeSpec *typeSpec{spec->declTypeSpec}) { // Name resolution sets TypeSpec::declTypeSpec only when it's valid // (viz., an intrinsic type with valid known kind or a non-polymorphic // & non-ABSTRACT derived type). - if (const semantics::IntrinsicTypeSpec * - intrinsic{typeSpec->AsIntrinsic()}) { + if (const semantics::IntrinsicTypeSpec *intrinsic{ + typeSpec->AsIntrinsic()}) { TypeCategory category{intrinsic->category()}; if (auto optKind{ToInt64(intrinsic->kind())}) { int kind{static_cast(*optKind)}; @@ -84,8 +84,8 @@ return DynamicTypeWithLength{DynamicType{category, kind}}; } } - } else if (const semantics::DerivedTypeSpec * - derived{typeSpec->AsDerived()}) { + } else if (const semantics::DerivedTypeSpec *derived{ + typeSpec->AsDerived()}) { return DynamicTypeWithLength{DynamicType{*derived}}; } } @@ -155,8 +155,10 @@ // Find and return a user-defined operator or report an error. // The provided message is used if there is no such operator. - MaybeExpr TryDefinedOp(const char *, parser::MessageFixedText, - const Symbol **definedOpSymbolPtr = nullptr, bool isUserOp = false); + // If a definedOpSymbolPtr is provided, the caller must check + // for its accessibility. + MaybeExpr TryDefinedOp( + const char *, parser::MessageFixedText, bool isUserOp = false); template MaybeExpr TryDefinedOp(E opr, parser::MessageFixedText msg) { return TryDefinedOp( @@ -174,8 +176,8 @@ std::optional AnalyzeExpr(const parser::Expr &); MaybeExpr AnalyzeExprOrWholeAssumedSizeArray(const parser::Expr &); bool AreConformable() const; - const Symbol *FindBoundOp( - parser::CharBlock, int passIndex, const Symbol *&definedOp); + const Symbol *FindBoundOp(parser::CharBlock, int passIndex, + const Symbol *&generic, bool isSubroutine); void AddAssignmentConversion( const DynamicType &lhsType, const DynamicType &rhsType); bool OkLogicalIntegerAssignment(TypeCategory lhs, TypeCategory rhs); @@ -257,7 +259,7 @@ } else if (const auto *object{ symbol.detailsIf()}) { // C928 & C1002 - if (Triplet * last{std::get_if(&ref.subscript().back().u)}) { + if (Triplet *last{std::get_if(&ref.subscript().back().u)}) { if (!last->upper() && object->IsAssumedSize()) { Say("Assumed-size array '%s' must have explicit final " "subscript upper bound value"_err_en_US, @@ -379,10 +381,10 @@ if (auto *triplet{std::get_if( &arrElement.subscripts.front().u)}) { if (!std::get<2 /*stride*/>(triplet->t).has_value()) { - if (const Symbol * - symbol{parser::GetLastName(arrElement.base).symbol}) { + if (const Symbol *symbol{ + parser::GetLastName(arrElement.base).symbol}) { const Symbol &ultimate{symbol->GetUltimate()}; - if (const semantics::DeclTypeSpec * type{ultimate.GetType()}) { + if (const semantics::DeclTypeSpec *type{ultimate.GetType()}) { if (!ultimate.IsObjectArray() && type->category() == semantics::DeclTypeSpec::Character) { // The ambiguous S(j:k) was parsed as an array section @@ -698,9 +700,8 @@ } MaybeExpr ExpressionAnalyzer::Analyze(const parser::ComplexLiteralConstant &z) { - return AsMaybeExpr( - ConstructComplex(GetContextualMessages(), Analyze(std::get<0>(z.t)), - Analyze(std::get<1>(z.t)), GetDefaultKind(TypeCategory::Real))); + return AnalyzeComplex(Analyze(std::get<0>(z.t)), Analyze(std::get<1>(z.t)), + "complex literal constant"); } // CHARACTER literal processing. @@ -805,8 +806,7 @@ ultimate, AsGenericExpr(TypeParamInquiry{std::nullopt, ultimate}))); } else { if (n.symbol->attrs().test(semantics::Attr::VOLATILE)) { - if (const semantics::Scope * - pure{semantics::FindPureProcedureContaining( + if (const semantics::Scope *pure{semantics::FindPureProcedureContaining( context_.FindScope(n.source))}) { SayAt(n, "VOLATILE variable '%s' may not be referenced in pure subprogram '%s'"_err_en_US, @@ -840,7 +840,8 @@ } MaybeExpr ExpressionAnalyzer::Analyze(const parser::NullInit &n) { - if (MaybeExpr value{Analyze(n.v)}) { + auto restorer{AllowNullPointer()}; + if (MaybeExpr value{Analyze(n.v.value())}) { // Subtle: when the NullInit is a DataStmtConstant, it might // be a misparse of a structure constructor without parameters // or components (e.g., T()). Checking the result to ensure @@ -851,6 +852,12 @@ return std::nullopt; } +MaybeExpr ExpressionAnalyzer::Analyze( + const parser::StmtFunctionStmt &stmtFunc) { + inStmtFunctionDefinition_ = true; + return Analyze(std::get>(stmtFunc.t)); +} + MaybeExpr ExpressionAnalyzer::Analyze(const parser::InitialDataTarget &x) { return Analyze(x.value()); } @@ -1068,7 +1075,7 @@ if (ae.subscripts.empty()) { // will be converted to function call later or error reported } else if (baseExpr->Rank() == 0) { - if (const Symbol * symbol{GetLastSymbol(*baseExpr)}) { + if (const Symbol *symbol{GetLastSymbol(*baseExpr)}) { if (!context_.HasError(symbol)) { if (inDataStmtConstant_) { // Better error for NULL(X) with a MOLD= argument @@ -1120,14 +1127,13 @@ if (&component.owner() == &scope) { return Component{std::move(base), component}; } - if (const Symbol * typeSymbol{scope.GetSymbol()}) { - if (const Symbol * - parentComponent{typeSymbol->GetParentComponent(&scope)}) { + if (const Symbol *typeSymbol{scope.GetSymbol()}) { + if (const Symbol *parentComponent{typeSymbol->GetParentComponent(&scope)}) { if (const auto *object{ parentComponent->detailsIf()}) { if (const auto *parentType{object->type()}) { - if (const semantics::Scope * - parentScope{parentType->derivedTypeSpec().scope()}) { + if (const semantics::Scope *parentScope{ + parentType->derivedTypeSpec().scope()}) { return CreateComponent( DataRef{Component{std::move(base), *parentComponent}}, component, *parentScope); @@ -1227,7 +1233,7 @@ if (auto *aRef{std::get_if(&dataRef->u)}) { subscripts = std::move(aRef->subscript()); reversed.push_back(aRef->GetLastSymbol()); - if (Component * component{aRef->base().UnwrapComponent()}) { + if (Component *component{aRef->base().UnwrapComponent()}) { dataRef = &component->base(); } else { dataRef = nullptr; @@ -1343,15 +1349,15 @@ MakeSpecific(std::move(values_))}); } } else if (type_->kind() == T::kind) { + ArrayConstructor result{MakeSpecific(std::move(values_))}; if constexpr (T::category == TypeCategory::Character) { if (auto len{type_->LEN()}) { - return AsMaybeExpr(ArrayConstructor{ - *std::move(len), MakeSpecific(std::move(values_))}); + if (IsConstantExpr(*len)) { + result.set_LEN(std::move(*len)); + } } - } else { - return AsMaybeExpr( - ArrayConstructor{MakeSpecific(std::move(values_))}); } + return AsMaybeExpr(std::move(result)); } } return std::nullopt; @@ -1669,7 +1675,7 @@ auto &parsedType{std::get(structure.t)}; parser::Name structureType{std::get(parsedType.t)}; parser::CharBlock &typeName{structureType.source}; - if (semantics::Symbol * typeSymbol{structureType.symbol}) { + if (semantics::Symbol *typeSymbol{structureType.symbol}) { if (typeSymbol->has()) { semantics::DerivedTypeSpec dtSpec{typeName, typeSymbol->GetUltimate()}; if (!CheckIsValidForwardReference(dtSpec)) { @@ -1713,6 +1719,9 @@ bool checkConflicts{true}; // until we hit one auto &messages{GetContextualMessages()}; + // NULL() can be a valid component + auto restorer{AllowNullPointer()}; + for (const auto &component : std::get>(structure.t)) { const parser::Expr &expr{ @@ -1777,10 +1786,9 @@ } } if (symbol) { - if (const auto *currScope{context_.globalScope().FindScope(source)}) { - if (auto msg{CheckAccessibleComponent(*currScope, *symbol)}) { - Say(source, *msg); - } + const semantics::Scope &innermost{context_.FindScope(expr.source)}; + if (auto msg{CheckAccessibleSymbol(innermost, *symbol)}) { + Say(expr.source, std::move(*msg)); } if (checkConflicts) { auto componentIter{ @@ -1808,15 +1816,14 @@ } unavailable.insert(symbol->name()); if (value) { - const auto &innermost{context_.FindScope(expr.source)}; if (symbol->has()) { CHECK(IsPointer(*symbol)); } else if (symbol->has()) { // C1594(4) if (const auto *pureProc{FindPureProcedureContaining(innermost)}) { - if (const Symbol * pointer{FindPointerComponent(*symbol)}) { - if (const Symbol * - object{FindExternallyVisibleObject(*value, *pureProc)}) { + if (const Symbol *pointer{FindPointerComponent(*symbol)}) { + if (const Symbol *object{ + FindExternallyVisibleObject(*value, *pureProc)}) { if (auto *msg{Say(expr.source, "Externally visible object '%s' may not be " "associated with pointer component '%s' in a " @@ -1845,8 +1852,41 @@ semantics::CheckStructConstructorPointerComponent( GetFoldingContext(), *symbol, *value, innermost); // C7104, C7105 result.Add(*symbol, Fold(std::move(*value))); - } else if (MaybeExpr converted{ - ConvertToType(*symbol, std::move(*value))}) { + continue; + } + if (IsNullPointer(*value)) { + if (IsAllocatable(*symbol)) { + if (IsBareNullPointer(&*value)) { + // NULL() with no arguments allowed by 7.5.10 para 6 for + // ALLOCATABLE. + result.Add(*symbol, Expr{NullPointer{}}); + continue; + } + if (IsNullObjectPointer(*value)) { + AttachDeclaration( + Say(expr.source, + "NULL() with arguments is not standard conforming as the value for allocatable component '%s'"_port_en_US, + symbol->name()), + *symbol); + // proceed to check type & shape + } else { + AttachDeclaration( + Say(expr.source, + "A NULL procedure pointer may not be used as the value for component '%s'"_err_en_US, + symbol->name()), + *symbol); + continue; + } + } else { + AttachDeclaration( + Say(expr.source, + "A NULL pointer may not be used as the value for component '%s'"_err_en_US, + symbol->name()), + *symbol); + continue; + } + } + if (MaybeExpr converted{ConvertToType(*symbol, std::move(*value))}) { if (auto componentShape{GetShape(GetFoldingContext(), *symbol)}) { if (auto valueShape{GetShape(GetFoldingContext(), *converted)}) { if (GetRank(*componentShape) == 0 && GetRank(*valueShape) > 0) { @@ -1884,9 +1924,6 @@ symbol->name()), *symbol); } - } else if (IsAllocatable(*symbol) && IsBareNullPointer(&*value)) { - // NULL() with no arguments allowed by 7.5.10 para 6 for ALLOCATABLE. - result.Add(*symbol, Expr{NullPointer{}}); } else if (auto symType{DynamicType::From(symbol)}) { if (IsAllocatable(*symbol) && symType->IsUnlimitedPolymorphic() && valueType) { @@ -1954,7 +1991,9 @@ static int GetPassIndex(const Symbol &proc) { CHECK(!proc.attrs().test(semantics::Attr::NOPASS)); std::optional passName{GetPassName(proc)}; - const auto *interface { semantics::FindInterface(proc) }; + const auto *interface { + semantics::FindInterface(proc) + }; if (!passName || !interface) { return 0; // first argument is passed-object } @@ -2019,7 +2058,7 @@ bool isSubroutine) -> std::optional { const parser::StructureComponent &sc{pcr.v.thing}; if (MaybeExpr base{Analyze(sc.base)}) { - if (const Symbol * sym{sc.component.symbol}) { + if (const Symbol *sym{sc.component.symbol}) { if (context_.HasError(sym)) { return std::nullopt; } @@ -2045,7 +2084,8 @@ // re-resolve the name to the specific binding sc.component.symbol = const_cast(sym); } else { - EmitGenericResolutionError(*sc.component.symbol, pair.second); + EmitGenericResolutionError( + *sc.component.symbol, pair.second, isSubroutine); return std::nullopt; } } @@ -2053,8 +2093,8 @@ if (dataRef && !CheckDataRef(*dataRef)) { return std::nullopt; } - if (const Symbol * - resolution{GetBindingResolution(dtExpr->GetType(), *sym)}) { + if (const Symbol *resolution{ + GetBindingResolution(dtExpr->GetType(), *sym)}) { AddPassArg(arguments, std::move(*dtExpr), *sym, false); return CalleeAndArguments{ ProcedureDesignator{*resolution}, std::move(arguments)}; @@ -2153,6 +2193,9 @@ context_.SetError(symbol); return false; } + } else if (inStmtFunctionDefinition_) { + semantics::ResolveSpecificationParts(context_, symbol); + CHECK(symbol.has()); } else { // 10.1.11 para 4 Say("The internal function '%s' may not be referenced in a specification expression"_err_en_US, symbol.name()); @@ -2190,6 +2233,9 @@ return IsBareNullPointer(iter->UnwrapExpr()); }) != actuals.end()}; for (const Symbol &specific : details->specificProcs()) { + if (isSubroutine != !IsFunction(specific)) { + continue; + } if (!ResolveForward(specific)) { continue; } @@ -2231,7 +2277,7 @@ } // Check parent derived type if (const auto *parentScope{symbol.owner().GetDerivedTypeParent()}) { - if (const Symbol * extended{parentScope->FindComponent(symbol.name())}) { + if (const Symbol *extended{parentScope->FindComponent(symbol.name())}) { auto pair{ResolveGeneric( *extended, actuals, adjustActuals, isSubroutine, false)}; if (pair.first) { @@ -2247,7 +2293,7 @@ // See 15.5.5.2 for details. if (!symbol.owner().IsGlobal() && !symbol.owner().IsDerivedType()) { for (const std::string &n : GetAllNames(context_, symbol.name())) { - if (const Symbol * outer{symbol.owner().parent().FindSymbol(n)}) { + if (const Symbol *outer{symbol.owner().parent().FindSymbol(n)}) { auto pair{ResolveGeneric(*outer, actuals, adjustActuals, isSubroutine, mightBeStructureConstructor)}; if (pair.first) { @@ -2294,12 +2340,14 @@ } void ExpressionAnalyzer::EmitGenericResolutionError( - const Symbol &symbol, bool dueToNullActuals) { + const Symbol &symbol, bool dueToNullActuals, bool isSubroutine) { Say(dueToNullActuals ? "One or more NULL() actual arguments to the generic procedure '%s' requires a MOLD= for disambiguation"_err_en_US : semantics::IsGenericDefinedOp(symbol) ? "No specific procedure of generic operator '%s' matches the actual arguments"_err_en_US - : "No specific procedure of generic '%s' matches the actual arguments"_err_en_US, + : isSubroutine + ? "No specific subroutine of generic '%s' matches the actual arguments"_err_en_US + : "No specific function of generic '%s' matches the actual arguments"_err_en_US, symbol.name()); } @@ -2362,7 +2410,7 @@ std::move(specificCall->arguments)}; } else { if (isGenericInterface) { - EmitGenericResolutionError(*symbol, dueToNullActual); + EmitGenericResolutionError(*symbol, dueToNullActual, isSubroutine); } return std::nullopt; } @@ -2451,7 +2499,7 @@ } template <> const Symbol *AssumedTypeDummy(const parser::Name &name) { - if (const Symbol * symbol{name.symbol}) { + if (const Symbol *symbol{name.symbol}) { if (const auto *type{symbol->GetType()}) { if (type->category() == semantics::DeclTypeSpec::TypeStar) { return symbol; @@ -2600,6 +2648,18 @@ if (!procRef) { analyzer.CheckForNullPointer( "in a non-pointer intrinsic assignment statement"); + const Expr &lhs{analyzer.GetExpr(0)}; + if (auto dyType{lhs.GetType()}; + dyType && dyType->IsPolymorphic()) { // 10.2.1.2p1(1) + const Symbol *lastWhole0{UnwrapWholeSymbolOrComponentDataRef(lhs)}; + const Symbol *lastWhole{ + lastWhole0 ? &lastWhole0->GetUltimate() : nullptr}; + if (!lastWhole || !IsAllocatable(*lastWhole)) { + Say("Left-hand side of assignment may not be polymorphic unless assignment is to an entire allocatable"_err_en_US); + } else if (evaluate::IsCoarray(*lastWhole)) { + Say("Left-hand side of assignment may not be polymorphic if it is a coarray"_err_en_US); + } + } } assignment.emplace(analyzer.MoveExpr(0), analyzer.MoveExpr(1)); if (procRef) { @@ -2616,7 +2676,11 @@ const parser::PointerAssignmentStmt &x) { if (!x.typedAssignment) { MaybeExpr lhs{Analyze(std::get(x.t))}; - MaybeExpr rhs{Analyze(std::get(x.t))}; + MaybeExpr rhs; + { + auto restorer{AllowNullPointer()}; + rhs = Analyze(std::get(x.t)); + } if (!lhs || !rhs) { x.typedAssignment.Reset( new GenericAssignmentWrapper{}, GenericAssignmentWrapper::Deleter); @@ -2670,21 +2734,22 @@ std::optional ExpressionAnalyzer::CheckCall( parser::CharBlock callSite, const ProcedureDesignator &proc, ActualArguments &arguments) { + bool treatExternalAsImplicit{IsExternalCalledImplicitly(callSite, proc)}; + const Symbol *procSymbol{proc.GetSymbol()}; auto chars{characteristics::Procedure::Characterize( proc, context_.foldingContext())}; + bool ok{true}; if (chars) { - bool treatExternalAsImplicit{IsExternalCalledImplicitly(callSite, proc)}; if (treatExternalAsImplicit && !chars->CanBeCalledViaImplicitInterface()) { Say(callSite, "References to the procedure '%s' require an explicit interface"_err_en_US, - DEREF(proc.GetSymbol()).name()); + DEREF(procSymbol).name()); } // Checks for ASSOCIATED() are done in intrinsic table processing const SpecificIntrinsic *specificIntrinsic{proc.GetSpecificIntrinsic()}; bool procIsAssociated{ specificIntrinsic && specificIntrinsic->name == "associated"}; if (!procIsAssociated) { - const Symbol *procSymbol{proc.GetSymbol()}; bool procIsDummy{procSymbol && IsDummy(*procSymbol)}; if (chars->functionResult && chars->functionResult->IsAssumedLengthCharacter() && @@ -2692,12 +2757,11 @@ Say(callSite, "Assumed-length character function must be defined with a length to be called"_err_en_US); } - semantics::CheckArguments(*chars, arguments, GetFoldingContext(), + ok &= semantics::CheckArguments(*chars, arguments, GetFoldingContext(), context_.FindScope(callSite), treatExternalAsImplicit, specificIntrinsic); if (procSymbol && !IsPureProcedure(*procSymbol)) { - if (const semantics::Scope * - pure{semantics::FindPureProcedureContaining( + if (const semantics::Scope *pure{semantics::FindPureProcedureContaining( context_.FindScope(callSite))}) { Say(callSite, "Procedure '%s' referenced in pure subprogram '%s' must be pure too"_err_en_US, @@ -2706,6 +2770,19 @@ } } } + if (ok && !treatExternalAsImplicit && procSymbol && + !(chars && chars->HasExplicitInterface())) { + if (const Symbol *global{FindGlobal(*procSymbol)}; + global && global != procSymbol && IsProcedure(*global)) { + // Check a known global definition behind a local interface + if (auto globalChars{characteristics::Procedure::Characterize( + *global, context_.foldingContext())}) { + semantics::CheckArguments(*globalChars, arguments, GetFoldingContext(), + context_.FindScope(callSite), true, + nullptr /*not specific intrinsic*/); + } + } + } return chars; } @@ -2713,8 +2790,8 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::Expr::Parentheses &x) { if (MaybeExpr operand{Analyze(x.v.value())}) { - if (const semantics::Symbol * symbol{GetLastSymbol(*operand)}) { - if (const semantics::Symbol * result{FindFunctionResult(*symbol)}) { + if (const semantics::Symbol *symbol{GetLastSymbol(*operand)}) { + if (const semantics::Symbol *result{FindFunctionResult(*symbol)}) { if (semantics::IsProcedurePointer(*result)) { Say("A function reference that returns a procedure " "pointer may not be parenthesized"_err_en_US); // C1003 @@ -2782,7 +2859,7 @@ // intrinsic function. // Use the actual source for the name of the call for error reporting. std::optional arg; - if (const Symbol * assumedTypeDummy{AssumedTypeDummy(x.v.value())}) { + if (const Symbol *assumedTypeDummy{AssumedTypeDummy(x.v.value())}) { arg = ActualArgument{ActualArgument::AssumedType{*assumedTypeDummy}}; } else if (MaybeExpr argExpr{Analyze(x.v.value())}) { arg = ActualArgument{std::move(*argExpr)}; @@ -2801,7 +2878,7 @@ ArgumentAnalyzer analyzer{*this, name.source}; analyzer.Analyze(std::get<1>(x.t)); return analyzer.TryDefinedOp(name.source.ToString().c_str(), - "No operator %s defined for %s"_err_en_US, nullptr, true); + "No operator %s defined for %s"_err_en_US, true); } // Binary (dyadic) operations @@ -2848,22 +2925,9 @@ } MaybeExpr ExpressionAnalyzer::Analyze( - const parser::Expr::ComplexConstructor &x) { - auto re{Analyze(std::get<0>(x.t).value())}; - auto im{Analyze(std::get<1>(x.t).value())}; - if (re && re->Rank() > 0) { - context().Say(std::get<0>(x.t).value().source, - "Real part of complex constructor must be scalar"_err_en_US); - } - if (im && im->Rank() > 0) { - context().Say(std::get<1>(x.t).value().source, - "Imaginary part of complex constructor must be scalar"_err_en_US); - } - if (re && im) { - ConformabilityCheck(GetContextualMessages(), *re, *im); - } - return AsMaybeExpr(ConstructComplex(GetContextualMessages(), std::move(re), - std::move(im), GetDefaultKind(TypeCategory::Real))); + const parser::Expr::ComplexConstructor &z) { + return AnalyzeComplex(Analyze(std::get<0>(z.t).value()), + Analyze(std::get<1>(z.t).value()), "complex constructor"); } MaybeExpr ExpressionAnalyzer::Analyze(const parser::Expr::Concat &x) { @@ -2998,7 +3062,7 @@ analyzer.Analyze(std::get<1>(x.t)); analyzer.Analyze(std::get<2>(x.t)); return analyzer.TryDefinedOp(name.source.ToString().c_str(), - "No operator %s defined for %s and %s"_err_en_US, nullptr, true); + "No operator %s defined for %s and %s"_err_en_US, true); } // Returns true if a parsed function reference should be converted @@ -3018,11 +3082,12 @@ if (!name->symbol) { return false; } else if (name->symbol->Rank() == 0) { - if (const Symbol * - function{ + if (const Symbol *function{ semantics::IsFunctionResultWithSameNameAsFunction(*name->symbol)}) { auto &msg{context.Say(funcRef.v.source, - "Recursive call to '%s' requires a distinct RESULT in its declaration"_err_en_US, + function->flags().test(Symbol::Flag::StmtFunction) + ? "Recursive call to statement function '%s' is not allowed"_err_en_US + : "Recursive call to '%s' requires a distinct RESULT in its declaration"_err_en_US, name->source)}; AttachDeclaration(&msg, *function); name->symbol = const_cast(function); @@ -3055,8 +3120,7 @@ std::get_if>(&u)}) { parser::FunctionReference &funcRef{func->value()}; auto &proc{std::get(funcRef.v.t)}; - if (Symbol * - origSymbol{ + if (Symbol *origSymbol{ common::visit(common::visitors{ [&](parser::Name &name) { return name.symbol; }, [&](parser::ProcComponentRef &pcr) { @@ -3087,9 +3151,6 @@ template MaybeExpr ExpressionAnalyzer::ExprOrVariable( const PARSED &x, parser::CharBlock source) { - if (useSavedTypedExprs_ && x.typedExpr) { - return x.typedExpr->v; - } auto restorer{GetContextualMessages().SetLocation(source)}; if constexpr (std::is_same_v || std::is_same_v) { @@ -3141,10 +3202,21 @@ } MaybeExpr ExpressionAnalyzer::Analyze(const parser::Expr &expr) { - return ExprOrVariable(expr, expr.source); + if (useSavedTypedExprs_ && expr.typedExpr) { + return expr.typedExpr->v; + } + MaybeExpr result{ExprOrVariable(expr, expr.source)}; + if (!isNullPointerOk_ && result && IsNullPointer(*result)) { + Say(expr.source, + "NULL() may not be used as an expression in this context"_err_en_US); + } + return result; } MaybeExpr ExpressionAnalyzer::Analyze(const parser::Variable &variable) { + if (useSavedTypedExprs_ && variable.typedExpr) { + return variable.typedExpr->v; + } return ExprOrVariable(variable, variable.GetSource()); } @@ -3343,7 +3415,7 @@ return Expr{NullPointer{}}; } } - if (const Symbol * symbol{proc.GetSymbol()}) { + if (const Symbol *symbol{proc.GetSymbol()}) { if (!ResolveForward(*symbol)) { return std::nullopt; } @@ -3380,6 +3452,21 @@ } } +MaybeExpr ExpressionAnalyzer::AnalyzeComplex( + MaybeExpr &&re, MaybeExpr &&im, const char *what) { + if (re && re->Rank() > 0) { + Say("Real part of %s is not scalar"_port_en_US, what); + } + if (im && im->Rank() > 0) { + Say("Imaginary part of %s is not scalar"_port_en_US, what); + } + if (re && im) { + ConformabilityCheck(GetContextualMessages(), *re, *im); + } + return AsMaybeExpr(ConstructComplex(GetContextualMessages(), std::move(re), + std::move(im), GetDefaultKind(TypeCategory::Real))); +} + void ArgumentAnalyzer::Analyze(const parser::Variable &x) { source_.ExtendToCover(x.GetSource()); if (MaybeExpr expr{context_.Analyze(x)}) { @@ -3413,8 +3500,6 @@ void ArgumentAnalyzer::Analyze( const parser::ActualArgSpec &arg, bool isSubroutine) { - // TODO: Actual arguments that are procedures and procedure pointers need to - // be detected and represented (they're not expressions). // TODO: C1534: Don't allow a "restricted" specific intrinsic to be passed. std::optional actual; common::visit(common::visitors{ @@ -3561,63 +3646,100 @@ return true; } -MaybeExpr ArgumentAnalyzer::TryDefinedOp(const char *opr, - parser::MessageFixedText error, const Symbol **definedOpSymbolPtr, - bool isUserOp) { +MaybeExpr ArgumentAnalyzer::TryDefinedOp( + const char *opr, parser::MessageFixedText error, bool isUserOp) { if (AnyUntypedOrMissingOperand()) { context_.Say(error, ToUpperCase(opr), TypeAsFortran(0), TypeAsFortran(1)); return std::nullopt; } - const Symbol *localDefinedOpSymbolPtr{nullptr}; - if (!definedOpSymbolPtr) { - definedOpSymbolPtr = &localDefinedOpSymbolPtr; - } + MaybeExpr result; + bool anyPossibilities{false}; + std::optional inaccessible; + std::vector hit; + std::string oprNameString{ + isUserOp ? std::string{opr} : "operator("s + opr + ')'}; + parser::CharBlock oprName{oprNameString}; { auto restorer{context_.GetContextualMessages().DiscardMessages()}; - std::string oprNameString{ - isUserOp ? std::string{opr} : "operator("s + opr + ')'}; - parser::CharBlock oprName{oprNameString}; const auto &scope{context_.context().FindScope(source_)}; - if (Symbol * symbol{scope.FindSymbol(oprName)}) { - *definedOpSymbolPtr = symbol; + if (Symbol *symbol{scope.FindSymbol(oprName)}) { + anyPossibilities = true; parser::Name name{symbol->name(), symbol}; - if (auto result{context_.AnalyzeDefinedOp(name, GetActuals())}) { - return result; + result = context_.AnalyzeDefinedOp(name, GetActuals()); + if (result) { + inaccessible = CheckAccessibleSymbol(scope, *symbol); + if (inaccessible) { + result.reset(); + } else { + hit.push_back(symbol); + } } } for (std::size_t passIndex{0}; passIndex < actuals_.size(); ++passIndex) { - if (const Symbol * - symbol{FindBoundOp(oprName, passIndex, *definedOpSymbolPtr)}) { - if (MaybeExpr result{TryBoundOp(*symbol, passIndex)}) { - return result; + const Symbol *generic{nullptr}; + if (const Symbol *binding{ + FindBoundOp(oprName, passIndex, generic, false)}) { + anyPossibilities = true; + if (MaybeExpr thisResult{TryBoundOp(*binding, passIndex)}) { + if (auto thisInaccessible{ + CheckAccessibleSymbol(scope, DEREF(generic))}) { + inaccessible = thisInaccessible; + } else { + result = std::move(thisResult); + hit.push_back(binding); + } } } } } - if (*definedOpSymbolPtr) { - SayNoMatch(ToUpperCase((*definedOpSymbolPtr)->name().ToString())); - } else if (actuals_.size() == 1 || AreConformable()) { - if (CheckForNullPointer()) { - context_.Say(error, ToUpperCase(opr), TypeAsFortran(0), TypeAsFortran(1)); + if (result) { + if (hit.size() > 1) { + if (auto *msg{context_.Say( + "%zd matching accessible generic interfaces for %s were found"_err_en_US, + hit.size(), ToUpperCase(opr))}) { + for (const Symbol *symbol : hit) { + AttachDeclaration(*msg, *symbol); + } + } } - } else { + } else if (inaccessible) { + context_.Say(source_, std::move(*inaccessible)); + } else if (anyPossibilities) { + SayNoMatch(ToUpperCase(oprNameString), false); + } else if (actuals_.size() == 2 && !AreConformable()) { context_.Say( "Operands of %s are not conformable; have rank %d and rank %d"_err_en_US, ToUpperCase(opr), actuals_[0]->Rank(), actuals_[1]->Rank()); + } else if (CheckForNullPointer()) { + context_.Say(error, ToUpperCase(opr), TypeAsFortran(0), TypeAsFortran(1)); } - return std::nullopt; + return result; } MaybeExpr ArgumentAnalyzer::TryDefinedOp( std::vector oprs, parser::MessageFixedText error) { - const Symbol *definedOpSymbolPtr{nullptr}; - for (std::size_t i{1}; i < oprs.size(); ++i) { + if (oprs.size() == 1) { + return TryDefinedOp(oprs[0], error); + } + MaybeExpr result; + std::vector hit; + { auto restorer{context_.GetContextualMessages().DiscardMessages()}; - if (auto result{TryDefinedOp(oprs[i], error, &definedOpSymbolPtr)}) { - return result; + for (std::size_t i{0}; i < oprs.size(); ++i) { + if (MaybeExpr thisResult{TryDefinedOp(oprs[i], error)}) { + result = std::move(thisResult); + hit.push_back(oprs[i]); + } } } - return TryDefinedOp(oprs[0], error, &definedOpSymbolPtr); + if (hit.empty()) { // for the error + result = TryDefinedOp(oprs[0], error); + } else if (hit.size() > 1) { + context_.Say( + "Matching accessible definitions were found with %zd variant spellings of the generic operator ('%s', '%s')"_err_en_US, + hit.size(), ToUpperCase(hit[0]), ToUpperCase(hit[1])); + } + return result; } MaybeExpr ArgumentAnalyzer::TryBoundOp(const Symbol &symbol, int passIndex) { @@ -3694,30 +3816,34 @@ } std::optional ArgumentAnalyzer::GetDefinedAssignmentProc() { - auto restorer{context_.GetContextualMessages().DiscardMessages()}; + const Symbol *proc{nullptr}; + int passedObjectIndex{-1}; std::string oprNameString{"assignment(=)"}; parser::CharBlock oprName{oprNameString}; - const Symbol *proc{nullptr}; const auto &scope{context_.context().FindScope(source_)}; - if (const Symbol * symbol{scope.FindSymbol(oprName)}) { - ExpressionAnalyzer::AdjustActuals noAdjustment; - auto pair{context_.ResolveGeneric(*symbol, actuals_, noAdjustment, true)}; - if (pair.first) { - proc = pair.first; - } else { - context_.EmitGenericResolutionError(*symbol, pair.second); - } - } - int passedObjectIndex{-1}; - const Symbol *definedOpSymbol{nullptr}; - for (std::size_t i{0}; i < actuals_.size(); ++i) { - if (const Symbol * specific{FindBoundOp(oprName, i, definedOpSymbol)}) { - if (const Symbol * - resolution{GetBindingResolution(GetType(i), *specific)}) { - proc = resolution; + // If multiple resolutions were possible, they will have been already + // diagnosed. + { + auto restorer{context_.GetContextualMessages().DiscardMessages()}; + if (const Symbol *symbol{scope.FindSymbol(oprName)}) { + ExpressionAnalyzer::AdjustActuals noAdjustment; + auto pair{context_.ResolveGeneric(*symbol, actuals_, noAdjustment, true)}; + if (pair.first) { + proc = pair.first; } else { - proc = specific; - passedObjectIndex = i; + context_.EmitGenericResolutionError(*symbol, pair.second, true); + } + } + for (std::size_t i{0}; i < actuals_.size(); ++i) { + const Symbol *generic{nullptr}; + if (const Symbol *specific{FindBoundOp(oprName, i, generic, true)}) { + if (const Symbol *resolution{ + GetBindingResolution(GetType(i), *specific)}) { + proc = resolution; + } else { + proc = specific; + passedObjectIndex = i; + } } } } @@ -3737,7 +3863,7 @@ for (const auto &actual : actuals_) { if (!actual.has_value()) { os << "- error\n"; - } else if (const Symbol * symbol{actual->GetAssumedTypeDummy()}) { + } else if (const Symbol *symbol{actual->GetAssumedTypeDummy()}) { os << "- assumed type: " << symbol->name().ToString() << '\n'; } else if (const Expr *expr{actual->UnwrapExpr()}) { expr->AsFortran(os << "- expr: ") << '\n'; @@ -3750,7 +3876,7 @@ std::optional ArgumentAnalyzer::AnalyzeExpr( const parser::Expr &expr) { source_.ExtendToCover(expr.source); - if (const Symbol * assumedTypeDummy{AssumedTypeDummy(expr)}) { + if (const Symbol *assumedTypeDummy{AssumedTypeDummy(expr)}) { expr.typedExpr.Reset(new GenericExprWrapper{}, GenericExprWrapper::Deleter); if (isProcedureCall_) { ActualArgument arg{ActualArgument::AssumedType{*assumedTypeDummy}}; @@ -3784,6 +3910,7 @@ return context_.Analyze(expr); } } + auto restorer{context_.AllowNullPointer()}; return context_.Analyze(expr); } @@ -3794,24 +3921,24 @@ } // Look for a type-bound operator in the type of arg number passIndex. -const Symbol *ArgumentAnalyzer::FindBoundOp( - parser::CharBlock oprName, int passIndex, const Symbol *&definedOp) { +const Symbol *ArgumentAnalyzer::FindBoundOp(parser::CharBlock oprName, + int passIndex, const Symbol *&generic, bool isSubroutine) { const auto *type{GetDerivedTypeSpec(GetType(passIndex))}; if (!type || !type->scope()) { return nullptr; } - const Symbol *symbol{type->scope()->FindComponent(oprName)}; - if (!symbol) { + generic = type->scope()->FindComponent(oprName); + if (!generic) { return nullptr; } - definedOp = symbol; ExpressionAnalyzer::AdjustActuals adjustment{ [&](const Symbol &proc, ActualArguments &) { return passIndex == GetPassIndex(proc); }}; - auto pair{context_.ResolveGeneric(*symbol, actuals_, adjustment, false)}; + auto pair{ + context_.ResolveGeneric(*generic, actuals_, adjustment, isSubroutine)}; if (!pair.first) { - context_.EmitGenericResolutionError(*symbol, pair.second); + context_.EmitGenericResolutionError(*generic, pair.second, isSubroutine); } return pair.first; } diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -59,7 +59,7 @@ static llvm::raw_ostream &PutAttr(llvm::raw_ostream &, Attr); static llvm::raw_ostream &PutType(llvm::raw_ostream &, const DeclTypeSpec &); -static llvm::raw_ostream &PutLower(llvm::raw_ostream &, const std::string &); +static llvm::raw_ostream &PutLower(llvm::raw_ostream &, std::string_view); static std::error_code WriteFile( const std::string &, const std::string &, bool = true); static bool FileContentsMatch( @@ -422,7 +422,12 @@ void ModFileWriter::PutSubprogram(const Symbol &symbol) { auto &details{symbol.get()}; if (const Symbol * interface{details.moduleInterface()}) { - PutSubprogram(*interface); + const Scope *module{FindModuleContaining(interface->owner())}; + if (module && module != &symbol.owner()) { + // Interface is in ancestor module + } else { + PutSubprogram(*interface); + } } auto attrs{symbol.attrs()}; Attrs bindAttrs{}; @@ -797,7 +802,7 @@ return PutLower(os, type.AsFortran()); } -llvm::raw_ostream &PutLower(llvm::raw_ostream &os, const std::string &str) { +llvm::raw_ostream &PutLower(llvm::raw_ostream &os, std::string_view str) { for (char c : str) { os << parser::ToLowerCaseLetter(c); } diff --git a/flang/lib/Semantics/pointer-assignment.cpp b/flang/lib/Semantics/pointer-assignment.cpp --- a/flang/lib/Semantics/pointer-assignment.cpp +++ b/flang/lib/Semantics/pointer-assignment.cpp @@ -307,6 +307,10 @@ symbol->name()); return false; } + } else if (symbol->has()) { + evaluate::SayWithDeclaration(context_.messages(), *symbol, + "Procedure binding '%s' used as target of a pointer assignment"_port_en_US, + symbol->name()); } } if (auto chars{Procedure::Characterize(d, context_)}) { diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -873,7 +873,7 @@ Symbol &PushSubprogramScope(const parser::Name &, Symbol::Flag, const parser::LanguageBindingSpec * = nullptr); Symbol *GetSpecificFromGeneric(const parser::Name &); - SubprogramDetails &PostSubprogramStmt(const parser::Name &); + SubprogramDetails &PostSubprogramStmt(); void CreateEntry(const parser::EntryStmt &stmt, Symbol &subprogram); void PostEntryStmt(const parser::EntryStmt &stmt); void HandleLanguageBinding(Symbol *, @@ -1194,6 +1194,7 @@ // Creates Block scopes with neither symbol name nor symbol details. bool Pre(const parser::SelectRankConstruct::RankCase &); void Post(const parser::SelectRankConstruct::RankCase &); + bool Pre(const parser::TypeGuardStmt::Guard &); void Post(const parser::TypeGuardStmt::Guard &); void Post(const parser::SelectRankCaseStmt::Rank &); bool Pre(const parser::ChangeTeamStmt &); @@ -3245,9 +3246,8 @@ specificProcs_.erase(range.first, range.second); } -// Check that the specific procedures are all functions or all subroutines. -// If there is a derived type with the same name they must be functions. -// Set the corresponding flag on generic. +// Mixed interfaces are allowed by the standard. +// If there is a derived type with the same name, they must all be functions. void InterfaceVisitor::CheckGenericProcedures(Symbol &generic) { ResolveSpecificsInGeneric(generic); auto &details{generic.get()}; @@ -3270,10 +3270,11 @@ } const Symbol &firstSpecific{specifics.front()}; bool isFunction{firstSpecific.test(Symbol::Flag::Function)}; + bool isBoth{false}; for (const Symbol &specific : specifics) { if (isFunction != specific.test(Symbol::Flag::Function)) { // C1514 auto &msg{Say(generic.name(), - "Generic interface '%s' has both a function and a subroutine"_err_en_US)}; + "Generic interface '%s' has both a function and a subroutine"_warn_en_US)}; if (isFunction) { msg.Attach(firstSpecific.name(), "Function declaration"_en_US); msg.Attach(specific.name(), "Subroutine declaration"_en_US); @@ -3281,6 +3282,9 @@ msg.Attach(firstSpecific.name(), "Subroutine declaration"_en_US); msg.Attach(specific.name(), "Function declaration"_en_US); } + isFunction = false; + isBoth = true; + break; } } if (!isFunction && details.derivedType()) { @@ -3289,7 +3293,9 @@ " with same name"_err_en_US, *details.derivedType()->GetUltimate().scope()); } - generic.set(isFunction ? Symbol::Flag::Function : Symbol::Flag::Subroutine); + if (!isBoth) { + generic.set(isFunction ? Symbol::Flag::Function : Symbol::Flag::Subroutine); + } } // SubprogramVisitor implementation @@ -3301,7 +3307,8 @@ // Look up name: provides return type or tells us if it's an array if (auto *symbol{FindSymbol(name)}) { auto *details{symbol->detailsIf()}; - if (!details) { + if (!details || symbol->has() || + symbol->has()) { badStmtFuncFound_ = true; return false; } @@ -3311,7 +3318,7 @@ } if (badStmtFuncFound_) { Say(name, "'%s' has not been declared as an array"_err_en_US); - return true; + return false; } auto &symbol{PushSubprogramScope(name, Symbol::Flag::Function)}; symbol.set(Symbol::Flag::StmtFunction); @@ -3336,10 +3343,9 @@ } resultDetails.set_funcResult(true); Symbol &result{MakeSymbol(name, std::move(resultDetails))}; + result.flags().set(Symbol::Flag::StmtFunction); ApplyImplicitRules(result); details.set_result(result); - const auto &parsedExpr{std::get>(x.t)}; - Walk(parsedExpr); // The analysis of the expression that constitutes the body of the // statement function is deferred to FinishSpecificationPart() so that // all declarations and implicit typing are complete. @@ -3416,8 +3422,7 @@ Walk(std::get(stmt.t)); Walk(std::get>(stmt.t)); // Don't traverse the LanguageBindingSpec now; it's deferred to EndSubprogram. - const auto &name{std::get(stmt.t)}; - auto &details{PostSubprogramStmt(name)}; + auto &details{PostSubprogramStmt()}; for (const auto &dummyArg : std::get>(stmt.t)) { if (const auto *dummyName{std::get_if(&dummyArg.u)}) { Symbol &dummy{MakeSymbol(*dummyName, EntityDetails{true})}; @@ -3438,7 +3443,7 @@ void SubprogramVisitor::Post(const parser::FunctionStmt &stmt) { const auto &name{std::get(stmt.t)}; - auto &details{PostSubprogramStmt(name)}; + auto &details{PostSubprogramStmt()}; for (const auto &dummyName : std::get>(stmt.t)) { Symbol &dummy{MakeSymbol(dummyName, EntityDetails{true})}; details.add_dummyArg(dummy); @@ -3503,8 +3508,7 @@ info.resultName = nullptr; } -SubprogramDetails &SubprogramVisitor::PostSubprogramStmt( - const parser::Name &name) { +SubprogramDetails &SubprogramVisitor::PostSubprogramStmt() { Symbol &symbol{*currScope().symbol()}; SetExplicitAttrs(symbol, EndAttrs()); if (symbol.attrs().test(Attr::MODULE)) { @@ -3769,7 +3773,7 @@ if (moduleInterface && &moduleInterface->owner() == &currScope()) { // Subprogram is MODULE FUNCTION or MODULE SUBROUTINE with an interface // previously defined in the same scope. - currScope().erase(moduleInterface->name()); + EraseSymbol(name); } } Symbol &newSymbol{PushSubprogramScope(name, subpFlag, bindingSpec)}; @@ -4932,8 +4936,17 @@ bool DeclarationVisitor::Pre(const parser::ProcPointerInit &x) { if (auto *name{std::get_if(&x.u)}) { return !NameIsKnownOrIntrinsic(*name) && !CheckUseError(*name); + } else { + const auto &null{DEREF(std::get_if(&x.u))}; + Walk(null); + if (auto nullInit{EvaluateExpr(null)}) { + if (!evaluate::IsNullPointer(*nullInit)) { + Say(null.v.value().source, + "Procedure pointer initializer must be a name or intrinsic NULL()"_err_en_US); + } + } + return false; } - return true; } void DeclarationVisitor::Post(const parser::ProcInterface &x) { if (auto *name{std::get_if(&x.u)}) { @@ -5974,7 +5987,7 @@ } else if (extends) { msg = "Type cannot be extended as it has a component named" " '%s'"_err_en_US; - } else if (CheckAccessibleComponent(currScope(), *prev)) { + } else if (CheckAccessibleSymbol(currScope(), *prev)) { // inaccessible component -- redeclaration is ok msg = "Component '%s' is inaccessibly declared in or as a " "parent of this derived type"_warn_en_US; @@ -6398,6 +6411,14 @@ PopScope(); } +bool ConstructVisitor::Pre(const parser::TypeGuardStmt::Guard &x) { + if (std::holds_alternative(x.u)) { + // CLASS IS (t) + SetDeclTypeSpecCategory(DeclTypeSpec::Category::ClassDerived); + } + return true; +} + void ConstructVisitor::Post(const parser::TypeGuardStmt::Guard &x) { if (auto *symbol{MakeAssocEntity()}) { if (std::holds_alternative(x.u)) { @@ -6841,8 +6862,7 @@ derived->Instantiate(currScope()); // in case of forward referenced type if (const Scope * scope{derived->scope()}) { if (Resolve(component, scope->FindComponent(component.source))) { - if (auto msg{ - CheckAccessibleComponent(currScope(), *component.symbol)}) { + if (auto msg{CheckAccessibleSymbol(currScope(), *component.symbol)}) { context().Say(component.source, *msg); } return &component; @@ -6886,9 +6906,9 @@ [&](const parser::NullInit &null) { // => NULL() Walk(null); if (auto nullInit{EvaluateExpr(null)}) { - if (!evaluate::IsNullPointer(*nullInit)) { - Say(name, - "Pointer initializer must be intrinsic NULL()"_err_en_US); // C813 + if (!evaluate::IsNullPointer(*nullInit)) { // C813 + Say(null.v.value().source, + "Pointer initializer must be intrinsic NULL()"_err_en_US); } else if (IsPointer(ultimate)) { if (auto *object{ultimate.detailsIf()}) { object->set_init(std::move(*nullInit)); @@ -6947,14 +6967,14 @@ if (IsProcedurePointer(ultimate)) { auto &details{ultimate.get()}; CHECK(!details.init()); - Walk(target); if (const auto *targetName{std::get_if(&target.u)}) { + Walk(target); if (!CheckUseError(*targetName) && targetName->symbol) { // Validation is done in declaration checking. details.set_init(*targetName->symbol); } - } else { - details.set_init(nullptr); // explicit NULL() + } else { // explicit NULL + details.set_init(nullptr); } } else { Say(name, @@ -7394,28 +7414,31 @@ // Analyze the bodies of statement functions now that the symbols in this // specification part have been fully declared and implicitly typed. +// (Statement function references are not allowed in specification +// expressions, so it's safe to defer processing their definitions.) void ResolveNamesVisitor::AnalyzeStmtFunctionStmt( const parser::StmtFunctionStmt &stmtFunc) { Symbol *symbol{std::get(stmtFunc.t).symbol}; - if (!symbol || !symbol->has()) { + auto *details{symbol ? symbol->detailsIf() : nullptr}; + if (!details || !symbol->scope()) { return; } - auto &details{symbol->get()}; - auto expr{AnalyzeExpr( - context(), std::get>(stmtFunc.t))}; - if (!expr) { - context().SetError(*symbol); - return; - } - if (auto type{evaluate::DynamicType::From(*symbol)}) { - auto converted{ConvertToType(*type, std::move(*expr))}; - if (!converted) { - context().SetError(*symbol); - return; + // Resolve the symbols on the RHS of the statement function. + PushScope(*symbol->scope()); + const auto &parsedExpr{std::get>(stmtFunc.t)}; + Walk(parsedExpr); + PopScope(); + if (auto expr{AnalyzeExpr(context(), stmtFunc)}) { + if (auto type{evaluate::DynamicType::From(*symbol)}) { + if (auto converted{ConvertToType(*type, std::move(*expr))}) { + details->set_stmtFunction(std::move(*converted)); + } + } else { + details->set_stmtFunction(std::move(*expr)); } - details.set_stmtFunction(std::move(*converted)); - } else { - details.set_stmtFunction(std::move(*expr)); + } + if (!details->stmtFunction()) { + context().SetError(*symbol); } } @@ -7805,6 +7828,7 @@ resolver_.CheckBindings(tbps); } } + bool Pre(const parser::StmtFunctionStmt &stmtFunc) { return false; } private: void Init(const parser::Name &name, @@ -7829,7 +7853,7 @@ } SetScope(*node.scope()); // The initializers of pointers, the default initializers of pointer - // components, and non-deferred type-bound procedure bindings have not + // components, non-deferred type-bound procedure bindings have not // yet been traversed. // We do that now, when any (formerly) forward references that appear // in those initializers will resolve to the right symbols without diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp --- a/flang/lib/Semantics/symbol.cpp +++ b/flang/lib/Semantics/symbol.cpp @@ -253,9 +253,7 @@ details); } -const std::string Symbol::GetDetailsName() const { - return DetailsToString(details_); -} +std::string Symbol::GetDetailsName() const { return DetailsToString(details_); } void Symbol::set_details(Details &&details) { CHECK(CanReplaceDetails(details)); @@ -671,20 +669,20 @@ std::string GenericKind::ToString() const { return common::visit( common::visitors { - [](const OtherKind &x) { return EnumToString(x); }, + [](const OtherKind &x) { return std::string{EnumToString(x)}; }, [](const DefinedIo &x) { return AsFortran(x).ToString(); }, #if !__clang__ && __GNUC__ == 7 && __GNUC_MINOR__ == 2 [](const common::NumericOperator &x) { - return common::EnumToString(x); + return std::string{common::EnumToString(x)}; }, [](const common::LogicalOperator &x) { - return common::EnumToString(x); + return std::string{common::EnumToString(x)}; }, [](const common::RelationalOperator &x) { - return common::EnumToString(x); + return std::string{common::EnumToString(x)}; }, #else - [](const auto &x) { return common::EnumToString(x); }, + [](const auto &x) { return std::string{common::EnumToString(x)}; }, #endif }, u); diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -520,6 +520,36 @@ return nullptr; } +const Symbol *FindGlobal(const Symbol &original) { + const Symbol &ultimate{original.GetUltimate()}; + if (ultimate.owner().IsGlobal()) { + return &ultimate; + } + bool isLocal{false}; + if (IsDummy(ultimate)) { + } else if (IsPointer(ultimate)) { + } else if (ultimate.has()) { + isLocal = IsExternal(ultimate); + } else if (const auto *subp{ultimate.detailsIf()}) { + isLocal = subp->isInterface(); + } + if (isLocal) { + const std::string *bind{ultimate.GetBindName()}; + if (!bind || ultimate.name() == *bind) { + const Scope &globalScope{ultimate.owner().context().globalScope()}; + if (auto iter{globalScope.find(ultimate.name())}; + iter != globalScope.end()) { + const Symbol &global{*iter->second}; + const std::string *globalBind{global.GetBindName()}; + if (!globalBind || global.name() == *globalBind) { + return &global; + } + } + } + } + return nullptr; +} + const DeclTypeSpec *FindParentTypeSpec(const DerivedTypeSpec &derived) { return FindParentTypeSpec(derived.typeSymbol()); } @@ -961,9 +991,8 @@ return IsAllocatable(symbol) && IsPolymorphic(symbol); } -std::optional CheckAccessibleComponent( +std::optional CheckAccessibleSymbol( const Scope &scope, const Symbol &symbol) { - CHECK(symbol.owner().IsDerivedType()); // symbol must be a component if (symbol.attrs().test(Attr::PRIVATE)) { if (FindModuleFileContaining(scope)) { // Don't enforce component accessibility checks in module files; @@ -973,7 +1002,7 @@ moduleScope{FindModuleContaining(symbol.owner())}) { if (!moduleScope->Contains(scope)) { return parser::MessageFormattedText{ - "PRIVATE component '%s' is only accessible within module '%s'"_err_en_US, + "PRIVATE name '%s' is only accessible within module '%s'"_err_en_US, symbol.name(), moduleScope->GetName().value()}; } } @@ -1484,31 +1513,4 @@ return false; } -const Symbol *FindUnsafeIoDirectComponent(GenericKind::DefinedIo which, - const DerivedTypeSpec &derived, const Scope *scope) { - if (HasDefinedIo(which, derived, scope)) { - return nullptr; - } - if (const Scope * dtScope{derived.scope()}) { - for (const auto &pair : *dtScope) { - const Symbol &symbol{*pair.second}; - if (IsAllocatableOrPointer(symbol)) { - return &symbol; - } - if (const auto *details{symbol.detailsIf()}) { - if (const DeclTypeSpec * type{details->type()}) { - if (type->category() == DeclTypeSpec::Category::TypeDerived) { - if (const Symbol * - bad{FindUnsafeIoDirectComponent( - which, type->derivedTypeSpec(), scope)}) { - return bad; - } - } - } - } - } - } - return nullptr; -} - } // namespace Fortran::semantics diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -156,6 +156,8 @@ ! HELP-FC1-NEXT: -P Disable linemarker output in -E mode ! HELP-FC1-NEXT: -std= Language standard to compile for ! HELP-FC1-NEXT: -S Only run preprocess and compilation steps +! HELP-FC1-NEXT: -target-cpu Target a specific cpu type +! HELP-FC1-NEXT: -target-feature Target specific attributes ! HELP-FC1-NEXT: -test-io Run the InputOuputTest action. Use for development and testing only. ! HELP-FC1-NEXT: -triple Specify target triple (e.g. i686-apple-darwin9) ! HELP-FC1-NEXT: -U Undefine macro diff --git a/flang/test/Driver/target-cpu-features.f90 b/flang/test/Driver/target-cpu-features.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Driver/target-cpu-features.f90 @@ -0,0 +1,56 @@ +! REQUIRES: aarch64-registered-target, x86-registered-target + +! Test that -mcpu/march are used and that the -target-cpu and -target-features +! are also added to the fc1 command. + +! RUN: %flang --target=aarch64-linux-gnu -mcpu=cortex-a57 -c %s -### 2>&1 \ +! RUN: | FileCheck %s -check-prefix=CHECK-A57 + +! RUN: %flang --target=aarch64-linux-gnu -mcpu=cortex-a76 -c %s -### 2>&1 \ +! RUN: | FileCheck %s -check-prefix=CHECK-A76 + +! RUN: %flang --target=aarch64-linux-gnu -march=armv9 -c %s -### 2>&1 \ +! RUN: | FileCheck %s -check-prefix=CHECK-ARMV9 + +! Negative test. ARM cpu with x86 target. +! RUN: %flang --target=x86_64-linux-gnu -mcpu=cortex-a57 -c %s -### 2>&1 \ +! RUN: | FileCheck %s -check-prefix=CHECK-NO-A57 + +! RUN: %flang --target=x86_64-linux-gnu -march=skylake -c %s -### 2>&1 \ +! RUN: | FileCheck %s -check-prefix=CHECK-SKYLAKE + +! RUN: %flang --target=x86_64h-linux-gnu -c %s -### 2>&1 \ +! RUN: | FileCheck %s -check-prefix=CHECK-X86_64H + + +! Test that invalid cpu and features are ignored. + +! RUN: %flang_fc1 -triple aarch64-linux-gnu -target-cpu supercpu \ +! RUN: -o /dev/null -S %s 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-CPU + +! RUN: %flang_fc1 -triple aarch64-linux-gnu -target-feature +superspeed \ +! RUN: -o /dev/null -S %s 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FEATURE + + +! CHECK-A57: "-fc1" "-triple" "aarch64-unknown-linux-gnu" +! CHECK-A57-SAME: "-target-cpu" "cortex-a57" "-target-feature" "+v8a" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+sha2" "-target-feature" "+aes" + +! CHECK-A76: "-fc1" "-triple" "aarch64-unknown-linux-gnu" +! CHECK-A76-SAME: "-target-cpu" "cortex-a76" "-target-feature" "+v8.2a" "-target-feature" "+crc" "-target-feature" "+lse" "-target-feature" "+rdm" "-target-feature" "+crypto" "-target-feature" "+dotprod" "-target-feature" "+fp-armv8" "-target-feature" "+neon" "-target-feature" "+fullfp16" "-target-feature" "+ras" "-target-feature" "+rcpc" "-target-feature" "+ssbs" "-target-feature" "+sha2" "-target-feature" "+aes" + +! CHECK-ARMV9: "-fc1" "-triple" "aarch64-unknown-linux-gnu" +! CHECK-ARMV9-SAME: "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v9a" "-target-feature" "+sve" "-target-feature" "+sve2" + +! CHECK-NO-A57: "-fc1" "-triple" "x86_64-unknown-linux-gnu" +! CHECK-NO-A57-NOT: cortex-a57 +! CHECK-NO-A57-SAME: "-target-cpu" "x86-64" +! CHECK-NO-A57-NOT: cortex-a57 + +! CHECK-SKYLAKE: "-fc1" "-triple" "x86_64-unknown-linux-gnu" +! CHECK-SKYLAKE-SAME: "-target-cpu" "skylake" + +! CHECK-X86_64H: "-fc1" "-triple" "x86_64h-unknown-linux-gnu" +! CHECK-X86_64H-SAME: "-target-cpu" "x86-64" "-target-feature" "-rdrnd" "-target-feature" "-aes" "-target-feature" "-pclmul" "-target-feature" "-rtm" "-target-feature" "-fsgsbase" + +! CHECK-INVALID-CPU: 'supercpu' is not a recognized processor for this target (ignoring processor) +! CHECK-INVALID-FEATURE: '+superspeed' is not a recognized feature for this target (ignoring feature) diff --git a/flang/test/Lower/HLFIR/binary-ops.f90 b/flang/test/Lower/HLFIR/binary-ops.f90 --- a/flang/test/Lower/HLFIR/binary-ops.f90 +++ b/flang/test/Lower/HLFIR/binary-ops.f90 @@ -193,3 +193,160 @@ ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref> ! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref ! CHECK: %[[VAL_8:.*]] = fir.call @_FortranAcpowi(%[[VAL_6]], %[[VAL_7]]) fastmath : (!fir.complex<4>, i32) -> !fir.complex<4> + +subroutine extremum(c, n, l) + integer(8), intent(in) :: l + integer(8) :: n + character(l) :: c + ! evaluate::Extremum is created by semantics while analyzing LEN(). + n = len(c, 8) +end subroutine +! CHECK-LABEL: func.func @_QPextremum( +! CHECK: hlfir.declare {{.*}}c +! CHECK: %[[VAL_11:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_12:.*]] = fir.load %{{.*}} : !fir.ref +! CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_12]] : i64 +! CHECK: arith.select %[[VAL_13]], %[[VAL_11]], %[[VAL_12]] : i64 + +subroutine cmp_int(l, x, y) + logical :: l + integer :: x, y + l = x .eq. y +end subroutine +! CHECK-LABEL: func.func @_QPcmp_int( +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare {{.*}}x" +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare {{.*}}y" +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +! CHECK: %[[VAL_8:.*]] = arith.cmpi eq, %[[VAL_6]], %[[VAL_7]] : i32 + +subroutine cmp_int_2(l, x, y) + logical :: l + integer :: x, y + l = x .ne. y +! CHECK: arith.cmpi ne + l = x .gt. y +! CHECK: arith.cmpi sgt + l = x .ge. y +! CHECK: arith.cmpi sge + l = x .lt. y +! CHECK: arith.cmpi slt + l = x .le. y +! CHECK: arith.cmpi sle +end subroutine + +subroutine cmp_real(l, x, y) + logical :: l + real :: x, y + l = x .eq. y +end subroutine +! CHECK-LABEL: func.func @_QPcmp_real( +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare {{.*}}x" +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare {{.*}}y" +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +! CHECK: %[[VAL_8:.*]] = arith.cmpf oeq, %[[VAL_6]], %[[VAL_7]] : f32 + +subroutine cmp_real_2(l, x, y) + logical :: l + real :: x, y + l = x .ne. y +! CHECK: arith.cmpf une + l = x .gt. y +! CHECK: arith.cmpf ogt + l = x .ge. y +! CHECK: arith.cmpf oge + l = x .lt. y +! CHECK: arith.cmpf olt + l = x .le. y +! CHECK: arith.cmpf ole +end subroutine + +subroutine cmp_cmplx(l, x, y) + logical :: l + complex :: x, y + l = x .eq. y +end subroutine +! CHECK-LABEL: func.func @_QPcmp_cmplx( +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare {{.*}}x" +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare {{.*}}y" +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref> +! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref> +! CHECK: %[[VAL_8:.*]] = fir.cmpc "oeq", %[[VAL_6]], %[[VAL_7]] : !fir.complex<4> + +subroutine cmp_char(l, x, y) + logical :: l + character(*) :: x, y + l = x .eq. y +end subroutine +! CHECK-LABEL: func.func @_QPcmp_char( +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %{{.*}} typeparams %[[VAL_4:.*]]#1 {uniq_name = "_QFcmp_charEx"} : (!fir.ref>, index) -> (!fir.boxchar<1>, !fir.ref>) +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %{{.*}} typeparams %[[VAL_6:.*]]#1 {uniq_name = "_QFcmp_charEy"} : (!fir.ref>, index) -> (!fir.boxchar<1>, !fir.ref>) +! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_5]]#1 : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_7]]#1 : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_4]]#1 : (index) -> i64 +! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_6]]#1 : (index) -> i64 +! CHECK: %[[VAL_12:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_8]], %[[VAL_9]], %[[VAL_10]], %[[VAL_11]]) fastmath : (!fir.ref, !fir.ref, i64, i64) -> i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_14:.*]] = arith.cmpi eq, %[[VAL_12]], %[[VAL_13]] : i32 + +subroutine logical_and(x, y, z) + logical :: x, y, z + x = y.and.z +end subroutine +! CHECK-LABEL: func.func @_QPlogical_and( +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %{{.*}}y"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %{{.*}}z"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref> +! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref> +! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_6]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_7]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_10:.*]] = arith.andi %[[VAL_8]], %[[VAL_9]] : i1 + +subroutine logical_or(x, y, z) + logical :: x, y, z + x = y.or.z +end subroutine +! CHECK-LABEL: func.func @_QPlogical_or( +! CHECK: %[[VAL_10:.*]] = arith.ori + +subroutine logical_eqv(x, y, z) + logical :: x, y, z + x = y.eqv.z +end subroutine +! CHECK-LABEL: func.func @_QPlogical_eqv( +! CHECK: %[[VAL_10:.*]] = arith.cmpi eq + +subroutine logical_neqv(x, y, z) + logical :: x, y, z + x = y.neqv.z +end subroutine +! CHECK-LABEL: func.func @_QPlogical_neqv( +! CHECK: %[[VAL_10:.*]] = arith.cmpi ne + +subroutine cmplx_ctor(z, x, y) + complex :: z + real :: x, y + z = cmplx(x, y) +end subroutine +! CHECK-LABEL: func.func @_QPcmplx_ctor( +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}}x"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %{{.*}}y"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref +! CHECK: %[[VAL_8:.*]] = fir.undefined !fir.complex<4> +! CHECK: %[[VAL_9:.*]] = fir.insert_value %[[VAL_8]], %[[VAL_6]], [0 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> +! CHECK: %[[VAL_10:.*]] = fir.insert_value %[[VAL_9]], %[[VAL_7]], [1 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> + +subroutine cmplx_ctor_2(z, x) + complex(8) :: z + real(8) :: x + z = cmplx(x, 1._8, kind=8) +end subroutine +! CHECK-LABEL: func.func @_QPcmplx_ctor_2( +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %{{.*}}x"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref +! CHECK: %[[VAL_5:.*]] = arith.constant 1.000000e+00 : f64 +! CHECK: %[[VAL_6:.*]] = fir.undefined !fir.complex<8> +! CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_4]], [0 : index] : (!fir.complex<8>, f64) -> !fir.complex<8> +! CHECK: %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_5]], [1 : index] : (!fir.complex<8>, f64) -> !fir.complex<8> diff --git a/flang/test/Lower/HLFIR/conversion-ops.f90 b/flang/test/Lower/HLFIR/conversion-ops.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/HLFIR/conversion-ops.f90 @@ -0,0 +1,69 @@ +! Test lowering of intrinsic conversions to HLFIR +! RUN: bbc -emit-fir -hlfir -o - %s 2>&1 | FileCheck %s + +subroutine test + integer(4) :: i4 + integer(8) :: i8 + real(4) :: r4 + real(8) :: r8 + complex(4) :: z4 + complex(8) :: z8 + + logical(4) :: l4 + logical(8) :: l8 + + i4 = i8 +! CHECK: fir.convert %{{.*}} : (i64) -> i32 + i4 = r4 +! CHECK: fir.convert %{{.*}} : (f32) -> i32 + i4 = r8 +! CHECK: fir.convert %{{.*}} : (f64) -> i32 + i4 = z4 +! CHECK: %[[VAL_23:.*]] = fir.extract_value %{{.*}}, [0 : index] : (!fir.complex<4>) -> f32 +! CHECK: fir.convert %[[VAL_23]] : (f32) -> i32 + i4 = z8 +! CHECK: %[[VAL_26:.*]] = fir.extract_value %{{.*}}, [0 : index] : (!fir.complex<8>) -> f64 +! CHECK: fir.convert %[[VAL_26]] : (f64) -> i32 + + r4 = i4 +! CHECK: fir.convert %{{.*}} : (i32) -> f32 + r4 = i8 +! CHECK: fir.convert %{{.*}} : (i64) -> f32 + r4 = r8 +! CHECK: fir.convert %{{.*}} : (f64) -> f32 + r4 = z4 +! CHECK: fir.extract_value %{{.*}}, [0 : index] : (!fir.complex<4>) -> f32 + r4 = z8 +! CHECK: %[[VAL_36:.*]] = fir.load %{{.*}} : !fir.ref> +! CHECK: %[[VAL_37:.*]] = fir.extract_value %[[VAL_36]], [0 : index] : (!fir.complex<8>) -> f64 +! CHECK: fir.convert %[[VAL_37]] : (f64) -> f32 + + z4 = i4 +! CHECK: %[[VAL_40:.*]] = fir.convert %{{.*}} : (i32) -> f32 +! CHECK: %[[VAL_41:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: %[[VAL_42:.*]] = fir.undefined !fir.complex<4> +! CHECK: %[[VAL_43:.*]] = fir.insert_value %[[VAL_42]], %[[VAL_40]], [0 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> +! CHECK: fir.insert_value %[[VAL_43]], %[[VAL_41]], [1 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> + z4 = i8 +! CHECK: %[[VAL_46:.*]] = fir.convert %{{.*}} : (i64) -> f32 +! CHECK: %[[VAL_47:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: %[[VAL_48:.*]] = fir.undefined !fir.complex<4> +! CHECK: %[[VAL_49:.*]] = fir.insert_value %[[VAL_48]], %[[VAL_46]], [0 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> +! CHECK: fir.insert_value %[[VAL_49]], %[[VAL_47]], [1 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> + z4 = r4 +! CHECK: %[[VAL_52:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: %[[VAL_53:.*]] = fir.undefined !fir.complex<4> +! CHECK: %[[VAL_54:.*]] = fir.insert_value %[[VAL_53]], %{{.*}}, [0 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> +! CHECK: fir.insert_value %[[VAL_54]], %[[VAL_52]], [1 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> + z4 = r8 +! CHECK: %[[VAL_57:.*]] = fir.convert %{{.*}} : (f64) -> f32 +! CHECK: %[[VAL_58:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: %[[VAL_59:.*]] = fir.undefined !fir.complex<4> +! CHECK: %[[VAL_60:.*]] = fir.insert_value %[[VAL_59]], %[[VAL_57]], [0 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> +! CHECK: fir.insert_value %[[VAL_60]], %[[VAL_58]], [1 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> + z4 = z8 +! CHECK: fir.convert %{{.*}} : (!fir.complex<8>) -> !fir.complex<4> + + l4 = l8 +! CHECK: fir.convert %{{.*}} : (!fir.logical<8>) -> !fir.logical<4> +end subroutine diff --git a/flang/test/Lower/HLFIR/unary-ops.f90 b/flang/test/Lower/HLFIR/unary-ops.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/HLFIR/unary-ops.f90 @@ -0,0 +1,61 @@ +! Test lowering of unary intrinsic operations to HLFIR +! RUN: bbc -emit-fir -hlfir -o - %s 2>&1 | FileCheck %s + +subroutine test_not(l, x) + logical :: l, x + l = .not.x +end subroutine +! CHECK-LABEL: func.func @_QPtest_not( +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}}x"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref> +! CHECK: %[[VAL_5:.*]] = arith.constant true +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_4]] : (!fir.logical<4>) -> i1 +! CHECK: %[[VAL_7:.*]] = arith.xori %[[VAL_6]], %[[VAL_5]] : i1 + +subroutine test_negate_int(res, x) + integer :: res, x + res = -x +end subroutine +! CHECK-LABEL: func.func @_QPtest_negate_int( +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}}x"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +! CHECK: %[[VAL_5:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_6:.*]] = arith.subi %[[VAL_5]], %[[VAL_4]] : i32 + +subroutine test_negate_real(res, x) + real :: res, x + res = -x +end subroutine +! CHECK-LABEL: func.func @_QPtest_negate_real( +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}}x"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +! CHECK: %[[VAL_5:.*]] = arith.negf %[[VAL_4]] fastmath : f32 + +subroutine test_negate_complex(res, x) + complex :: res, x + res = -x +end subroutine +! CHECK-LABEL: func.func @_QPtest_negate_complex( +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}}x"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref> +! CHECK: %[[VAL_5:.*]] = fir.negc %[[VAL_4]] : !fir.complex<4> + +subroutine test_complex_component_real(res, x) + real :: res + complex :: x + res = real(x) +end subroutine +! CHECK-LABEL: func.func @_QPtest_complex_component_real( +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}}x"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref> +! CHECK: %[[VAL_5:.*]] = fir.extract_value %[[VAL_4]], [0 : index] : (!fir.complex<4>) -> f32 + +subroutine test_complex_component_imag(res, x) + real :: res + complex :: x + res = aimag(x) +end subroutine +! CHECK-LABEL: func.func @_QPtest_complex_component_imag( +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}}x"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref> +! CHECK: %[[VAL_5:.*]] = fir.extract_value %[[VAL_4]], [1 : index] : (!fir.complex<4>) -> f32 diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90 --- a/flang/test/Lower/polymorphic.f90 +++ b/flang/test/Lower/polymorphic.f90 @@ -301,4 +301,27 @@ ! CHECK: %[[UP:.*]] = fir.convert %[[BOX_COMPLEX]] : (!fir.class>) -> !fir.class ! CHECK: fir.call @_QMpolymorphic_testPup_input(%[[UP]]) {{.*}} : (!fir.class) -> () + subroutine assign_polymorphic_allocatable() + type(p1), target :: t(10,20) + class(p1), allocatable :: c(:,:) + c = t + end subroutine + +! CHECK-LABEL: func.func @_QMpolymorphic_testPassign_polymorphic_allocatable() { +! CHECK: %[[C:.*]] = fir.alloca !fir.class>>> {bindc_name = "c", uniq_name = "_QMpolymorphic_testFassign_polymorphic_allocatableEc"} +! CHECK: %[[ZERO:.*]] = fir.zero_bits !fir.heap>> +! CHECK: %[[C0:.*]] = arith.constant 0 : index +! CHECK: %[[SHAPE_C:.*]] = fir.shape %[[C0]], %[[C0]] : (index, index) -> !fir.shape<2> +! CHECK: %[[EMBOX:.*]] = fir.embox %[[ZERO]](%[[SHAPE_C]]) : (!fir.heap>>, !fir.shape<2>) -> !fir.class>>> +! CHECK: fir.store %[[EMBOX]] to %[[C]] : !fir.ref>>>> +! CHECK: %[[C10:.*]] = arith.constant 10 : index +! CHECK: %[[C20:.*]] = arith.constant 20 : index +! CHECK: %[[T:.*]] = fir.alloca !fir.array<10x20x!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>> {bindc_name = "t", fir.target, uniq_name = "_QMpolymorphic_testFassign_polymorphic_allocatableEt"} +! CHECK: %[[SHAPE:.*]] = fir.shape %[[C10]], %[[C20]] : (index, index) -> !fir.shape<2> +! CHECK: %[[BOXED_T:.*]] = fir.embox %[[T]](%[[SHAPE]]) : (!fir.ref>>, !fir.shape<2>) -> !fir.box>> +! CHECK: %[[CONV_C:.*]] = fir.convert %[[C]] : (!fir.ref>>>>) -> !fir.ref> +! CHECK: %[[CONV_BOXED_T:.*]] = fir.convert %[[BOXED_T]] : (!fir.box>>) -> !fir.box +! CHECK: %{{.*}} = fir.call @_FortranAAssign(%[[CONV_C]], %[[CONV_BOXED_T]], %{{.*}}, %{{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.ref, i32) -> none +! CHECK: return + end module diff --git a/flang/test/Lower/select-type.f90 b/flang/test/Lower/select-type.f90 --- a/flang/test/Lower/select-type.f90 +++ b/flang/test/Lower/select-type.f90 @@ -253,6 +253,8 @@ print*, 'type is real' type is (logical) print*, 'type is logical' + type is (character(*)) + print*, 'type is character' class default print*,'default' end select @@ -261,14 +263,57 @@ ! CHECK-LABEL: func.func @_QMselect_type_lower_testPselect_type5( ! CHECK-SAME: %[[ARG0:.*]]: !fir.class {fir.bindc_name = "a"}) ! CHECK: fir.select_type %[[ARG0]] : !fir.class -! CHECK-SAME: [#fir.type_is, ^[[I8_BLK:.*]], #fir.type_is, ^[[I32_BLK:.*]], #fir.type_is, ^[[F32_BLK:.*]], #fir.type_is>, ^[[LOG_BLK:.*]], unit, ^[[DEFAULT:.*]]] +! CHECK-SAME: [#fir.type_is, ^[[I8_BLK:.*]], #fir.type_is, ^[[I32_BLK:.*]], #fir.type_is, ^[[F32_BLK:.*]], #fir.type_is>, ^[[LOG_BLK:.*]], #fir.type_is>, ^[[CHAR_BLK:.*]], unit, ^[[DEFAULT:.*]]] ! CHECK: ^[[I8_BLK]] ! CHECK: ^[[I32_BLK]] ! CHECK: ^[[F32_BLK]] ! CHECK: ^[[LOG_BLK]] +! CHECK: ^[[CHAR_BLK]] ! CHECK: ^[[DEFAULT_BLOCK]] ! CFG-LABEL: func.func @_QMselect_type_lower_testPselect_type5( +! CFG-SAME: %[[SELECTOR:.*]]: !fir.class {fir.bindc_name = "a"}) { + +! CFG: %[[INT8_TC:.*]] = arith.constant 7 : i8 +! CFG: %[[TYPE_CODE:.*]] = fir.box_typecode %[[SELECTOR]] : (!fir.class) -> i8 +! CFG: %[[IS_INT8:.*]] = arith.cmpi eq, %[[TYPE_CODE]], %[[INT8_TC]] : i8 +! CFG: cf.cond_br %[[IS_INT8]], ^[[INT8_BLK:.*]], ^[[NOT_INT8:.*]] +! CFG: ^[[NOT_INT8]]: +! CFG: %[[INT32_TC:.*]] = arith.constant 9 : i8 +! CFG: %[[TYPE_CODE:.*]] = fir.box_typecode %[[SELECTOR]] : (!fir.class) -> i8 +! CFG: %[[IS_INT32:.*]] = arith.cmpi eq, %[[TYPE_CODE]], %[[INT32_TC]] : i8 +! CFG: cf.cond_br %[[IS_INT32]], ^[[INT32_BLK:.*]], ^[[NOT_INT32_BLK:.*]] +! CFG: ^[[INT8_BLK]]: +! CFG: cf.br ^[[EXIT_BLK:.*]] +! CFG: ^[[NOT_INT32_BLK]]: +! CFG: %[[FLOAT_TC:.*]] = arith.constant 27 : i8 +! CFG: %[[TYPE_CODE:.*]] = fir.box_typecode %[[SELECTOR]] : (!fir.class) -> i8 +! CFG: %[[IS_FLOAT:.*]] = arith.cmpi eq, %[[TYPE_CODE]], %[[FLOAT_TC]] : i8 +! CFG: cf.cond_br %[[IS_FLOAT]], ^[[FLOAT_BLK:.*]], ^[[NOT_FLOAT_BLK:.*]] +! CFG: ^[[INT32_BLK]]: +! CFG: cf.br ^[[EXIT_BLK]] +! CFG: ^[[NOT_FLOAT_BLK]]: +! CFG: %[[LOGICAL_TC:.*]] = arith.constant 14 : i8 +! CFG: %[[TYPE_CODE:.*]] = fir.box_typecode %[[SELECTOR]] : (!fir.class) -> i8 +! CFG: %[[IS_LOGICAL:.*]] = arith.cmpi eq, %[[TYPE_CODE]], %[[LOGICAL_TC]] : i8 +! CFG: cf.cond_br %[[IS_LOGICAL]], ^[[LOGICAL_BLK:.*]], ^[[NOT_LOGICAL_BLK:.*]] +! CFG: ^[[FLOAT_BLK]]: +! CFG: cf.br ^[[EXIT_BLK]] +! CFG: ^[[NOT_LOGICAL_BLK]]: +! CFG: %[[CHAR_TC:.*]] = arith.constant 40 : i8 +! CFG: %[[TYPE_CODE:.*]] = fir.box_typecode %[[SELECTOR]] : (!fir.class) -> i8 +! CFG: %[[IS_CHAR:.*]] = arith.cmpi eq, %[[TYPE_CODE]], %[[CHAR_TC]] : i8 +! CFG: cf.cond_br %[[IS_CHAR]], ^[[CHAR_BLK:.*]], ^[[NOT_CHAR_BLK:.*]] +! CFG: ^[[LOGICAL_BLK]]: +! CFG: cf.br ^[[EXIT_BLK]] +! CFG: ^[[NOT_CHAR_BLK]]: +! CFG: cf.br ^[[DEFAULT_BLK:.*]] +! CFG: ^[[CHAR_BLK]]: +! CFG: cf.br ^[[EXIT_BLK]] +! CFG: ^[[DEFAULT_BLK]]: +! CFG: cf.br ^[[EXIT_BLK]] +! CFG: ^bb12: +! CFG: return subroutine select_type6(a) class(*) :: a diff --git a/flang/test/Parser/excessive-continuations.f90 b/flang/test/Parser/excessive-continuations.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Parser/excessive-continuations.f90 @@ -0,0 +1,262 @@ +! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s +! CHECK: portability: 256 continuation lines is more than the Fortran standard allows +! CHECK: LOGICAL, PARAMETER :: c255 = .true._4 +program test + logical, parameter :: c255 = 255 == len("& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &a& + &") +end diff --git a/flang/test/Parser/missing-colons.f90 b/flang/test/Parser/missing-colons.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Parser/missing-colons.f90 @@ -0,0 +1,13 @@ +! RUN: %flang_fc1 -fsyntax-only -pedantic %s 2>&1 | FileCheck %s +module m + type t + contains +!CHECK: portability: type-bound procedure statement should have '::' if it has '=>' + procedure p => sub + end type + contains + subroutine sub(x) + class(t), intent(in) :: x + end subroutine +end module + diff --git a/flang/test/Semantics/OpenMP/omp-declare-target03.f90 b/flang/test/Semantics/OpenMP/omp-declare-target03.f90 --- a/flang/test/Semantics/OpenMP/omp-declare-target03.f90 +++ b/flang/test/Semantics/OpenMP/omp-declare-target03.f90 @@ -12,6 +12,7 @@ !ERROR: The module name or main program name cannot be in a DECLARE TARGET directive !$omp declare target (mod1) + !PORTABILITY: Name 'main' declared in a main program should not have the same name as the main program !ERROR: The module name or main program name cannot be in a DECLARE TARGET directive !$omp declare target (main) end diff --git a/flang/test/Semantics/OpenMP/omp-threadprivate03.f90 b/flang/test/Semantics/OpenMP/omp-threadprivate03.f90 --- a/flang/test/Semantics/OpenMP/omp-threadprivate03.f90 +++ b/flang/test/Semantics/OpenMP/omp-threadprivate03.f90 @@ -13,6 +13,7 @@ !ERROR: The module name or main program name cannot be in a THREADPRIVATE directive !$omp threadprivate(mod1) + !PORTABILITY: Name 'main' declared in a main program should not have the same name as the main program !ERROR: The module name or main program name cannot be in a THREADPRIVATE directive !$omp threadprivate(main) diff --git a/flang/test/Semantics/assign11.f90 b/flang/test/Semantics/assign11.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/assign11.f90 @@ -0,0 +1,12 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! 10.2.1.2p1(1) +program test + class(*), allocatable :: pa + class(*), pointer :: pp + class(*), allocatable :: pac[:] + pa = 1 ! ok + !ERROR: Left-hand side of assignment may not be polymorphic unless assignment is to an entire allocatable + pp = 1 + !ERROR: Left-hand side of assignment may not be polymorphic if it is a coarray + pac = 1 +end diff --git a/flang/test/Semantics/associated.f90 b/flang/test/Semantics/associated.f90 --- a/flang/test/Semantics/associated.f90 +++ b/flang/test/Semantics/associated.f90 @@ -73,6 +73,9 @@ type(t1), target :: t1xtarget type(t2) :: t2x type(t2), target :: t2xtarget + integer, target :: targetIntArr(2) + integer, target :: targetIntCoarray[*] + integer, pointer :: intPointerArr(:) !ERROR: missing mandatory 'pointer=' argument lVar = associated() @@ -177,5 +180,9 @@ cannotBeCalledfromImplicitPointer => externalProc !WARNING: Procedure pointer 'cannotbecalledfromimplicitpointer' with explicit interface that cannot be called via an implicit interface cannot be associated with procedure designator with an implicit interface lvar = associated(cannotBeCalledfromImplicitPointer, externalProc) + !ERROR: TARGET= argument 'targetintarr([INTEGER(8)::2_8,1_8])' may not have a vector subscript or coindexing + lvar = associated(intPointerArr, targetIntArr([2,1])) + !ERROR: TARGET= argument 'targetintcoarray[1_8]' may not have a vector subscript or coindexing + lvar = associated(intPointerVar1, targetIntCoarray[1]) end subroutine test end subroutine assoc diff --git a/flang/test/Semantics/bind-c02.f90 b/flang/test/Semantics/bind-c02.f90 --- a/flang/test/Semantics/bind-c02.f90 +++ b/flang/test/Semantics/bind-c02.f90 @@ -18,6 +18,7 @@ !ERROR: Only variable and named common block can be in BIND statement bind(c) :: sub + !PORTABILITY: Name 'm' declared in a module should not have the same name as the module bind(c) :: m ! no error for implicit type variable type my_type diff --git a/flang/test/Semantics/bindings03.f90 b/flang/test/Semantics/bindings03.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/bindings03.f90 @@ -0,0 +1,26 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -Werror +! Confirm a portability warning on use of a procedure binding apart from a call +module m + type t + contains + procedure :: sub + end type + contains + subroutine sub(x) + class(t), intent(in) :: x + end subroutine +end module + +program test + use m + procedure(sub), pointer :: p + type(t) x + !PORTABILITY: Procedure binding 'sub' used as target of a pointer assignment + p => x%sub + !PORTABILITY: Procedure binding 'sub' passed as an actual argument + call sub2(x%sub) + contains + subroutine sub2(s) + procedure(sub) s + end subroutine +end diff --git a/flang/test/Semantics/call25.f90 b/flang/test/Semantics/call25.f90 --- a/flang/test/Semantics/call25.f90 +++ b/flang/test/Semantics/call25.f90 @@ -13,6 +13,10 @@ character(5), intent(in) :: x explicitLength = x end function + character(6) function badExplicitLength(x) + character(5), intent(in) :: x + badExplicitLength = x + end function real function notChar(x) character(*), intent(in) :: x notChar = 0 @@ -34,6 +38,8 @@ external assumedlength character(5) :: assumedlength call subr1(explicitLength) + !CHECK: error: Actual argument function associated with procedure dummy argument 'f=' has incompatible result type + call subr1(badExplicitLength) call subr1(assumedLength) !CHECK: error: Actual argument function associated with procedure dummy argument 'f=' has incompatible result type call subr1(notChar) @@ -42,6 +48,9 @@ !CHECK: error: Actual argument function associated with procedure dummy argument 'f=' has incompatible result type call subr2(notChar) call subr3(explicitLength) + !CHECK: warning: If the procedure's interface were explicit, this reference would be in error + !CHECK: because: Actual argument function associated with procedure dummy argument 'f=' has incompatible result type + call subr3(badExplicitLength) call subr3(assumedLength) !CHECK: warning: If the procedure's interface were explicit, this reference would be in error !CHECK: because: Actual argument function associated with procedure dummy argument 'f=' has incompatible result type diff --git a/flang/test/Semantics/call28.f90 b/flang/test/Semantics/call28.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/call28.f90 @@ -0,0 +1,23 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 + +module m1 + type :: t + end type + contains + pure subroutine s1(x) + class(t), intent(in out) :: x + call s2(x) + call s3(x) + end subroutine + pure subroutine s2(x) + class(t), intent(in out) :: x + !ERROR: Left-hand side of assignment may not be polymorphic unless assignment is to an entire allocatable + !ERROR: Left-hand side of assignment is not definable + !BECAUSE: 'x' is polymorphic in a pure subprogram + x = t() + end subroutine + pure subroutine s3(x) + !ERROR: An INTENT(OUT) dummy argument of a pure subroutine may not be polymorphic + class(t), intent(out) :: x + end subroutine +end module diff --git a/flang/test/Semantics/modifiable01.f90 b/flang/test/Semantics/definable01.f90 rename from flang/test/Semantics/modifiable01.f90 rename to flang/test/Semantics/definable01.f90 --- a/flang/test/Semantics/modifiable01.f90 +++ b/flang/test/Semantics/definable01.f90 @@ -1,5 +1,5 @@ ! RUN: not %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s -! Test WhyNotModifiable() explanations +! Test WhyNotDefinable() explanations module prot real, protected :: prot @@ -67,4 +67,19 @@ !CHECK: because: 'ptr' is externally visible via 'ptr' and not definable in a pure subprogram read(internal,*) ptr end subroutine + subroutine test3(objp, procp) + real, intent(in), pointer :: objp + procedure(sin), pointer, intent(in) :: procp + !CHECK: error: Actual argument associated with INTENT(IN OUT) dummy argument 'op=' is not definable + !CHECK: because: 'objp' is an INTENT(IN) dummy argument + call test3a(objp) + !CHECK: error: Actual argument associated with procedure pointer dummy argument 'pp=' may not be INTENT(IN) + call test3b(procp) + end subroutine + subroutine test3a(op) + real, intent(in out), pointer :: op + end subroutine + subroutine test3b(pp) + procedure(sin), pointer, intent(in out) :: pp + end subroutine end module diff --git a/flang/test/Semantics/expr-errors05.f90 b/flang/test/Semantics/expr-errors05.f90 --- a/flang/test/Semantics/expr-errors05.f90 +++ b/flang/test/Semantics/expr-errors05.f90 @@ -1,7 +1,14 @@ -! RUN: %python %S/test_errors.py %s %flang_fc1 -! The components of a complex constructor (extension) must be scalar -!ERROR: Real part of complex constructor must be scalar +! RUN: %python %S/test_errors.py %s %flang_fc1 -Werror +!PORTABILITY: Real part of complex constructor is not scalar complex, parameter :: z1(*) = ([1.,2.], 3.) -!ERROR: Imaginary part of complex constructor must be scalar +!PORTABILITY: Imaginary part of complex constructor is not scalar complex, parameter :: z2(*) = (4., [5.,6.]) +real, parameter :: aa(*) = [7.,8.] +!PORTABILITY: Real part of complex literal constant is not scalar +complex, parameter :: z3(*) = (aa, 9.) +!PORTABILITY: Imaginary part of complex literal constant is not scalar +complex, parameter :: z4(*) = (10., aa) +!We need a nonzero exit status to make test_errors.py look at messages :-( +!WARNING: division by zero +real, parameter :: xxx = 1./0. end diff --git a/flang/test/Semantics/generic03.f90 b/flang/test/Semantics/generic03.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/generic03.f90 @@ -0,0 +1,34 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! Exercise function vs subroutine distinction in generics +module m1 + type t1 + integer n + end type + interface g1 + integer function f1(x, j) + import t1 + class(t1), intent(in out) :: x + integer, intent(in) :: j + end + end interface +end module + +program test + use m1 + !WARNING: Generic interface 'g1' has both a function and a subroutine + interface g1 + subroutine s1(x, a) + import t1 + class(t1), intent(in out) :: x + real, intent(in) :: a + end subroutine + end interface + type(t1) :: x + print *, g1(x,1) ! ok + !ERROR: No specific function of generic 'g1' matches the actual arguments + print *, g1(x,1.) + !ERROR: No specific subroutine of generic 'g1' matches the actual arguments + call g1(x,1) + call g1(x, 1.) ! ok + contains +end diff --git a/flang/test/Semantics/global01.f90 b/flang/test/Semantics/global01.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/global01.f90 @@ -0,0 +1,45 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -Werror +! Catch discrepancies between a local interface and a global definition + +subroutine global1(x) + integer, intent(in) :: x +end subroutine + +subroutine global2(x) bind(c,name="xyz") + integer, intent(in) :: x +end subroutine + +subroutine global3(x) + integer, intent(in) :: x +end subroutine + +pure subroutine global4(x) + integer, intent(in) :: x +end subroutine + +subroutine global5(x) + integer, intent(in) :: x +end subroutine + +program test + interface + !WARNING: The global subprogram 'global1' is not compatible with its local procedure declaration (incompatible dummy argument #1: incompatible dummy data object types: REAL(4) vs INTEGER(4)) + subroutine global1(x) + real, intent(in) :: x + end subroutine + subroutine global2(x) + real, intent(in) :: x + end subroutine + subroutine global3(x) bind(c,name="abc") + real, intent(in) :: x + end subroutine + subroutine global4(x) ! not PURE, but that's ok + integer, intent(in) :: x + end subroutine + !WARNING: The global subprogram 'global5' is not compatible with its local procedure declaration (incompatible procedure attributes: Pure) + pure subroutine global5(x) + integer, intent(in) :: x + end subroutine + end interface +end + diff --git a/flang/test/Semantics/io12.f90 b/flang/test/Semantics/io12.f90 --- a/flang/test/Semantics/io12.f90 +++ b/flang/test/Semantics/io12.f90 @@ -52,9 +52,9 @@ type(maybeBad) :: y type(poison) :: z write(u) x ! always ok - !ERROR: Derived type in I/O cannot have an allocatable or pointer direct component unless using defined I/O + !ERROR: Derived type 'maybebad' in I/O cannot have an allocatable or pointer direct component 'allocatablecomponent' unless using defined I/O write(u) y ! bad here - !ERROR: Derived type in I/O cannot have an allocatable or pointer direct component unless using defined I/O + !ERROR: Derived type 'poison' in I/O cannot have an allocatable or pointer direct component 'allocatablecomponent' unless using defined I/O write(u) z ! bad end subroutine end module @@ -69,7 +69,7 @@ type(poison) :: z write(u) x ! always ok write(u) y ! ok here - !ERROR: Derived type in I/O cannot have an allocatable or pointer direct component unless using defined I/O + !ERROR: Derived type 'poison' in I/O cannot have an allocatable or pointer direct component 'allocatablecomponent' unless using defined I/O write(u) z ! bad end subroutine end module diff --git a/flang/test/Semantics/io14.f90 b/flang/test/Semantics/io14.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/io14.f90 @@ -0,0 +1,37 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! Test polymorphic restrictions +module m + type base + end type + type, extends(base) :: t + integer n + contains + procedure :: fwrite + generic :: write(formatted) => fwrite + end type + contains + subroutine fwrite(x, unit, iotype, vlist, iostat, iomsg) + class(t), intent(in) :: x + integer, intent(in) :: unit + character(*), intent(in) :: iotype + integer, intent(in) :: vlist(:) + integer, intent(out) :: iostat + character(*), intent(in out) :: iomsg + write(unit, *, iostat=iostat, iomsg=iomsg) '(', iotype, ':', vlist, ':', x%n, ')' + end subroutine + subroutine subr(x, y, z) + class(t), intent(in) :: x + class(base), intent(in) :: y + class(*), intent(in) :: z + print *, x ! ok + !ERROR: Derived type 'base' in I/O may not be polymorphic unless using defined I/O + print *, y + !ERROR: I/O list item may not be unlimited polymorphic + print *, z + end subroutine +end + +program main + use m + call subr(t(123),t(234),t(345)) +end diff --git a/flang/test/Semantics/io15.f90 b/flang/test/Semantics/io15.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/io15.f90 @@ -0,0 +1,55 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! Test visibility restrictions +module m + type t1 + integer, private :: ip1 = 123 + contains + procedure :: fwrite1 + generic :: write(formatted) => fwrite1 + end type t1 + type t2 + integer, private :: ip2 = 234 + type(t1) x1 + end type t2 + type t3 + type(t1) x1 + type(t2) x2 + end type t3 + type, extends(t2) :: t4 + end type t4 + contains + subroutine fwrite1(x, unit, iotype, vlist, iostat, iomsg) + class(t1), intent(in) :: x + integer, intent(in) :: unit + character(*), intent(in) :: iotype + integer, intent(in) :: vlist(:) + integer, intent(out) :: iostat + character(*), intent(in out) :: iomsg + write(unit, *, iostat=iostat, iomsg=iomsg) '(', iotype, ':', vlist, ':', x%ip1, ')' + end subroutine + subroutine local ! all OK since type is local + type(t1) :: x1 + type(t2) :: x2 + type(t3) :: x3 + type(t4) :: x4 + print *, x1 + print *, x2 + print *, x3 + print *, x4 + end subroutine +end module + +program main + use m + type(t1) :: x1 + type(t2) :: x2 + type(t3) :: x3 + type(t4) :: x4 + print *, x1 ! ok + !ERROR: I/O of the derived type 't2' may not be performed without defined I/O in a scope in which a direct component like 'ip2' is inaccessible + print *, x2 + !ERROR: I/O of the derived type 't3' may not be performed without defined I/O in a scope in which a direct component like 'ip2' is inaccessible + print *, x3 + !ERROR: I/O of the derived type 't4' may not be performed without defined I/O in a scope in which a direct component like 'ip2' is inaccessible + print *, x4 +end diff --git a/flang/test/Semantics/local-vs-global.f90 b/flang/test/Semantics/local-vs-global.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/local-vs-global.f90 @@ -0,0 +1,164 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 + +module module_before_1 +end + +module module_before_2 +end + +block data block_data_before_1 +end + +block data block_data_before_2 +end + +subroutine explicit_before_1(a) + real, optional :: a +end + +subroutine explicit_before_2(a) + real, optional :: a +end + +subroutine implicit_before_1(a) + real :: a +end + +subroutine implicit_before_2(a) + real :: a +end + +function explicit_func_before_1(a) + real, optional :: a +end + +function explicit_func_before_2(a) + real, optional :: a +end + +function implicit_func_before_1(a) + real :: a +end + +function implicit_func_before_2(a) + real :: a +end + +program test + external justfine ! OK to name a BLOCK DATA if not called + !ERROR: The global entity 'module_before_1' corresponding to the local procedure 'module_before_1' is not a callable subprogram + external module_before_1 + !ERROR: The global entity 'block_data_before_1' corresponding to the local procedure 'block_data_before_1' is not a callable subprogram + external block_data_before_1 + !ERROR: The global subprogram 'explicit_before_1' may not be referenced via the implicit interface 'explicit_before_1' + external explicit_before_1 + external implicit_before_1 + !ERROR: The global subprogram 'explicit_func_before_1' may not be referenced via the implicit interface 'explicit_func_before_1' + external explicit_func_before_1 + external implicit_func_before_1 + !ERROR: The global entity 'module_after_1' corresponding to the local procedure 'module_after_1' is not a callable subprogram + external module_after_1 + !ERROR: The global entity 'block_data_after_1' corresponding to the local procedure 'block_data_after_1' is not a callable subprogram + external block_data_after_1 + !ERROR: The global subprogram 'explicit_after_1' may not be referenced via the implicit interface 'explicit_after_1' + external explicit_after_1 + external implicit_after_1 + !ERROR: The global subprogram 'explicit_func_after_1' may not be referenced via the implicit interface 'explicit_func_after_1' + external explicit_func_after_1 + external implicit_func_after_1 + call module_before_1 + !ERROR: 'module_before_2' is not a callable procedure + call module_before_2 + call block_data_before_1 + !ERROR: 'block_data_before_2' is not a callable procedure + call block_data_before_2 + call explicit_before_1(1.) + !ERROR: References to the procedure 'explicit_before_2' require an explicit interface + call explicit_before_2(1.) + !WARNING: If the procedure's interface were explicit, this reference would be in error + !BECAUSE: Dummy argument 'a=' (#1) is not OPTIONAL and is not associated with an actual argument in this procedure reference + call implicit_before_1 + !WARNING: If the procedure's interface were explicit, this reference would be in error + !BECAUSE: Dummy argument 'a=' (#1) is not OPTIONAL and is not associated with an actual argument in this procedure reference + call implicit_before_2 + print *, explicit_func_before_1(1.) + !ERROR: References to the procedure 'explicit_func_before_2' require an explicit interface + print *, explicit_func_before_2(1.) + !WARNING: If the procedure's interface were explicit, this reference would be in error + !BECAUSE: Dummy argument 'a=' (#1) is not OPTIONAL and is not associated with an actual argument in this procedure reference + print *, implicit_func_before_1() + !WARNING: If the procedure's interface were explicit, this reference would be in error + !BECAUSE: Dummy argument 'a=' (#1) is not OPTIONAL and is not associated with an actual argument in this procedure reference + print *, implicit_func_before_2() + call module_after_1 + call module_after_2 + call block_data_after_1 + call block_data_after_2 + call explicit_after_1(1.) + !ERROR: References to the procedure 'explicit_after_2' require an explicit interface + call explicit_after_2(1.) + !WARNING: If the procedure's interface were explicit, this reference would be in error + !BECAUSE: Dummy argument 'a=' (#1) is not OPTIONAL and is not associated with an actual argument in this procedure reference + call implicit_after_1 + !WARNING: If the procedure's interface were explicit, this reference would be in error + !BECAUSE: Dummy argument 'a=' (#1) is not OPTIONAL and is not associated with an actual argument in this procedure reference + call implicit_after_2 + print *, explicit_func_after_1(1.) + !ERROR: References to the procedure 'explicit_func_after_2' require an explicit interface + print *, explicit_func_after_2(1.) + !WARNING: If the procedure's interface were explicit, this reference would be in error + !BECAUSE: Dummy argument 'a=' (#1) is not OPTIONAL and is not associated with an actual argument in this procedure reference + print *, implicit_func_after_1() + !WARNING: If the procedure's interface were explicit, this reference would be in error + !BECAUSE: Dummy argument 'a=' (#1) is not OPTIONAL and is not associated with an actual argument in this procedure reference + print *, implicit_func_after_2() +end program + +block data justfine +end + +module module_after_1 +end + +!ERROR: 'module_after_2' is already declared in this scoping unit +module module_after_2 +end + +block data block_data_after_1 +end + +!ERROR: BLOCK DATA 'block_data_after_2' has been called +block data block_data_after_2 +end + +subroutine explicit_after_1(a) + real, optional :: a +end + +subroutine explicit_after_2(a) + real, optional :: a +end + +subroutine implicit_after_1(a) + real :: a +end + +subroutine implicit_after_2(a) + real :: a +end + +function explicit_func_after_1(a) + real, optional :: a +end + +function explicit_func_after_2(a) + real, optional :: a +end + +function implicit_func_after_1(a) + real :: a +end + +function implicit_func_after_2(a) + real :: a +end diff --git a/flang/test/Semantics/null01.f90 b/flang/test/Semantics/null01.f90 --- a/flang/test/Semantics/null01.f90 +++ b/flang/test/Semantics/null01.f90 @@ -32,6 +32,7 @@ external implicit type :: dt0 integer, pointer :: ip0 + integer :: n = 666 end type dt0 type :: dt1 integer, pointer :: ip1(:) @@ -42,11 +43,15 @@ type :: dt3 procedure(s1), pointer, nopass :: pps1 end type dt3 + type :: dt4 + real, allocatable :: ra0 + end type dt4 integer :: j type(dt0) :: dt0x type(dt1) :: dt1x type(dt2) :: dt2x type(dt3) :: dt3x + type(dt4) :: dt4x integer, pointer :: ip0, ip1(:), ip2(:,:) integer, allocatable :: ia0, ia1(:), ia2(:,:) real, pointer :: rp0, rp1(:) @@ -55,6 +60,7 @@ integer, parameter :: ip2r = rank(null(mold=ip2)) integer, parameter :: eight = ip0r + ip1r + ip2r + 5 real(kind=eight) :: r8check + logical, pointer :: lp ip0 => null() ! ok ip1 => null() ! ok ip2 => null() ! ok @@ -68,6 +74,8 @@ dt0x = dt0(ip0=null(mold=ip0)) !ERROR: function result type 'REAL(4)' is not compatible with pointer type 'INTEGER(4)' dt0x = dt0(ip0=null(mold=rp0)) + !ERROR: A NULL pointer may not be used as the value for component 'n' + dt0x = dt0(null(), null()) !ERROR: function result type 'REAL(4)' is not compatible with pointer type 'INTEGER(4)' dt1x = dt1(ip1=null(mold=rp1)) dt2x = dt2(pps0=null()) @@ -77,6 +85,14 @@ !ERROR: Procedure pointer 'pps1' associated with result of reference to function 'null' that is an incompatible procedure pointer: distinct numbers of dummy arguments dt3x = dt3(pps1=null(mold=dt2x%pps0)) dt3x = dt3(pps1=null(mold=dt3x%pps1)) + dt4x = dt4(null()) ! ok + !PORTABILITY: NULL() with arguments is not standard conforming as the value for allocatable component 'ra0' + dt4x = dt4(null(rp0)) + !PORTABILITY: NULL() with arguments is not standard conforming as the value for allocatable component 'ra0' + !ERROR: Rank-1 array value is not compatible with scalar component 'ra0' + dt4x = dt4(null(rp1)) + !ERROR: A NULL procedure pointer may not be used as the value for component 'ra0' + dt4x = dt4(null(dt2x%pps0)) call canbenull(null(), null()) ! fine call canbenull(null(mold=ip0), null(mold=rp0)) ! fine !ERROR: Null pointer argument requires an explicit interface @@ -87,4 +103,10 @@ print *, sin(null(rp0)) !ERROR: A NULL() pointer is not allowed for 'source=' intrinsic argument print *, transfer(null(rp0),ip0) + !ERROR: NULL() may not be used as an expression in this context + select case(null(ip0)) + end select + !ERROR: NULL() may not be used as an expression in this context + if (null(lp)) then + end if end subroutine test diff --git a/flang/test/Semantics/procinterface01.f90 b/flang/test/Semantics/procinterface01.f90 --- a/flang/test/Semantics/procinterface01.f90 +++ b/flang/test/Semantics/procinterface01.f90 @@ -130,9 +130,9 @@ end function nested5 end module module1 -!DEF: /explicit1 ELEMENTAL (Function) Subprogram REAL(4) +!DEF: /explicit1 (Function) Subprogram REAL(4) !DEF: /explicit1/x INTENT(IN) ObjectEntity REAL(4) -real elemental function explicit1(x) +real function explicit1(x) !REF: /explicit1/x real, intent(in) :: x !DEF: /explicit1/explicit1 ObjectEntity REAL(4) @@ -150,14 +150,13 @@ logical = x+3. end function logical -!DEF: /tan (Function) Subprogram REAL(4) +!DEF: /tan (Function) Subprogram CHARACTER(1_8,1) !DEF: /tan/x INTENT(IN) ObjectEntity REAL(4) -real function tan(x) +character*1 function tan(x) !REF: /tan/x real, intent(in) :: x - !DEF: /tan/tan ObjectEntity REAL(4) - !REF: /tan/x - tan = x+5. + !DEF: /tan/tan ObjectEntity CHARACTER(1_8,1) + tan = "?" end function tan !DEF: /main MainProgram diff --git a/flang/test/Semantics/resolve05.f90 b/flang/test/Semantics/resolve05.f90 --- a/flang/test/Semantics/resolve05.f90 +++ b/flang/test/Semantics/resolve05.f90 @@ -1,12 +1,20 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 program p - integer :: p ! this is ok + !PORTABILITY: Name 'p' declared in a main program should not have the same name as the main program + integer :: p end module m - integer :: m ! this is ok + !PORTABILITY: Name 'm' declared in a module should not have the same name as the module + integer :: m end submodule(m) sm - integer :: sm ! this is ok + !PORTABILITY: Name 'sm' declared in a submodule should not have the same name as the submodule + integer :: sm +end +block data bd + !PORTABILITY: Name 'bd' declared in a BLOCK DATA subprogram should not have the same name as the BLOCK DATA subprogram + type bd + end type end module m2 type :: t diff --git a/flang/test/Semantics/resolve102.f90 b/flang/test/Semantics/resolve102.f90 --- a/flang/test/Semantics/resolve102.f90 +++ b/flang/test/Semantics/resolve102.f90 @@ -30,6 +30,7 @@ !ERROR: Procedure 'p' is recursively defined. Procedures in the cycle: 'p', 'sub', 'p2' procedure(sub) :: p interface + !ERROR: Procedure 'sub' is recursively defined. Procedures in the cycle: 'p', 'sub', 'p2' subroutine sub(p2) import p procedure(p) :: p2 diff --git a/flang/test/Semantics/resolve110.f90 b/flang/test/Semantics/resolve110.f90 --- a/flang/test/Semantics/resolve110.f90 +++ b/flang/test/Semantics/resolve110.f90 @@ -1,5 +1,7 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 ! Exercise ways to define and extend non-type-bound generics +! TODO: crashes compiler (infinite recursion) when build with MSVC +! XFAIL: system-windows module m1 type :: t1; end type diff --git a/flang/test/Semantics/resolve20.f90 b/flang/test/Semantics/resolve20.f90 --- a/flang/test/Semantics/resolve20.f90 +++ b/flang/test/Semantics/resolve20.f90 @@ -37,7 +37,8 @@ type :: bad3 end type - type :: m ! the name of a module can be used as a local identifier + !PORTABILITY: Name 'm' declared in a module should not have the same name as the module + type :: m end type m !ERROR: EXTERNAL attribute was already specified on 'a' diff --git a/flang/test/Semantics/resolve34.f90 b/flang/test/Semantics/resolve34.f90 --- a/flang/test/Semantics/resolve34.f90 +++ b/flang/test/Semantics/resolve34.f90 @@ -91,9 +91,9 @@ type(t2) :: x integer :: j j = x%i2 - !ERROR: PRIVATE component 'i3' is only accessible within module 'm7' + !ERROR: PRIVATE name 'i3' is only accessible within module 'm7' j = x%i3 - !ERROR: PRIVATE component 't1' is only accessible within module 'm7' + !ERROR: PRIVATE name 't1' is only accessible within module 'm7' j = x%t1%i1 end @@ -117,11 +117,11 @@ subroutine s8 use m8 type(t) :: x - !ERROR: PRIVATE component 'i2' is only accessible within module 'm8' + !ERROR: PRIVATE name 'i2' is only accessible within module 'm8' x = t(2, 5) - !ERROR: PRIVATE component 'i2' is only accessible within module 'm8' + !ERROR: PRIVATE name 'i2' is only accessible within module 'm8' x = t(i1=2, i2=5) - !ERROR: PRIVATE component 'i2' is only accessible within module 'm8' + !ERROR: PRIVATE name 'i2' is only accessible within module 'm8' a = [y%i2] end @@ -143,3 +143,24 @@ x = t(i1=2, i2=5) !OK end end + +module m10 + type t + integer n + contains + procedure :: f + generic, private :: operator(+) => f + end type + contains + type(t) function f(x,y) + class(t), intent(in) :: x, y + f = t(x%n + y%n) + end function +end module +subroutine s10 + use m10 + type(t) x + x = t(1) + !ERROR: PRIVATE name 'operator(+)' is only accessible within module 'm10' + x = x + x +end subroutine diff --git a/flang/test/Semantics/resolve53.f90 b/flang/test/Semantics/resolve53.f90 --- a/flang/test/Semantics/resolve53.f90 +++ b/flang/test/Semantics/resolve53.f90 @@ -97,7 +97,6 @@ end subroutine end interface end - ! Two procedures that differ only by attributes are not distinguishable module m8 @@ -468,7 +467,7 @@ end interface end module -subroutine s1() +subroutine subr1() use m20 interface operator(.not.) !ERROR: Procedure 'f' from module 'm20' is already specified in generic 'OPERATOR(.NOT.)' @@ -478,7 +477,7 @@ !ERROR: Procedure 'f' from module 'm20' is already specified in generic 'OPERATOR(+)' procedure f end interface -end subroutine s1 +end subroutine subr1 ! Extensions for distinguishable allocatable arguments; these should not ! elicit errors from f18 diff --git a/flang/test/Semantics/resolve62.f90 b/flang/test/Semantics/resolve62.f90 --- a/flang/test/Semantics/resolve62.f90 +++ b/flang/test/Semantics/resolve62.f90 @@ -1,6 +1,6 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 ! Resolve generic based on number of arguments -subroutine s1 +subroutine subr1 interface f real function f1(x) optional :: x @@ -10,12 +10,12 @@ end interface z = f(1.0) z = f(1.0, 2.0) - !ERROR: No specific procedure of generic 'f' matches the actual arguments + !ERROR: No specific function of generic 'f' matches the actual arguments z = f(1.0, 2.0, 3.0) end ! Elemental and non-element function both match: non-elemental one should be used -subroutine s2 +subroutine subr2 interface f logical elemental function f1(x) intent(in) :: x @@ -53,10 +53,10 @@ real, protected :: x real :: y interface s - pure subroutine s1(x) + pure subroutine s101(x) real, intent(out) :: x end - subroutine s2(x, y) + subroutine s102(x, y) real :: x, y end end interface diff --git a/flang/test/Semantics/resolve63.f90 b/flang/test/Semantics/resolve63.f90 --- a/flang/test/Semantics/resolve63.f90 +++ b/flang/test/Semantics/resolve63.f90 @@ -58,15 +58,15 @@ l = z'fe' == r !OK l = cVar == z'fe' !OK l = z'fe' == cVar !OK - !ERROR: No intrinsic or user-defined OPERATOR(==) matches operand types CHARACTER(KIND=1) and INTEGER(4) + !ERROR: Operands of .EQ. must have comparable types; have CHARACTER(KIND=1) and INTEGER(4) l = charVar == z'fe' - !ERROR: No intrinsic or user-defined OPERATOR(==) matches operand types INTEGER(4) and CHARACTER(KIND=1) + !ERROR: Operands of .EQ. must have comparable types; have INTEGER(4) and CHARACTER(KIND=1) l = z'fe' == charVar - !ERROR: No intrinsic or user-defined OPERATOR(==) matches operand types LOGICAL(4) and INTEGER(4) - l = l == z'fe' !OK - !ERROR: No intrinsic or user-defined OPERATOR(==) matches operand types INTEGER(4) and LOGICAL(4) - l = z'fe' == l !OK - !ERROR: No intrinsic or user-defined OPERATOR(==) matches operand types TYPE(t) and REAL(4) + !ERROR: Operands of .EQ. must have comparable types; have LOGICAL(4) and INTEGER(4) + l = l == z'fe' + !ERROR: Operands of .EQ. must have comparable types; have INTEGER(4) and LOGICAL(4) + l = z'fe' == l + !ERROR: Operands of .EQ. must have comparable types; have TYPE(t) and REAL(4) l = x == r lVar = z'a' == b'1010' !OK @@ -265,9 +265,9 @@ i = x + y i = x + i i = y + i - !ERROR: No intrinsic or user-defined OPERATOR(+) matches operand types CLASS(t2) and CLASS(t1) + !ERROR: Operands of + must be numeric; have CLASS(t2) and CLASS(t1) i = y + x - !ERROR: No intrinsic or user-defined OPERATOR(+) matches operand types INTEGER(4) and CLASS(t1) + !ERROR: Operands of + must be numeric; have INTEGER(4) and CLASS(t1) i = i + x end end @@ -307,9 +307,9 @@ j = null() - null(mold=x1) j = null(mold=x1) - null() j = null() - null() - !ERROR: No intrinsic or user-defined OPERATOR(/) matches operand types untyped and TYPE(t1) + !ERROR: A NULL() pointer is not allowed as an operand here j = null() / null(mold=x1) - !ERROR: No intrinsic or user-defined OPERATOR(/) matches operand types TYPE(t1) and untyped + !ERROR: A NULL() pointer is not allowed as an operand here j = null(mold=x1) / null() !ERROR: A NULL() pointer is not allowed as an operand here j = null() / null() diff --git a/flang/test/Semantics/resolve64.f90 b/flang/test/Semantics/resolve64.f90 --- a/flang/test/Semantics/resolve64.f90 +++ b/flang/test/Semantics/resolve64.f90 @@ -37,9 +37,9 @@ subroutine s1(x, y, z) logical :: x complex :: y, z - !ERROR: No intrinsic or user-defined OPERATOR(.A.) matches operand types COMPLEX(4) and COMPLEX(4) + !ERROR: Operands of .AND. must be LOGICAL; have COMPLEX(4) and COMPLEX(4) x = y .and. z - !ERROR: No intrinsic or user-defined OPERATOR(.A.) matches operand types COMPLEX(4) and COMPLEX(4) + !ERROR: Operands of .AND. must be LOGICAL; have COMPLEX(4) and COMPLEX(4) x = y .a. z end end diff --git a/flang/test/Semantics/resolve68.f90 b/flang/test/Semantics/resolve68.f90 --- a/flang/test/Semantics/resolve68.f90 +++ b/flang/test/Semantics/resolve68.f90 @@ -21,14 +21,14 @@ type(t) :: x integer :: y integer :: z - !ERROR: No specific procedure of generic 'g' matches the actual arguments + !ERROR: No specific function of generic 'g' matches the actual arguments z = x%g(y) end subroutine test2(x, y, z) type(t) :: x real :: y integer :: z - !ERROR: No specific procedure of generic 'g' matches the actual arguments + !ERROR: No specific function of generic 'g' matches the actual arguments z = x%g(x, y) end end diff --git a/flang/test/Semantics/resolve69.f90 b/flang/test/Semantics/resolve69.f90 --- a/flang/test/Semantics/resolve69.f90 +++ b/flang/test/Semantics/resolve69.f90 @@ -26,6 +26,16 @@ character(:) :: colonString2 !OK because of the allocatable attribute character(:), allocatable :: colonString3 +!ERROR: 'foo1' has a type CHARACTER(KIND=1,LEN=:) with a deferred type parameter but is neither an allocatable or a pointer + character(:), external :: foo1 +!ERROR: 'foo2' has a type CHARACTER(KIND=1,LEN=:) with a deferred type parameter but is neither an allocatable or a pointer + procedure(character(:)) :: foo2 + interface + function foo3() +!ERROR: 'foo3' has a type CHARACTER(KIND=1,LEN=:) with a deferred type parameter but is neither an allocatable or a pointer + character(:) foo3 + end function + end interface !ERROR: Must have INTEGER type, but is REAL(4) character(3.5) :: badParamValue @@ -75,6 +85,8 @@ implicit character(:)(f) end function +!Not errors. + Program d5 Type string(maxlen) Integer,Kind :: maxlen @@ -85,7 +97,6 @@ Print *,Trim(line%value) End Program -!Not errors. subroutine outer integer n contains diff --git a/flang/test/Semantics/resolve77.f90 b/flang/test/Semantics/resolve77.f90 --- a/flang/test/Semantics/resolve77.f90 +++ b/flang/test/Semantics/resolve77.f90 @@ -10,7 +10,7 @@ end interface !ERROR: Automatic data object 'a' may not appear in the specification part of a module real :: a(if1(1)) - !ERROR: No specific procedure of generic 'ifn2' matches the actual arguments + !ERROR: No specific function of generic 'ifn2' matches the actual arguments real :: b(ifn2(1)) contains subroutine t1(n) diff --git a/flang/test/Semantics/stmt-func01.f90 b/flang/test/Semantics/stmt-func01.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Semantics/stmt-func01.f90 @@ -0,0 +1,44 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! C1577 +program main + type t1(k,l) + integer, kind :: k = kind(1) + integer, len :: l = 666 + integer(k) n + end type t1 + interface + pure integer function ifunc() + end function + end interface + type(t1(k=4,l=ifunc())) x1 + !PORTABILITY: Statement function 'sf1' should not contain an array constructor + sf1(n) = sum([(j,j=1,n)]) + type(t1) sf2 + !PORTABILITY: Statement function 'sf2' should not contain a structure constructor + sf2(n) = t1(n) + !PORTABILITY: Statement function 'sf3' should not contain a type parameter inquiry + sf3(n) = x1%l + !ERROR: Recursive call to statement function 'sf4' is not allowed + sf4(n) = sf4(n) + !ERROR: Statement function 'sf5' may not reference another statement function 'sf6' that is defined later + sf5(n) = sf6(n) + real sf7 + !ERROR: Statement function 'sf6' may not reference another statement function 'sf7' that is defined later + sf6(n) = sf7(n) + !PORTABILITY: Statement function 'sf7' should not reference function 'explicit' that requires an explicit interface + sf7(n) = explicit(n) + real :: a(3) = [1., 2., 3.] + !PORTABILITY: Statement function 'sf8' should not pass an array argument that is not a whole array + sf8(n) = sum(a(1:2)) + sf8a(n) = sum(a) ! ok + contains + real function explicit(x,y) + integer, intent(in) :: x + integer, intent(in), optional :: y + explicit = x + end function + pure function arr() + real :: arr(2) + arr = [1., 2.] + end function +end diff --git a/flang/test/Semantics/symbol11.f90 b/flang/test/Semantics/symbol11.f90 --- a/flang/test/Semantics/symbol11.f90 +++ b/flang/test/Semantics/symbol11.f90 @@ -68,7 +68,7 @@ !REF: /s3/t2 class is (t2) !REF: /s3/i - !DEF: /s3/OtherConstruct1/y TARGET AssocEntity TYPE(t2) + !DEF: /s3/OtherConstruct1/y TARGET AssocEntity CLASS(t2) !REF: /s3/t2/a2 i = y%a2 !REF: /s3/t1 @@ -79,7 +79,8 @@ i = y%a1 class default !DEF: /s3/OtherConstruct3/y TARGET AssocEntity CLASS(t1) - print *, y + !REF:/s3/t1/a1 + print *, y%a1 end select end subroutine diff --git a/flang/tools/f18-parse-demo/f18-parse-demo.cpp b/flang/tools/f18-parse-demo/f18-parse-demo.cpp --- a/flang/tools/f18-parse-demo/f18-parse-demo.cpp +++ b/flang/tools/f18-parse-demo/f18-parse-demo.cpp @@ -111,7 +111,7 @@ ErrMsg = Program.getError().message(); if (!Program || llvm::sys::ExecuteAndWait( - Program.get(), argv, llvm::None, {}, 0, 0, &ErrMsg)) { + Program.get(), argv, std::nullopt, {}, 0, 0, &ErrMsg)) { llvm::errs() << "execvp(" << argv[0] << ") failed: " << ErrMsg << '\n'; exit(EXIT_FAILURE); } diff --git a/flang/unittests/Optimizer/Builder/CharacterTest.cpp b/flang/unittests/Optimizer/Builder/CharacterTest.cpp --- a/flang/unittests/Optimizer/Builder/CharacterTest.cpp +++ b/flang/unittests/Optimizer/Builder/CharacterTest.cpp @@ -28,7 +28,7 @@ // Set the insertion point in the function entry block. mlir::ModuleOp mod = builder.create(loc); mlir::func::FuncOp func = mlir::func::FuncOp::create( - loc, "func1", builder.getFunctionType(llvm::None, llvm::None)); + loc, "func1", builder.getFunctionType(std::nullopt, std::nullopt)); auto *entryBlock = func.addEntryBlock(); mod.push_back(mod); builder.setInsertionPointToStart(entryBlock); diff --git a/flang/unittests/Optimizer/Builder/ComplexTest.cpp b/flang/unittests/Optimizer/Builder/ComplexTest.cpp --- a/flang/unittests/Optimizer/Builder/ComplexTest.cpp +++ b/flang/unittests/Optimizer/Builder/ComplexTest.cpp @@ -24,7 +24,7 @@ // Set the insertion point in the function entry block. mlir::ModuleOp mod = builder.create(loc); mlir::func::FuncOp func = mlir::func::FuncOp::create( - loc, "func1", builder.getFunctionType(llvm::None, llvm::None)); + loc, "func1", builder.getFunctionType(std::nullopt, std::nullopt)); auto *entryBlock = func.addEntryBlock(); mod.push_back(mod); builder.setInsertionPointToStart(entryBlock); diff --git a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp --- a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp +++ b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp @@ -28,7 +28,7 @@ // Set the insertion point in the function entry block. mlir::ModuleOp mod = builder.create(loc); mlir::func::FuncOp func = mlir::func::FuncOp::create( - loc, "func1", builder.getFunctionType(llvm::None, llvm::None)); + loc, "func1", builder.getFunctionType(std::nullopt, std::nullopt)); auto *entryBlock = func.addEntryBlock(); mod.push_back(mod); builder.setInsertionPointToStart(entryBlock); @@ -176,7 +176,7 @@ EXPECT_EQ(nullptr, func2); auto loc = builder.getUnknownLoc(); func2 = builder.createFunction( - loc, "func2", builder.getFunctionType(llvm::None, llvm::None)); + loc, "func2", builder.getFunctionType(std::nullopt, std::nullopt)); auto func2query = builder.getNamedFunction("func2"); EXPECT_EQ(func2, func2query); } diff --git a/flang/unittests/Optimizer/Builder/HLFIRToolsTest.cpp b/flang/unittests/Optimizer/Builder/HLFIRToolsTest.cpp --- a/flang/unittests/Optimizer/Builder/HLFIRToolsTest.cpp +++ b/flang/unittests/Optimizer/Builder/HLFIRToolsTest.cpp @@ -27,7 +27,7 @@ // Set the insertion point in the function entry block. mlir::ModuleOp mod = builder.create(loc); mlir::func::FuncOp func = mlir::func::FuncOp::create( - loc, "func1", builder.getFunctionType(llvm::None, llvm::None)); + loc, "func1", builder.getFunctionType(std::nullopt, std::nullopt)); auto *entryBlock = func.addEntryBlock(); mod.push_back(mod); builder.setInsertionPointToStart(entryBlock); diff --git a/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h b/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h --- a/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h +++ b/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h @@ -27,7 +27,7 @@ mlir::ModuleOp mod = builder.create(loc); mlir::func::FuncOp func = mlir::func::FuncOp::create(loc, "runtime_unit_tests_func", - builder.getFunctionType(llvm::None, llvm::None)); + builder.getFunctionType(std::nullopt, std::nullopt)); auto *entryBlock = func.addEntryBlock(); mod.push_back(mod); builder.setInsertionPointToStart(entryBlock); diff --git a/flang/unittests/Optimizer/FortranVariableTest.cpp b/flang/unittests/Optimizer/FortranVariableTest.cpp --- a/flang/unittests/Optimizer/FortranVariableTest.cpp +++ b/flang/unittests/Optimizer/FortranVariableTest.cpp @@ -22,7 +22,7 @@ mlir::ModuleOp mod = builder->create(loc); mlir::func::FuncOp func = mlir::func::FuncOp::create(loc, "fortran_variable_tests", - builder->getFunctionType(llvm::None, llvm::None)); + builder->getFunctionType(std::nullopt, std::nullopt)); auto *entryBlock = func.addEntryBlock(); mod.push_back(mod); builder->setInsertionPointToStart(entryBlock); @@ -49,7 +49,7 @@ mlir::Value addr = builder->create(loc, eleType); auto name = mlir::StringAttr::get(&context, "x"); auto declare = builder->create(loc, addr.getType(), addr, - /*shape=*/mlir::Value{}, /*typeParams=*/llvm::None, name, + /*shape=*/mlir::Value{}, /*typeParams=*/std::nullopt, name, /*fortran_attrs=*/fir::FortranVariableFlagsAttr{}); fir::FortranVariableOpInterface fortranVariable = declare; @@ -100,11 +100,11 @@ extents.size(), fir::SequenceType::getUnknownExtent()); mlir::Type seqTy = fir::SequenceType::get(typeShape, eleType); mlir::Value addr = builder->create( - loc, seqTy, /*pinned=*/false, /*typeParams=*/llvm::None, extents); + loc, seqTy, /*pinned=*/false, /*typeParams=*/std::nullopt, extents); mlir::Value shape = createShape(extents); auto name = mlir::StringAttr::get(&context, "x"); auto declare = builder->create(loc, addr.getType(), addr, - shape, /*typeParams*/ llvm::None, name, + shape, /*typeParams*/ std::nullopt, name, /*fortran_attrs=*/fir::FortranVariableFlagsAttr{}); fir::FortranVariableOpInterface fortranVariable = declare; diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -16,6 +16,11 @@ set(object_files "") set(skipped_list "") foreach(dep IN LISTS ARGN) + if (NOT TARGET ${dep}) + # Skip any tests whose dependencies have not been defined. + list(APPEND skipped_list ${dep}) + continue() + endif() get_target_property(dep_type ${dep} "TARGET_TYPE") if(NOT dep_type) # Target for which TARGET_TYPE property is not set do not diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -191,6 +191,78 @@ } } + // Return the full product. + template + constexpr UInt ful_mul(const UInt &other) const { + UInt result(0); + UInt<128> partial_sum(0); + uint64_t carry = 0; + constexpr size_t OtherWordCount = UInt::WordCount; + for (size_t i = 0; i <= WordCount + OtherWordCount - 2; ++i) { + const size_t lower_idx = i < OtherWordCount ? 0 : i - OtherWordCount + 1; + const size_t upper_idx = i < WordCount ? i : WordCount - 1; + for (size_t j = lower_idx; j <= upper_idx; ++j) { + NumberPair prod = full_mul(val[j], other.val[i - j]); + UInt<128> tmp({prod.lo, prod.hi}); + carry += partial_sum.add(tmp); + } + result.val[i] = partial_sum.val[0]; + partial_sum.val[0] = partial_sum.val[1]; + partial_sum.val[1] = carry; + carry = 0; + } + result.val[WordCount + OtherWordCount - 1] = partial_sum.val[0]; + return result; + } + + // Fast hi part of the full product. The normal product `operator*` returns + // `Bits` least significant bits of the full product, while this function will + // approximate `Bits` most significant bits of the full product with errors + // bounded by: + // 0 <= (a.full_mul(b) >> Bits) - a.quick_mul_hi(b)) <= WordCount - 1. + // + // An example usage of this is to quickly (but less accurately) compute the + // product of (normalized) mantissas of floating point numbers: + // (mant_1, mant_2) -> quick_mul_hi -> normalize leading bit + // is much more efficient than: + // (mant_1, mant_2) -> ful_mul -> normalize leading bit + // -> convert back to same Bits width by shifting/rounding, + // especially for higher precisions. + // + // Performance summary: + // Number of 64-bit x 64-bit -> 128-bit multiplications performed. + // Bits WordCount ful_mul quick_mul_hi Error bound + // 128 2 4 3 1 + // 196 3 9 6 2 + // 256 4 16 10 3 + // 512 8 64 36 7 + constexpr UInt quick_mul_hi(const UInt &other) const { + UInt result(0); + UInt<128> partial_sum(0); + uint64_t carry = 0; + // First round of accumulation for those at WordCount - 1 in the full + // product. + for (size_t i = 0; i < WordCount; ++i) { + NumberPair prod = + full_mul(val[i], other.val[WordCount - 1 - i]); + UInt<128> tmp({prod.lo, prod.hi}); + carry += partial_sum.add(tmp); + } + for (size_t i = WordCount; i < 2 * WordCount - 1; ++i) { + partial_sum.val[0] = partial_sum.val[1]; + partial_sum.val[1] = carry; + carry = 0; + for (size_t j = i - WordCount + 1; j < WordCount; ++j) { + NumberPair prod = full_mul(val[j], other.val[i - j]); + UInt<128> tmp({prod.lo, prod.hi}); + carry += partial_sum.add(tmp); + } + result.val[i - WordCount] = partial_sum.val[0]; + } + result.val[WordCount - 1] = partial_sum.val[1]; + return result; + } + // pow takes a power and sets this to its starting value to that power. Zero // to the zeroth power returns 1. constexpr void pow_n(uint64_t power) { diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -64,11 +64,11 @@ ) add_libc_unittest( - uint128_test + uint_test SUITE libc_support_unittests SRCS - uint128_test.cpp + uint_test.cpp DEPENDS libc.src.__support.uint libc.src.__support.CPP.optional diff --git a/libc/test/src/__support/uint128_test.cpp b/libc/test/src/__support/uint_test.cpp rename from libc/test/src/__support/uint128_test.cpp rename to libc/test/src/__support/uint_test.cpp --- a/libc/test/src/__support/uint128_test.cpp +++ b/libc/test/src/__support/uint_test.cpp @@ -1,4 +1,4 @@ -//===-- Unittests for the 128 bit integer class ---------------------------===// +//===-- Unittests for the UInt integer class ------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -17,15 +17,18 @@ using LL_UInt128 = __llvm_libc::cpp::UInt<128>; using LL_UInt192 = __llvm_libc::cpp::UInt<192>; using LL_UInt256 = __llvm_libc::cpp::UInt<256>; +using LL_UInt320 = __llvm_libc::cpp::UInt<320>; +using LL_UInt512 = __llvm_libc::cpp::UInt<512>; +using LL_UInt1024 = __llvm_libc::cpp::UInt<1024>; -TEST(LlvmLibcUInt128ClassTest, BasicInit) { +TEST(LlvmLibcUIntClassTest, BasicInit) { LL_UInt128 empty; LL_UInt128 half_val(12345); LL_UInt128 full_val({12345, 67890}); ASSERT_TRUE(half_val != full_val); } -TEST(LlvmLibcUInt128ClassTest, AdditionTests) { +TEST(LlvmLibcUIntClassTest, AdditionTests) { LL_UInt128 val1(12345); LL_UInt128 val2(54321); LL_UInt128 result1(66666); @@ -65,7 +68,7 @@ EXPECT_EQ(val9 + val10, val10 + val9); } -TEST(LlvmLibcUInt128ClassTest, SubtractionTests) { +TEST(LlvmLibcUIntClassTest, SubtractionTests) { LL_UInt128 val1(12345); LL_UInt128 val2(54321); LL_UInt128 result1({0xffffffffffff5c08, 0xffffffffffffffff}); @@ -94,7 +97,7 @@ EXPECT_EQ(val6, val5 + result6); } -TEST(LlvmLibcUInt128ClassTest, MultiplicationTests) { +TEST(LlvmLibcUIntClassTest, MultiplicationTests) { LL_UInt128 val1({5, 0}); LL_UInt128 val2({10, 0}); LL_UInt128 result1({50, 0}); @@ -154,7 +157,7 @@ EXPECT_EQ((val13 * val14), (val14 * val13)); } -TEST(LlvmLibcUInt128ClassTest, DivisionTests) { +TEST(LlvmLibcUIntClassTest, DivisionTests) { LL_UInt128 val1({10, 0}); LL_UInt128 val2({5, 0}); LL_UInt128 result1({2, 0}); @@ -201,7 +204,7 @@ EXPECT_FALSE(val13.div(val14).has_value()); } -TEST(LlvmLibcUInt128ClassTest, ModuloTests) { +TEST(LlvmLibcUIntClassTest, ModuloTests) { LL_UInt128 val1({10, 0}); LL_UInt128 val2({5, 0}); LL_UInt128 result1({0, 0}); @@ -248,7 +251,7 @@ EXPECT_EQ((val17 % val18), result9); } -TEST(LlvmLibcUInt128ClassTest, PowerTests) { +TEST(LlvmLibcUIntClassTest, PowerTests) { LL_UInt128 val1({10, 0}); val1.pow_n(30); LL_UInt128 result1({5076944270305263616, 54210108624}); // (10 ^ 30) @@ -299,7 +302,7 @@ } } -TEST(LlvmLibcUInt128ClassTest, ShiftLeftTests) { +TEST(LlvmLibcUIntClassTest, ShiftLeftTests) { LL_UInt128 val1(0x0123456789abcdef); LL_UInt128 result1(0x123456789abcdef0); EXPECT_EQ((val1 << 4), result1); @@ -325,7 +328,7 @@ EXPECT_EQ((val2 << 256), result6); } -TEST(LlvmLibcUInt128ClassTest, ShiftRightTests) { +TEST(LlvmLibcUIntClassTest, ShiftRightTests) { LL_UInt128 val1(0x0123456789abcdef); LL_UInt128 result1(0x00123456789abcde); EXPECT_EQ((val1 >> 4), result1); @@ -351,7 +354,7 @@ EXPECT_EQ((val2 >> 256), result6); } -TEST(LlvmLibcUInt128ClassTest, AndTests) { +TEST(LlvmLibcUIntClassTest, AndTests) { LL_UInt128 base({0xffff00000000ffff, 0xffffffff00000000}); LL_UInt128 val128({0xf0f0f0f00f0f0f0f, 0xff00ff0000ff00ff}); uint64_t val64 = 0xf0f0f0f00f0f0f0f; @@ -364,7 +367,7 @@ EXPECT_EQ((base & val32), result32); } -TEST(LlvmLibcUInt128ClassTest, OrTests) { +TEST(LlvmLibcUIntClassTest, OrTests) { LL_UInt128 base({0xffff00000000ffff, 0xffffffff00000000}); LL_UInt128 val128({0xf0f0f0f00f0f0f0f, 0xff00ff0000ff00ff}); uint64_t val64 = 0xf0f0f0f00f0f0f0f; @@ -377,7 +380,7 @@ EXPECT_EQ((base | val32), result32); } -TEST(LlvmLibcUInt128ClassTest, CompoundAssignments) { +TEST(LlvmLibcUIntClassTest, CompoundAssignments) { LL_UInt128 x({0xffff00000000ffff, 0xffffffff00000000}); LL_UInt128 b({0xf0f0f0f00f0f0f0f, 0xff00ff0000ff00ff}); @@ -419,7 +422,7 @@ EXPECT_EQ(a, mul_result); } -TEST(LlvmLibcUInt128ClassTest, UnaryPredecrement) { +TEST(LlvmLibcUIntClassTest, UnaryPredecrement) { LL_UInt128 a = LL_UInt128({0x1111111111111111, 0x1111111111111111}); ++a; EXPECT_EQ(a, LL_UInt128({0x1111111111111112, 0x1111111111111111})); @@ -433,7 +436,7 @@ EXPECT_EQ(a, LL_UInt128({0x0, 0x0})); } -TEST(LlvmLibcUInt128ClassTest, EqualsTests) { +TEST(LlvmLibcUIntClassTest, EqualsTests) { LL_UInt128 a1({0xffffffff00000000, 0xffff00000000ffff}); LL_UInt128 a2({0xffffffff00000000, 0xffff00000000ffff}); LL_UInt128 b({0xff00ff0000ff00ff, 0xf0f0f0f00f0f0f0f}); @@ -449,7 +452,7 @@ ASSERT_TRUE(a_lower != a_upper); } -TEST(LlvmLibcUInt128ClassTest, ComparisonTests) { +TEST(LlvmLibcUIntClassTest, ComparisonTests) { LL_UInt128 a({0xffffffff00000000, 0xffff00000000ffff}); LL_UInt128 b({0xff00ff0000ff00ff, 0xf0f0f0f00f0f0f0f}); EXPECT_GT(a, b); @@ -467,3 +470,43 @@ EXPECT_LE(a, a); EXPECT_GE(a, a); } + +TEST(LlvmLibcUIntClassTest, FullMulTests) { + LL_UInt128 a({0xffffffffffffffffULL, 0xffffffffffffffffULL}); + LL_UInt128 b({0xfedcba9876543210ULL, 0xfefdfcfbfaf9f8f7ULL}); + LL_UInt256 r({0x0123456789abcdf0ULL, 0x0102030405060708ULL, + 0xfedcba987654320fULL, 0xfefdfcfbfaf9f8f7ULL}); + LL_UInt128 r_hi({0xfedcba987654320eULL, 0xfefdfcfbfaf9f8f7ULL}); + + EXPECT_EQ(a.ful_mul(b), r); + EXPECT_EQ(a.quick_mul_hi(b), r_hi); + + LL_UInt192 c( + {0x7766554433221101ULL, 0xffeeddccbbaa9988ULL, 0x1f2f3f4f5f6f7f8fULL}); + LL_UInt320 rr({0x8899aabbccddeeffULL, 0x0011223344556677ULL, + 0x583715f4d3b29171ULL, 0xffeeddccbbaa9988ULL, + 0x1f2f3f4f5f6f7f8fULL}); + + EXPECT_EQ(a.ful_mul(c), rr); + EXPECT_EQ(a.ful_mul(c), c.ful_mul(a)); +} + +#define TEST_QUICK_MUL_HI(Bits, Error) \ + do { \ + LL_UInt##Bits a = ~LL_UInt##Bits(0); \ + LL_UInt##Bits hi = a.quick_mul_hi(a); \ + LL_UInt##Bits trunc = static_cast(a.ful_mul(a) >> Bits); \ + uint64_t overflow = trunc.sub(hi); \ + EXPECT_EQ(overflow, uint64_t(0)); \ + EXPECT_LE(uint64_t(trunc), uint64_t(Error)); \ + } while (0) + +TEST(LlvmLibcUIntClassTest, QuickMulHiTests) { + // TODO(lntue): Investigate / Analyze the error bounds for other rounding + // modes. It the error bounds seems to be able to reach to WordCount instead + // of WordCount - 1 in the CI environment. + TEST_QUICK_MUL_HI(128, 2); + TEST_QUICK_MUL_HI(192, 3); + TEST_QUICK_MUL_HI(256, 4); + TEST_QUICK_MUL_HI(512, 8); +} diff --git a/libc/test/utils/tools/WrapperGen/wrappergen_test.cpp b/libc/test/utils/tools/WrapperGen/wrappergen_test.cpp --- a/libc/test/utils/tools/WrapperGen/wrappergen_test.cpp +++ b/libc/test/utils/tools/WrapperGen/wrappergen_test.cpp @@ -70,7 +70,7 @@ using LlvmLibcWrapperGenTest = WrapperGenTest; TEST_F(LlvmLibcWrapperGenTest, RunWrapperGenAndGetNoErrors) { - llvm::Optional Redirects[] = { + std::optional Redirects[] = { llvm::None, llvm::StringRef(STDOutFile.get().TmpName), llvm::StringRef(STDErrFile.get().TmpName)}; @@ -92,7 +92,7 @@ } TEST_F(LlvmLibcWrapperGenTest, RunWrapperGenOnStrlen) { - llvm::Optional Redirects[] = { + std::optional Redirects[] = { llvm::None, llvm::StringRef(STDOutFile.get().TmpName), llvm::StringRef(STDErrFile.get().TmpName)}; @@ -127,7 +127,7 @@ } TEST_F(LlvmLibcWrapperGenTest, GenAliasForStrlen) { - llvm::Optional Redirects[] = { + std::optional Redirects[] = { llvm::None, llvm::StringRef(STDOutFile.get().TmpName), llvm::StringRef(STDErrFile.get().TmpName)}; @@ -163,7 +163,7 @@ } TEST_F(LlvmLibcWrapperGenTest, DeclStrlenAliasUsingMangledNameFile) { - llvm::Optional Redirects[] = { + std::optional Redirects[] = { llvm::None, llvm::StringRef(STDOutFile.get().TmpName), llvm::StringRef(STDErrFile.get().TmpName)}; @@ -216,7 +216,7 @@ TEST_F(LlvmLibcWrapperGenTest, RunWrapperGenOnStrlenWithMangledNameAndMangledNameFile) { - llvm::Optional Redirects[] = { + std::optional Redirects[] = { llvm::None, llvm::StringRef(STDOutFile.get().TmpName), llvm::StringRef(STDErrFile.get().TmpName)}; @@ -250,7 +250,7 @@ } TEST_F(LlvmLibcWrapperGenTest, RunWrapperGenOnBadFuncName) { - llvm::Optional Redirects[] = { + std::optional Redirects[] = { llvm::None, llvm::StringRef(STDOutFile.get().TmpName), llvm::StringRef(STDErrFile.get().TmpName)}; @@ -282,7 +282,7 @@ } TEST_F(LlvmLibcWrapperGenTest, RunWrapperGenOnStrlenWithBadMangledNameFile) { - llvm::Optional Redirects[] = { + std::optional Redirects[] = { llvm::None, llvm::StringRef(STDOutFile.get().TmpName), llvm::StringRef(STDErrFile.get().TmpName)}; @@ -316,7 +316,7 @@ } TEST_F(LlvmLibcWrapperGenTest, RunWithMangledNameFileMissingLLVMLibcName) { - llvm::Optional Redirects[] = { + std::optional Redirects[] = { llvm::None, llvm::StringRef(STDOutFile.get().TmpName), llvm::StringRef(STDErrFile.get().TmpName)}; diff --git a/libc/utils/UnitTest/LibcTest.cpp b/libc/utils/UnitTest/LibcTest.cpp --- a/libc/utils/UnitTest/LibcTest.cpp +++ b/libc/utils/UnitTest/LibcTest.cpp @@ -288,6 +288,11 @@ __llvm_libc::cpp::UInt<256> RHS, const char *LHSStr, const char *RHSStr, const char *File, unsigned long Line); +template bool test<__llvm_libc::cpp::UInt<320>>( + RunContext *Ctx, TestCondition Cond, __llvm_libc::cpp::UInt<320> LHS, + __llvm_libc::cpp::UInt<320> RHS, const char *LHSStr, const char *RHSStr, + const char *File, unsigned long Line); + template bool test<__llvm_libc::cpp::string_view>( RunContext *Ctx, TestCondition Cond, __llvm_libc::cpp::string_view LHS, __llvm_libc::cpp::string_view RHS, const char *LHSStr, const char *RHSStr, diff --git a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp --- a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// test libc++'s implementation of align_val_t, and the relevant new/delete && !hwasan +// test libc++'s implementation of align_val_t, and the relevant new/delete // overloads in all dialects when -faligned-allocation is present. // The dylibs shipped before macosx10.13 do not contain the aligned allocation diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp --- a/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp @@ -126,14 +126,14 @@ using string = std::basic_string, alloc>; test_allocator_statistics stats; { - string str((alloc(&stats))); + string str = string(alloc(&stats)); stats = test_allocator_statistics(); (void)str.substr(); assert(stats.moved == 0); assert(stats.copied == 0); } { - string str((alloc(&stats))); + string str = string(alloc(&stats)); stats = test_allocator_statistics(); (void)std::move(str).substr(); assert(stats.moved == 0); diff --git a/libcxx/test/support/filesystem_test_helper.h b/libcxx/test/support/filesystem_test_helper.h --- a/libcxx/test/support/filesystem_test_helper.h +++ b/libcxx/test/support/filesystem_test_helper.h @@ -15,6 +15,7 @@ #include #include +#include #include // for printf #include #include @@ -34,7 +35,6 @@ namespace utils { #ifdef _WIN32 inline int mkdir(const char* path, int mode) { (void)mode; return ::_mkdir(path); } - inline int ftruncate(int fd, off_t length) { return ::_chsize(fd, length); } inline int symlink(const char* oldname, const char* newname, bool is_dir) { DWORD flags = is_dir ? SYMBOLIC_LINK_FLAG_DIRECTORY : 0; if (CreateSymbolicLinkA(newname, oldname, @@ -71,7 +71,6 @@ } #else using ::mkdir; - using ::ftruncate; inline int symlink(const char* oldname, const char* newname, bool is_dir) { (void)is_dir; return ::symlink(oldname, newname); } using ::link; using ::setenv; @@ -99,6 +98,37 @@ } #endif + // N.B. libc might define some of the foo[64] identifiers using macros from + // foo64 -> foo or vice versa. +#if defined(_WIN32) + using off64_t = int64_t; +#elif defined(__MVS__) || defined(__LP64__) + using off64_t = ::off_t; +#else + using ::off64_t; +#endif + + inline FILE* fopen64(const char* pathname, const char* mode) { + // Bionic does not distinguish between fopen and fopen64, but fopen64 + // wasn't added until API 24. +#if defined(_WIN32) || defined(__MVS__) || defined(__LP64__) || defined(__BIONIC__) + return ::fopen(pathname, mode); +#else + return ::fopen64(pathname, mode); +#endif + } + + inline int ftruncate64(int fd, off64_t length) { +#if defined(_WIN32) + // _chsize_s sets errno on failure and also returns the error number. + return ::_chsize_s(fd, length) ? -1 : 0; +#elif defined(__MVS__) || defined(__LP64__) + return ::ftruncate(fd, length); +#else + return ::ftruncate64(fd, length); +#endif + } + inline std::string getcwd() { // Assume that path lengths are not greater than this. // This should be fine for testing purposes. @@ -185,22 +215,11 @@ // off_t). On a 32-bit system this allows us to create a file larger than // 2GB. std::string create_file(fs::path filename_path, uintmax_t size = 0) { - std::string filename = filename_path.string(); -#if defined(__LP64__) || defined(_WIN32) || defined(__MVS__) - auto large_file_fopen = fopen; - auto large_file_ftruncate = utils::ftruncate; - using large_file_offset_t = off_t; -#else - auto large_file_fopen = fopen64; - auto large_file_ftruncate = ftruncate64; - using large_file_offset_t = off64_t; -#endif - - filename = sanitize_path(std::move(filename)); + std::string filename = sanitize_path(filename_path.string()); if (size > - static_cast::type>( - std::numeric_limits::max())) { + static_cast::type>( + std::numeric_limits::max())) { fprintf(stderr, "create_file(%s, %ju) too large\n", filename.c_str(), size); abort(); @@ -211,15 +230,15 @@ #else # define FOPEN_CLOEXEC_FLAG "e" #endif - FILE* file = large_file_fopen(filename.c_str(), "w" FOPEN_CLOEXEC_FLAG); + FILE* file = utils::fopen64(filename.c_str(), "w" FOPEN_CLOEXEC_FLAG); if (file == nullptr) { fprintf(stderr, "fopen %s failed: %s\n", filename.c_str(), strerror(errno)); abort(); } - if (large_file_ftruncate( - fileno(file), static_cast(size)) == -1) { + if (utils::ftruncate64( + fileno(file), static_cast(size)) == -1) { fprintf(stderr, "ftruncate %s %ju failed: %s\n", filename.c_str(), size, strerror(errno)); fclose(file); diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -941,7 +941,7 @@ } MergeChunk::MergeChunk(uint32_t alignment) - : builder(StringTableBuilder::RAW, alignment) { + : builder(StringTableBuilder::RAW, llvm::Align(alignment)) { setAlignment(alignment); } diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -288,7 +288,7 @@ SectionChunk *sec = SectionChunk::findByName(file->getDebugChunks(), ".debug$H"); if (!sec) - return llvm::None; + return std::nullopt; ArrayRef contents = sec->getContents(); if (!canUseDebugH(contents)) return std::nullopt; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -549,7 +549,7 @@ // use. Check the environment next, in case we're being invoked from a VS // command prompt. Failing that, just try to find the newest Visual Studio // version we can and use its default VC toolchain. - Optional VCToolsDir, VCToolsVersion, WinSysRoot; + std::optional VCToolsDir, VCToolsVersion, WinSysRoot; if (auto *A = Args.getLastArg(OPT_vctoolsdir)) VCToolsDir = A->getValue(); if (auto *A = Args.getLastArg(OPT_vctoolsversion)) @@ -579,7 +579,7 @@ Args.getLastArg(OPT_vctoolsdir, OPT_winsysroot); if (Args.hasArg(OPT_lldignoreenv) || !Process::GetEnv("LIB") || Args.getLastArg(OPT_winsdkdir, OPT_winsysroot)) { - Optional WinSdkDir, WinSdkVersion; + std::optional WinSdkDir, WinSdkVersion; if (auto *A = Args.getLastArg(OPT_winsdkdir)) WinSdkDir = A->getValue(); if (auto *A = Args.getLastArg(OPT_winsdkversion)) diff --git a/lld/Common/TargetOptionsCommandFlags.cpp b/lld/Common/TargetOptionsCommandFlags.cpp --- a/lld/Common/TargetOptionsCommandFlags.cpp +++ b/lld/Common/TargetOptionsCommandFlags.cpp @@ -10,16 +10,17 @@ #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/CommandFlags.h" #include "llvm/Target/TargetOptions.h" +#include llvm::TargetOptions lld::initTargetOptionsFromCodeGenFlags() { return llvm::codegen::InitTargetOptionsFromCodeGenFlags(llvm::Triple()); } -llvm::Optional lld::getRelocModelFromCMModel() { +std::optional lld::getRelocModelFromCMModel() { return llvm::codegen::getExplicitRelocModel(); } -llvm::Optional lld::getCodeModelFromCMModel() { +std::optional lld::getCodeModelFromCMModel() { return llvm::codegen::getExplicitCodeModel(); } diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -369,7 +369,7 @@ return patchOff; } -class elf::Patch843419Section : public SyntheticSection { +class elf::Patch843419Section final : public SyntheticSection { public: Patch843419Section(InputSection *p, uint64_t off); diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp --- a/lld/ELF/ARMErrataFix.cpp +++ b/lld/ELF/ARMErrataFix.cpp @@ -68,7 +68,7 @@ // 00001002 2 - bytes padding // 00001004 __CortexA8657417_00000FFE: B.w func -class elf::Patch657417Section : public SyntheticSection { +class elf::Patch657417Section final : public SyntheticSection { public: Patch657417Section(InputSection *p, uint64_t off, uint32_t instr, bool isARM); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -98,7 +98,7 @@ // the input objects have been compiled. static void updateARMVFPArgs(const ARMAttributeParser &attributes, const InputFile *f) { - Optional attr = + std::optional attr = attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); if (!attr) // If an ABI tag isn't present then it is implicitly given the value of 0 @@ -145,7 +145,7 @@ // is compiled with an architecture that supports these features then lld is // permitted to use them. static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { - Optional attr = + std::optional attr = attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); if (!attr) return; diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -116,7 +116,7 @@ if (auto relocModel = getRelocModelFromCMModel()) c.RelocModel = *relocModel; else if (config->relocatable) - c.RelocModel = None; + c.RelocModel = std::nullopt; else if (config->isPic) c.RelocModel = Reloc::PIC_; else diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -1041,19 +1041,19 @@ const StringRef op = peek(); if (op.startswith("=")) { // Support = followed by an expression without whitespace. - SaveAndRestore saved(inExpr, true); + SaveAndRestore saved(inExpr, true); cmd = readSymbolAssignment(tok); } else if ((op.size() == 2 && op[1] == '=' && strchr("*/+-&|", op[0])) || op == "<<=" || op == ">>=") { cmd = readSymbolAssignment(tok); } else if (tok == "PROVIDE") { - SaveAndRestore saved(inExpr, true); + SaveAndRestore saved(inExpr, true); cmd = readProvideHidden(true, false); } else if (tok == "HIDDEN") { - SaveAndRestore saved(inExpr, true); + SaveAndRestore saved(inExpr, true); cmd = readProvideHidden(false, true); } else if (tok == "PROVIDE_HIDDEN") { - SaveAndRestore saved(inExpr, true); + SaveAndRestore saved(inExpr, true); cmd = readProvideHidden(true, true); } @@ -1287,7 +1287,7 @@ .Case(CASE_ENT(SHF_COMPRESSED)) .Case(CASE_ENT(SHF_EXCLUDE)) .Case(CASE_ENT(SHF_ARM_PURECODE)) - .Default(None); + .Default(std::nullopt); #undef CASE_ENT } diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -95,7 +95,7 @@ llvm::DenseMap, Symbol *>, CieRecord *> cieMap; }; -class GotSection : public SyntheticSection { +class GotSection final : public SyntheticSection { public: GotSection(); size_t getSize() const override { return size; } @@ -135,7 +135,7 @@ size_t getSize() const override { return 0; } }; -class GnuPropertySection : public SyntheticSection { +class GnuPropertySection final : public SyntheticSection { public: GnuPropertySection(); void writeTo(uint8_t *buf) override; @@ -1036,7 +1036,7 @@ // of executable file which is pointed to by the DT_MIPS_RLD_MAP entry. // See "Dynamic section" in Chapter 5 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf -class MipsRldMapSection : public SyntheticSection { +class MipsRldMapSection final : public SyntheticSection { public: MipsRldMapSection(); size_t getSize() const override { return config->wordsize; } @@ -1119,7 +1119,7 @@ // A container for one or more linker generated thunks. Instances of these // thunks including ARM interworking and Mips LA25 PI to non-PI thunks. -class ThunkSection : public SyntheticSection { +class ThunkSection final : public SyntheticSection { public: // ThunkSection in OS, with desired outSecOff of Off ThunkSection(OutputSection *os, uint64_t off); @@ -1176,7 +1176,7 @@ }; template -class PartitionElfHeaderSection : public SyntheticSection { +class PartitionElfHeaderSection final : public SyntheticSection { public: PartitionElfHeaderSection(); size_t getSize() const override; @@ -1184,14 +1184,14 @@ }; template -class PartitionProgramHeadersSection : public SyntheticSection { +class PartitionProgramHeadersSection final : public SyntheticSection { public: PartitionProgramHeadersSection(); size_t getSize() const override; void writeTo(uint8_t *buf) override; }; -class PartitionIndexSection : public SyntheticSection { +class PartitionIndexSection final : public SyntheticSection { public: PartitionIndexSection(); size_t getSize() const override; @@ -1202,7 +1202,7 @@ // See the following link for the Android-specific loader code that operates on // this section: // https://cs.android.com/android/platform/superproject/+/master:bionic/libc/bionic/libc_init_static.cpp;drc=9425b16978f9c5aa8f2c50c873db470819480d1d;l=192 -class MemtagAndroidNote : public SyntheticSection { +class MemtagAndroidNote final : public SyntheticSection { public: MemtagAndroidNote() : SyntheticSection(llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, @@ -1211,7 +1211,7 @@ size_t getSize() const override; }; -class PackageMetadataNote : public SyntheticSection { +class PackageMetadataNote final : public SyntheticSection { public: PackageMetadataNote() : SyntheticSection(llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -3210,7 +3210,7 @@ MergeTailSection::MergeTailSection(StringRef name, uint32_t type, uint64_t flags, uint32_t alignment) : MergeSyntheticSection(name, type, flags, alignment), - builder(StringTableBuilder::RAW, alignment) {} + builder(StringTableBuilder::RAW, llvm::Align(alignment)) {} size_t MergeTailSection::getSize() const { return builder.getSize(); } @@ -3252,7 +3252,7 @@ void MergeNoTailSection::finalizeContents() { // Initializes string table builders. for (size_t i = 0; i < numShards; ++i) - shards.emplace_back(StringTableBuilder::RAW, addralign); + shards.emplace_back(StringTableBuilder::RAW, llvm::Align(addralign)); // Concurrency level. Must be a power of 2 to avoid expensive modulo // operations in the following tight loop. diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1882,15 +1882,15 @@ ElfSym::tlsModuleBase = cast(s); } } - } - if (!config->relocatable) { - llvm::TimeTraceScope timeScope("Finalize .eh_frame"); // This responsible for splitting up .eh_frame section into // pieces. The relocation scan uses those pieces, so this has to be // earlier. - for (Partition &part : partitions) - finalizeSynthetic(part.ehFrame.get()); + { + llvm::TimeTraceScope timeScope("Finalize .eh_frame"); + for (Partition &part : partitions) + finalizeSynthetic(part.ehFrame.get()); + } if (config->hasDynSymTab) { parallelForEach(symtab.getSymbols(), [](Symbol *sym) { diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -200,7 +200,7 @@ ErrorOr> mbOrErr = MemoryBuffer::getFile(path); if (std::error_code ec = mbOrErr.getError()) { error("cannot open " + path + ": " + ec.message()); - return None; + return std::nullopt; } std::unique_ptr &mb = *mbOrErr; @@ -228,7 +228,7 @@ if (reinterpret_cast(arch + i + 1) > buf + mbref.getBufferSize()) { error(path + ": fat_arch struct extends beyond end of file"); - return None; + return std::nullopt; } if (read32be(&arch[i].cputype) != static_cast(target->cpuType) || @@ -246,7 +246,7 @@ } error("unable to find matching architecture in " + path); - return None; + return std::nullopt; } InputFile::InputFile(Kind kind, const InterfaceFile &interface) diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -168,7 +168,7 @@ // not use the cached MemoryBuffer directly to ensure dsymutil does not // race with the cache pruner. StringRef objBuf; - std::optional cachePath = llvm::None; + std::optional cachePath = std::nullopt; if (files[i]) { objBuf = files[i]->getBuffer(); cachePath = files[i]->getBufferIdentifier(); diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp --- a/lld/MachO/SectionPriorities.cpp +++ b/lld/MachO/SectionPriorities.cpp @@ -252,11 +252,11 @@ std::optional macho::PriorityBuilder::getSymbolPriority(const Defined *sym) { if (sym->isAbsolute()) - return None; + return std::nullopt; auto it = priorities.find(sym->getName()); if (it == priorities.end()) - return None; + return std::nullopt; const SymbolPriorityEntry &entry = it->second; const InputFile *f = sym->isec->getFile(); if (!f) diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -116,7 +116,7 @@ sys::path::append(s, path1, path2); if (sys::fs::exists(s)) return std::string(s); - return None; + return std::nullopt; } // This is for -lfoo. We'll look for libfoo.dll.a or libfoo.a from search paths. diff --git a/lld/include/lld/Common/TargetOptionsCommandFlags.h b/lld/include/lld/Common/TargetOptionsCommandFlags.h --- a/lld/include/lld/Common/TargetOptionsCommandFlags.h +++ b/lld/include/lld/Common/TargetOptionsCommandFlags.h @@ -13,14 +13,14 @@ #ifndef LLD_COMMON_TARGETOPTIONSCOMMANDFLAGS_H #define LLD_COMMON_TARGETOPTIONSCOMMANDFLAGS_H -#include "llvm/ADT/Optional.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" +#include namespace lld { llvm::TargetOptions initTargetOptionsFromCodeGenFlags(); -llvm::Optional getRelocModelFromCMModel(); -llvm::Optional getCodeModelFromCMModel(); +std::optional getRelocModelFromCMModel(); +std::optional getCodeModelFromCMModel(); std::string getCPUStr(); std::vector getMAttrs(); } diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -164,7 +164,7 @@ path::append(s, path1, path2); if (fs::exists(s)) return std::string(s); - return None; + return std::nullopt; } opt::InputArgList WasmOptTable::parse(ArrayRef argv) { @@ -283,7 +283,7 @@ for (StringRef dir : config->searchPaths) if (Optional s = findFile(dir, path)) return s; - return None; + return std::nullopt; } // This is for -l. We'll look for lib.a from @@ -298,7 +298,7 @@ if (Optional s = findFile(dir, "lib" + name + ".a")) return s; } - return None; + return std::nullopt; } // This is for -l. @@ -659,9 +659,9 @@ if (s->signature) { LLVM_DEBUG(llvm::dbgs() << "demoting lazy func: " << s->getName() << "\n"); - replaceSymbol(s, s->getName(), None, None, - WASM_SYMBOL_BINDING_WEAK, s->getFile(), - s->signature); + replaceSymbol(s, s->getName(), std::nullopt, + std::nullopt, WASM_SYMBOL_BINDING_WEAK, + s->getFile(), s->signature); } } } @@ -670,7 +670,7 @@ static UndefinedGlobal * createUndefinedGlobal(StringRef name, llvm::wasm::WasmGlobalType *type) { auto *sym = cast(symtab->addUndefinedGlobal( - name, None, None, WASM_SYMBOL_UNDEFINED, nullptr, type)); + name, std::nullopt, std::nullopt, WASM_SYMBOL_UNDEFINED, nullptr, type)); config->allowUndefinedSymbols.insert(sym->getName()); sym->isUsedInRegularObj = true; return sym; @@ -844,8 +844,9 @@ }; static Symbol *addUndefined(StringRef name) { - return symtab->addUndefinedFunction(name, None, None, WASM_SYMBOL_UNDEFINED, - nullptr, nullptr, false); + return symtab->addUndefinedFunction(name, std::nullopt, std::nullopt, + WASM_SYMBOL_UNDEFINED, nullptr, nullptr, + false); } // Handles -wrap option. diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h --- a/lld/wasm/InputChunks.h +++ b/lld/wasm/InputChunks.h @@ -85,7 +85,7 @@ OutputSection *outputSec = nullptr; uint32_t comdat = UINT32_MAX; uint32_t inputSectionOffset = 0; - uint32_t alignment; + llvm::Align alignment; uint32_t flags; // Only applies to data segments. @@ -109,8 +109,8 @@ protected: InputChunk(ObjFile *f, Kind k, StringRef name, uint32_t alignment = 0, uint32_t flags = 0) - : name(name), file(f), alignment(alignment), flags(flags), sectionKind(k), - live(!config->gcSections), discarded(false) {} + : name(name), file(f), alignment(1ULL << alignment), flags(flags), + sectionKind(k), live(!config->gcSections), discarded(false) {} ArrayRef data() const { return rawData; } uint64_t getTombstone() const; @@ -223,7 +223,8 @@ public: SyntheticMergedChunk(StringRef name, uint32_t alignment, uint32_t flags) : InputChunk(nullptr, InputChunk::MergedChunk, name, alignment, flags), - builder(llvm::StringTableBuilder::RAW, 1ULL << alignment) {} + builder(llvm::StringTableBuilder::RAW, llvm::Align(1ULL << alignment)) { + } static bool classof(const InputChunk *c) { return c->kind() == InputChunk::MergedChunk; diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -61,7 +61,7 @@ auto mbOrErr = MemoryBuffer::getFile(path); if (auto ec = mbOrErr.getError()) { error("cannot open " + path + ": " + ec.message()); - return None; + return std::nullopt; } std::unique_ptr &mb = *mbOrErr; MemoryBufferRef mbref = mb->getMemBufferRef(); @@ -739,8 +739,8 @@ if (objSym.isUndefined() || excludedByComdat) { flags |= WASM_SYMBOL_UNDEFINED; if (objSym.isExecutable()) - return symtab->addUndefinedFunction(name, None, None, flags, &f, nullptr, - true); + return symtab->addUndefinedFunction(name, std::nullopt, std::nullopt, + flags, &f, nullptr, true); return symtab->addUndefinedData(name, flags, &f); } diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp --- a/lld/wasm/LTO.cpp +++ b/lld/wasm/LTO.cpp @@ -55,7 +55,7 @@ c.DebugPassManager = config->ltoDebugPassManager; if (config->relocatable) - c.RelocModel = None; + c.RelocModel = std::nullopt; else if (config->isPic) c.RelocModel = Reloc::PIC_; else @@ -76,8 +76,9 @@ static void undefine(Symbol *s) { if (auto f = dyn_cast(s)) - replaceSymbol(f, f->getName(), None, None, 0, - f->getFile(), f->signature); + replaceSymbol(f, f->getName(), std::nullopt, + std::nullopt, 0, f->getFile(), + f->signature); else if (isa(s)) replaceSymbol(s, s->getName(), 0, s->getFile()); else diff --git a/lld/wasm/OutputSegment.h b/lld/wasm/OutputSegment.h --- a/lld/wasm/OutputSegment.h +++ b/lld/wasm/OutputSegment.h @@ -38,7 +38,7 @@ uint32_t linkingFlags = 0; uint32_t initFlags = 0; uint32_t sectionOffset = 0; - uint32_t alignment = 0; + llvm::Align alignment; uint64_t startVA = 0; std::vector inputSegments; diff --git a/lld/wasm/OutputSegment.cpp b/lld/wasm/OutputSegment.cpp --- a/lld/wasm/OutputSegment.cpp +++ b/lld/wasm/OutputSegment.cpp @@ -22,10 +22,10 @@ void OutputSegment::addInputSegment(InputChunk *inSeg) { alignment = std::max(alignment, inSeg->alignment); inputSegments.push_back(inSeg); - size = llvm::alignTo(size, 1ULL << inSeg->alignment); + size = llvm::alignTo(size, inSeg->alignment); LLVM_DEBUG(dbgs() << "addInputSegment: " << inSeg->name << " oname=" << name - << " size=" << inSeg->getSize() - << " align=" << inSeg->alignment << " at:" << size << "\n"); + << " size=" << inSeg->getSize() << " align=" + << Log2(inSeg->alignment) << " at:" << size << "\n"); inSeg->outputSeg = this; inSeg->outputSegmentOffset = size; size += inSeg->getSize(); @@ -56,8 +56,9 @@ }); if (i == mergedSegments.end()) { LLVM_DEBUG(llvm::dbgs() << "new merge segment: " << name - << " alignment=" << ms->alignment << "\n"); - auto *syn = make(name, ms->alignment, ms->flags); + << " alignment=" << Log2(ms->alignment) << "\n"); + auto *syn = + make(name, Log2(ms->alignment), ms->flags); syn->outputSeg = this; mergedSegments.push_back(syn); i = std::prev(mergedSegments.end()); @@ -74,7 +75,7 @@ inputSegments = newSegments; size = 0; for (InputChunk *seg : inputSegments) { - size = llvm::alignTo(size, 1ULL << seg->alignment); + size = llvm::alignTo(size, seg->alignment); LLVM_DEBUG(llvm::dbgs() << "outputSegmentOffset set: " << seg->name << " -> " << size << "\n"); seg->outputSegmentOffset = size; diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -668,7 +668,7 @@ writeUleb128(sub.os, dataSegments.size(), "num data segments"); for (const OutputSegment *s : dataSegments) { writeStr(sub.os, s->name, "segment name"); - writeUleb128(sub.os, s->alignment, "alignment"); + writeUleb128(sub.os, Log2(s->alignment), "alignment"); writeUleb128(sub.os, s->linkingFlags, "flags"); } sub.writeTo(os); diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -288,11 +288,12 @@ out.dylinkSec->memAlign = 0; for (OutputSegment *seg : segments) { - out.dylinkSec->memAlign = std::max(out.dylinkSec->memAlign, seg->alignment); - memoryPtr = alignTo(memoryPtr, 1ULL << seg->alignment); + out.dylinkSec->memAlign = + std::max(out.dylinkSec->memAlign, Log2(seg->alignment)); + memoryPtr = alignTo(memoryPtr, seg->alignment); seg->startVA = memoryPtr; log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}", seg->name, - memoryPtr, seg->size, seg->alignment)); + memoryPtr, seg->size, Log2(seg->alignment))); if (!config->relocatable && seg->isTLS()) { if (WasmSym::tlsSize) { @@ -301,7 +302,7 @@ } if (WasmSym::tlsAlign) { auto *tlsAlign = cast(WasmSym::tlsAlign); - setGlobalPtr(tlsAlign, int64_t{1} << seg->alignment); + setGlobalPtr(tlsAlign, seg->alignment.value()); } if (!config->sharedMemory && WasmSym::tlsBase) { auto *tlsBase = cast(WasmSym::tlsBase); diff --git a/lldb/include/lldb/Core/ModuleList.h b/lldb/include/lldb/Core/ModuleList.h --- a/lldb/include/lldb/Core/ModuleList.h +++ b/lldb/include/lldb/Core/ModuleList.h @@ -464,9 +464,22 @@ static bool RemoveSharedModuleIfOrphaned(const Module *module_ptr); + /// Applies 'callback' to each module in this ModuleList. + /// If 'callback' returns false, iteration terminates. + /// The 'module_sp' passed to 'callback' is guaranteed to + /// be non-null. + /// + /// This function is thread-safe. void ForEach(std::function const &callback) const; + /// Returns true if 'callback' returns true for one of the modules + /// in this ModuleList. + /// + /// This function is thread-safe. + bool AnyOf( + std::function const &callback) const; + protected: // Class typedefs. typedef std::vector diff --git a/lldb/include/lldb/Host/HostInfoBase.h b/lldb/include/lldb/Host/HostInfoBase.h --- a/lldb/include/lldb/Host/HostInfoBase.h +++ b/lldb/include/lldb/Host/HostInfoBase.h @@ -108,7 +108,9 @@ static FileSpec GetXcodeDeveloperDirectory() { return {}; } /// Return the directory containing a specific Xcode SDK. - static llvm::StringRef GetXcodeSDKPath(XcodeSDK sdk) { return {}; } + static llvm::Expected GetXcodeSDKPath(XcodeSDK sdk) { + return ""; + } /// Return information about module \p image_name if it is loaded in /// the current process's address space. diff --git a/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h b/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h --- a/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h +++ b/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h @@ -30,7 +30,7 @@ static FileSpec GetXcodeDeveloperDirectory(); /// Query xcrun to find an Xcode SDK directory. - static llvm::StringRef GetXcodeSDKPath(XcodeSDK sdk); + static llvm::Expected GetXcodeSDKPath(XcodeSDK sdk); /// Shared cache utilities static SharedCacheImageInfo diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -22,12 +22,11 @@ using namespace lldb_private; CommandObjectDWIMPrint::CommandObjectDWIMPrint(CommandInterpreter &interpreter) - : CommandObjectRaw( - interpreter, "dwim-print", "Print a variable or expression.", - "dwim-print [ | ]", - eCommandProcessMustBePaused | eCommandTryTargetAPILock | - eCommandRequiresFrame | eCommandProcessMustBeLaunched | - eCommandRequiresProcess) {} + : CommandObjectRaw(interpreter, "dwim-print", + "Print a variable or expression.", + "dwim-print [ | ]", + eCommandProcessMustBePaused | eCommandTryTargetAPILock) { +} bool CommandObjectDWIMPrint::DoExecute(StringRef expr, CommandReturnObject &result) { @@ -40,14 +39,10 @@ return false; } - // eCommandRequiresFrame guarantees a frame. - StackFrame *frame = m_exe_ctx.GetFramePtr(); - assert(frame); - auto verbosity = GetDebugger().GetDWIMPrintVerbosity(); - // First, try `expr` as the name of a variable. - { + // First, try `expr` as the name of a frame variable. + if (StackFrame *frame = m_exe_ctx.GetFramePtr()) { auto valobj_sp = frame->FindVariable(ConstString(expr)); if (valobj_sp && valobj_sp->GetError().Success()) { if (verbosity == eDWIMPrintVerbosityFull) @@ -60,12 +55,13 @@ // Second, also lastly, try `expr` as a source expression to evaluate. { - // eCommandRequiresProcess guarantees a target. - Target *target = m_exe_ctx.GetTargetPtr(); - assert(target); + Target *target_ptr = m_exe_ctx.GetTargetPtr(); + // Fallback to the dummy target, which can allow for expression evaluation. + Target &target = target_ptr ? *target_ptr : GetDummyTarget(); + auto *exe_scope = m_exe_ctx.GetBestExecutionContextScope(); ValueObjectSP valobj_sp; - if (target->EvaluateExpression(expr, frame, valobj_sp) == + if (target.EvaluateExpression(expr, exe_scope, valobj_sp) == eExpressionCompleted) { if (verbosity != eDWIMPrintVerbosityNone) result.AppendMessageWithFormatv("note: ran `expression -- {0}`", expr); diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -1645,7 +1645,15 @@ void Module::RegisterXcodeSDK(llvm::StringRef sdk_name, llvm::StringRef sysroot) { XcodeSDK sdk(sdk_name.str()); - llvm::StringRef sdk_path(HostInfo::GetXcodeSDKPath(sdk)); + auto sdk_path_or_err = HostInfo::GetXcodeSDKPath(sdk); + + if (!sdk_path_or_err) { + Debugger::ReportError("Error while searching for Xcode SDK: " + + toString(sdk_path_or_err.takeError())); + return; + } + + auto sdk_path = *sdk_path_or_err; if (sdk_path.empty()) return; // If the SDK changed for a previously registered source path, update it. diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -1067,9 +1067,23 @@ void ModuleList::ForEach( std::function const &callback) const { std::lock_guard guard(m_modules_mutex); - for (const auto &module : m_modules) { + for (const auto &module_sp : m_modules) { + assert(module_sp != nullptr); // If the callback returns false, then stop iterating and break out - if (!callback(module)) + if (!callback(module_sp)) break; } } + +bool ModuleList::AnyOf( + std::function const &callback) + const { + std::lock_guard guard(m_modules_mutex); + for (const auto &module_sp : m_modules) { + assert(module_sp != nullptr); + if (callback(*module_sp)) + return true; + } + + return false; +} diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -925,10 +925,6 @@ stack.back().SetValueType(Value::ValueType::LoadAddress); } else { stack.back().SetValueType(Value::ValueType::FileAddress); - // Convert the file address to a load address, so subsequent - // DWARF operators can operate on it. - if (target) - stack.back().ConvertToLoadAddress(module_sp.get(), target); } break; diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm --- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm +++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// +#include "lldb/Host/macosx/HostInfoMacOSX.h" +#include "Utility/UuidCompatibility.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/Host.h" #include "lldb/Host/HostInfo.h" -#include "lldb/Host/macosx/HostInfoMacOSX.h" #include "lldb/Utility/Args.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/Timer.h" -#include "Utility/UuidCompatibility.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" @@ -337,7 +337,14 @@ } } - FileSpec fspec(HostInfo::GetXcodeSDKPath(XcodeSDK::GetAnyMacOS())); + auto sdk_path_or_err = HostInfo::GetXcodeSDKPath(XcodeSDK::GetAnyMacOS()); + if (!sdk_path_or_err) { + Log *log = GetLog(LLDBLog::Host); + LLDB_LOGF(log, "Error while searching for Xcode SDK: %s", + toString(sdk_path_or_err.takeError()).c_str()); + return; + } + FileSpec fspec(*sdk_path_or_err); if (fspec) { if (FileSystem::Instance().Exists(fspec)) { std::string xcode_contents_dir = @@ -365,12 +372,18 @@ return g_developer_directory; } -static std::string GetXcodeSDK(XcodeSDK sdk) { +llvm::Expected GetXcodeSDK(XcodeSDK sdk) { XcodeSDK::Info info = sdk.Parse(); std::string sdk_name = XcodeSDK::GetCanonicalName(info); + if (sdk_name.empty()) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "Unrecognized SDK type: " + sdk.GetString()); + + Log *log = GetLog(LLDBLog::Host); auto xcrun = [](const std::string &sdk, - llvm::StringRef developer_dir = "") -> std::string { + llvm::StringRef developer_dir = + "") -> llvm::Expected { Args args; if (!developer_dir.empty()) { args.AppendArgument("/usr/bin/env"); @@ -391,13 +404,29 @@ int status = 0; int signo = 0; std::string output_str; - lldb_private::Status error = - Host::RunShellCommand(args, FileSpec(), &status, &signo, &output_str, - std::chrono::seconds(15)); - - // Check that xcrun return something useful. - if (status != 0 || output_str.empty()) - return {}; + // The first time after Xcode was updated or freshly installed, + // xcrun can take surprisingly long to build up its database. + auto timeout = std::chrono::seconds(60); + bool run_in_shell = false; + lldb_private::Status error = Host::RunShellCommand( + args, FileSpec(), &status, &signo, &output_str, timeout, run_in_shell); + + // Check that xcrun returned something useful. + if (error.Fail()) { + // Catastrophic error. + LLDB_LOG(log, "xcrun failed to execute: %s", error.AsCString()); + return error.ToError(); + } + if (status != 0) { + // xcrun didn't find a matching SDK. Not an error, we'll try + // different spellings. + LLDB_LOG(log, "xcrun returned exit code %d", status); + return ""; + } + if (output_str.empty()) { + LLDB_LOG(log, "xcrun returned no results"); + return ""; + } // Convert to a StringRef so we can manipulate the string without modifying // the underlying data. @@ -414,7 +443,8 @@ return output.str(); }; - auto find_sdk = [&xcrun](const std::string &sdk_name) -> std::string { + auto find_sdk = + [&xcrun](const std::string &sdk_name) -> llvm::Expected { // Invoke xcrun with the developer dir specified in the environment. std::string developer_dir = GetEnvDeveloperDir(); if (!developer_dir.empty()) { @@ -430,8 +460,10 @@ llvm::StringRef shlib_developer_dir = llvm::sys::path::parent_path(contents_dir); if (!shlib_developer_dir.empty()) { - std::string sdk = xcrun(sdk_name, std::move(shlib_developer_dir)); - if (!sdk.empty()) + auto sdk = xcrun(sdk_name, std::move(shlib_developer_dir)); + if (!sdk) + return sdk.takeError(); + if (!sdk->empty()) return sdk; } } @@ -441,7 +473,10 @@ return xcrun(sdk_name); }; - std::string path = find_sdk(sdk_name); + auto path_or_err = find_sdk(sdk_name); + if (!path_or_err) + return path_or_err.takeError(); + std::string path = *path_or_err; while (path.empty()) { // Try an alternate spelling of the name ("macosx10.9internal"). if (info.type == XcodeSDK::Type::MacOSX && !info.version.empty() && @@ -449,44 +484,68 @@ llvm::StringRef fixed(sdk_name); if (fixed.consume_back(".internal")) sdk_name = fixed.str() + "internal"; - path = find_sdk(sdk_name); + path_or_err = find_sdk(sdk_name); + if (!path_or_err) + return path_or_err.takeError(); + path = *path_or_err; if (!path.empty()) break; } - Log *log = GetLog(LLDBLog::Host); LLDB_LOGF(log, "Couldn't find SDK %s on host", sdk_name.c_str()); // Try without the version. if (!info.version.empty()) { info.version = {}; sdk_name = XcodeSDK::GetCanonicalName(info); - path = find_sdk(sdk_name); + path_or_err = find_sdk(sdk_name); + if (!path_or_err) + return path_or_err.takeError(); + path = *path_or_err; if (!path.empty()) break; } LLDB_LOGF(log, "Couldn't find any matching SDK on host"); - return {}; + return ""; } // Whatever is left in output should be a valid path. - if (!FileSystem::Instance().Exists(path)) - return {}; + if (!FileSystem::Instance().Exists(path)) { + LLDB_LOGF(log, "SDK returned by xcrun doesn't exist"); + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "SDK returned by xcrun doesn't exist"); + } return path; } -llvm::StringRef HostInfoMacOSX::GetXcodeSDKPath(XcodeSDK sdk) { - static llvm::StringMap g_sdk_path; +llvm::Expected HostInfoMacOSX::GetXcodeSDKPath(XcodeSDK sdk) { + struct ErrorOrPath { + std::string str; + bool is_error; + }; + static llvm::StringMap g_sdk_path; static std::mutex g_sdk_path_mutex; std::lock_guard guard(g_sdk_path_mutex); LLDB_SCOPED_TIMER(); - auto it = g_sdk_path.find(sdk.GetString()); - if (it != g_sdk_path.end()) - return it->second; - auto it_new = g_sdk_path.insert({sdk.GetString(), GetXcodeSDK(sdk)}); - return it_new.first->second; + auto key = sdk.GetString(); + auto it = g_sdk_path.find(key); + if (it != g_sdk_path.end()) { + if (it->second.is_error) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + it->second.str); + else + return it->second.str; + } + auto path_or_err = GetXcodeSDK(sdk); + if (!path_or_err) { + std::string error = toString(path_or_err.takeError()); + g_sdk_path.insert({key, {error, true}}); + return llvm::createStringError(llvm::inconvertibleErrorCode(), error); + } + auto it_new = g_sdk_path.insert({key, {*path_or_err, false}}); + return it_new.first->second.str; } namespace { diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.cpp b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.cpp --- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.cpp +++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.cpp @@ -92,7 +92,7 @@ return false; } - llvm::Optional reloc_model; + std::optional reloc_model; assert(m_process_ptr && "no available lldb process"); switch (m_process_ptr->GetTarget().GetArchitecture().GetMachine()) { case llvm::Triple::ArchType::x86: @@ -175,7 +175,6 @@ namespace lldb_renderscript { RSIRPasses::RSIRPasses(Process *process) { - IRPasses(); assert(process); EarlyPasses = std::make_shared(); diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp --- a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp @@ -12,6 +12,7 @@ #include #endif +#include "lldb/Core/Debugger.h" #include "lldb/Core/Module.h" #include "lldb/Core/PluginManager.h" #include "lldb/Host/HostInfo.h" @@ -278,9 +279,19 @@ static llvm::StringRef GetXcodeSDKDir(std::string preferred, std::string secondary) { llvm::StringRef sdk; - sdk = HostInfo::GetXcodeSDKPath(XcodeSDK(std::move(preferred))); + auto get_sdk = [&](std::string sdk) -> llvm::StringRef { + auto sdk_path_or_err = HostInfo::GetXcodeSDKPath(XcodeSDK(std::move(sdk))); + if (!sdk_path_or_err) { + Debugger::ReportError("Error while searching for Xcode SDK: " + + toString(sdk_path_or_err.takeError())); + return {}; + } + return *sdk_path_or_err; + }; + + sdk = get_sdk(preferred); if (sdk.empty()) - sdk = HostInfo::GetXcodeSDKPath(XcodeSDK(std::move(secondary))); + sdk = get_sdk(secondary); return sdk; } diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp --- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp @@ -17,6 +17,7 @@ #include "PlatformRemoteAppleWatch.h" #endif #include "lldb/Breakpoint/BreakpointLocation.h" +#include "lldb/Core/Debugger.h" #include "lldb/Core/Module.h" #include "lldb/Core/ModuleList.h" #include "lldb/Core/ModuleSpec.h" @@ -123,8 +124,14 @@ } // Use the default SDK as a fallback. - FileSpec fspec( - HostInfo::GetXcodeSDKPath(lldb_private::XcodeSDK::GetAnyMacOS())); + auto sdk_path_or_err = HostInfo::GetXcodeSDKPath(XcodeSDK::GetAnyMacOS()); + if (!sdk_path_or_err) { + Debugger::ReportError("Error while searching for Xcode SDK: " + + toString(sdk_path_or_err.takeError())); + return {}; + } + + FileSpec fspec(*sdk_path_or_err); if (fspec) { if (FileSystem::Instance().Exists(fspec)) return ConstString(fspec.GetPath()); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp --- a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp @@ -12,14 +12,16 @@ static constexpr Log::Category g_categories[] = { {{"comp"}, - {"log insertions of object files into DWARF debug maps"}, + {"log struct/union/class type completions"}, DWARFLog::TypeCompletion}, {{"info"}, {"log the parsing of .debug_info"}, DWARFLog::DebugInfo}, {{"line"}, {"log the parsing of .debug_line"}, DWARFLog::DebugLine}, {{"lookups"}, {"log any lookups that happen by name, regex, or address"}, DWARFLog::Lookups}, - {{"map"}, {"log struct/unions/class type completions"}, DWARFLog::DebugMap}, + {{"map"}, + {"log insertions of object files into DWARF debug maps"}, + DWARFLog::DebugMap}, }; static Log::Channel g_channel(g_categories, DWARFLog::DebugInfo); diff --git a/lldb/source/Plugins/TraceExporter/common/TraceHTR.h b/lldb/source/Plugins/TraceExporter/common/TraceHTR.h --- a/lldb/source/Plugins/TraceExporter/common/TraceHTR.h +++ b/lldb/source/Plugins/TraceExporter/common/TraceHTR.h @@ -1,9 +1,8 @@ //===-- TraceHTR.h --------------------------------------------------------===// // -// Part of the LLVM Project, under the Apache -// License v2.0 with LLVM Exceptions.// See https://llvm.org/LICENSE.txt for -// license information.// SPDX-License-Identifier: Apache-2.0 WITH -// LLVM-exception +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py --- a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py +++ b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py @@ -10,11 +10,6 @@ class TestCase(TestBase): - def setUp(self): - TestBase.setUp(self) - self.build() - lldbutil.run_to_name_breakpoint(self, "main") - def _run_cmd(self, cmd: str) -> str: """Run the given lldb command and return its output.""" result = lldb.SBCommandReturnObject() @@ -51,18 +46,28 @@ def test_variables(self): """Test dwim-print with variables.""" + self.build() + lldbutil.run_to_name_breakpoint(self, "main") vars = ("argc", "argv") for var in vars: self._expect_cmd(var, "frame variable") def test_variable_paths(self): """Test dwim-print with variable path expressions.""" + self.build() + lldbutil.run_to_name_breakpoint(self, "main") exprs = ("&argc", "*argv", "argv[0]") for expr in exprs: self._expect_cmd(expr, "expression --") def test_expressions(self): """Test dwim-print with expressions.""" + self.build() + lldbutil.run_to_name_breakpoint(self, "main") exprs = ("argc + 1", "(void)argc", "(int)abs(argc)") for expr in exprs: self._expect_cmd(expr, "expression --") + + def test_dummy_target_expressions(self): + """Test dwim-print's ability to evaluate expressions without a target.""" + self._expect_cmd("1 + 2", "expression --") diff --git a/lldb/test/API/lang/c/high-mem-global/Makefile b/lldb/test/API/lang/c/high-mem-global/Makefile deleted file mode 100644 --- a/lldb/test/API/lang/c/high-mem-global/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -C_SOURCES := main.c - -include Makefile.rules diff --git a/lldb/test/API/lang/c/high-mem-global/TestHighMemGlobal.py b/lldb/test/API/lang/c/high-mem-global/TestHighMemGlobal.py deleted file mode 100644 --- a/lldb/test/API/lang/c/high-mem-global/TestHighMemGlobal.py +++ /dev/null @@ -1,59 +0,0 @@ -"""Look that lldb can display a global loaded in high memory at an addressable address.""" - - -import lldb -from lldbsuite.test.lldbtest import * -import lldbsuite.test.lldbutil as lldbutil -from lldbsuite.test.decorators import * - -class TestHighMemGlobal(TestBase): - - NO_DEBUG_INFO_TESTCASE = True - - @skipUnlessDarwin # hardcoding of __DATA segment name - def test_command_line(self): - """Test that we can display a global variable loaded in high memory.""" - self.build() - - exe = self.getBuildArtifact("a.out") - err = lldb.SBError() - - target = self.dbg.CreateTarget(exe, '', '', False, err) - self.assertTrue(target.IsValid()) - module = target.GetModuleAtIndex(0) - self.assertTrue(module.IsValid()) - data_segment = module.FindSection("__DATA") - self.assertTrue(data_segment.IsValid()) - err.Clear() - - self.expect("expr -- global.c", substrs=[' = 1']) - self.expect("expr -- global.d", substrs=[' = 2']) - self.expect("expr -- global.e", substrs=[' = 3']) - - err = target.SetSectionLoadAddress(data_segment, 0xffffffff00000000) - self.assertTrue(err.Success()) - self.expect("expr -- global.c", substrs=[' = 1']) - self.expect("expr -- global.d", substrs=[' = 2']) - self.expect("expr -- global.e", substrs=[' = 3']) - - err = target.SetSectionLoadAddress(data_segment, 0x0000088100004000) - self.assertTrue(err.Success()) - self.expect("expr -- global.c", substrs=[' = 1']) - self.expect("expr -- global.d", substrs=[' = 2']) - self.expect("expr -- global.e", substrs=[' = 3']) - - # This is an address in IRMemoryMap::FindSpace where it has an - # lldb-side buffer of memory that's used in IR interpreters when - # memory cannot be allocated in the inferior / functions cannot - # be jitted. - err = target.SetSectionLoadAddress(data_segment, 0xdead0fff00000000) - self.assertTrue(err.Success()) - - # The global variable `global` is now overlayed by this - # IRMemoryMap special buffer, and now we cannot see the variable. - # Testing that we get the incorrect values at this address ensures - # that IRMemoryMap::FindSpace and this test stay in sync. - self.runCmd("expr -- int $global_c = global.c") - self.runCmd("expr -- int $global_d = global.d") - self.runCmd("expr -- int $global_e = global.e") - self.expect("expr -- $global_c != 1 || $global_d != 2 || $global_e != 3", substrs=[' = true']) diff --git a/lldb/test/API/lang/c/high-mem-global/main.c b/lldb/test/API/lang/c/high-mem-global/main.c deleted file mode 100644 --- a/lldb/test/API/lang/c/high-mem-global/main.c +++ /dev/null @@ -1,9 +0,0 @@ - -struct mystruct { - int c, d, e; -} global = {1, 2, 3}; - -int main () -{ - return global.c; // break here -} diff --git a/lldb/unittests/CMakeLists.txt b/lldb/unittests/CMakeLists.txt --- a/lldb/unittests/CMakeLists.txt +++ b/lldb/unittests/CMakeLists.txt @@ -10,13 +10,6 @@ add_compile_options("-Wno-suggest-override") endif() -set(LLDB_GTEST_COMMON_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/gtest_common.h) -if (MSVC) - list(APPEND LLVM_COMPILE_FLAGS /FI ${LLDB_GTEST_COMMON_INCLUDE}) -else () - list(APPEND LLVM_COMPILE_FLAGS -include ${LLDB_GTEST_COMMON_INCLUDE}) -endif () - function(add_lldb_unittest test_name) cmake_parse_arguments(ARG "" diff --git a/lldb/unittests/Host/CMakeLists.txt b/lldb/unittests/Host/CMakeLists.txt --- a/lldb/unittests/Host/CMakeLists.txt +++ b/lldb/unittests/Host/CMakeLists.txt @@ -32,6 +32,7 @@ ${FILES} LINK_LIBS lldbHost + lldbCore lldbUtilityHelpers lldbHostHelpers LLVMTestingSupport diff --git a/lldb/unittests/Host/HostInfoTest.cpp b/lldb/unittests/Host/HostInfoTest.cpp --- a/lldb/unittests/Host/HostInfoTest.cpp +++ b/lldb/unittests/Host/HostInfoTest.cpp @@ -56,11 +56,21 @@ #if defined(__APPLE__) TEST_F(HostInfoTest, GetXcodeSDK) { - EXPECT_FALSE(HostInfo::GetXcodeSDKPath(XcodeSDK("MacOSX.sdk")).empty()); + auto get_sdk = [](std::string sdk, bool error = false) -> llvm::StringRef { + auto sdk_path_or_err = HostInfo::GetXcodeSDKPath(XcodeSDK(std::move(sdk))); + if (!error) { + EXPECT_TRUE((bool)sdk_path_or_err); + return *sdk_path_or_err; + } + EXPECT_FALSE((bool)sdk_path_or_err); + llvm::consumeError(sdk_path_or_err.takeError()); + return {}; + }; + EXPECT_FALSE(get_sdk("MacOSX.sdk").empty()); // These are expected to fall back to an available version. - EXPECT_FALSE(HostInfo::GetXcodeSDKPath(XcodeSDK("MacOSX9999.sdk")).empty()); + EXPECT_FALSE(get_sdk("MacOSX9999.sdk").empty()); // This is expected to fail. - EXPECT_TRUE(HostInfo::GetXcodeSDKPath(XcodeSDK("CeciNestPasUnOS.sdk")).empty()); + EXPECT_TRUE(get_sdk("CeciNestPasUnOS.sdk", true).empty()); } #endif diff --git a/llvm/docs/GwpAsan.rst b/llvm/docs/GwpAsan.rst --- a/llvm/docs/GwpAsan.rst +++ b/llvm/docs/GwpAsan.rst @@ -143,9 +143,10 @@ default visibility. This will override the compile time define; - Depending on allocator support (Scudo has support for this mechanism): Through - the environment variable ``GWP_ASAN_OPTIONS``, containing the options string - to be parsed. Options defined this way will override any definition made - through ``__gwp_asan_default_options``. + an environment variable, containing the options string to be parsed. In Scudo, + this is through `SCUDO_OPTIONS=GWP_ASAN_${OPTION_NAME}=${VALUE}` (e.g. + `SCUDO_OPTIONS=GWP_ASAN_SampleRate=100`). Options defined this way will + override any definition made through ``__gwp_asan_default_options``. The options string follows a syntax similar to ASan, where distinct options can be assigned in the same string, separated by colons. @@ -216,9 +217,9 @@ .. code:: console - $ clang++ -fsanitize=scudo -std=c++17 -g buggy_code.cpp - $ for i in `seq 1 200`; do - GWP_ASAN_OPTIONS="SampleRate=100" ./a.out > /dev/null; + $ clang++ -fsanitize=scudo -g buggy_code.cpp + $ for i in `seq 1 500`; do + SCUDO_OPTIONS="GWP_ASAN_SampleRate=100" ./a.out > /dev/null; done | | *** GWP-ASan detected a memory error *** diff --git a/llvm/include/llvm/ADT/AddressRanges.h b/llvm/include/llvm/ADT/AddressRanges.h --- a/llvm/include/llvm/ADT/AddressRanges.h +++ b/llvm/include/llvm/ADT/AddressRanges.h @@ -66,7 +66,7 @@ Optional getRangeThatContains(uint64_t Addr) const { Collection::const_iterator It = find(Addr); if (It == Ranges.end()) - return None; + return std::nullopt; return *It; } @@ -129,7 +129,7 @@ getRangeValueThatContains(uint64_t Addr) const { Collection::const_iterator It = find(Addr); if (It == Ranges.end()) - return None; + return std::nullopt; return std::make_pair(*It, Values[It - Ranges.begin()]); } diff --git a/llvm/include/llvm/ADT/BreadthFirstIterator.h b/llvm/include/llvm/ADT/BreadthFirstIterator.h --- a/llvm/include/llvm/ADT/BreadthFirstIterator.h +++ b/llvm/include/llvm/ADT/BreadthFirstIterator.h @@ -72,8 +72,8 @@ Level = 0; // Also, insert a dummy node as marker. - VisitQueue.push(QueueElement(Node, None)); - VisitQueue.push(None); + VisitQueue.push(QueueElement(Node, std::nullopt)); + VisitQueue.push(std::nullopt); } inline bf_iterator() = default; @@ -91,14 +91,14 @@ // Already visited? if (this->Visited.insert(Next).second) - VisitQueue.push(QueueElement(Next, None)); + VisitQueue.push(QueueElement(Next, std::nullopt)); } VisitQueue.pop(); // Go to the next element skipping markers if needed. if (!VisitQueue.empty()) { Head = VisitQueue.front(); - if (Head != None) + if (Head != std::nullopt) return; Level += 1; VisitQueue.pop(); @@ -106,7 +106,7 @@ // Don't push another marker if this is the last // element. if (!VisitQueue.empty()) - VisitQueue.push(None); + VisitQueue.push(std::nullopt); } } diff --git a/llvm/include/llvm/ADT/DepthFirstIterator.h b/llvm/include/llvm/ADT/DepthFirstIterator.h --- a/llvm/include/llvm/ADT/DepthFirstIterator.h +++ b/llvm/include/llvm/ADT/DepthFirstIterator.h @@ -105,7 +105,7 @@ inline df_iterator(NodeRef Node) { this->Visited.insert(Node); - VisitStack.push_back(StackElement(Node, None)); + VisitStack.push_back(StackElement(Node, std::nullopt)); } inline df_iterator() = default; // End is when stack is empty @@ -113,7 +113,7 @@ inline df_iterator(NodeRef Node, SetType &S) : df_iterator_storage(S) { if (this->Visited.insert(Node).second) - VisitStack.push_back(StackElement(Node, None)); + VisitStack.push_back(StackElement(Node, std::nullopt)); } inline df_iterator(SetType &S) @@ -137,7 +137,7 @@ // Has our next sibling been visited? if (this->Visited.insert(Next).second) { // No, do it now. - VisitStack.push_back(StackElement(Next, None)); + VisitStack.push_back(StackElement(Next, std::nullopt)); return; } } diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h --- a/llvm/include/llvm/ADT/Hashing.h +++ b/llvm/include/llvm/ADT/Hashing.h @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -122,6 +123,8 @@ template hash_code hash_value(const std::basic_string &arg); +/// Compute a hash_code for a standard string. +template hash_code hash_value(const std::optional &arg); /// Override the execution seed with a fixed value. /// @@ -662,6 +665,10 @@ return hash_combine_range(arg.begin(), arg.end()); } +template hash_code hash_value(const std::optional &arg) { + return arg ? hash_combine(true, *arg) : hash_value(false); +} + template <> struct DenseMapInfo { static inline hash_code getEmptyKey() { return hash_code(-1); } static inline hash_code getTombstoneKey() { return hash_code(-2); } diff --git a/llvm/include/llvm/ADT/Optional.h b/llvm/include/llvm/ADT/Optional.h --- a/llvm/include/llvm/ADT/Optional.h +++ b/llvm/include/llvm/ADT/Optional.h @@ -297,7 +297,7 @@ auto transform(const Function &F) const & -> Optional { if (*this) return F(value()); - return None; + return std::nullopt; } T &&value() && { return std::move(Storage.value()); } @@ -313,7 +313,7 @@ const Function &F) && -> Optional { if (*this) return F(std::move(*this).value()); - return None; + return std::nullopt; } }; @@ -365,17 +365,17 @@ template constexpr bool operator==(std::nullopt_t, const Optional &X) { - return X == None; + return X == std::nullopt; } template constexpr bool operator!=(const Optional &X, std::nullopt_t) { - return !(X == None); + return !(X == std::nullopt); } template constexpr bool operator!=(std::nullopt_t, const Optional &X) { - return X != None; + return X != std::nullopt; } template @@ -390,32 +390,32 @@ template constexpr bool operator<=(const Optional &X, std::nullopt_t) { - return !(None < X); + return !(std::nullopt < X); } template constexpr bool operator<=(std::nullopt_t, const Optional &X) { - return !(X < None); + return !(X < std::nullopt); } template constexpr bool operator>(const Optional &X, std::nullopt_t) { - return None < X; + return std::nullopt < X; } template constexpr bool operator>(std::nullopt_t, const Optional &X) { - return X < None; + return X < std::nullopt; } template constexpr bool operator>=(const Optional &X, std::nullopt_t) { - return None <= X; + return std::nullopt <= X; } template constexpr bool operator>=(std::nullopt_t, const Optional &X) { - return X <= None; + return X <= std::nullopt; } template @@ -486,7 +486,7 @@ if (O) OS << *O; else - OS << None; + OS << std::nullopt; return OS; } diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -924,7 +924,7 @@ auto deref_or_none(const Iter &I, const Iter &End) -> llvm::Optional< std::remove_const_t>> { if (I == End) - return None; + return std::nullopt; return *I; } diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h --- a/llvm/include/llvm/ADT/StringMapEntry.h +++ b/llvm/include/llvm/ADT/StringMapEntry.h @@ -88,11 +88,12 @@ template <> class StringMapEntryStorage : public StringMapEntryBase { public: - explicit StringMapEntryStorage(size_t keyLength, std::nullopt_t = None) + explicit StringMapEntryStorage(size_t keyLength, + std::nullopt_t = std::nullopt) : StringMapEntryBase(keyLength) {} StringMapEntryStorage(StringMapEntryStorage &entry) = delete; - std::nullopt_t getValue() const { return None; } + std::nullopt_t getValue() const { return std::nullopt; } }; /// StringMapEntry - This is used to represent one value that is inserted into diff --git a/llvm/include/llvm/ADT/TinyPtrVector.h b/llvm/include/llvm/ADT/TinyPtrVector.h --- a/llvm/include/llvm/ADT/TinyPtrVector.h +++ b/llvm/include/llvm/ADT/TinyPtrVector.h @@ -136,7 +136,7 @@ // implicit conversion operator to ArrayRef. operator ArrayRef() const { if (Val.isNull()) - return None; + return std::nullopt; if (Val.template is()) return *Val.getAddrOfPtr1(); return *Val.template get(); @@ -145,7 +145,7 @@ // implicit conversion operator to MutableArrayRef. operator MutableArrayRef() { if (Val.isNull()) - return None; + return std::nullopt; if (Val.template is()) return *Val.getAddrOfPtr1(); return *Val.template get(); diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h --- a/llvm/include/llvm/ADT/TypeSwitch.h +++ b/llvm/include/llvm/ADT/TypeSwitch.h @@ -71,8 +71,8 @@ /// Attempt to dyn_cast the given `value` to `CastT`. This overload is /// selected if `value` already has a suitable dyn_cast method. template - static auto castValue( - ValueT value, + static decltype(auto) castValue( + ValueT &&value, std::enable_if_t::value> * = nullptr) { return value.template dyn_cast(); @@ -81,8 +81,8 @@ /// Attempt to dyn_cast the given `value` to `CastT`. This overload is /// selected if llvm::dyn_cast should be used. template - static auto castValue( - ValueT value, + static decltype(auto) castValue( + ValueT &&value, std::enable_if_t::value> * = nullptr) { return dyn_cast(value); diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -893,7 +893,7 @@ assert(V != nullptr && "Value is a nullptr?"); DenseMap::iterator VNIt = ValueToNumber.find(V); if (VNIt == ValueToNumber.end()) - return None; + return std::nullopt; return VNIt->second; } @@ -904,7 +904,7 @@ Optional fromGVN(unsigned Num) { DenseMap::iterator VNIt = NumberToValue.find(Num); if (VNIt == NumberToValue.end()) - return None; + return std::nullopt; assert(VNIt->second != nullptr && "Found value is a nullptr!"); return VNIt->second; } @@ -918,7 +918,7 @@ Optional getCanonicalNum(unsigned N) { DenseMap::iterator NCIt = NumberToCanonNum.find(N); if (NCIt == NumberToCanonNum.end()) - return None; + return std::nullopt; return NCIt->second; } @@ -931,7 +931,7 @@ Optional fromCanonicalNum(unsigned N) { DenseMap::iterator CNIt = CanonNumToNumber.find(N); if (CNIt == CanonNumToNumber.end()) - return None; + return std::nullopt; return CNIt->second; } diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -200,7 +200,7 @@ protected: InlineAdvisor(Module &M, FunctionAnalysisManager &FAM, - Optional IC = llvm::None); + Optional IC = std::nullopt); virtual std::unique_ptr getAdviceImpl(CallBase &CB) = 0; virtual std::unique_ptr getMandatoryAdvice(CallBase &CB, bool Advice); diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -103,12 +103,12 @@ const char *Reason = nullptr; /// The cost-benefit pair computed by cost-benefit analysis. - Optional CostBenefit = None; + Optional CostBenefit = std::nullopt; // Trivial constructor, interesting logic in the factory functions below. InlineCost(int Cost, int Threshold, int StaticBonusApplied, const char *Reason = nullptr, - Optional CostBenefit = None) + Optional CostBenefit = std::nullopt) : Cost(Cost), Threshold(Threshold), StaticBonusApplied(StaticBonusApplied), Reason(Reason), CostBenefit(CostBenefit) { @@ -122,12 +122,14 @@ assert(Cost < NeverInlineCost && "Cost crosses sentinel value"); return InlineCost(Cost, Threshold, StaticBonus); } - static InlineCost getAlways(const char *Reason, - Optional CostBenefit = None) { + static InlineCost + getAlways(const char *Reason, + Optional CostBenefit = std::nullopt) { return InlineCost(AlwaysInlineCost, 0, 0, Reason, CostBenefit); } - static InlineCost getNever(const char *Reason, - Optional CostBenefit = None) { + static InlineCost + getNever(const char *Reason, + Optional CostBenefit = std::nullopt) { return InlineCost(NeverInlineCost, 0, 0, Reason, CostBenefit); } diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -455,7 +455,7 @@ // None if pointer-difference checks cannot be used. std::optional> getDiffChecks() const { if (!CanUseDiffCheck) - return None; + return std::nullopt; return {DiffChecks}; } diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h --- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -200,7 +200,7 @@ /// classified to have temporal reuse. CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, - Optional TRT = None); + Optional TRT = std::nullopt); /// Create a CacheCost for the loop nest rooted by \p Root. /// The optional parameter \p TRT can be used to specify the max. distance @@ -208,7 +208,7 @@ /// classified to have temporal reuse. static std::unique_ptr getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, - Optional TRT = None); + Optional TRT = std::nullopt); /// Return the estimated cost of loop \p L if the given loop is part of the /// loop nest associated with this object. Return -1 otherwise. diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -475,7 +475,7 @@ const auto Off = ClobberOffsets.find(DepInst); if (Off != ClobberOffsets.end()) return Off->getSecond(); - return None; + return std::nullopt; } private: diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h --- a/llvm/include/llvm/Analysis/MemoryLocation.h +++ b/llvm/include/llvm/Analysis/MemoryLocation.h @@ -16,10 +16,11 @@ #define LLVM_ANALYSIS_MEMORYLOCATION_H #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/Optional.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/TypeSize.h" +#include + namespace llvm { class CallBase; @@ -39,9 +40,9 @@ class Value; // Represents the size of a MemoryLocation. Logically, it's an -// Optional that also carries a bit to represent whether the integer -// it contains, N, is 'precise'. Precise, in this context, means that we know -// that the area of storage referenced by the given MemoryLocation must be +// std::optional that also carries a bit to represent whether the +// integer it contains, N, is 'precise'. Precise, in this context, means that we +// know that the area of storage referenced by the given MemoryLocation must be // precisely N bytes. An imprecise value is formed as the union of two or more // precise values, and can conservatively represent all of the values unioned // into it. Importantly, imprecise values are an *upper-bound* on the size of a @@ -62,7 +63,7 @@ // we'll ever actually do so. // // If asked to represent a pathologically large value, this will degrade to -// None. +// std::nullopt. class LocationSize { enum : uint64_t { BeforeOrAfterPointer = ~uint64_t(0), @@ -242,7 +243,7 @@ static MemoryLocation get(const Instruction *Inst) { return *MemoryLocation::getOrNone(Inst); } - static Optional getOrNone(const Instruction *Inst); + static std::optional getOrNone(const Instruction *Inst); /// Return a location representing the source of a memory transfer. static MemoryLocation getForSource(const MemTransferInst *MTI); @@ -254,8 +255,8 @@ static MemoryLocation getForDest(const MemIntrinsic *MI); static MemoryLocation getForDest(const AtomicMemIntrinsic *MI); static MemoryLocation getForDest(const AnyMemIntrinsic *MI); - static Optional getForDest(const CallBase *CI, - const TargetLibraryInfo &TLI); + static std::optional getForDest(const CallBase *CI, + const TargetLibraryInfo &TLI); /// Return a location representing a particular argument of a call. static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h --- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h +++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -241,9 +241,9 @@ public: void init(Module *Mod) { M = Mod; - ImpreciseReleaseMDKind = llvm::None; - CopyOnEscapeMDKind = llvm::None; - NoObjCARCExceptionsMDKind = llvm::None; + ImpreciseReleaseMDKind = std::nullopt; + CopyOnEscapeMDKind = std::nullopt; + NoObjCARCExceptionsMDKind = std::nullopt; } unsigned get(ARCMDKindID ID) { diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h --- a/llvm/include/llvm/Analysis/ObjCARCUtil.h +++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h @@ -43,7 +43,7 @@ inline Optional getAttachedARCFunction(const CallBase *CB) { auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall); if (!B) - return None; + return std::nullopt; return cast(B->Inputs[0]); } diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1334,10 +1334,9 @@ /// as arguments and asserts enforce that internally. /*implicit*/ ExitLimit(const SCEV *E); - ExitLimit( - const SCEV *E, const SCEV *ConstantMaxNotTaken, bool MaxOrZero, - ArrayRef *> PredSetList = - None); + ExitLimit(const SCEV *E, const SCEV *ConstantMaxNotTaken, bool MaxOrZero, + ArrayRef *> + PredSetList = std::nullopt); ExitLimit(const SCEV *E, const SCEV *ConstantMaxNotTaken, bool MaxOrZero, const SmallPtrSetImpl &PredSet); diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -236,7 +236,7 @@ public: explicit TargetLibraryInfo(const TargetLibraryInfoImpl &Impl, - std::optional F = None) + std::optional F = std::nullopt) : Impl(&Impl), OverrideAsUnavailable(NumLibFuncs) { if (!F) return; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -552,22 +552,21 @@ /// intrinsics. This function will be called from the InstCombine pass every /// time a target-specific intrinsic is encountered. /// - /// \returns None to not do anything target specific or a value that will be - /// returned from the InstCombiner. It is possible to return null and stop - /// further processing of the intrinsic by returning nullptr. - Optional instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const; + /// \returns std::nullopt to not do anything target specific or a value that + /// will be returned from the InstCombiner. It is possible to return null and + /// stop further processing of the intrinsic by returning nullptr. + std::optional instCombineIntrinsic(InstCombiner & IC, + IntrinsicInst & II) const; /// Can be used to implement target-specific instruction combining. /// \see instCombineIntrinsic - Optional - simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, - APInt DemandedMask, KnownBits &Known, - bool &KnownBitsComputed) const; + std::optional simplifyDemandedUseBitsIntrinsic( + InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask, + KnownBits & Known, bool &KnownBitsComputed) const; /// Can be used to implement target-specific instruction combining. /// \see instCombineIntrinsic - Optional simplifyDemandedVectorEltsIntrinsic( - InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, - APInt &UndefElts2, APInt &UndefElts3, + std::optional simplifyDemandedVectorEltsIntrinsic( + InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts, + APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3, std::function SimplifyAndSetOp) const; /// @} @@ -806,6 +805,9 @@ MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; + /// Should the Select Optimization pass be enabled and ran. + bool enableSelectOptimize() const; + /// Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; @@ -971,11 +973,11 @@ unsigned getMinVectorRegisterBitWidth() const; /// \return The maximum value of vscale if the target specifies an - /// architectural maximum vector length, and None otherwise. - Optional getMaxVScale() const; + /// architectural maximum vector length, and std::nullopt otherwise. + std::optional getMaxVScale() const; /// \return the value of vscale to tune the cost model for. - Optional getVScaleForTuning() const; + std::optional getVScaleForTuning() const; /// \return True if the vectorization factor should be chosen to /// make the vector of the smallest element type match the size of a @@ -1028,10 +1030,10 @@ }; /// \return The size of the cache level in bytes, if available. - Optional getCacheSize(CacheLevel Level) const; + std::optional getCacheSize(CacheLevel Level) const; /// \return The associativity of the cache level, if available. - Optional getCacheAssociativity(CacheLevel Level) const; + std::optional getCacheAssociativity(CacheLevel Level) const; /// \return How much before a load we should place the prefetch /// instruction. This is currently measured in number of @@ -1111,10 +1113,11 @@ /// cases, like in broadcast loads. /// NOTE: For subvector extractions Tp represents the source type. InstructionCost - getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef Mask = None, + getShuffleCost(ShuffleKind Kind, VectorType *Tp, + ArrayRef Mask = std::nullopt, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = 0, VectorType *SubTp = nullptr, - ArrayRef Args = None) const; + ArrayRef Args = std::nullopt) const; /// Represents a hint about the context in which a cast is used. /// @@ -1264,8 +1267,8 @@ /// A helper function to determine the type of reduction algorithm used /// for a given \p Opcode and set of FastMathFlags \p FMF. - static bool requiresOrderedReduction(Optional FMF) { - return FMF != None && !(*FMF).allowReassoc(); + static bool requiresOrderedReduction(std::optional FMF) { + return FMF && !(*FMF).allowReassoc(); } /// Calculate the cost of vector reduction intrinsics. @@ -1293,7 +1296,7 @@ /// allowed. /// InstructionCost getArithmeticReductionCost( - unsigned Opcode, VectorType *Ty, Optional FMF, + unsigned Opcode, VectorType *Ty, std::optional FMF, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; InstructionCost getMinMaxReductionCost( @@ -1315,7 +1318,7 @@ /// ResTy vecreduce.opcode(ext(Ty A)). InstructionCost getExtendedReductionCost( unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; /// \returns The cost of Intrinsic instructions. Analyses the real arguments. @@ -1369,11 +1372,10 @@ Type *ExpectedType) const; /// \returns The type to use in a loop expansion of a memcpy call. - Type * - getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, - unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign, - Optional AtomicElementSize = None) const; + Type *getMemcpyLoopLoweringType( + LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, + unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, + std::optional AtomicElementSize = std::nullopt) const; /// \param[out] OpsOut The operand types to copy RemainingBytes of memory. /// \param RemainingBytes The number of bytes to copy. @@ -1385,7 +1387,7 @@ SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, - Optional AtomicCpySize = None) const; + std::optional AtomicCpySize = std::nullopt) const; /// \returns True if the two functions have compatible attributes for inlining /// purposes. @@ -1610,15 +1612,14 @@ DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) = 0; virtual PredicationStyle emitGetActiveLaneMask() = 0; - virtual Optional instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) = 0; - virtual Optional - simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, - APInt DemandedMask, KnownBits &Known, - bool &KnownBitsComputed) = 0; - virtual Optional simplifyDemandedVectorEltsIntrinsic( - InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, - APInt &UndefElts2, APInt &UndefElts3, + virtual std::optional instCombineIntrinsic( + InstCombiner &IC, IntrinsicInst &II) = 0; + virtual std::optional simplifyDemandedUseBitsIntrinsic( + InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, + KnownBits & Known, bool &KnownBitsComputed) = 0; + virtual std::optional simplifyDemandedVectorEltsIntrinsic( + InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, + APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function SimplifyAndSetOp) = 0; virtual bool isLegalAddImmediate(int64_t Imm) = 0; @@ -1685,6 +1686,7 @@ virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0; + virtual bool enableSelectOptimize() = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool enableMaskedInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; @@ -1715,8 +1717,8 @@ virtual const char *getRegisterClassName(unsigned ClassID) const = 0; virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0; virtual unsigned getMinVectorRegisterBitWidth() const = 0; - virtual Optional getMaxVScale() const = 0; - virtual Optional getVScaleForTuning() const = 0; + virtual std::optional getMaxVScale() const = 0; + virtual std::optional getVScaleForTuning() const = 0; virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0; virtual ElementCount getMinimumVF(unsigned ElemWidth, @@ -1727,8 +1729,9 @@ virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; virtual unsigned getCacheLineSize() const = 0; - virtual Optional getCacheSize(CacheLevel Level) const = 0; - virtual Optional getCacheAssociativity(CacheLevel Level) const = 0; + virtual std::optional getCacheSize(CacheLevel Level) const = 0; + virtual std::optional getCacheAssociativity(CacheLevel Level) + const = 0; /// \return How much before a load we should place the prefetch /// instruction. This is currently measured in number of @@ -1818,14 +1821,14 @@ bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind) = 0; virtual InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind) = 0; virtual InstructionCost getExtendedReductionCost( unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0; virtual InstructionCost getMulAccReductionCost( bool IsUnsigned, Type *ResTy, VectorType *Ty, @@ -1846,17 +1849,16 @@ virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) = 0; - virtual Type * - getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, - unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign, - Optional AtomicElementSize) const = 0; + virtual Type *getMemcpyLoopLoweringType( + LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, + unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, + std::optional AtomicElementSize) const = 0; virtual void getMemcpyLoopResidualLoweringType( SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, - Optional AtomicCpySize) const = 0; + std::optional AtomicCpySize) const = 0; virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const = 0; virtual bool areTypesABICompatible(const Function *Caller, @@ -2008,18 +2010,18 @@ PredicationStyle emitGetActiveLaneMask() override { return Impl.emitGetActiveLaneMask(); } - Optional instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) override { + std::optional + instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override { return Impl.instCombineIntrinsic(IC, II); } - Optional + std::optional simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) override { return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, KnownBitsComputed); } - Optional simplifyDemandedVectorEltsIntrinsic( + std::optional simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -2175,6 +2177,9 @@ bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); } + bool enableSelectOptimize() override { + return Impl.enableSelectOptimize(); + } bool enableMaskedInterleavedAccessVectorization() override { return Impl.enableMaskedInterleavedAccessVectorization(); } @@ -2239,10 +2244,10 @@ unsigned getMinVectorRegisterBitWidth() const override { return Impl.getMinVectorRegisterBitWidth(); } - Optional getMaxVScale() const override { + std::optional getMaxVScale() const override { return Impl.getMaxVScale(); } - Optional getVScaleForTuning() const override { + std::optional getVScaleForTuning() const override { return Impl.getVScaleForTuning(); } bool shouldMaximizeVectorBandwidth( @@ -2266,10 +2271,11 @@ I, AllowPromotionWithoutCommonHeader); } unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); } - Optional getCacheSize(CacheLevel Level) const override { + std::optional getCacheSize(CacheLevel Level) const override { return Impl.getCacheSize(Level); } - Optional getCacheAssociativity(CacheLevel Level) const override { + std::optional + getCacheAssociativity(CacheLevel Level) const override { return Impl.getCacheAssociativity(Level); } @@ -2407,7 +2413,7 @@ } InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind) override { return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); } @@ -2418,7 +2424,7 @@ } InstructionCost getExtendedReductionCost( unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override { return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF, CostKind); @@ -2461,7 +2467,7 @@ Type *getMemcpyLoopLoweringType( LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, - Optional AtomicElementSize) const override { + std::optional AtomicElementSize) const override { return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace, DestAddrSpace, SrcAlign, DestAlign, AtomicElementSize); @@ -2470,7 +2476,7 @@ SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, - Optional AtomicCpySize) const override { + std::optional AtomicCpySize) const override { Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign, DestAlign, AtomicCpySize); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -22,6 +22,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include #include namespace llvm { @@ -174,24 +175,24 @@ return PredicationStyle::None; } - Optional instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const { - return None; + std::optional instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const { + return std::nullopt; } - Optional + std::optional simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const { - return None; + return std::nullopt; } - Optional simplifyDemandedVectorEltsIntrinsic( + std::optional simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function SimplifyAndSetOp) const { - return None; + return std::nullopt; } void getUnrollingPreferences(Loop *, ScalarEvolution &, @@ -358,6 +359,8 @@ return {}; } + bool enableSelectOptimize() const { return true; } + bool enableInterleavedAccessVectorization() const { return false; } bool enableMaskedInterleavedAccessVectorization() const { return false; } @@ -430,8 +433,8 @@ unsigned getMinVectorRegisterBitWidth() const { return 128; } - Optional getMaxVScale() const { return None; } - Optional getVScaleForTuning() const { return None; } + std::optional getMaxVScale() const { return std::nullopt; } + std::optional getVScaleForTuning() const { return std::nullopt; } bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { @@ -452,25 +455,24 @@ } unsigned getCacheLineSize() const { return 0; } - - llvm::Optional + std::optional getCacheSize(TargetTransformInfo::CacheLevel Level) const { switch (Level) { case TargetTransformInfo::CacheLevel::L1D: [[fallthrough]]; case TargetTransformInfo::CacheLevel::L2D: - return llvm::None; + return std::nullopt; } llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } - llvm::Optional + std::optional getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { switch (Level) { case TargetTransformInfo::CacheLevel::L1D: [[fallthrough]]; case TargetTransformInfo::CacheLevel::L2D: - return llvm::None; + return std::nullopt; } llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); @@ -516,11 +518,10 @@ return 1; } - InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, - ArrayRef Mask, - TTI::TargetCostKind CostKind, int Index, - VectorType *SubTp, - ArrayRef Args = None) const { + InstructionCost + getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef Mask, + TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, + ArrayRef Args = std::nullopt) const { return 1; } @@ -693,7 +694,7 @@ } InstructionCost getArithmeticReductionCost(unsigned, VectorType *, - Optional FMF, + std::optional FMF, TTI::TargetCostKind) const { return 1; } @@ -705,7 +706,7 @@ InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind) const { return 1; } @@ -738,10 +739,11 @@ return nullptr; } - Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, - unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign, - Optional AtomicElementSize) const { + Type * + getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAddrSpace, unsigned DestAddrSpace, + unsigned SrcAlign, unsigned DestAlign, + std::optional AtomicElementSize) const { return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8) : Type::getInt8Ty(Context); } @@ -750,7 +752,7 @@ SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, - Optional AtomicCpySize) const { + std::optional AtomicCpySize) const { unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1; Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8); for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes) diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -281,7 +281,7 @@ } else if (isConstantRange() && getConstantRange().isSingleElement()) { return *getConstantRange().getSingleElement(); } - return None; + return std::nullopt; } bool markOverdefined() { diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -21,6 +21,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/ModuleSummaryIndex.h" #include +#include namespace llvm { class Module; @@ -180,7 +181,7 @@ BlockAddressPFS(nullptr) {} bool Run( bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback = - [](StringRef) { return None; }); + [](StringRef) { return std::nullopt; }); bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots); @@ -301,7 +302,7 @@ bool parseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc, bool &AteExtraComma); bool parseAllocSizeArguments(unsigned &BaseSizeArg, - Optional &HowManyArg); + std::optional &HowManyArg); bool parseVScaleRangeArguments(unsigned &MinValue, unsigned &MaxValue); bool parseIndexList(SmallVectorImpl &Indices, bool &AteExtraComma); diff --git a/llvm/include/llvm/AsmParser/Parser.h b/llvm/include/llvm/AsmParser/Parser.h --- a/llvm/include/llvm/AsmParser/Parser.h +++ b/llvm/include/llvm/AsmParser/Parser.h @@ -87,7 +87,9 @@ ParsedModuleAndIndex parseAssemblyFileWithIndex( StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, SlotMapping *Slots = nullptr, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + return std::nullopt; + }); /// Only for use in llvm-as for testing; this does not produce a valid module. ParsedModuleAndIndex parseAssemblyFileWithIndexNoUpgradeDebugInfo( @@ -126,7 +128,9 @@ std::unique_ptr parseAssembly( MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context, SlotMapping *Slots = nullptr, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + return std::nullopt; + }); /// Parse LLVM Assembly including the summary index from a MemoryBuffer. /// @@ -166,7 +170,9 @@ bool parseAssemblyInto( MemoryBufferRef F, Module *M, ModuleSummaryIndex *Index, SMDiagnostic &Err, SlotMapping *Slots = nullptr, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + return std::nullopt; + }); /// Parse a type and a constant value in the given string. /// diff --git a/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h b/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h --- a/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h +++ b/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h @@ -50,7 +50,7 @@ bool verifyInteger(msgpack::DocNode &Node); bool verifyArray(msgpack::DocNode &Node, function_ref verifyNode, - Optional Size = None); + Optional Size = std::nullopt); bool verifyEntry(msgpack::MapDocNode &MapNode, StringRef Key, bool Required, function_ref verifyNode); bool diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -916,7 +916,7 @@ HANDLE_DW_LANG(0x0024, RenderScript, 0, 5, DWARF) HANDLE_DW_LANG(0x0025, BLISS, 0, 5, DWARF) // Vendor extensions: -HANDLE_DW_LANG(0x8001, Mips_Assembler, None, 0, MIPS) +HANDLE_DW_LANG(0x8001, Mips_Assembler, std::nullopt, 0, MIPS) HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, 0, GOOGLE) HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, 0, BORLAND) diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1035,6 +1035,10 @@ SHT_ARM_ATTRIBUTES = 0x70000003U, SHT_ARM_DEBUGOVERLAY = 0x70000004U, SHT_ARM_OVERLAYSECTION = 0x70000005U, + // Special aarch64-specific sections for MTE support, as described in: + // https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#7section-types + SHT_AARCH64_MEMTAG_GLOBALS_STATIC = 0x70000007U, + SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC = 0x70000008U, SHT_HEX_ORDERED = 0x70000000, // Link editor is to sort the entries in // this section based on their sizes SHT_X86_64_UNWIND = 0x70000001, // Unwind information diff --git a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h --- a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h +++ b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h @@ -85,18 +85,19 @@ std::map BlockIDStats; public: - BitcodeAnalyzer(StringRef Buffer, Optional BlockInfoBuffer = None); + BitcodeAnalyzer(StringRef Buffer, + Optional BlockInfoBuffer = std::nullopt); /// Analyze the bitcode file. - Error analyze(Optional O = None, - Optional CheckHash = None); + Error analyze(Optional O = std::nullopt, + Optional CheckHash = std::nullopt); /// Print stats about the bitcode file. - void printStats(BCDumpOptions O, Optional Filename = None); + void printStats(BCDumpOptions O, Optional Filename = std::nullopt); private: /// Read a block, updating statistics, etc. Error parseBlock(unsigned BlockID, unsigned IndentLevel, - Optional O = None, - Optional CheckHash = None); + Optional O = std::nullopt, + Optional CheckHash = std::nullopt); Error decodeMetadataStringsBlob(StringRef Indent, ArrayRef Record, StringRef Blob, raw_ostream &OS); diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h --- a/llvm/include/llvm/Bitcode/BitcodeReader.h +++ b/llvm/include/llvm/Bitcode/BitcodeReader.h @@ -107,7 +107,7 @@ /// Read the entire bitcode module and return it. Expected> parseModule( LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback = - [](StringRef) { return None; }); + [](StringRef) { return std::nullopt; }); /// Returns information about the module to be used for LTO: whether to /// compile with ThinLTO, and whether it has a summary. @@ -175,7 +175,7 @@ Expected> parseBitcodeFile( MemoryBufferRef Buffer, LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { - return None; + return std::nullopt; }); /// Returns LTO information for the specified bitcode file. diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -502,7 +502,8 @@ /// the first entry. template void EmitRecordWithAbbrev(unsigned Abbrev, const Container &Vals) { - EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), None); + EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), + std::nullopt); } /// EmitRecordWithBlob - Emit the specified record to the stream, using an @@ -513,13 +514,13 @@ template void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals, StringRef Blob) { - EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Blob, None); + EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Blob, std::nullopt); } template void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals, const char *BlobData, unsigned BlobLen) { return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), - StringRef(BlobData, BlobLen), None); + StringRef(BlobData, BlobLen), std::nullopt); } /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records @@ -527,13 +528,14 @@ template void EmitRecordWithArray(unsigned Abbrev, const Container &Vals, StringRef Array) { - EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Array, None); + EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Array, std::nullopt); } template void EmitRecordWithArray(unsigned Abbrev, const Container &Vals, const char *ArrayData, unsigned ArrayLen) { return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), - StringRef(ArrayData, ArrayLen), None); + StringRef(ArrayData, ArrayLen), + std::nullopt); } //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -32,8 +32,8 @@ // The cluster information for a machine basic block. struct BBClusterInfo { - // MachineBasicBlock ID. - unsigned MBBNumber; + // Unique ID for this basic block. + unsigned BBID; // Cluster ID this basic block belongs to. unsigned ClusterID; // Position of basic block within the cluster. diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -52,6 +52,7 @@ #include #include #include +#include #include namespace llvm { @@ -620,21 +621,20 @@ return BaseT::emitGetActiveLaneMask(); } - Optional instCombineIntrinsic(InstCombiner &IC, + std::optional instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) { return BaseT::instCombineIntrinsic(IC, II); } - Optional simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, - IntrinsicInst &II, - APInt DemandedMask, - KnownBits &Known, - bool &KnownBitsComputed) { + std::optional + simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, + APInt DemandedMask, KnownBits &Known, + bool &KnownBitsComputed) { return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, KnownBitsComputed); } - Optional simplifyDemandedVectorEltsIntrinsic( + std::optional simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -644,15 +644,15 @@ SimplifyAndSetOp); } - virtual Optional + virtual std::optional getCacheSize(TargetTransformInfo::CacheLevel Level) const { - return Optional( - getST()->getCacheSize(static_cast(Level))); + return std::optional( + getST()->getCacheSize(static_cast(Level))); } - virtual Optional + virtual std::optional getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { - Optional TargetResult = + std::optional TargetResult = getST()->getCacheAssociativity(static_cast(Level)); if (TargetResult) @@ -698,8 +698,8 @@ return TypeSize::getFixed(32); } - Optional getMaxVScale() const { return None; } - Optional getVScaleForTuning() const { return None; } + std::optional getMaxVScale() const { return std::nullopt; } + std::optional getVScaleForTuning() const { return std::nullopt; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or @@ -940,7 +940,7 @@ ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = None) { + ArrayRef Args = std::nullopt) { switch (improveShuffleKindFromMask(Kind, Mask)) { case TTI::SK_Broadcast: @@ -1522,7 +1522,7 @@ unsigned Index = cast(Args[1])->getZExtValue(); return thisT()->getShuffleCost( TTI::SK_ExtractSubvector, cast(Args[0]->getType()), - None, CostKind, Index, cast(RetTy)); + std::nullopt, CostKind, Index, cast(RetTy)); } case Intrinsic::vector_insert: { // FIXME: Handle case where a scalable vector is inserted into a scalable @@ -1531,19 +1531,19 @@ return BaseT::getIntrinsicInstrCost(ICA, CostKind); unsigned Index = cast(Args[2])->getZExtValue(); return thisT()->getShuffleCost( - TTI::SK_InsertSubvector, cast(Args[0]->getType()), None, - CostKind, Index, cast(Args[1]->getType())); + TTI::SK_InsertSubvector, cast(Args[0]->getType()), + std::nullopt, CostKind, Index, cast(Args[1]->getType())); } case Intrinsic::experimental_vector_reverse: { - return thisT()->getShuffleCost(TTI::SK_Reverse, - cast(Args[0]->getType()), None, - CostKind, 0, cast(RetTy)); + return thisT()->getShuffleCost( + TTI::SK_Reverse, cast(Args[0]->getType()), std::nullopt, + CostKind, 0, cast(RetTy)); } case Intrinsic::experimental_vector_splice: { unsigned Index = cast(Args[2])->getZExtValue(); - return thisT()->getShuffleCost(TTI::SK_Splice, - cast(Args[0]->getType()), None, - CostKind, Index, cast(RetTy)); + return thisT()->getShuffleCost( + TTI::SK_Splice, cast(Args[0]->getType()), std::nullopt, + CostKind, Index, cast(RetTy)); } case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_mul: @@ -1819,19 +1819,19 @@ } case Intrinsic::vector_reduce_add: return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy, - None, CostKind); + std::nullopt, CostKind); case Intrinsic::vector_reduce_mul: return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy, - None, CostKind); + std::nullopt, CostKind); case Intrinsic::vector_reduce_and: return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy, - None, CostKind); + std::nullopt, CostKind); case Intrinsic::vector_reduce_or: - return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, None, - CostKind); + return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, + std::nullopt, CostKind); case Intrinsic::vector_reduce_xor: return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy, - None, CostKind); + std::nullopt, CostKind); case Intrinsic::vector_reduce_fadd: return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy, FMF, CostKind); @@ -2224,8 +2224,9 @@ while (NumVecElts > MVTLen) { NumVecElts /= 2; VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); - ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, - CostKind, NumVecElts, SubTy); + ShuffleCost += + thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt, + CostKind, NumVecElts, SubTy); ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind); Ty = SubTy; ++LongVectorCount; @@ -2241,7 +2242,7 @@ // By default reductions need one shuffle per reduction level. ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, - None, CostKind, 0, Ty); + std::nullopt, CostKind, 0, Ty); ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind); return ShuffleCost + ArithCost + @@ -2282,7 +2283,7 @@ } InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind) { if (TTI::requiresOrderedReduction(FMF)) return getOrderedReductionCost(Opcode, Ty, CostKind); @@ -2322,8 +2323,9 @@ auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts); - ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, - None, CostKind, NumVecElts, SubTy); + ShuffleCost += + thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt, + CostKind, NumVecElts, SubTy); MinMaxCost += thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CmpInst::BAD_ICMP_PREDICATE, CostKind) + @@ -2341,7 +2343,7 @@ // architecture-dependent length. ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, - None, CostKind, 0, Ty); + std::nullopt, CostKind, 0, Ty); MinMaxCost += NumReduxLevels * (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, @@ -2356,7 +2358,7 @@ InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind) { // Without any native support, this is equivalent to the cost of // vecreduce.opcode(ext(Ty A)). @@ -2378,7 +2380,7 @@ // vecreduce.add(mul(A, B)). VectorType *ExtTy = VectorType::get(ResTy, Ty); InstructionCost RedCost = thisT()->getArithmeticReductionCost( - Instruction::Add, ExtTy, None, CostKind); + Instruction::Add, ExtTy, std::nullopt, CostKind); InstructionCost ExtCost = thisT()->getCastInstrCost( IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty, TTI::CastContextHint::None, CostKind); diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -1130,6 +1130,9 @@ if (!TM.requiresStructuredCFG()) addPass(TailDuplicatePass()); + // Cleanup of redundant (identical) address/immediate loads. + addPass(MachineLateInstrsCleanupPass()); + // Copy propagation. addPass(MachineCopyPropagationPass()); } diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -18,6 +18,7 @@ #include "llvm/ADT/FloatingPointMode.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" +#include #include #include @@ -37,16 +38,16 @@ std::vector getMAttrs(); Reloc::Model getRelocModel(); -Optional getExplicitRelocModel(); +std::optional getExplicitRelocModel(); ThreadModel::Model getThreadModel(); CodeModel::Model getCodeModel(); -Optional getExplicitCodeModel(); +std::optional getExplicitCodeModel(); llvm::ExceptionHandling getExceptionModel(); -Optional getExplicitFileType(); +std::optional getExplicitFileType(); CodeGenFileType getFileType(); @@ -98,10 +99,10 @@ bool getRelaxELFRelocations(); bool getDataSections(); -Optional getExplicitDataSections(); +std::optional getExplicitDataSections(); bool getFunctionSections(); -Optional getExplicitFunctionSections(); +std::optional getExplicitFunctionSections(); bool getIgnoreXCOFFVisibility(); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h @@ -91,9 +91,9 @@ // Pull in base class constructors. using MachineIRBuilder::MachineIRBuilder; // Unhide buildInstr - MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef DstOps, - ArrayRef SrcOps, - Optional Flag = None) override; + MachineInstrBuilder + buildInstr(unsigned Opc, ArrayRef DstOps, ArrayRef SrcOps, + Optional Flag = std::nullopt) override; // Bring in the other overload from the base class. using MachineIRBuilder::buildConstant; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -395,21 +395,20 @@ /// \p Handler to move them to the assigned locations. /// /// \return True if everything has succeeded, false otherwise. - bool - determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, - SmallVectorImpl &Args, - MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, bool IsVarArg, - ArrayRef ThisReturnRegs = None) const; + bool determineAndHandleAssignments( + ValueHandler &Handler, ValueAssigner &Assigner, + SmallVectorImpl &Args, MachineIRBuilder &MIRBuilder, + CallingConv::ID CallConv, bool IsVarArg, + ArrayRef ThisReturnRegs = std::nullopt) const; /// Use \p Handler to insert code to handle the argument/return values /// represented by \p Args. It's expected determineAssignments previously /// processed these arguments to populate \p CCState and \p ArgLocs. - bool handleAssignments(ValueHandler &Handler, SmallVectorImpl &Args, - CCState &CCState, - SmallVectorImpl &ArgLocs, - MachineIRBuilder &MIRBuilder, - ArrayRef ThisReturnRegs = None) const; + bool + handleAssignments(ValueHandler &Handler, SmallVectorImpl &Args, + CCState &CCState, SmallVectorImpl &ArgLocs, + MachineIRBuilder &MIRBuilder, + ArrayRef ThisReturnRegs = std::nullopt) const; /// Check whether parameters to a call that are passed in callee saved /// registers are the same as from the calling function. This needs to be diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -658,11 +658,10 @@ /// Build and insert \p Res = G_FPEXT \p Op MachineInstrBuilder buildFPExt(const DstOp &Res, const SrcOp &Op, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FPEXT, {Res}, {Op}, Flags); } - /// Build and insert a G_PTRTOINT instruction. MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src) { return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src}); @@ -1094,7 +1093,7 @@ /// /// \return The newly created instruction. MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, - Optional Flags = None); + Optional Flags = std::nullopt); /// Build and insert \p Res = G_TRUNC \p Op /// @@ -1138,7 +1137,7 @@ /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, - Optional Flags = None); + Optional Flags = std::nullopt); /// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1 /// @@ -1152,7 +1151,7 @@ /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, - Optional Flags = None); + Optional Flags = std::nullopt); /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val, /// \p Elt, \p Idx @@ -1482,7 +1481,7 @@ MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_ADD, {Dst}, {Src0, Src1}, Flags); } @@ -1499,7 +1498,7 @@ MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_SUB, {Dst}, {Src0, Src1}, Flags); } @@ -1515,74 +1514,74 @@ /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_MUL, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildUMulH(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_UMULH, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildSMulH(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags); } /// Build and insert \p Res = G_UREM \p Op0, \p Op1 MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_UREM, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMUL, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildFMinNum(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMINNUM, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildFMaxNum(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMAXNUM, {Dst}, {Src0, Src1}, Flags); } - MachineInstrBuilder buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0, - const SrcOp &Src1, - Optional Flags = None) { + MachineInstrBuilder + buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMINNUM_IEEE, {Dst}, {Src0, Src1}, Flags); } - MachineInstrBuilder buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0, - const SrcOp &Src1, - Optional Flags = None) { + MachineInstrBuilder + buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMAXNUM_IEEE, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_SHL, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_LSHR, {Dst}, {Src0, Src1}, Flags); } MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_ASHR, {Dst}, {Src0, Src1}, Flags); } @@ -1614,7 +1613,7 @@ /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_OR, {Dst}, {Src0, Src1}, Flags); } @@ -1673,97 +1672,99 @@ /// Build and insert \p Res = G_FADD \p Op0, \p Op1 MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1}, Flags); } /// Build and insert \p Res = G_STRICT_FADD \p Op0, \p Op1 MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, - const SrcOp &Src1, - Optional Flags = None) { + const SrcOp &Src1, + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_STRICT_FADD, {Dst}, {Src0, Src1}, Flags); } /// Build and insert \p Res = G_FSUB \p Op0, \p Op1 MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FSUB, {Dst}, {Src0, Src1}, Flags); } /// Build and insert \p Res = G_FDIV \p Op0, \p Op1 MachineInstrBuilder buildFDiv(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FDIV, {Dst}, {Src0, Src1}, Flags); } /// Build and insert \p Res = G_FMA \p Op0, \p Op1, \p Op2 MachineInstrBuilder buildFMA(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, const SrcOp &Src2, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2}, Flags); } /// Build and insert \p Res = G_FMAD \p Op0, \p Op1, \p Op2 MachineInstrBuilder buildFMAD(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, const SrcOp &Src2, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FMAD, {Dst}, {Src0, Src1, Src2}, Flags); } /// Build and insert \p Res = G_FNEG \p Op0 MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}, Flags); } /// Build and insert \p Res = G_FABS \p Op0 MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0}, Flags); } /// Build and insert \p Dst = G_FCANONICALIZE \p Src0 - MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, - Optional Flags = None) { + MachineInstrBuilder + buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FCANONICALIZE, {Dst}, {Src0}, Flags); } /// Build and insert \p Dst = G_INTRINSIC_TRUNC \p Src0 - MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, - Optional Flags = None) { + MachineInstrBuilder + buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {Dst}, {Src0}, Flags); } /// Build and insert \p Res = GFFLOOR \p Op0, \p Op1 MachineInstrBuilder buildFFloor(const DstOp &Dst, const SrcOp &Src0, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FFLOOR, {Dst}, {Src0}, Flags); } /// Build and insert \p Dst = G_FLOG \p Src MachineInstrBuilder buildFLog(const DstOp &Dst, const SrcOp &Src, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FLOG, {Dst}, {Src}, Flags); } /// Build and insert \p Dst = G_FLOG2 \p Src MachineInstrBuilder buildFLog2(const DstOp &Dst, const SrcOp &Src, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FLOG2, {Dst}, {Src}, Flags); } /// Build and insert \p Dst = G_FEXP2 \p Src MachineInstrBuilder buildFExp2(const DstOp &Dst, const SrcOp &Src, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FEXP2, {Dst}, {Src}, Flags); } /// Build and insert \p Dst = G_FPOW \p Src0, \p Src1 MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, - Optional Flags = None) { + Optional Flags = std::nullopt) { return buildInstr(TargetOpcode::G_FPOW, {Dst}, {Src0, Src1}, Flags); } @@ -1975,9 +1976,9 @@ return buildInstr(TargetOpcode::G_BITREVERSE, {Dst}, {Src}); } - virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef DstOps, - ArrayRef SrcOps, - Optional Flags = None); + virtual MachineInstrBuilder + buildInstr(unsigned Opc, ArrayRef DstOps, ArrayRef SrcOps, + Optional Flags = std::nullopt); }; } // End namespace llvm. diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h --- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -241,7 +241,7 @@ /// allocator. These registers should not be split into new intervals /// as currently those new intervals are not guaranteed to spill. void eliminateDeadDefs(SmallVectorImpl &Dead, - ArrayRef RegsBeingSpilled = None); + ArrayRef RegsBeingSpilled = std::nullopt); /// calculateRegClassAndHint - Recompute register class and hint for each new /// register. diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h --- a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h +++ b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h @@ -53,8 +53,10 @@ /// /// A new, empty module is created if the LLVM IR isn't present. /// \returns nullptr if a parsing error occurred. - std::unique_ptr parseIRModule( - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + std::unique_ptr + parseIRModule(DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + return std::nullopt; + }); /// Parses MachineFunctions in the MIR file and add them to the given /// MachineModuleInfo \p MMI. diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h --- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -245,7 +245,7 @@ ObjectType Type = DefaultType; int64_t Offset = 0; uint64_t Size = 0; - MaybeAlign Alignment = None; + MaybeAlign Alignment = std::nullopt; TargetStackID::Value StackID; StringValue CalleeSavedRegister; bool CalleeSavedRestored = true; @@ -285,7 +285,7 @@ YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); if (Object.Type != MachineStackObject::VariableSized) YamlIO.mapRequired("size", Object.Size); - YamlIO.mapOptional("alignment", Object.Alignment, None); + YamlIO.mapOptional("alignment", Object.Alignment, std::nullopt); YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default); YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. @@ -311,7 +311,7 @@ ObjectType Type = DefaultType; int64_t Offset = 0; uint64_t Size = 0; - MaybeAlign Alignment = None; + MaybeAlign Alignment = std::nullopt; TargetStackID::Value StackID; bool IsImmutable = false; bool IsAliased = false; @@ -361,7 +361,7 @@ FixedMachineStackObject::DefaultType); // Don't print the default type. YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); YamlIO.mapOptional("size", Object.Size, (uint64_t)0); - YamlIO.mapOptional("alignment", Object.Alignment, None); + YamlIO.mapOptional("alignment", Object.Alignment, std::nullopt); YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default); if (Object.Type != FixedMachineStackObject::SpillSlot) { YamlIO.mapOptional("isImmutable", Object.IsImmutable, false); @@ -521,7 +521,7 @@ struct MachineConstantPoolValue { UnsignedValue ID; StringValue Value; - MaybeAlign Alignment = None; + MaybeAlign Alignment = std::nullopt; bool IsTargetSpecific = false; bool operator==(const MachineConstantPoolValue &Other) const { @@ -535,7 +535,7 @@ static void mapping(IO &YamlIO, MachineConstantPoolValue &Constant) { YamlIO.mapRequired("id", Constant.ID); YamlIO.mapOptional("value", Constant.Value, StringValue()); - YamlIO.mapOptional("alignment", Constant.Alignment, None); + YamlIO.mapOptional("alignment", Constant.Alignment, std::nullopt); YamlIO.mapOptional("isTargetSpecific", Constant.IsTargetSpecific, false); } }; @@ -687,7 +687,7 @@ struct MachineFunction { StringRef Name; - MaybeAlign Alignment = None; + MaybeAlign Alignment = std::nullopt; bool ExposesReturnsTwice = false; // GISel MachineFunctionProperties. bool Legalized = false; @@ -726,7 +726,7 @@ template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineFunction &MF) { YamlIO.mapRequired("name", MF.Name); - YamlIO.mapOptional("alignment", MF.Alignment, None); + YamlIO.mapOptional("alignment", MF.Alignment, std::nullopt); YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice, false); YamlIO.mapOptional("legalized", MF.Legalized, false); YamlIO.mapOptional("regBankSelected", MF.RegBankSelected, false); diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -169,6 +169,10 @@ /// Indicate that this basic block is the entry block of a cleanup funclet. bool IsCleanupFuncletEntry = false; + /// Fixed unique ID assigned to this basic block upon creation. Used with + /// basic block sections and basic block labels. + Optional BBID; + /// With basic block sections, this stores the Section ID of the basic block. MBBSectionID SectionID{0}; @@ -620,6 +624,12 @@ void setIsEndSection(bool V = true) { IsEndSection = V; } + Optional getBBID() const { return BBID; } + + /// Returns the BBID of the block when BBAddrMapVersion >= 2, otherwise + /// returns Number. + unsigned getBBIDOrNumber() const; + /// Returns the section ID of this basic block. MBBSectionID getSectionID() const { return SectionID; } @@ -629,6 +639,12 @@ ((unsigned)SectionID.Type) + SectionID.Number; } + /// Sets the fixed BBID of this basic block. + void setBBID(unsigned V) { + assert(!BBID.has_value() && "Cannot change BBID."); + BBID = V; + } + /// Sets the section ID for this basic block. void setSectionID(MBBSectionID V) { SectionID = V; } diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -374,6 +374,9 @@ bool HasEHScopes = false; bool HasEHFunclets = false; + /// BBID to assign to the next basic block of this function. + unsigned NextBBID = 0; + /// Section Type for basic blocks, only relevant with basic block sections. BasicBlockSection BBSectionsType = BasicBlockSection::None; diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -151,6 +151,7 @@ DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ()) DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ()) DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass, ()) DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ()) DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ()) DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ()) diff --git a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h --- a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h +++ b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h @@ -284,9 +284,9 @@ /// classes are included. For the caller to account for extra machine /// instructions, it must first resolve each instruction's scheduling class. unsigned getResourceLength( - ArrayRef Extrablocks = None, - ArrayRef ExtraInstrs = None, - ArrayRef RemoveInstrs = None) const; + ArrayRef Extrablocks = std::nullopt, + ArrayRef ExtraInstrs = std::nullopt, + ArrayRef RemoveInstrs = std::nullopt) const; /// Return the length of the (data dependency) critical path through the /// trace. diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -334,6 +334,10 @@ MachineFunctionPass *createMachineCopyPropagationPass(bool UseCopyInstr); + /// MachineLateInstrsCleanup - This pass removes redundant identical + /// instructions after register allocation and rematerialization. + extern char &MachineLateInstrsCleanupID; + /// PeepholeOptimizer - This pass performs peephole optimizations - /// like extension and comparison eliminations. extern char &PeepholeOptimizerID; diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -725,19 +725,19 @@ SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags = 0) { return getJumpTable(JTI, VT, true, TargetFlags); } - SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align = None, - int Offs = 0, bool isT = false, - unsigned TargetFlags = 0); + SDValue getConstantPool(const Constant *C, EVT VT, + MaybeAlign Align = std::nullopt, int Offs = 0, + bool isT = false, unsigned TargetFlags = 0); SDValue getTargetConstantPool(const Constant *C, EVT VT, - MaybeAlign Align = None, int Offset = 0, + MaybeAlign Align = std::nullopt, int Offset = 0, unsigned TargetFlags = 0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } SDValue getConstantPool(MachineConstantPoolValue *C, EVT VT, - MaybeAlign Align = None, int Offs = 0, + MaybeAlign Align = std::nullopt, int Offs = 0, bool isT = false, unsigned TargetFlags = 0); SDValue getTargetConstantPool(MachineConstantPoolValue *C, EVT VT, - MaybeAlign Align = None, int Offset = 0, + MaybeAlign Align = std::nullopt, int Offset = 0, unsigned TargetFlags = 0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } @@ -1228,7 +1228,8 @@ inline SDValue getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef Ops, - EVT MemVT, MachinePointerInfo PtrInfo, MaybeAlign Alignment = None, + EVT MemVT, MachinePointerInfo PtrInfo, + MaybeAlign Alignment = std::nullopt, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore, uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()) { diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1011,7 +1011,7 @@ /// registers as machine operands. virtual Optional isCopyInstrImpl(const MachineInstr &MI) const { - return None; + return std::nullopt; } /// Return true if the given terminator MI is not expected to spill. This @@ -1045,7 +1045,7 @@ /// register and the offset which has been added. virtual Optional isAddImmediate(const MachineInstr &MI, Register Reg) const { - return None; + return std::nullopt; } /// Returns true if MI is an instruction that defines Reg to have a constant @@ -1383,7 +1383,7 @@ virtual Optional getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const { - return None; + return std::nullopt; } /// Returns true if MI's Def is NullValueReg, and the MI @@ -1846,7 +1846,7 @@ /// defined by this method. virtual ArrayRef> getSerializableTargetIndices() const { - return None; + return std::nullopt; } /// Decompose the machine operand's target flags into two values - the direct @@ -1863,7 +1863,7 @@ /// defined by this method. virtual ArrayRef> getSerializableDirectMachineOperandTargetFlags() const { - return None; + return std::nullopt; } /// Return an array that contains the bitmask target flag values and their @@ -1873,7 +1873,7 @@ /// defined by this method. virtual ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const { - return None; + return std::nullopt; } /// Return an array that contains the MMO target flag values and their @@ -1883,7 +1883,7 @@ /// defined by this method. virtual ArrayRef> getSerializableMachineMemOperandTargetFlags() const { - return None; + return std::nullopt; } /// Determines whether \p Inst is a tail call instruction. Override this diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -300,7 +300,7 @@ bool IsSwiftAsync : 1; bool IsSwiftError : 1; bool IsCFGuardTarget : 1; - MaybeAlign Alignment = None; + MaybeAlign Alignment = std::nullopt; Type *IndirectType = nullptr; ArgListEntry() @@ -1052,9 +1052,9 @@ // value representing memory location PointerUnion ptrVal; - // Fallback address space for use if ptrVal is nullptr. None means unknown - // address space. - Optional fallbackAddressSpace; + // Fallback address space for use if ptrVal is nullptr. std::nullopt means + // unknown address space. + std::optional fallbackAddressSpace; int offset = 0; // offset off of ptrVal uint64_t size = 0; // the size of the memory location @@ -1584,7 +1584,7 @@ /// instance with i128 inline assembly operands on SystemZ. virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, - Optional RegisterVT = None) const { + std::optional RegisterVT = std::nullopt) const { if (VT.isSimple()) { assert((unsigned)VT.getSimpleVT().SimpleTy < std::size(NumRegistersForVT)); @@ -4103,10 +4103,9 @@ /// Target-specific splitting of values into parts that fit a register /// storing a legal type - virtual bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, - SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, - Optional CC) const { + virtual bool splitValueIntoRegisterParts( + SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, std::optional CC) const { return false; } @@ -4132,7 +4131,7 @@ joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, - Optional CC) const { + std::optional CC) const { return SDValue(); } diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h --- a/llvm/include/llvm/DWP/DWP.h +++ b/llvm/include/llvm/DWP/DWP.h @@ -44,7 +44,7 @@ // dwo_id field. This resides in the header only if Version >= 5. // In earlier versions, it is read from DW_AT_GNU_dwo_id. - Optional Signature = None; + Optional Signature = std::nullopt; // Derived from the length of Length field. dwarf::DwarfFormat Format = dwarf::DwarfFormat::DWARF32; diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h --- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h @@ -249,7 +249,7 @@ Optional bytesRemaining(uint32_t CurrentOffset) const { if (!MaxLength) - return None; + return std::nullopt; assert(CurrentOffset >= BeginOffset); uint32_t BytesUsed = CurrentOffset - BeginOffset; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h @@ -77,8 +77,8 @@ // Constructors are private to force people to use the create static // functions. UnwindLocation(Location K) - : Kind(K), RegNum(InvalidRegisterNumber), Offset(0), AddrSpace(None), - Dereference(false) {} + : Kind(K), RegNum(InvalidRegisterNumber), Offset(0), + AddrSpace(std::nullopt), Dereference(false) {} UnwindLocation(Location K, uint32_t Reg, int32_t Off, Optional AS, bool Deref) @@ -117,10 +117,10 @@ /// false. static UnwindLocation createIsRegisterPlusOffset(uint32_t Reg, int32_t Off, - Optional AddrSpace = None); + Optional AddrSpace = std::nullopt); static UnwindLocation createAtRegisterPlusOffset(uint32_t Reg, int32_t Off, - Optional AddrSpace = None); + Optional AddrSpace = std::nullopt); /// Create a location whose value is the result of evaluating a DWARF /// expression. This allows complex expressions to be evaluated in order to /// unwind a register or CFA value. @@ -190,7 +190,7 @@ Optional getRegisterLocation(uint32_t RegNum) const { auto Pos = Locations.find(RegNum); if (Pos == Locations.end()) - return llvm::None; + return std::nullopt; return Pos->second; } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h @@ -47,7 +47,7 @@ /// Returns index of the parent die. Optional getParentIdx() const { if (ParentIdx == UINT32_MAX) - return None; + return std::nullopt; return ParentIdx; } @@ -55,7 +55,7 @@ /// Returns index of the sibling die. Optional getSiblingIdx() const { if (SiblingIdx == 0) - return None; + return std::nullopt; return SiblingIdx; } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h @@ -116,7 +116,7 @@ } Error parseMacinfo(DWARFDataExtractor MacroData) { - return parseImpl(None, None, MacroData, /*IsMacro=*/false); + return parseImpl(std::nullopt, std::nullopt, MacroData, /*IsMacro=*/false); } /// Return whether the section has any entries. diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h @@ -140,7 +140,7 @@ }; DWARFExpression(DataExtractor Data, uint8_t AddressSize, - Optional Format = None) + Optional Format = std::nullopt) : Data(Data), AddressSize(AddressSize), Format(Format) { assert(AddressSize == 8 || AddressSize == 4 || AddressSize == 2); } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -176,11 +176,11 @@ /// was valid and was a string. inline Optional toString(const Optional &V) { if (!V) - return None; + return std::nullopt; Expected E = V->getAsCString(); if (!E) { consumeError(E.takeError()); - return None; + return std::nullopt; } return *E; } @@ -225,7 +225,7 @@ inline Optional toUnsigned(const Optional &V) { if (V) return V->getAsUnsignedConstant(); - return None; + return std::nullopt; } /// Take an optional DWARFFormValue and extract a unsigned constant. @@ -247,7 +247,7 @@ inline Optional toReference(const Optional &V) { if (V) return V->getAsReference(); - return None; + return std::nullopt; } /// Take an optional DWARFFormValue and extract a reference. @@ -269,7 +269,7 @@ inline Optional toSigned(const Optional &V) { if (V) return V->getAsSignedConstant(); - return None; + return std::nullopt; } /// Take an optional DWARFFormValue and extract a signed integer. @@ -290,14 +290,14 @@ inline Optional toAddress(const Optional &V) { if (V) return V->getAsAddress(); - return None; + return std::nullopt; } inline Optional toSectionedAddress(const Optional &V) { if (V) return V->getAsSectionedAddress(); - return None; + return std::nullopt; } /// Take an optional DWARFFormValue and extract a address. @@ -318,7 +318,7 @@ inline Optional toSectionOffset(const Optional &V) { if (V) return V->getAsSectionOffset(); - return None; + return std::nullopt; } /// Take an optional DWARFFormValue and extract a section offset. @@ -340,7 +340,7 @@ inline Optional> toBlock(const Optional &V) { if (V) return V->getAsBlock(); - return None; + return std::nullopt; } } // end namespace dwarf diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h @@ -114,7 +114,7 @@ DIDumpOptions DumpOpts = {}) const; Optional getOffsetEntry(DataExtractor Data, uint32_t Index) const { if (Index >= HeaderData.OffsetEntryCount) - return None; + return std::nullopt; return getOffsetEntry(Data, getHeaderOffset() + getHeaderSize(Format), Format, Index); } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -530,7 +530,7 @@ }); if (It != DieArray.end() && It->getOffset() == Offset) return It - DieArray.begin(); - return None; + return std::nullopt; } uint32_t getLineTableOffset() const { diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h --- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -175,8 +175,8 @@ void clear() { Range = {0, 0}; Name = 0; - OptLineTable = None; - Inline = None; + OptLineTable = std::nullopt; + Inline = std::nullopt; } }; diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h --- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -141,7 +141,7 @@ Optional getFile(uint32_t Index) const { if (Index < Files.size()) return Files[Index]; - return llvm::None; + return std::nullopt; } /// Dump the entire Gsym data contained in this object. @@ -242,7 +242,7 @@ ArrayRef AIO = getAddrOffsets(); if (Index < AIO.size()) return AIO[Index] + Hdr->BaseAddress; - return llvm::None; + return std::nullopt; } /// Lookup an address offset in the AddrOffsets table. /// @@ -262,7 +262,7 @@ // Watch for addresses that fall between the gsym::Header::BaseAddress and // the first address offset. if (Iter == Begin && AddrOffset < *Begin) - return llvm::None; + return std::nullopt; if (Iter == End || AddrOffset < *Iter) --Iter; return std::distance(Begin, Iter); diff --git a/llvm/include/llvm/DebugInfo/GSYM/LineTable.h b/llvm/include/llvm/DebugInfo/GSYM/LineTable.h --- a/llvm/include/llvm/DebugInfo/GSYM/LineTable.h +++ b/llvm/include/llvm/DebugInfo/GSYM/LineTable.h @@ -172,7 +172,7 @@ /// table isn't empty, or llvm::None if the line table is emtpy. Optional first() const { if (Lines.empty()) - return llvm::None; + return std::nullopt; return Lines.front(); } /// Return the last line entry if the line table isn't empty. @@ -181,7 +181,7 @@ /// table isn't empty, or llvm::None if the line table is emtpy. Optional last() const { if (Lines.empty()) - return llvm::None; + return std::nullopt; return Lines.back(); } void push(const LineEntry &LE) { diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h --- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h @@ -248,7 +248,7 @@ /// from a real key to an internal key. template bool set_as(const Key &K, ValueT V, TraitsT &Traits) { - return set_as_internal(K, std::move(V), Traits, None); + return set_as_internal(K, std::move(V), Traits, std::nullopt); } template diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h --- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h @@ -33,7 +33,7 @@ class MarkupFilter { public: MarkupFilter(raw_ostream &OS, LLVMSymbolizer &Symbolizer, - Optional ColorsEnabled = llvm::None); + Optional ColorsEnabled = std::nullopt); /// Filters a line containing symbolizer markup and writes the human-readable /// results to the output stream. diff --git a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h --- a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -540,8 +541,8 @@ std::shared_ptr MemMgr; std::shared_ptr Resolver; TargetOptions Options; - Optional RelocModel; - Optional CMModel; + std::optional RelocModel; + std::optional CMModel; std::string MArch; std::string MCPU; SmallVector MAttrs; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h --- a/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h @@ -44,7 +44,7 @@ JITDylib &PlatformJD, const char *OrcRuntimePath, LoadDynamicLibrary LoadDynLibrary, bool StaticVCRuntime = false, const char *VCRuntimePath = nullptr, - Optional RuntimeAliases = None); + Optional RuntimeAliases = std::nullopt); ExecutionSession &getExecutionSession() const { return ES; } ObjectLinkingLayer &getObjectLinkingLayer() const { return ObjLinkingLayer; } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h --- a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h @@ -95,7 +95,7 @@ static Expected> Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer, JITDylib &PlatformJD, const char *OrcRuntimePath, - Optional RuntimeAliases = None); + Optional RuntimeAliases = std::nullopt); ExecutionSession &getExecutionSession() const { return ES; } ObjectLinkingLayer &getObjectLinkingLayer() const { return ObjLinkingLayer; } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h @@ -58,7 +58,7 @@ /// loaded to find the registration functions. Expected> createJITLoaderGDBRegistrar( ExecutionSession &ES, - Optional RegistrationFunctionDylib = None); + Optional RegistrationFunctionDylib = std::nullopt); } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h @@ -34,7 +34,7 @@ /// will be loaded to find the registration functions. static Expected> Create(ExecutionSession &ES, - Optional RegistrationFunctionsDylib = None); + Optional RegistrationFunctionsDylib = std::nullopt); /// Create a EPCEHFrameRegistrar with the given ExecutorProcessControl /// object and registration/deregistration function addresses. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h --- a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h @@ -21,6 +21,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include +#include #include #include @@ -83,22 +84,22 @@ const std::string &getCPU() const { return CPU; } /// Set the relocation model. - JITTargetMachineBuilder &setRelocationModel(Optional RM) { + JITTargetMachineBuilder &setRelocationModel(std::optional RM) { this->RM = std::move(RM); return *this; } /// Get the relocation model. - const Optional &getRelocationModel() const { return RM; } + const std::optional &getRelocationModel() const { return RM; } /// Set the code model. - JITTargetMachineBuilder &setCodeModel(Optional CM) { + JITTargetMachineBuilder &setCodeModel(std::optional CM) { this->CM = std::move(CM); return *this; } /// Get the code model. - const Optional &getCodeModel() const { return CM; } + const std::optional &getCodeModel() const { return CM; } /// Set the LLVM CodeGen optimization level. JITTargetMachineBuilder &setCodeGenOptLevel(CodeGenOpt::Level OptLevel) { @@ -150,8 +151,8 @@ std::string CPU; SubtargetFeatures Features; TargetOptions Options; - Optional RM; - Optional CM; + std::optional RM; + std::optional CM; CodeGenOpt::Level OptLevel = CodeGenOpt::Default; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h @@ -80,7 +80,7 @@ static Expected> Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer, JITDylib &PlatformJD, const char *OrcRuntimePath, - Optional RuntimeAliases = None); + Optional RuntimeAliases = std::nullopt); ExecutionSession &getExecutionSession() const { return ES; } ObjectLinkingLayer &getObjectLinkingLayer() const { return ObjLinkingLayer; } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -50,7 +50,7 @@ if (Position != Maps.end()) return Position->getSecond(); else - return None; + return std::nullopt; } std::mutex ConcurrentAccess; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h @@ -30,7 +30,7 @@ /// many main functions will expect a name argument at least, and will fail /// if none is provided. int runAsMain(int (*Main)(int, char *[]), ArrayRef Args, - Optional ProgramName = None); + Optional ProgramName = std::nullopt); int runAsVoidFunction(int (*Func)(void)); int runAsIntFunction(int (*Func)(int), int Arg); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -924,7 +924,7 @@ __OMP_RTL_ATTRS(__kmpc_alloc_shared, AttributeSet( EnumAttr(NoUnwind), EnumAttr(NoSync), - AllocSizeAttr(0, None)), ReturnPtrAttrs, ParamAttrs()) + AllocSizeAttr(0, std::nullopt)), ReturnPtrAttrs, ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(), ParamAttrs(AttributeSet(EnumAttr(NoCapture), EnumAttr(AllocatedPointer)))) diff --git a/llvm/include/llvm/FuzzMutate/OpDescriptor.h b/llvm/include/llvm/FuzzMutate/OpDescriptor.h --- a/llvm/include/llvm/FuzzMutate/OpDescriptor.h +++ b/llvm/include/llvm/FuzzMutate/OpDescriptor.h @@ -105,7 +105,7 @@ auto Pred = [](ArrayRef, const Value *V) { return !V->getType()->isVoidTy(); }; - auto Make = None; + auto Make = std::nullopt; return {Pred, Make}; } @@ -113,7 +113,7 @@ auto Pred = [](ArrayRef, const Value *V) { return V->getType()->isIntegerTy(); }; - auto Make = None; + auto Make = std::nullopt; return {Pred, Make}; } @@ -121,7 +121,7 @@ auto Pred = [](ArrayRef, const Value *V) { return V->getType()->isFloatingPointTy(); }; - auto Make = None; + auto Make = std::nullopt; return {Pred, Make}; } @@ -175,7 +175,7 @@ }; // TODO: For now we only find aggregates in BaseTypes. It might be better to // manufacture them out of the base types in some cases. - auto Find = None; + auto Find = std::nullopt; return {Pred, Find}; } @@ -186,7 +186,7 @@ // TODO: For now we only find vectors in BaseTypes. It might be better to // manufacture vectors out of the base types, but it's tricky to be sure // that's actually a reasonable type. - auto Make = None; + auto Make = std::nullopt; return {Pred, Make}; } diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -18,7 +18,6 @@ #include "llvm-c/Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" @@ -28,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -135,9 +135,9 @@ uint64_t Bytes); static Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes); - static Attribute getWithAllocSizeArgs(LLVMContext &Context, - unsigned ElemSizeArg, - const Optional &NumElemsArg); + static Attribute getWithAllocSizeArgs( + LLVMContext &Context, unsigned ElemSizeArg, + const std::optional &NumElemsArg); static Attribute getWithVScaleRangeArgs(LLVMContext &Context, unsigned MinValue, unsigned MaxValue); static Attribute getWithByValType(LLVMContext &Context, Type *Ty); @@ -230,14 +230,14 @@ uint64_t getDereferenceableOrNullBytes() const; /// Returns the argument numbers for the allocsize attribute. - std::pair> getAllocSizeArgs() const; + std::pair> getAllocSizeArgs() const; /// Returns the minimum value for the vscale_range attribute. unsigned getVScaleRangeMin() const; /// Returns the maximum value for the vscale_range attribute or None when /// unknown. - Optional getVScaleRangeMax() const; + std::optional getVScaleRangeMax() const; // Returns the unwind table kind. UWTableKind getUWTableKind() const; @@ -375,9 +375,10 @@ Type *getPreallocatedType() const; Type *getInAllocaType() const; Type *getElementType() const; - Optional>> getAllocSizeArgs() const; + std::optional>> getAllocSizeArgs() + const; unsigned getVScaleRangeMin() const; - Optional getVScaleRangeMax() const; + std::optional getVScaleRangeMax() const; UWTableKind getUWTableKind() const; AllocFnKind getAllocKind() const; MemoryEffects getMemoryEffects() const; @@ -730,7 +731,7 @@ /// Returns a new list because attribute lists are immutable. [[nodiscard]] AttributeList addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, unsigned ElemSizeArg, - const Optional &NumElemsArg); + const std::optional &NumElemsArg); //===--------------------------------------------------------------------===// // AttributeList Accessors @@ -1106,7 +1107,7 @@ /// Return raw (possibly packed/encoded) value of integer attribute or None if /// not set. - Optional getRawIntAttr(Attribute::AttrKind Kind) const; + std::optional getRawIntAttr(Attribute::AttrKind Kind) const; /// Retrieve the alignment attribute, if it exists. MaybeAlign getAlignment() const { @@ -1151,7 +1152,8 @@ Type *getInAllocaType() const { return getTypeAttr(Attribute::InAlloca); } /// Retrieve the allocsize args, or None if the attribute does not exist. - Optional>> getAllocSizeArgs() const; + std::optional>> getAllocSizeArgs() + const; /// Add integer attribute with raw value (packed/encoded if necessary). AttrBuilder &addRawIntAttr(Attribute::AttrKind Kind, uint64_t Value); @@ -1190,11 +1192,11 @@ /// This turns one (or two) ints into the form used internally in Attribute. AttrBuilder &addAllocSizeAttr(unsigned ElemSizeArg, - const Optional &NumElemsArg); + const std::optional &NumElemsArg); /// This turns two ints into the form used internally in Attribute. AttrBuilder &addVScaleRangeAttr(unsigned MinValue, - Optional MaxValue); + std::optional MaxValue); /// Add a type attribute with the given type. AttrBuilder &addTypeAttr(Attribute::AttrKind Kind, Type *Ty); diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -453,6 +453,32 @@ return splitBasicBlockBefore(I->getIterator(), BBName); } + /// Transfer all instructions from \p FromBB to this basic block at \p ToIt. + void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB) { + splice(ToIt, FromBB, FromBB->begin(), FromBB->end()); + } + + /// Transfer one instruction from \p FromBB at \p FromIt to this basic block + /// at \p ToIt. + void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB, + BasicBlock::iterator FromIt) { + auto FromItNext = std::next(FromIt); + // Single-element splice is a noop if destination == source. + if (ToIt == FromIt || ToIt == FromItNext) + return; + splice(ToIt, FromBB, FromIt, FromItNext); + } + + /// Transfer a range of instructions that belong to \p FromBB from \p + /// FromBeginIt to \p FromEndIt, to this basic block at \p ToIt. + void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB, + BasicBlock::iterator FromBeginIt, + BasicBlock::iterator FromEndIt); + + /// Erases a range of instructions from \p FromIt to (not including) \p ToIt. + /// \Returns \p ToIt. + BasicBlock::iterator erase(BasicBlock::iterator FromIt, BasicBlock::iterator ToIt); + /// Returns true if there are any uses of this basic block other than /// direct branches, switches, etc. to it. bool hasAddressTaken() const { diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -1215,30 +1215,30 @@ /// /// \param InRangeIndex the inrange index if present or None. /// \param OnlyIfReducedTy see \a getWithOperands() docs. - static Constant *getGetElementPtr(Type *Ty, Constant *C, - ArrayRef IdxList, - bool InBounds = false, - Optional InRangeIndex = None, - Type *OnlyIfReducedTy = nullptr) { + static Constant * + getGetElementPtr(Type *Ty, Constant *C, ArrayRef IdxList, + bool InBounds = false, + Optional InRangeIndex = std::nullopt, + Type *OnlyIfReducedTy = nullptr) { return getGetElementPtr( Ty, C, makeArrayRef((Value *const *)IdxList.data(), IdxList.size()), InBounds, InRangeIndex, OnlyIfReducedTy); } - static Constant *getGetElementPtr(Type *Ty, Constant *C, Constant *Idx, - bool InBounds = false, - Optional InRangeIndex = None, - Type *OnlyIfReducedTy = nullptr) { + static Constant * + getGetElementPtr(Type *Ty, Constant *C, Constant *Idx, bool InBounds = false, + Optional InRangeIndex = std::nullopt, + Type *OnlyIfReducedTy = nullptr) { // This form of the function only exists to avoid ambiguous overload // warnings about whether to convert Idx to ArrayRef or // ArrayRef. return getGetElementPtr(Ty, C, cast(Idx), InBounds, InRangeIndex, OnlyIfReducedTy); } - static Constant *getGetElementPtr(Type *Ty, Constant *C, - ArrayRef IdxList, - bool InBounds = false, - Optional InRangeIndex = None, - Type *OnlyIfReducedTy = nullptr); + static Constant * + getGetElementPtr(Type *Ty, Constant *C, ArrayRef IdxList, + bool InBounds = false, + Optional InRangeIndex = std::nullopt, + Type *OnlyIfReducedTy = nullptr); /// Create an "inbounds" getelementptr. See the documentation for the /// "inbounds" flag in LangRef.html for details. diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -173,10 +173,10 @@ /// \param Checksum Optional checksum kind (e.g. CSK_MD5, CSK_SHA1, etc.) /// and value. /// \param Source Optional source text. - DIFile * - createFile(StringRef Filename, StringRef Directory, - Optional> Checksum = None, - Optional Source = None); + DIFile *createFile( + StringRef Filename, StringRef Directory, + Optional> Checksum = std::nullopt, + Optional Source = std::nullopt); /// Create debugging information entry for a macro. /// \param Parent Macro parent (could be nullptr). @@ -256,7 +256,7 @@ DIDerivedType * createPointerType(DIType *PointeeTy, uint64_t SizeInBits, uint32_t AlignInBits = 0, - Optional DWARFAddressSpace = None, + Optional DWARFAddressSpace = std::nullopt, StringRef Name = "", DINodeArray Annotations = nullptr); /// Create debugging information entry for a pointer to member. @@ -271,11 +271,10 @@ /// Create debugging information entry for a c++ /// style reference or rvalue reference type. - DIDerivedType *createReferenceType(unsigned Tag, DIType *RTy, - uint64_t SizeInBits = 0, - uint32_t AlignInBits = 0, - Optional DWARFAddressSpace = - None); + DIDerivedType * + createReferenceType(unsigned Tag, DIType *RTy, uint64_t SizeInBits = 0, + uint32_t AlignInBits = 0, + Optional DWARFAddressSpace = std::nullopt); /// Create debugging information entry for a typedef. /// \param Ty Original type. @@ -732,7 +731,7 @@ /// Create a new descriptor for the specified /// variable which has a complex address expression for its address. /// \param Addr An array of complex address operations. - DIExpression *createExpression(ArrayRef Addr = None); + DIExpression *createExpression(ArrayRef Addr = std::nullopt); /// Create an expression for a variable that does not have an address, but /// does have a constant value. diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -134,7 +134,7 @@ protected: DINode(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag, - ArrayRef Ops1, ArrayRef Ops2 = None) + ArrayRef Ops1, ArrayRef Ops2 = std::nullopt) : MDNode(C, ID, Storage, Ops1, Ops2) { assert(Tag < 1u << 16); SubclassData16 = Tag; @@ -305,7 +305,7 @@ friend class MDNode; DIAssignID(LLVMContext &C, StorageType Storage) - : MDNode(C, DIAssignIDKind, Storage, None) {} + : MDNode(C, DIAssignIDKind, Storage, std::nullopt) {} ~DIAssignID() { dropAllReferences(); } @@ -617,7 +617,7 @@ Context, getCanonicalMDString(Context, Filename), getCanonicalMDString(Context, Directory), MDChecksum, Source ? Optional(getCanonicalMDString(Context, *Source)) - : None, + : std::nullopt, Storage, ShouldCreate); } static DIFile *getImpl(LLVMContext &Context, MDString *Filename, @@ -634,13 +634,13 @@ public: DEFINE_MDNODE_GET(DIFile, (StringRef Filename, StringRef Directory, - Optional> CS = None, - Optional Source = None), + Optional> CS = std::nullopt, + Optional Source = std::nullopt), (Filename, Directory, CS, Source)) DEFINE_MDNODE_GET(DIFile, (MDString * Filename, MDString *Directory, - Optional> CS = None, - Optional Source = None), + Optional> CS = std::nullopt, + Optional Source = std::nullopt), (Filename, Directory, CS, Source)) TempDIFile clone() const { return cloneImpl(); } @@ -654,7 +654,7 @@ return StringRefChecksum; } Optional getSource() const { - return Source ? Optional((*Source)->getString()) : None; + return Source ? Optional((*Source)->getString()) : std::nullopt; } MDString *getRawFilename() const { return getOperandAs(0); } @@ -685,7 +685,7 @@ Optional DIScope::getSource() const { if (auto *F = getFile()) return F->getSource(); - return None; + return std::nullopt; } /// Base class for types. @@ -2275,7 +2275,7 @@ return this; if (Optional Encoded = encodeDiscriminator(D, DF, CI)) return cloneWithDiscriminator(*Encoded); - return None; + return std::nullopt; } Optional @@ -2290,7 +2290,7 @@ unsigned CI = getCopyIdentifier(); if (Optional D = encodeDiscriminator(BD, DF, CI)) return cloneWithDiscriminator(*D); - return None; + return std::nullopt; } class DINamespace : public DIScope { @@ -2552,7 +2552,7 @@ Optional getSignedness() const { if (auto *BT = dyn_cast(getType())) return BT->getSignedness(); - return None; + return std::nullopt; } StringRef getFilename() const { @@ -2570,7 +2570,7 @@ Optional getSource() const { if (auto *F = getFile()) return F->getSource(); - return None; + return std::nullopt; } Metadata *getRawScope() const { return getOperand(0); } @@ -2600,7 +2600,7 @@ std::vector Elements; DIExpression(LLVMContext &C, StorageType Storage, ArrayRef Elements) - : MDNode(C, DIExpressionKind, Storage, None), + : MDNode(C, DIExpressionKind, Storage, std::nullopt), Elements(Elements.begin(), Elements.end()) {} ~DIExpression() = default; @@ -3446,7 +3446,8 @@ protected: DIMacroNode(LLVMContext &C, unsigned ID, StorageType Storage, unsigned MIType, - ArrayRef Ops1, ArrayRef Ops2 = None) + ArrayRef Ops1, + ArrayRef Ops2 = std::nullopt) : MDNode(C, ID, Storage, Ops1, Ops2) { assert(MIType < 1u << 16); SubclassData16 = MIType; @@ -3609,7 +3610,7 @@ DIArgList(LLVMContext &C, StorageType Storage, ArrayRef Args) - : MDNode(C, DIArgListKind, Storage, None), + : MDNode(C, DIArgListKind, Storage, std::nullopt), Args(Args.begin(), Args.end()) { track(); } @@ -3675,7 +3676,8 @@ DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr, const DILocation *InlinedAt) - : Variable(Var), Fragment(DIExpr ? DIExpr->getFragmentInfo() : None), + : Variable(Var), + Fragment(DIExpr ? DIExpr->getFragmentInfo() : std::nullopt), InlinedAt(InlinedAt) {} const DILocalVariable *getVariable() const { return Variable; } @@ -3706,7 +3708,7 @@ /// Empty key: no key should be generated that has no DILocalVariable. static inline DebugVariable getEmptyKey() { - return DebugVariable(nullptr, None, nullptr); + return DebugVariable(nullptr, std::nullopt, nullptr); } /// Difference in tombstone is that the Optional is meaningful. diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h --- a/llvm/include/llvm/IR/Dominators.h +++ b/llvm/include/llvm/IR/Dominators.h @@ -196,8 +196,12 @@ /// Return true if value Def dominates all possible uses inside instruction /// User. Same comments as for the Use-based API apply. bool dominates(const Value *Def, const Instruction *User) const; - // Does not accept Value to avoid ambiguity with dominance checks between - // two basic blocks. + + /// Returns true if Def would dominate a use in any instruction in BB. + /// If Def is an instruction in BB, then Def does not dominate BB. + /// + /// Does not accept Value to avoid ambiguity with dominance checks between + /// two basic blocks. bool dominates(const Instruction *Def, const BasicBlock *BB) const; /// Return true if an edge dominates a use. diff --git a/llvm/include/llvm/IR/FPEnv.h b/llvm/include/llvm/IR/FPEnv.h --- a/llvm/include/llvm/IR/FPEnv.h +++ b/llvm/include/llvm/IR/FPEnv.h @@ -16,8 +16,8 @@ #define LLVM_IR_FPENV_H #include "llvm/ADT/FloatingPointMode.h" -#include "llvm/ADT/Optional.h" #include "llvm/IR/FMF.h" +#include namespace llvm { class StringRef; @@ -46,19 +46,19 @@ /// Returns a valid RoundingMode enumerator when given a string /// that is valid as input in constrained intrinsic rounding mode /// metadata. -Optional convertStrToRoundingMode(StringRef); +std::optional convertStrToRoundingMode(StringRef); /// For any RoundingMode enumerator, returns a string valid as input in /// constrained intrinsic rounding mode metadata. -Optional convertRoundingModeToStr(RoundingMode); +std::optional convertRoundingModeToStr(RoundingMode); /// Returns a valid ExceptionBehavior enumerator when given a string /// valid as input in constrained intrinsic exception behavior metadata. -Optional convertStrToExceptionBehavior(StringRef); +std::optional convertStrToExceptionBehavior(StringRef); /// For any ExceptionBehavior enumerator, returns a string valid as /// input in constrained intrinsic exception behavior metadata. -Optional convertExceptionBehaviorToStr(fp::ExceptionBehavior); +std::optional convertExceptionBehaviorToStr(fp::ExceptionBehavior); /// Returns true if the exception handling behavior and rounding mode /// match what is used in the default floating point environment. diff --git a/llvm/include/llvm/IR/GCStrategy.h b/llvm/include/llvm/IR/GCStrategy.h --- a/llvm/include/llvm/IR/GCStrategy.h +++ b/llvm/include/llvm/IR/GCStrategy.h @@ -95,7 +95,7 @@ /// managed locations. Note a GCStrategy can always return 'None' (i.e. an /// empty optional indicating it can't reliably distinguish. virtual Optional isGCManagedPointer(const Type *Ty) const { - return None; + return std::nullopt; } ///@} diff --git a/llvm/include/llvm/IR/GlobalVariable.h b/llvm/include/llvm/IR/GlobalVariable.h --- a/llvm/include/llvm/IR/GlobalVariable.h +++ b/llvm/include/llvm/IR/GlobalVariable.h @@ -59,7 +59,7 @@ Constant *Initializer, const Twine &Name = "", GlobalVariable *InsertBefore = nullptr, ThreadLocalMode = NotThreadLocal, - Optional AddressSpace = None, + Optional AddressSpace = std::nullopt, bool isExternallyInitialized = false); GlobalVariable(const GlobalVariable &) = delete; GlobalVariable &operator=(const GlobalVariable &) = delete; diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -46,6 +46,7 @@ #include #include #include +#include #include namespace llvm { @@ -307,7 +308,8 @@ /// Set the exception handling to be used with constrained floating point void setDefaultConstrainedExcept(fp::ExceptionBehavior NewExcept) { #ifndef NDEBUG - Optional ExceptStr = convertExceptionBehaviorToStr(NewExcept); + std::optional ExceptStr = + convertExceptionBehaviorToStr(NewExcept); assert(ExceptStr && "Garbage strict exception behavior!"); #endif DefaultConstrainedExcept = NewExcept; @@ -316,7 +318,8 @@ /// Set the rounding mode handling to be used with constrained floating point void setDefaultConstrainedRounding(RoundingMode NewRounding) { #ifndef NDEBUG - Optional RoundingStr = convertRoundingModeToStr(NewRounding); + std::optional RoundingStr = + convertRoundingModeToStr(NewRounding); assert(RoundingStr && "Garbage strict rounding mode!"); #endif DefaultConstrainedRounding = NewRounding; @@ -796,8 +799,9 @@ /// /// The optional argument \p OpBundles specifies operand bundles that are /// added to the call instruction. - CallInst *CreateAssumption(Value *Cond, - ArrayRef OpBundles = llvm::None); + CallInst * + CreateAssumption(Value *Cond, + ArrayRef OpBundles = std::nullopt); /// Create a llvm.experimental.noalias.scope.decl intrinsic call. Instruction *CreateNoAliasScopeDeclaration(Value *Scope); @@ -811,7 +815,7 @@ CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee, ArrayRef CallArgs, - Optional> DeoptArgs, + std::optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name = ""); @@ -820,8 +824,8 @@ CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee, uint32_t Flags, ArrayRef CallArgs, - Optional> TransitionArgs, - Optional> DeoptArgs, + std::optional> TransitionArgs, + std::optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name = ""); @@ -831,7 +835,7 @@ CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee, ArrayRef CallArgs, - Optional> DeoptArgs, + std::optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name = ""); @@ -841,7 +845,7 @@ CreateGCStatepointInvoke(uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef InvokeArgs, - Optional> DeoptArgs, + std::optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name = ""); /// Create an invoke to the experimental.gc.statepoint intrinsic to @@ -849,8 +853,8 @@ InvokeInst *CreateGCStatepointInvoke( uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags, - ArrayRef InvokeArgs, Optional> TransitionArgs, - Optional> DeoptArgs, ArrayRef GCArgs, + ArrayRef InvokeArgs, std::optional> TransitionArgs, + std::optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name = ""); // Convenience function for the common case when CallArgs are filled in using @@ -860,7 +864,7 @@ CreateGCStatepointInvoke(uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef InvokeArgs, - Optional> DeoptArgs, + std::optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name = ""); /// Create a call to the experimental.gc.result intrinsic to extract @@ -1071,7 +1075,7 @@ } InvokeInst *CreateInvoke(FunctionType *Ty, Value *Callee, BasicBlock *NormalDest, BasicBlock *UnwindDest, - ArrayRef Args = None, + ArrayRef Args = std::nullopt, const Twine &Name = "") { InvokeInst *II = InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args); @@ -1090,7 +1094,7 @@ InvokeInst *CreateInvoke(FunctionCallee Callee, BasicBlock *NormalDest, BasicBlock *UnwindDest, - ArrayRef Args = None, + ArrayRef Args = std::nullopt, const Twine &Name = "") { return CreateInvoke(Callee.getFunctionType(), Callee.getCallee(), NormalDest, UnwindDest, Args, Name); @@ -1100,7 +1104,7 @@ CallBrInst *CreateCallBr(FunctionType *Ty, Value *Callee, BasicBlock *DefaultDest, ArrayRef IndirectDests, - ArrayRef Args = None, + ArrayRef Args = std::nullopt, const Twine &Name = "") { return Insert(CallBrInst::Create(Ty, Callee, DefaultDest, IndirectDests, Args), Name); @@ -1118,7 +1122,7 @@ CallBrInst *CreateCallBr(FunctionCallee Callee, BasicBlock *DefaultDest, ArrayRef IndirectDests, - ArrayRef Args = None, + ArrayRef Args = std::nullopt, const Twine &Name = "") { return CreateCallBr(Callee.getFunctionType(), Callee.getCallee(), DefaultDest, IndirectDests, Args, Name); @@ -1154,7 +1158,7 @@ } CleanupPadInst *CreateCleanupPad(Value *ParentPad, - ArrayRef Args = None, + ArrayRef Args = std::nullopt, const Twine &Name = "") { return Insert(CleanupPadInst::Create(ParentPad, Args), Name); } @@ -1191,26 +1195,28 @@ return I; } - Value *getConstrainedFPRounding(Optional Rounding) { + Value *getConstrainedFPRounding(std::optional Rounding) { RoundingMode UseRounding = DefaultConstrainedRounding; if (Rounding) UseRounding = Rounding.value(); - Optional RoundingStr = convertRoundingModeToStr(UseRounding); + std::optional RoundingStr = + convertRoundingModeToStr(UseRounding); assert(RoundingStr && "Garbage strict rounding mode!"); auto *RoundingMDS = MDString::get(Context, RoundingStr.value()); return MetadataAsValue::get(Context, RoundingMDS); } - Value *getConstrainedFPExcept(Optional Except) { + Value *getConstrainedFPExcept(std::optional Except) { fp::ExceptionBehavior UseExcept = DefaultConstrainedExcept; if (Except) UseExcept = Except.value(); - Optional ExceptStr = convertExceptionBehaviorToStr(UseExcept); + std::optional ExceptStr = + convertExceptionBehaviorToStr(UseExcept); assert(ExceptStr && "Garbage strict exception behavior!"); auto *ExceptMDS = MDString::get(Context, ExceptStr.value()); @@ -1602,8 +1608,8 @@ CallInst *CreateConstrainedFPBinOp( Intrinsic::ID ID, Value *L, Value *R, Instruction *FMFSource = nullptr, const Twine &Name = "", MDNode *FPMathTag = nullptr, - Optional Rounding = None, - Optional Except = None); + std::optional Rounding = std::nullopt, + std::optional Except = std::nullopt); Value *CreateNeg(Value *V, const Twine &Name = "", bool HasNUW = false, bool HasNSW = false) { @@ -2084,8 +2090,8 @@ Intrinsic::ID ID, Value *V, Type *DestTy, Instruction *FMFSource = nullptr, const Twine &Name = "", MDNode *FPMathTag = nullptr, - Optional Rounding = None, - Optional Except = None); + std::optional Rounding = std::nullopt, + std::optional Except = std::nullopt); // Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a // compile time error, instead of converting the string to bool for the @@ -2245,7 +2251,8 @@ public: CallInst *CreateConstrainedFPCmp( Intrinsic::ID ID, CmpInst::Predicate P, Value *L, Value *R, - const Twine &Name = "", Optional Except = None); + const Twine &Name = "", + std::optional Except = std::nullopt); //===--------------------------------------------------------------------===// // Instruction creation methods: Other Instructions @@ -2267,8 +2274,8 @@ public: CallInst *CreateCall(FunctionType *FTy, Value *Callee, - ArrayRef Args = None, const Twine &Name = "", - MDNode *FPMathTag = nullptr) { + ArrayRef Args = std::nullopt, + const Twine &Name = "", MDNode *FPMathTag = nullptr) { CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles); if (IsFPConstrained) setConstrainedFPCallAttr(CI); @@ -2288,7 +2295,8 @@ return Insert(CI, Name); } - CallInst *CreateCall(FunctionCallee Callee, ArrayRef Args = None, + CallInst *CreateCall(FunctionCallee Callee, + ArrayRef Args = std::nullopt, const Twine &Name = "", MDNode *FPMathTag = nullptr) { return CreateCall(Callee.getFunctionType(), Callee.getCallee(), Args, Name, FPMathTag); @@ -2303,8 +2311,8 @@ CallInst *CreateConstrainedFPCall( Function *Callee, ArrayRef Args, const Twine &Name = "", - Optional Rounding = None, - Optional Except = None); + std::optional Rounding = std::nullopt, + std::optional Except = std::nullopt); Value *CreateSelect(Value *C, Value *True, Value *False, const Twine &Name = "", Instruction *MDFrom = nullptr); @@ -2530,47 +2538,49 @@ public: IRBuilder(LLVMContext &C, FolderTy Folder, InserterTy Inserter = InserterTy(), MDNode *FPMathTag = nullptr, - ArrayRef OpBundles = None) + ArrayRef OpBundles = std::nullopt) : IRBuilderBase(C, this->Folder, this->Inserter, FPMathTag, OpBundles), Folder(Folder), Inserter(Inserter) {} explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr, - ArrayRef OpBundles = None) + ArrayRef OpBundles = std::nullopt) : IRBuilderBase(C, this->Folder, this->Inserter, FPMathTag, OpBundles) {} explicit IRBuilder(BasicBlock *TheBB, FolderTy Folder, MDNode *FPMathTag = nullptr, - ArrayRef OpBundles = None) + ArrayRef OpBundles = std::nullopt) : IRBuilderBase(TheBB->getContext(), this->Folder, this->Inserter, - FPMathTag, OpBundles), Folder(Folder) { + FPMathTag, OpBundles), + Folder(Folder) { SetInsertPoint(TheBB); } explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr, - ArrayRef OpBundles = None) + ArrayRef OpBundles = std::nullopt) : IRBuilderBase(TheBB->getContext(), this->Folder, this->Inserter, FPMathTag, OpBundles) { SetInsertPoint(TheBB); } explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr, - ArrayRef OpBundles = None) - : IRBuilderBase(IP->getContext(), this->Folder, this->Inserter, - FPMathTag, OpBundles) { + ArrayRef OpBundles = std::nullopt) + : IRBuilderBase(IP->getContext(), this->Folder, this->Inserter, FPMathTag, + OpBundles) { SetInsertPoint(IP); } IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, FolderTy Folder, MDNode *FPMathTag = nullptr, - ArrayRef OpBundles = None) + ArrayRef OpBundles = std::nullopt) : IRBuilderBase(TheBB->getContext(), this->Folder, this->Inserter, - FPMathTag, OpBundles), Folder(Folder) { + FPMathTag, OpBundles), + Folder(Folder) { SetInsertPoint(TheBB, IP); } IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, MDNode *FPMathTag = nullptr, - ArrayRef OpBundles = None) + ArrayRef OpBundles = std::nullopt) : IRBuilderBase(TheBB->getContext(), this->Folder, this->Inserter, FPMathTag, OpBundles) { SetInsertPoint(TheBB, IP); diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1736,7 +1736,7 @@ return Align; if (const Function *F = getCalledFunction()) return F->getAttributes().getRetAlignment(); - return None; + return std::nullopt; } /// Extract the alignment for a call or parameter (0=unknown). @@ -2024,7 +2024,7 @@ return U; } - return None; + return std::nullopt; } /// Return an operand bundle by tag ID, if present. @@ -2040,7 +2040,7 @@ return U; } - return None; + return std::nullopt; } /// Return the list of operand bundles attached to this instruction as diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -325,7 +325,7 @@ /// this API if the Instruction being modified is a call. void dropUnknownNonDebugMetadata(ArrayRef KnownIDs); void dropUnknownNonDebugMetadata() { - return dropUnknownNonDebugMetadata(None); + return dropUnknownNonDebugMetadata(std::nullopt); } void dropUnknownNonDebugMetadata(unsigned ID1) { return dropUnknownNonDebugMetadata(makeArrayRef(ID1)); diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1482,7 +1482,7 @@ inline CallInst(FunctionType *Ty, Value *Func, ArrayRef Args, const Twine &NameStr, Instruction *InsertBefore) - : CallInst(Ty, Func, Args, None, NameStr, InsertBefore) {} + : CallInst(Ty, Func, Args, std::nullopt, NameStr, InsertBefore) {} /// Construct a CallInst given a range of arguments. /// Construct a CallInst from a range of arguments @@ -1523,11 +1523,11 @@ const Twine &NameStr, Instruction *InsertBefore = nullptr) { return new (ComputeNumOperands(Args.size())) - CallInst(Ty, Func, Args, None, NameStr, InsertBefore); + CallInst(Ty, Func, Args, std::nullopt, NameStr, InsertBefore); } static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef Args, - ArrayRef Bundles = None, + ArrayRef Bundles = std::nullopt, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) { const int NumOperands = @@ -1546,7 +1546,7 @@ static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef Args, const Twine &NameStr, BasicBlock *InsertAtEnd) { return new (ComputeNumOperands(Args.size())) - CallInst(Ty, Func, Args, None, NameStr, InsertAtEnd); + CallInst(Ty, Func, Args, std::nullopt, NameStr, InsertAtEnd); } static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef Args, @@ -1567,7 +1567,7 @@ } static CallInst *Create(FunctionCallee Func, ArrayRef Args, - ArrayRef Bundles = None, + ArrayRef Bundles = std::nullopt, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) { return Create(Func.getFunctionType(), Func.getCallee(), Args, Bundles, @@ -1625,18 +1625,16 @@ Value *ArraySize = nullptr, Function *MallocF = nullptr, const Twine &Name = ""); - static Instruction *CreateMalloc(Instruction *InsertBefore, Type *IntPtrTy, - Type *AllocTy, Value *AllocSize, - Value *ArraySize = nullptr, - ArrayRef Bundles = None, - Function *MallocF = nullptr, - const Twine &Name = ""); - static Instruction *CreateMalloc(BasicBlock *InsertAtEnd, Type *IntPtrTy, - Type *AllocTy, Value *AllocSize, - Value *ArraySize = nullptr, - ArrayRef Bundles = None, - Function *MallocF = nullptr, - const Twine &Name = ""); + static Instruction * + CreateMalloc(Instruction *InsertBefore, Type *IntPtrTy, Type *AllocTy, + Value *AllocSize, Value *ArraySize = nullptr, + ArrayRef Bundles = std::nullopt, + Function *MallocF = nullptr, const Twine &Name = ""); + static Instruction * + CreateMalloc(BasicBlock *InsertAtEnd, Type *IntPtrTy, Type *AllocTy, + Value *AllocSize, Value *ArraySize = nullptr, + ArrayRef Bundles = std::nullopt, + Function *MallocF = nullptr, const Twine &Name = ""); /// Generate the IR for a call to the builtin free function. static Instruction *CreateFree(Value *Source, Instruction *InsertBefore); static Instruction *CreateFree(Value *Source, BasicBlock *InsertAtEnd); @@ -3619,7 +3617,7 @@ /// their prof branch_weights metadata. class SwitchInstProfUpdateWrapper { SwitchInst &SI; - std::optional> Weights = None; + std::optional> Weights = std::nullopt; bool Changed = false; protected: @@ -3858,13 +3856,13 @@ Instruction *InsertBefore = nullptr) { int NumOperands = ComputeNumOperands(Args.size()); return new (NumOperands) - InvokeInst(Ty, Func, IfNormal, IfException, Args, None, NumOperands, - NameStr, InsertBefore); + InvokeInst(Ty, Func, IfNormal, IfException, Args, std::nullopt, + NumOperands, NameStr, InsertBefore); } static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, - ArrayRef Bundles = None, + ArrayRef Bundles = std::nullopt, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) { int NumOperands = @@ -3881,8 +3879,8 @@ const Twine &NameStr, BasicBlock *InsertAtEnd) { int NumOperands = ComputeNumOperands(Args.size()); return new (NumOperands) - InvokeInst(Ty, Func, IfNormal, IfException, Args, None, NumOperands, - NameStr, InsertAtEnd); + InvokeInst(Ty, Func, IfNormal, IfException, Args, std::nullopt, + NumOperands, NameStr, InsertAtEnd); } static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, @@ -3903,12 +3901,12 @@ const Twine &NameStr, Instruction *InsertBefore = nullptr) { return Create(Func.getFunctionType(), Func.getCallee(), IfNormal, - IfException, Args, None, NameStr, InsertBefore); + IfException, Args, std::nullopt, NameStr, InsertBefore); } static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, - ArrayRef Bundles = None, + ArrayRef Bundles = std::nullopt, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) { return Create(Func.getFunctionType(), Func.getCallee(), IfNormal, @@ -4064,17 +4062,15 @@ Instruction *InsertBefore = nullptr) { int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size()); return new (NumOperands) - CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, None, + CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, std::nullopt, NumOperands, NameStr, InsertBefore); } - static CallBrInst *Create(FunctionType *Ty, Value *Func, - BasicBlock *DefaultDest, - ArrayRef IndirectDests, - ArrayRef Args, - ArrayRef Bundles = None, - const Twine &NameStr = "", - Instruction *InsertBefore = nullptr) { + static CallBrInst * + Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, + ArrayRef IndirectDests, ArrayRef Args, + ArrayRef Bundles = std::nullopt, + const Twine &NameStr = "", Instruction *InsertBefore = nullptr) { int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size(), CountBundleInputs(Bundles)); unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); @@ -4091,7 +4087,7 @@ BasicBlock *InsertAtEnd) { int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size()); return new (NumOperands) - CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, None, + CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, std::nullopt, NumOperands, NameStr, InsertAtEnd); } @@ -4121,7 +4117,7 @@ static CallBrInst *Create(FunctionCallee Func, BasicBlock *DefaultDest, ArrayRef IndirectDests, ArrayRef Args, - ArrayRef Bundles = None, + ArrayRef Bundles = std::nullopt, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) { return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest, @@ -4501,7 +4497,8 @@ NameStr, InsertAtEnd) {} public: - static CleanupPadInst *Create(Value *ParentPad, ArrayRef Args = None, + static CleanupPadInst *Create(Value *ParentPad, + ArrayRef Args = std::nullopt, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) { unsigned Values = 1 + Args.size(); @@ -5415,7 +5412,7 @@ /// None if it is not an atomic operation. inline std::optional getAtomicSyncScopeID(const Instruction *I) { if (!I->isAtomic()) - return None; + return std::nullopt; if (auto *AI = dyn_cast(I)) return AI->getSyncScopeID(); if (auto *AI = dyn_cast(I)) diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -35,6 +35,7 @@ #include "llvm/Support/Casting.h" #include #include +#include namespace llvm { @@ -594,8 +595,8 @@ public: bool isUnaryOp() const; bool isTernaryOp() const; - Optional getRoundingMode() const; - Optional getExceptionBehavior() const; + std::optional getRoundingMode() const; + std::optional getExceptionBehavior() const; bool isDefaultFPEnvironment() const; // Methods for support type inquiry through isa, cast, and dyn_cast: diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -75,7 +75,7 @@ /// Return the function type for an intrinsic. FunctionType *getType(LLVMContext &Context, ID id, - ArrayRef Tys = None); + ArrayRef Tys = std::nullopt); /// Returns true if the intrinsic can be overloaded. bool isOverloaded(ID id); @@ -95,7 +95,8 @@ /// using iAny, fAny, vAny, or iPTRAny). For a declaration of an overloaded /// intrinsic, Tys must provide exactly one type for each overloaded type in /// the intrinsic. - Function *getDeclaration(Module *M, ID id, ArrayRef Tys = None); + Function *getDeclaration(Module *M, ID id, + ArrayRef Tys = std::nullopt); /// Looks up Name in NameTable via binary search. NameTable must be sorted /// and all entries must start with "llvm.". If NameTable contains an exact diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -710,6 +710,9 @@ def int_roundeven : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_canonicalize : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + // Arithmetic fence intrinsic. + def int_arithmetic_fence : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], + [IntrNoMem]>; def int_lround : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; def int_llround : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; @@ -757,7 +760,7 @@ def int_is_fpclass : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyfloat_ty, llvm_i32_ty], - [IntrNoMem, IntrWillReturn, ImmArg>]>; + [IntrNoMem, IntrSpeculatable, ImmArg>]>; //===--------------- Constrained Floating Point Intrinsics ----------------===// // @@ -1389,9 +1392,6 @@ def int_pseudoprobe : DefaultAttrsIntrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; -// Arithmetic fence intrinsic. -def int_arithmetic_fence : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; - // Intrinsics to support half precision floating point format let IntrProperties = [IntrNoMem, IntrWillReturn] in { def int_convert_to_fp16 : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -1056,7 +1056,7 @@ protected: MDNode(LLVMContext &Context, unsigned ID, StorageType Storage, - ArrayRef Ops1, ArrayRef Ops2 = None); + ArrayRef Ops1, ArrayRef Ops2 = std::nullopt); ~MDNode() = default; void *operator new(size_t Size, size_t NumOps, StorageType Storage); diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -863,7 +864,7 @@ /// @{ /// Returns the code model (tiny, small, kernel, medium or large model) - Optional getCodeModel() const; + std::optional getCodeModel() const; /// Set the code model (tiny, small, kernel, medium or large) void setCodeModel(CodeModel::Model CL); diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1705,7 +1705,7 @@ getTypeIdCompatibleVtableSummary(StringRef TypeId) const { auto I = TypeIdCompatibleVtableMap.find(TypeId); if (I == TypeIdCompatibleVtableMap.end()) - return None; + return std::nullopt; return I->second; } diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -396,7 +396,8 @@ /// Returns the offset of the index with an inrange attachment, or None if /// none. Optional getInRangeIndex() const { - if (SubclassOptionalData >> 1 == 0) return None; + if (SubclassOptionalData >> 1 == 0) + return std::nullopt; return (SubclassOptionalData >> 1) - 1; } diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -443,7 +443,7 @@ HELPER_MAP_VPID_TO_VPSD(vp_load, VP_LOAD) VP_PROPERTY_FUNCTIONAL_OPC(Load) VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load) -VP_PROPERTY_MEMOP(0, None) +VP_PROPERTY_MEMOP(0, std::nullopt) END_REGISTER_VP(vp_load, VP_LOAD) // llvm.experimental.vp.strided.load(ptr,stride,mask,vlen) @@ -451,7 +451,7 @@ // chain = EXPERIMENTAL_VP_STRIDED_LOAD chain,base,offset,stride,mask,evl BEGIN_REGISTER_VP_SDNODE(EXPERIMENTAL_VP_STRIDED_LOAD, -1, experimental_vp_strided_load, 4, 5) HELPER_MAP_VPID_TO_VPSD(experimental_vp_strided_load, EXPERIMENTAL_VP_STRIDED_LOAD) -VP_PROPERTY_MEMOP(0, None) +VP_PROPERTY_MEMOP(0, std::nullopt) END_REGISTER_VP(experimental_vp_strided_load, EXPERIMENTAL_VP_STRIDED_LOAD) // llvm.vp.gather(ptr,mask,vlen) @@ -460,7 +460,7 @@ BEGIN_REGISTER_VP_SDNODE(VP_GATHER, -1, vp_gather, 4, 5) HELPER_MAP_VPID_TO_VPSD(vp_gather, VP_GATHER) VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_gather) -VP_PROPERTY_MEMOP(0, None) +VP_PROPERTY_MEMOP(0, std::nullopt) END_REGISTER_VP(vp_gather, VP_GATHER) ///// } Memory Operations @@ -568,12 +568,12 @@ // false. // // llvm.vp.select(cond,on_true,on_false,vlen) -BEGIN_REGISTER_VP(vp_select, None, 3, VP_SELECT, -1) +BEGIN_REGISTER_VP(vp_select, std::nullopt, 3, VP_SELECT, -1) VP_PROPERTY_FUNCTIONAL_OPC(Select) END_REGISTER_VP(vp_select, VP_SELECT) // llvm.vp.merge(cond,on_true,on_false,pivot) -BEGIN_REGISTER_VP(vp_merge, None, 3, VP_MERGE, -1) +BEGIN_REGISTER_VP(vp_merge, std::nullopt, 3, VP_MERGE, -1) END_REGISTER_VP(vp_merge, VP_MERGE) BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, EXPERIMENTAL_VP_SPLICE, -1) diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -556,9 +556,6 @@ /// Return true if there is metadata referencing this value. bool isUsedByMetadata() const { return IsUsedByMD; } - // Return true if this value is only transitively referenced by metadata. - bool isTransitiveUsedByMetadataOnly() const; - protected: /// Get the current metadata attachments for the given kind, if any. /// diff --git a/llvm/include/llvm/IR/ValueMap.h b/llvm/include/llvm/IR/ValueMap.h --- a/llvm/include/llvm/IR/ValueMap.h +++ b/llvm/include/llvm/IR/ValueMap.h @@ -122,10 +122,10 @@ /// Get the mapped metadata, if it's in the map. Optional getMappedMD(const Metadata *MD) const { if (!MDMap) - return None; + return std::nullopt; auto Where = MDMap->find(MD); if (Where == MDMap->end()) - return None; + return std::nullopt; return Where->second.get(); } diff --git a/llvm/include/llvm/IRReader/IRReader.h b/llvm/include/llvm/IRReader/IRReader.h --- a/llvm/include/llvm/IRReader/IRReader.h +++ b/llvm/include/llvm/IRReader/IRReader.h @@ -56,7 +56,9 @@ /// \param DataLayoutCallback Override datalayout in the llvm assembly. std::unique_ptr parseIR( MemoryBufferRef Buffer, SMDiagnostic &Err, LLVMContext &Context, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + return std::nullopt; + }); /// If the given file holds a bitcode image, return a Module for it. /// Otherwise, attempt to parse it as LLVM Assembly and return a Module @@ -64,7 +66,9 @@ /// \param DataLayoutCallback Override datalayout in the llvm assembly. std::unique_ptr parseIRFile( StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + return std::nullopt; + }); } #endif diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -277,6 +277,7 @@ void initializeMachineDominatorTreePass(PassRegistry&); void initializeMachineFunctionPrinterPassPass(PassRegistry&); void initializeMachineFunctionSplitterPass(PassRegistry &); +void initializeMachineLateInstrsCleanupPass(PassRegistry&); void initializeMachineLICMPass(PassRegistry&); void initializeMachineLoopInfoPass(PassRegistry&); void initializeMachineModuleInfoWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -25,6 +25,7 @@ #include "llvm/Target/TargetOptions.h" #include +#include namespace llvm { @@ -51,8 +52,8 @@ std::vector PassPlugins; /// For adding passes that run right before codegen. std::function PreCodeGenPassesHook; - Optional RelocModel = Reloc::PIC_; - Optional CodeModel = None; + std::optional RelocModel = Reloc::PIC_; + std::optional CodeModel = std::nullopt; CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default; CodeGenFileType CGFileType = CGFT_ObjectFile; unsigned OptLevel = 2; diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h --- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h @@ -89,7 +89,7 @@ void setAsmUndefinedRefs(struct LTOModule *); void setTargetOptions(const TargetOptions &Options); void setDebugInfo(lto_debug_model); - void setCodePICModel(Optional Model) { + void setCodePICModel(std::optional Model) { Config.RelocModel = Model; } diff --git a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h --- a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h @@ -37,7 +37,7 @@ std::string MCpu; std::string MAttr; TargetOptions Options; - Optional RelocModel; + std::optional RelocModel; CodeGenOpt::Level CGOptLevel = CodeGenOpt::Aggressive; std::unique_ptr create() const; @@ -211,7 +211,7 @@ void setFreestanding(bool Enabled) { Freestanding = Enabled; } /// CodeModel - void setCodePICModel(Optional Model) { + void setCodePICModel(std::optional Model) { TMBuilder.RelocModel = Model; } diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -442,6 +442,8 @@ /// protected visibility. Defaults to MCSA_Protected MCSymbolAttr ProtectedVisibilityAttr = MCSA_Protected; + MCSymbolAttr MemtagAttr = MCSA_Memtag; + //===--- Dwarf Emission Directives -----------------------------------===// /// True if target supports emission of debugging information. Defaults to @@ -772,6 +774,8 @@ return ProtectedVisibilityAttr; } + MCSymbolAttr getMemtagAttr() const { return MemtagAttr; } + bool doesSupportDebugInformation() const { return SupportsDebugInformation; } ExceptionHandling getExceptionHandlingType() const { return ExceptionsType; } diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -672,11 +672,13 @@ bool hasXCOFFSection(StringRef Section, XCOFF::CsectProperties CsectProp) const; - MCSectionXCOFF *getXCOFFSection( - StringRef Section, SectionKind K, - Optional CsectProp = None, - bool MultiSymbolsAllowed = false, const char *BeginSymName = nullptr, - Optional DwarfSubtypeFlags = None); + MCSectionXCOFF * + getXCOFFSection(StringRef Section, SectionKind K, + Optional CsectProp = std::nullopt, + bool MultiSymbolsAllowed = false, + const char *BeginSymName = nullptr, + Optional DwarfSubtypeFlags = + std::nullopt); // Create and save a copy of STI and return a reference to the copy. MCSubtargetInfo &getSubtargetCopy(const MCSubtargetInfo &STI); diff --git a/llvm/include/llvm/MC/MCDirectives.h b/llvm/include/llvm/MC/MCDirectives.h --- a/llvm/include/llvm/MC/MCDirectives.h +++ b/llvm/include/llvm/MC/MCDirectives.h @@ -45,7 +45,8 @@ MCSA_Weak, ///< .weak MCSA_WeakDefinition, ///< .weak_definition (MachO) MCSA_WeakReference, ///< .weak_reference (MachO) - MCSA_WeakDefAutoPrivate ///< .weak_def_can_be_hidden (MachO) + MCSA_WeakDefAutoPrivate, ///< .weak_def_can_be_hidden (MachO) + MCSA_Memtag, ///< .memtag (ELF) }; enum MCAssemblerFlag { diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h --- a/llvm/include/llvm/MC/MCELFObjectWriter.h +++ b/llvm/include/llvm/MC/MCELFObjectWriter.h @@ -139,6 +139,14 @@ unsigned setRSsym(unsigned Value, unsigned Type) const { return (Type & R_SSYM_MASK) | ((Value & 0xff) << R_SSYM_SHIFT); } + + // On AArch64, return a new section to be added to the ELF object that + // contains relocations used to describe every symbol that should have memory + // tags applied. Returns nullptr if no such section is necessary (i.e. there's + // no tagged globals). + virtual MCSectionELF *getMemtagRelocsSection(MCContext &Ctx) const { + return nullptr; + } }; /// Construct a new ELF writer instance. diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -210,25 +210,28 @@ const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) = 0; /// Emit a note at the location \p L, with the message \p Msg. - virtual void Note(SMLoc L, const Twine &Msg, SMRange Range = None) = 0; + virtual void Note(SMLoc L, const Twine &Msg, + SMRange Range = std::nullopt) = 0; /// Emit a warning at the location \p L, with the message \p Msg. /// /// \return The return value is true, if warnings are fatal. - virtual bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) = 0; + virtual bool Warning(SMLoc L, const Twine &Msg, + SMRange Range = std::nullopt) = 0; /// Return an error at the location \p L, with the message \p Msg. This /// may be modified before being emitted. /// /// \return The return value is always true, as an idiomatic convenience to /// clients. - bool Error(SMLoc L, const Twine &Msg, SMRange Range = None); + bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt); /// Emit an error at the location \p L, with the message \p Msg. /// /// \return The return value is always true, as an idiomatic convenience to /// clients. - virtual bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) = 0; + virtual bool printError(SMLoc L, const Twine &Msg, + SMRange Range = std::nullopt) = 0; bool hasPendingError() { return !PendingErrors.empty(); } @@ -253,7 +256,7 @@ const AsmToken &getTok() const; /// Report an error at the current lexer location. - bool TokError(const Twine &Msg, SMRange Range = None); + bool TokError(const Twine &Msg, SMRange Range = std::nullopt); bool parseTokenLoc(SMLoc &Loc); bool parseToken(AsmToken::TokenKind T, const Twine &Msg = "unexpected token"); diff --git a/llvm/include/llvm/MC/MCSectionXCOFF.h b/llvm/include/llvm/MC/MCSectionXCOFF.h --- a/llvm/include/llvm/MC/MCSectionXCOFF.h +++ b/llvm/include/llvm/MC/MCSectionXCOFF.h @@ -46,7 +46,7 @@ bool MultiSymbolsAllowed) : MCSection(SV_XCOFF, Name, K, Begin), CsectProp(XCOFF::CsectProperties(SMC, ST)), QualName(QualName), - SymbolTableName(SymbolTableName), DwarfSubtypeFlags(None), + SymbolTableName(SymbolTableName), DwarfSubtypeFlags(std::nullopt), MultiSymbolsAllowed(MultiSymbolsAllowed) { assert( (ST == XCOFF::XTY_SD || ST == XCOFF::XTY_CM || ST == XCOFF::XTY_ER) && diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -905,11 +905,10 @@ /// Associate a filename with a specified logical file number. This /// implements the DWARF2 '.file 4 "foo.c"' assembler directive. - unsigned emitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename, - Optional Checksum = None, - Optional Source = None, - unsigned CUID = 0) { + unsigned emitDwarfFileDirective( + unsigned FileNo, StringRef Directory, StringRef Filename, + Optional Checksum = std::nullopt, + Optional Source = std::nullopt, unsigned CUID = 0) { return cantFail( tryEmitDwarfFileDirective(FileNo, Directory, Filename, Checksum, Source, CUID)); @@ -922,8 +921,8 @@ /// '.file 4 "dir/foo.c" md5 "..." source "..."' assembler directive. virtual Expected tryEmitDwarfFileDirective( unsigned FileNo, StringRef Directory, StringRef Filename, - Optional Checksum = None, Optional Source = None, - unsigned CUID = 0); + Optional Checksum = std::nullopt, + Optional Source = std::nullopt, unsigned CUID = 0); /// Specify the "root" file of the compilation, using the ".file 0" extension. virtual void emitDwarfFile0Directive(StringRef Directory, StringRef Filename, @@ -1089,7 +1088,7 @@ virtual Optional> emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr, SMLoc Loc, const MCSubtargetInfo &STI) { - return None; + return std::nullopt; } virtual void emitAddrsig() {} diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h --- a/llvm/include/llvm/MC/MCSubtargetInfo.h +++ b/llvm/include/llvm/MC/MCSubtargetInfo.h @@ -23,6 +23,7 @@ #include "llvm/MC/SubtargetFeature.h" #include #include +#include #include namespace llvm { @@ -236,13 +237,13 @@ /// Level is zero-based, so a value of zero means the first level of /// cache. /// - virtual Optional getCacheSize(unsigned Level) const; + virtual std::optional getCacheSize(unsigned Level) const; /// Return the cache associatvity for the given level of cache. /// Level is zero-based, so a value of zero means the first level of /// cache. /// - virtual Optional getCacheAssociativity(unsigned Level) const; + virtual std::optional getCacheAssociativity(unsigned Level) const; /// Return the target cache line size in bytes at a given level. /// diff --git a/llvm/include/llvm/MC/MCSymbolELF.h b/llvm/include/llvm/MC/MCSymbolELF.h --- a/llvm/include/llvm/MC/MCSymbolELF.h +++ b/llvm/include/llvm/MC/MCSymbolELF.h @@ -43,6 +43,9 @@ void setIsSignature() const; bool isSignature() const; + void setMemtag(bool Tagged); + bool isMemtag() const; + static bool classof(const MCSymbol *S) { return S->isELF(); } private: diff --git a/llvm/include/llvm/MC/StringTableBuilder.h b/llvm/include/llvm/MC/StringTableBuilder.h --- a/llvm/include/llvm/MC/StringTableBuilder.h +++ b/llvm/include/llvm/MC/StringTableBuilder.h @@ -12,6 +12,7 @@ #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Alignment.h" #include #include @@ -38,14 +39,14 @@ DenseMap StringIndexMap; size_t Size = 0; Kind K; - unsigned Alignment; + Align Alignment; bool Finalized = false; void finalizeStringTable(bool Optimize); void initSize(); public: - StringTableBuilder(Kind K, unsigned Alignment = 1); + StringTableBuilder(Kind K, Align Alignment = Align(1)); ~StringTableBuilder(); /// Add a string to the builder. Returns the position of S in the diff --git a/llvm/include/llvm/MC/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h --- a/llvm/include/llvm/MC/TargetRegistry.h +++ b/llvm/include/llvm/MC/TargetRegistry.h @@ -31,6 +31,7 @@ #include #include #include +#include #include namespace llvm { @@ -166,8 +167,8 @@ StringRef Features); using TargetMachineCtorTy = TargetMachine *(*)(const Target &T, const Triple &TT, StringRef CPU, StringRef Features, - const TargetOptions &Options, Optional RM, - Optional CM, CodeGenOpt::Level OL, bool JIT); + const TargetOptions &Options, std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT); // If it weren't for layering issues (this header is in llvm/Support, but // depends on MC?) this should take the Streamer by value rather than rvalue // reference. @@ -478,13 +479,11 @@ /// feature set; it should always be provided. Generally this should be /// either the target triple from the module, or the target triple of the /// host if that does not exist. - TargetMachine *createTargetMachine(StringRef TT, StringRef CPU, - StringRef Features, - const TargetOptions &Options, - Optional RM, - Optional CM = None, - CodeGenOpt::Level OL = CodeGenOpt::Default, - bool JIT = false) const { + TargetMachine *createTargetMachine( + StringRef TT, StringRef CPU, StringRef Features, + const TargetOptions &Options, std::optional RM, + std::optional CM = std::nullopt, + CodeGenOpt::Level OL = CodeGenOpt::Default, bool JIT = false) const { if (!TargetMachineCtorFn) return nullptr; return TargetMachineCtorFn(*this, Triple(TT), CPU, Features, Options, RM, @@ -1359,10 +1358,12 @@ } private: - static TargetMachine * - Allocator(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Optional RM, - Optional CM, CodeGenOpt::Level OL, bool JIT) { + static TargetMachine *Allocator(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + std::optional RM, + std::optional CM, + CodeGenOpt::Level OL, bool JIT) { return new TargetMachineImpl(T, TT, CPU, FS, Options, RM, CM, OL, JIT); } }; diff --git a/llvm/include/llvm/ObjCopy/CommonConfig.h b/llvm/include/llvm/ObjCopy/CommonConfig.h --- a/llvm/include/llvm/ObjCopy/CommonConfig.h +++ b/llvm/include/llvm/ObjCopy/CommonConfig.h @@ -118,7 +118,7 @@ Optional getName() const { if (!R && !G) return Name; - return None; + return std::nullopt; } bool operator==(StringRef S) const { return R ? R->match(S) : G ? G->match(S) : Name == S; diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -120,7 +120,7 @@ } const T *First; - Optional Size = None; + Optional Size = std::nullopt; const uint8_t *BufEnd = nullptr; }; diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -108,7 +108,7 @@ // `TextSectionIndex` is specified, only returns the BB address maps // corresponding to the section with that index. Expected> - readBBAddrMap(Optional TextSectionIndex = None) const; + readBBAddrMap(Optional TextSectionIndex = std::nullopt) const; }; class ELFSectionRef : public SectionRef { diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -799,6 +799,7 @@ uint64_t Addr; // Function address // Struct representing the BBAddrMap information for one basic block. struct BBEntry { + uint32_t ID; // Unique ID of this basic block. uint32_t Offset; // Offset of basic block relative to function start. uint32_t Size; // Size of the basic block. @@ -809,13 +810,13 @@ bool IsEHPad; // If this is an exception handling block. bool CanFallThrough; // If this block can fall through to its next. - BBEntry(uint32_t Offset, uint32_t Size, uint32_t Metadata) - : Offset(Offset), Size(Size), HasReturn(Metadata & 1), + BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, uint32_t Metadata) + : ID(ID), Offset(Offset), Size(Size), HasReturn(Metadata & 1), HasTailCall(Metadata & (1 << 1)), IsEHPad(Metadata & (1 << 2)), CanFallThrough(Metadata & (1 << 3)){}; bool operator==(const BBEntry &Other) const { - return Offset == Other.Offset && Size == Other.Size && + return ID == Other.ID && Offset == Other.Offset && Size == Other.Size && HasReturn == Other.HasReturn && HasTailCall == Other.HasTailCall && IsEHPad == Other.IsEHPad && CanFallThrough == Other.CanFallThrough; } diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -337,7 +337,7 @@ virtual StringRef getFileFormatName() const = 0; virtual Triple::ArchType getArch() const = 0; virtual SubtargetFeatures getFeatures() const = 0; - virtual Optional tryGetCPUName() const { return None; }; + virtual Optional tryGetCPUName() const { return std::nullopt; }; virtual void setARMSubArch(Triple &TheTriple) const { } virtual Expected getStartAddress() const { return errorCodeToError(object_error::parse_failed); diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -157,6 +157,7 @@ struct BBAddrMapEntry { struct BBEntry { + uint32_t ID; llvm::yaml::Hex64 AddressOffset; llvm::yaml::Hex64 Size; llvm::yaml::Hex64 Metadata; diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -116,7 +116,7 @@ explicit PassBuilder(TargetMachine *TM = nullptr, PipelineTuningOptions PTO = PipelineTuningOptions(), - Optional PGOOpt = None, + Optional PGOOpt = std::nullopt, PassInstrumentationCallbacks *PIC = nullptr); /// Cross register the analysis managers through their proxies. diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -323,7 +323,7 @@ public: CounterMappingContext(ArrayRef Expressions, - ArrayRef CounterValues = None) + ArrayRef CounterValues = std::nullopt) : Expressions(Expressions), CounterValues(CounterValues) {} void setCounts(ArrayRef Counts) { CounterValues = Counts; } @@ -605,7 +605,8 @@ /// Ignores non-instrumented object files unless all are not instrumented. static Expected> load(ArrayRef ObjectFilenames, StringRef ProfileFilename, - ArrayRef Arches = None, StringRef CompilationDir = ""); + ArrayRef Arches = std::nullopt, + StringRef CompilationDir = ""); /// The number of functions that couldn't have their profiles mapped. /// diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -868,7 +868,7 @@ ArrayRef getValueSitesForKind(uint32_t ValueKind) const { if (!ValueData) - return None; + return std::nullopt; switch (ValueKind) { case IPVK_IndirectCallTarget: return ValueData->IndirectCallSites; diff --git a/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h b/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h --- a/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h +++ b/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h @@ -106,8 +106,8 @@ /// Emit the metadata for the remarks. void emitMetaBlock(uint64_t ContainerVersion, Optional RemarkVersion, - Optional StrTab = None, - Optional Filename = None); + Optional StrTab = std::nullopt, + Optional Filename = std::nullopt); /// Emit a remark block. The string table is required. void emitRemarkBlock(const Remark &Remark, StringTable &StrTab); @@ -148,7 +148,7 @@ /// metadata serializer will change. std::unique_ptr metaSerializer(raw_ostream &OS, - Optional ExternalFilename = None) override; + Optional ExternalFilename = std::nullopt) override; static bool classof(const RemarkSerializer *S) { return S->SerializerFormat == Format::Bitstream; @@ -172,10 +172,10 @@ /// Create a new meta serializer based on \p ContainerType. BitstreamMetaSerializer(raw_ostream &OS, BitstreamRemarkContainerType ContainerType, - Optional StrTab = None, - Optional ExternalFilename = None) - : MetaSerializer(OS), TmpHelper(None), Helper(nullptr), StrTab(StrTab), - ExternalFilename(ExternalFilename) { + Optional StrTab = std::nullopt, + Optional ExternalFilename = std::nullopt) + : MetaSerializer(OS), TmpHelper(std::nullopt), Helper(nullptr), + StrTab(StrTab), ExternalFilename(ExternalFilename) { TmpHelper.emplace(ContainerType); Helper = &*TmpHelper; } @@ -183,10 +183,10 @@ /// Create a new meta serializer based on a previously built \p Helper. BitstreamMetaSerializer(raw_ostream &OS, BitstreamRemarkSerializerHelper &Helper, - Optional StrTab = None, - Optional ExternalFilename = None) - : MetaSerializer(OS), TmpHelper(None), Helper(&Helper), StrTab(StrTab), - ExternalFilename(ExternalFilename) {} + Optional StrTab = std::nullopt, + Optional ExternalFilename = std::nullopt) + : MetaSerializer(OS), TmpHelper(std::nullopt), Helper(&Helper), + StrTab(StrTab), ExternalFilename(ExternalFilename) {} void emit() override; }; diff --git a/llvm/include/llvm/Remarks/HotnessThresholdParser.h b/llvm/include/llvm/Remarks/HotnessThresholdParser.h --- a/llvm/include/llvm/Remarks/HotnessThresholdParser.h +++ b/llvm/include/llvm/Remarks/HotnessThresholdParser.h @@ -30,7 +30,7 @@ // be filled later during PSI. inline Expected> parseHotnessThresholdOption(StringRef Arg) { if (Arg == "auto") - return None; + return std::nullopt; int64_t Val; if (Arg.getAsInteger(10, Val)) diff --git a/llvm/include/llvm/Remarks/RemarkLinker.h b/llvm/include/llvm/Remarks/RemarkLinker.h --- a/llvm/include/llvm/Remarks/RemarkLinker.h +++ b/llvm/include/llvm/Remarks/RemarkLinker.h @@ -65,12 +65,12 @@ /// \p Buffer. /// \p Buffer can be either a standalone remark container or just /// metadata. This takes care of uniquing and merging the remarks. - Error link(StringRef Buffer, Optional RemarkFormat = None); + Error link(StringRef Buffer, Optional RemarkFormat = std::nullopt); /// Link the remarks found in \p Obj by looking for the right section and /// calling the method above. Error link(const object::ObjectFile &Obj, - Optional RemarkFormat = None); + Optional RemarkFormat = std::nullopt); /// Serialize the linked remarks to the stream \p OS, using the format \p /// RemarkFormat. diff --git a/llvm/include/llvm/Remarks/RemarkParser.h b/llvm/include/llvm/Remarks/RemarkParser.h --- a/llvm/include/llvm/Remarks/RemarkParser.h +++ b/llvm/include/llvm/Remarks/RemarkParser.h @@ -82,10 +82,10 @@ createRemarkParser(Format ParserFormat, StringRef Buf, ParsedStringTable StrTab); -Expected> -createRemarkParserFromMeta(Format ParserFormat, StringRef Buf, - Optional StrTab = None, - Optional ExternalFilePrependPath = None); +Expected> createRemarkParserFromMeta( + Format ParserFormat, StringRef Buf, + Optional StrTab = std::nullopt, + Optional ExternalFilePrependPath = std::nullopt); } // end namespace remarks } // end namespace llvm diff --git a/llvm/include/llvm/Remarks/RemarkSerializer.h b/llvm/include/llvm/Remarks/RemarkSerializer.h --- a/llvm/include/llvm/Remarks/RemarkSerializer.h +++ b/llvm/include/llvm/Remarks/RemarkSerializer.h @@ -60,7 +60,7 @@ /// Return the corresponding metadata serializer. virtual std::unique_ptr metaSerializer(raw_ostream &OS, - Optional ExternalFilename = None) = 0; + Optional ExternalFilename = std::nullopt) = 0; }; /// This is the base class for a remark metadata serializer. diff --git a/llvm/include/llvm/Remarks/RemarkStreamer.h b/llvm/include/llvm/Remarks/RemarkStreamer.h --- a/llvm/include/llvm/Remarks/RemarkStreamer.h +++ b/llvm/include/llvm/Remarks/RemarkStreamer.h @@ -51,11 +51,11 @@ public: RemarkStreamer(std::unique_ptr RemarkSerializer, - Optional Filename = None); + Optional Filename = std::nullopt); /// Return the filename that the remark diagnostics are emitted to. Optional getFilename() const { - return Filename ? Optional(*Filename) : None; + return Filename ? Optional(*Filename) : std::nullopt; } /// Return stream that the remark diagnostics are emitted to. raw_ostream &getStream() { return RemarkSerializer->OS; } diff --git a/llvm/include/llvm/Remarks/YAMLRemarkSerializer.h b/llvm/include/llvm/Remarks/YAMLRemarkSerializer.h --- a/llvm/include/llvm/Remarks/YAMLRemarkSerializer.h +++ b/llvm/include/llvm/Remarks/YAMLRemarkSerializer.h @@ -35,12 +35,12 @@ yaml::Output YAMLOutput; YAMLRemarkSerializer(raw_ostream &OS, SerializerMode Mode, - Optional StrTab = None); + Optional StrTab = std::nullopt); void emit(const Remark &Remark) override; std::unique_ptr metaSerializer(raw_ostream &OS, - Optional ExternalFilename = None) override; + Optional ExternalFilename = std::nullopt) override; static bool classof(const RemarkSerializer *S) { return S->SerializerFormat == Format::YAML; @@ -49,7 +49,7 @@ protected: YAMLRemarkSerializer(Format SerializerFormat, raw_ostream &OS, SerializerMode Mode, - Optional StrTab = None); + Optional StrTab = std::nullopt); }; struct YAMLMetaSerializer : public MetaSerializer { @@ -83,7 +83,7 @@ std::unique_ptr metaSerializer(raw_ostream &OS, - Optional ExternalFilename = None) override; + Optional ExternalFilename = std::nullopt) override; static bool classof(const RemarkSerializer *S) { return S->SerializerFormat == Format::YAMLStrTab; diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h --- a/llvm/include/llvm/Support/AArch64TargetParser.h +++ b/llvm/include/llvm/Support/AArch64TargetParser.h @@ -16,8 +16,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/VersionTuple.h" -#include #include +#include namespace llvm { @@ -27,6 +27,7 @@ // Arch extension modifiers for CPUs. These are labelled with their Arm ARM // feature name (though the canonical reference for those is AArch64.td) +// clang-format off enum ArchExtKind : uint64_t { AEK_INVALID = 0, AEK_NONE = 1, @@ -83,6 +84,7 @@ AEK_D128 = 1ULL << 51, // FEAT_D128 AEK_LSE128 = 1ULL << 52, // FEAT_LSE128 }; +// clang-format on // Represents an extension that can be enabled with -march=+. // Typically these correspond to Arm Architecture extensions, unlike @@ -112,12 +114,6 @@ StringRef ArchFeature; // Command line feature flag, e.g. +v8a uint64_t DefaultExts; // bitfield of default extensions ArchExtKind - // These are not intended to be copied or created outside of this file. - ArchInfo(const ArchInfo &) = delete; - ArchInfo(const ArchInfo &&) = delete; - ArchInfo &operator=(const ArchInfo &rhs) = delete; - ArchInfo &&operator=(const ArchInfo &&rhs) = delete; - // Comparison is done by address. Copies should not exist. bool operator==(const ArchInfo &Other) const { return this == &Other; } bool operator!=(const ArchInfo &Other) const { return this != &Other; } diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h --- a/llvm/include/llvm/Support/Allocator.h +++ b/llvm/include/llvm/Support/Allocator.h @@ -248,7 +248,7 @@ return InCustomSizedSlabIdx - static_cast(P - S); InCustomSizedSlabIdx -= static_cast(Size); } - return None; + return std::nullopt; } /// A wrapper around identifyObject that additionally asserts that diff --git a/llvm/include/llvm/Support/CheckedArithmetic.h b/llvm/include/llvm/Support/CheckedArithmetic.h --- a/llvm/include/llvm/Support/CheckedArithmetic.h +++ b/llvm/include/llvm/Support/CheckedArithmetic.h @@ -33,7 +33,7 @@ bool Overflow; llvm::APInt Out = (ALHS.*Op)(ARHS, Overflow); if (Overflow) - return llvm::None; + return std::nullopt; return Signed ? Out.getSExtValue() : Out.getZExtValue(); } } @@ -75,7 +75,7 @@ checkedMulAdd(T A, T B, T C) { if (auto Product = checkedMul(A, B)) return checkedAdd(*Product, C); - return llvm::None; + return std::nullopt; } /// Add two unsigned integers \p LHS and \p RHS. @@ -104,7 +104,7 @@ checkedMulAddUnsigned(T A, T B, T C) { if (auto Product = checkedMulUnsigned(A, B)) return checkedAddUnsigned(*Product, C); - return llvm::None; + return std::nullopt; } } // End llvm namespace diff --git a/llvm/include/llvm/Support/ELFAttributeParser.h b/llvm/include/llvm/Support/ELFAttributeParser.h --- a/llvm/include/llvm/Support/ELFAttributeParser.h +++ b/llvm/include/llvm/Support/ELFAttributeParser.h @@ -15,6 +15,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" +#include #include namespace llvm { @@ -59,16 +60,16 @@ Error parse(ArrayRef section, support::endianness endian); - Optional getAttributeValue(unsigned tag) const { + std::optional getAttributeValue(unsigned tag) const { auto I = attributes.find(tag); if (I == attributes.end()) - return None; + return std::nullopt; return I->second; } - Optional getAttributeString(unsigned tag) const { + std::optional getAttributeString(unsigned tag) const { auto I = attributesStr.find(tag); if (I == attributesStr.end()) - return None; + return std::nullopt; return I->second; } }; diff --git a/llvm/include/llvm/Support/ELFAttributes.h b/llvm/include/llvm/Support/ELFAttributes.h --- a/llvm/include/llvm/Support/ELFAttributes.h +++ b/llvm/include/llvm/Support/ELFAttributes.h @@ -11,6 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include namespace llvm { @@ -27,7 +28,7 @@ StringRef attrTypeAsString(unsigned attr, TagNameMap tagNameMap, bool hasTagPrefix = true); -Optional attrTypeFromString(StringRef tag, TagNameMap tagNameMap); +std::optional attrTypeFromString(StringRef tag, TagNameMap tagNameMap); // Magic numbers for ELF attributes. enum AttrMagic { Format_Version = 0x41 }; diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h --- a/llvm/include/llvm/Support/Error.h +++ b/llvm/include/llvm/Support/Error.h @@ -1054,7 +1054,7 @@ if (E) return std::move(*E); consumeError(E.takeError()); - return None; + return std::nullopt; } /// Helper for converting an Error to a bool. diff --git a/llvm/include/llvm/Support/FileUtilities.h b/llvm/include/llvm/Support/FileUtilities.h --- a/llvm/include/llvm/Support/FileUtilities.h +++ b/llvm/include/llvm/Support/FileUtilities.h @@ -122,7 +122,7 @@ /// Copy LastAccess and ModificationTime if \p CopyDates is true. /// Overwrite stored permissions if \p OverwritePermissions is specified. Error apply(StringRef OutputFilename, bool CopyDates = false, - Optional OverwritePermissions = None); + Optional OverwritePermissions = std::nullopt); private: FilePermissionsApplier(StringRef InputFilename, sys::fs::file_status Status) diff --git a/llvm/include/llvm/Support/Format.h b/llvm/include/llvm/Support/Format.h --- a/llvm/include/llvm/Support/Format.h +++ b/llvm/include/llvm/Support/Format.h @@ -236,7 +236,8 @@ }; inline FormattedBytes -format_bytes(ArrayRef Bytes, Optional FirstByteOffset = None, +format_bytes(ArrayRef Bytes, + Optional FirstByteOffset = std::nullopt, uint32_t NumPerLine = 16, uint8_t ByteGroupSize = 4, uint32_t IndentLevel = 0, bool Upper = false) { return FormattedBytes(Bytes, IndentLevel, FirstByteOffset, NumPerLine, @@ -245,7 +246,7 @@ inline FormattedBytes format_bytes_with_ascii(ArrayRef Bytes, - Optional FirstByteOffset = None, + Optional FirstByteOffset = std::nullopt, uint32_t NumPerLine = 16, uint8_t ByteGroupSize = 4, uint32_t IndentLevel = 0, bool Upper = false) { return FormattedBytes(Bytes, IndentLevel, FirstByteOffset, NumPerLine, diff --git a/llvm/include/llvm/Support/FormatProviders.h b/llvm/include/llvm/Support/FormatProviders.h --- a/llvm/include/llvm/Support/FormatProviders.h +++ b/llvm/include/llvm/Support/FormatProviders.h @@ -63,10 +63,10 @@ size_t Prec; Optional Result; if (Str.empty()) - Result = None; + Result = std::nullopt; else if (Str.getAsInteger(10, Prec)) { assert(false && "Invalid precision specifier"); - Result = None; + Result = std::nullopt; } else { assert(Prec < 100 && "Precision out of range"); Result = std::min(99u, Prec); diff --git a/llvm/include/llvm/Support/InitLLVM.h b/llvm/include/llvm/Support/InitLLVM.h --- a/llvm/include/llvm/Support/InitLLVM.h +++ b/llvm/include/llvm/Support/InitLLVM.h @@ -9,7 +9,6 @@ #ifndef LLVM_SUPPORT_INITLLVM_H #define LLVM_SUPPORT_INITLLVM_H -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/PrettyStackTrace.h" diff --git a/llvm/include/llvm/Support/InstructionCost.h b/llvm/include/llvm/Support/InstructionCost.h --- a/llvm/include/llvm/Support/InstructionCost.h +++ b/llvm/include/llvm/Support/InstructionCost.h @@ -88,7 +88,7 @@ std::optional getValue() const { if (isValid()) return Value; - return None; + return std::nullopt; } /// For all of the arithmetic operators provided here any invalid state is diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h --- a/llvm/include/llvm/Support/JSON.h +++ b/llvm/include/llvm/Support/JSON.h @@ -404,12 +404,12 @@ llvm::Optional getAsNull() const { if (LLVM_LIKELY(Type == T_Null)) return nullptr; - return llvm::None; + return std::nullopt; } llvm::Optional getAsBoolean() const { if (LLVM_LIKELY(Type == T_Boolean)) return as(); - return llvm::None; + return std::nullopt; } llvm::Optional getAsNumber() const { if (LLVM_LIKELY(Type == T_Double)) @@ -418,7 +418,7 @@ return as(); if (LLVM_LIKELY(Type == T_UINT64)) return as(); - return llvm::None; + return std::nullopt; } // Succeeds if the Value is a Number, and exactly representable as int64_t. llvm::Optional getAsInteger() const { @@ -431,7 +431,7 @@ D <= double(std::numeric_limits::max()))) return D; } - return llvm::None; + return std::nullopt; } llvm::Optional getAsUINT64() const { if (Type == T_UINT64) @@ -441,14 +441,14 @@ if (N >= 0) return as(); } - return llvm::None; + return std::nullopt; } llvm::Optional getAsString() const { if (Type == T_String) return llvm::StringRef(as()); if (LLVM_LIKELY(Type == T_StringRef)) return as(); - return llvm::None; + return std::nullopt; } const json::Object *getAsObject() const { return LLVM_LIKELY(Type == T_Object) ? &as() : nullptr; @@ -764,7 +764,7 @@ template bool fromJSON(const Value &E, llvm::Optional &Out, Path P) { if (E.getAsNull()) { - Out = llvm::None; + Out = std::nullopt; return true; } T Result; @@ -845,7 +845,7 @@ assert(*this && "Must check this is an object before calling map()"); if (const Value *E = O->get(Prop)) return fromJSON(*E, Out, P.field(Prop)); - Out = llvm::None; + Out = std::nullopt; return true; } diff --git a/llvm/include/llvm/Support/LineIterator.h b/llvm/include/llvm/Support/LineIterator.h --- a/llvm/include/llvm/Support/LineIterator.h +++ b/llvm/include/llvm/Support/LineIterator.h @@ -9,11 +9,11 @@ #ifndef LLVM_SUPPORT_LINEITERATOR_H #define LLVM_SUPPORT_LINEITERATOR_H -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/MemoryBufferRef.h" #include +#include namespace llvm { @@ -31,7 +31,7 @@ /// /// Note that this iterator requires the buffer to be nul terminated. class line_iterator { - Optional Buffer; + std::optional Buffer; char CommentMarker = '\0'; bool SkipBlanks = true; diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -753,6 +753,18 @@ return Z; } +/// Add multiple unsigned integers of type T. Clamp the result to the +/// maximum representable value of T on overflow. +template +std::enable_if_t, T> SaturatingAdd(T X, T Y, T Z, + Ts... Args) { + bool Overflowed = false; + T XY = SaturatingAdd(X, Y, &Overflowed); + if (Overflowed) + return SaturatingAdd(std::numeric_limits::max(), T(1), Args...); + return SaturatingAdd(XY, Z, Args...); +} + /// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the /// maximum representable value of T on overflow. ResultOverflowed indicates if /// the result is larger than the maximum representable value of type T. diff --git a/llvm/include/llvm/Support/MemoryBuffer.h b/llvm/include/llvm/Support/MemoryBuffer.h --- a/llvm/include/llvm/Support/MemoryBuffer.h +++ b/llvm/include/llvm/Support/MemoryBuffer.h @@ -97,7 +97,7 @@ static ErrorOr> getFile(const Twine &Filename, bool IsText = false, bool RequiresNullTerminator = true, bool IsVolatile = false, - Optional Alignment = None); + Optional Alignment = std::nullopt); /// Read all of the specified file into a MemoryBuffer as a stream /// (i.e. until EOF reached). This is useful for special files that @@ -111,7 +111,7 @@ static ErrorOr> getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, uint64_t MapSize, int64_t Offset, bool IsVolatile = false, - Optional Alignment = None); + Optional Alignment = std::nullopt); /// Given an already-open file descriptor, read the file and return a /// MemoryBuffer. @@ -125,7 +125,7 @@ static ErrorOr> getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, bool RequiresNullTerminator = true, bool IsVolatile = false, - Optional Alignment = None); + Optional Alignment = std::nullopt); /// Open the specified memory range as a MemoryBuffer. Note that InputData /// must be null terminated if RequiresNullTerminator is true. @@ -149,12 +149,13 @@ static ErrorOr> getFileOrSTDIN(const Twine &Filename, bool IsText = false, bool RequiresNullTerminator = true, - Optional Alignment = None); + Optional Alignment = std::nullopt); /// Map a subrange of the specified file as a MemoryBuffer. static ErrorOr> getFileSlice(const Twine &Filename, uint64_t MapSize, uint64_t Offset, - bool IsVolatile = false, Optional Alignment = None); + bool IsVolatile = false, + Optional Alignment = std::nullopt); //===--------------------------------------------------------------------===// // Provided for performance analysis. @@ -200,12 +201,13 @@ static ErrorOr> getFile(const Twine &Filename, bool IsVolatile = false, - Optional Alignment = None); + Optional Alignment = std::nullopt); /// Map a subrange of the specified file as a WritableMemoryBuffer. static ErrorOr> getFileSlice(const Twine &Filename, uint64_t MapSize, uint64_t Offset, - bool IsVolatile = false, Optional Alignment = None); + bool IsVolatile = false, + Optional Alignment = std::nullopt); /// Allocate a new MemoryBuffer of the specified size that is not initialized. /// Note that the caller should initialize the memory allocated by this @@ -215,7 +217,7 @@ /// least the specified alignment. static std::unique_ptr getNewUninitMemBuffer(size_t Size, const Twine &BufferName = "", - Optional Alignment = None); + Optional Alignment = std::nullopt); /// Allocate a new zero-initialized MemoryBuffer of the specified size. Note /// that the caller need not initialize the memory allocated by this method. diff --git a/llvm/include/llvm/Support/NativeFormatting.h b/llvm/include/llvm/Support/NativeFormatting.h --- a/llvm/include/llvm/Support/NativeFormatting.h +++ b/llvm/include/llvm/Support/NativeFormatting.h @@ -38,9 +38,9 @@ IntegerStyle Style); void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, - Optional Width = None); + Optional Width = std::nullopt); void write_double(raw_ostream &S, double D, FloatStyle Style, - Optional Precision = None); + Optional Precision = std::nullopt); } #endif diff --git a/llvm/include/llvm/Support/Program.h b/llvm/include/llvm/Support/Program.h --- a/llvm/include/llvm/Support/Program.h +++ b/llvm/include/llvm/Support/Program.h @@ -14,12 +14,12 @@ #define LLVM_SUPPORT_PROGRAM_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include +#include #include namespace llvm { @@ -107,11 +107,12 @@ ArrayRef Args, ///< An array of strings that are passed to the ///< program. The first element should be the name of the program. ///< The array should **not** be terminated by an empty StringRef. - Optional> Env = None, ///< An optional vector of + std::optional> Env = + std::nullopt, ///< An optional vector of ///< strings to use for the program's environment. If not provided, the ///< current program's environment will be used. If specified, the ///< vector should **not** be terminated by an empty StringRef. - ArrayRef> Redirects = {}, ///< + ArrayRef> Redirects = {}, ///< ///< An array of optional paths. Should have a size of zero or three. ///< If the array is empty, no redirections are performed. ///< Otherwise, the inferior process's stdin(0), stdout(1), and stderr(2) @@ -133,7 +134,7 @@ ///< string is non-empty upon return an error occurred while invoking the ///< program. bool *ExecutionFailed = nullptr, - Optional *ProcStat = nullptr, ///< If non-zero, + std::optional *ProcStat = nullptr, ///< If non-zero, /// provides a pointer to a structure in which process execution /// statistics will be stored. BitVector *AffinityMask = nullptr ///< CPUs or processors the new @@ -146,8 +147,8 @@ /// \see Wait until the process finished execution or win32 CloseHandle() API /// on ProcessInfo.ProcessHandle to avoid memory leaks. ProcessInfo ExecuteNoWait(StringRef Program, ArrayRef Args, - Optional> Env, - ArrayRef> Redirects = {}, + std::optional> Env, + ArrayRef> Redirects = {}, unsigned MemoryLimit = 0, std::string *ErrMsg = nullptr, bool *ExecutionFailed = nullptr, @@ -216,7 +217,8 @@ ///< string instance in which error messages will be returned. If the ///< string is non-empty upon return an error occurred while invoking the ///< program. - Optional *ProcStat = nullptr ///< If non-zero, provides + std::optional *ProcStat = + nullptr ///< If non-zero, provides /// a pointer to a structure in which process execution statistics will be /// stored. ); diff --git a/llvm/include/llvm/Support/SaveAndRestore.h b/llvm/include/llvm/Support/SaveAndRestore.h --- a/llvm/include/llvm/Support/SaveAndRestore.h +++ b/llvm/include/llvm/Support/SaveAndRestore.h @@ -34,6 +34,11 @@ T OldValue; }; +// User-defined CTAD guides. +template SaveAndRestore(T &) -> SaveAndRestore; +template SaveAndRestore(T &, const T &) -> SaveAndRestore; +template SaveAndRestore(T &, T &&) -> SaveAndRestore; + } // namespace llvm #endif diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h --- a/llvm/include/llvm/Support/Threading.h +++ b/llvm/include/llvm/Support/Threading.h @@ -19,6 +19,7 @@ #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX #include "llvm/Support/Compiler.h" #include // So we can check the C++ standard lib macros. +#include #if defined(_MSC_VER) // MSVC's call_once implementation worked since VS 2015, which is the minimum @@ -140,7 +141,7 @@ /// Finds the CPU socket where a thread should go. Returns 'None' if the /// thread shall remain on the actual CPU socket. - Optional compute_cpu_socket(unsigned ThreadPoolNum) const; + std::optional compute_cpu_socket(unsigned ThreadPoolNum) const; }; /// Build a strategy from a number of threads as a string provided in \p Num. @@ -148,7 +149,7 @@ /// strategy, we attempt to equally allocate the threads on all CPU sockets. /// "0" or an empty string will return the \p Default strategy. /// "all" for using all hardware threads. - Optional + std::optional get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {}); /// Returns a thread strategy for tasks requiring significant memory or other @@ -170,7 +171,7 @@ /// If \p Num is invalid, returns a default strategy where one thread per /// hardware core is used. inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) { - Optional S = + std::optional S = get_threadpool_strategy(Num, heavyweight_hardware_concurrency()); if (S) return *S; diff --git a/llvm/include/llvm/Support/VersionTuple.h b/llvm/include/llvm/Support/VersionTuple.h --- a/llvm/include/llvm/Support/VersionTuple.h +++ b/llvm/include/llvm/Support/VersionTuple.h @@ -75,21 +75,21 @@ /// Retrieve the minor version number, if provided. Optional getMinor() const { if (!HasMinor) - return None; + return std::nullopt; return Minor; } /// Retrieve the subminor version number, if provided. Optional getSubminor() const { if (!HasSubminor) - return None; + return std::nullopt; return Subminor; } /// Retrieve the build version number, if provided. Optional getBuild() const { if (!HasBuild) - return None; + return std::nullopt; return Build; } diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -534,9 +534,10 @@ /// different contents. bool addFile(const Twine &Path, time_t ModificationTime, std::unique_ptr Buffer, - Optional User = None, Optional Group = None, - Optional Type = None, - Optional Perms = None); + Optional User = std::nullopt, + Optional Group = std::nullopt, + Optional Type = std::nullopt, + Optional Perms = std::nullopt); /// Add a hard link to a file. /// @@ -562,9 +563,10 @@ /// to refer to a file (or refer to anything, as it happens). Also, an /// in-memory directory for \p Target isn't automatically created. bool addSymbolicLink(const Twine &NewLink, const Twine &Target, - time_t ModificationTime, Optional User = None, - Optional Group = None, - Optional Perms = None); + time_t ModificationTime, + Optional User = std::nullopt, + Optional Group = std::nullopt, + Optional Perms = std::nullopt); /// Add a buffer to the VFS with a path. The VFS does not own the buffer. /// If present, User, Group, Type and Perms apply to the newly-created file @@ -574,10 +576,10 @@ /// different contents. bool addFileNoOwn(const Twine &Path, time_t ModificationTime, const llvm::MemoryBufferRef &Buffer, - Optional User = None, - Optional Group = None, - Optional Type = None, - Optional Perms = None); + Optional User = std::nullopt, + Optional Group = std::nullopt, + Optional Type = std::nullopt, + Optional Perms = std::nullopt); std::string toString() const; @@ -871,7 +873,7 @@ return StringRef(*ExternalRedirect); if (auto *FE = dyn_cast(E)) return FE->getExternalContentsPath(); - return None; + return std::nullopt; } }; diff --git a/llvm/include/llvm/Target/CodeGenCWrappers.h b/llvm/include/llvm/Target/CodeGenCWrappers.h --- a/llvm/include/llvm/Target/CodeGenCWrappers.h +++ b/llvm/include/llvm/Target/CodeGenCWrappers.h @@ -19,17 +19,18 @@ #include "llvm/ADT/Optional.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" +#include namespace llvm { -inline Optional unwrap(LLVMCodeModel Model, bool &JIT) { +inline std::optional unwrap(LLVMCodeModel Model, bool &JIT) { JIT = false; switch (Model) { case LLVMCodeModelJITDefault: JIT = true; [[fallthrough]]; case LLVMCodeModelDefault: - return None; + return std::nullopt; case LLVMCodeModelTiny: return CodeModel::Tiny; case LLVMCodeModelSmall: diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -23,6 +23,7 @@ #include "llvm/Support/PGOOptions.h" #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetOptions.h" +#include #include #include @@ -110,7 +111,7 @@ unsigned O0WantsFastISel : 1; // PGO related tunables. - Optional PGOOption = None; + Optional PGOOption = std::nullopt; public: const TargetOptions DefaultOptions; @@ -497,8 +498,9 @@ /// CM does not have a value. The tiny and kernel models will produce /// an error, so targets that support them or require more complex codemodel /// selection logic should implement and call their own getEffectiveCodeModel. -inline CodeModel::Model getEffectiveCodeModel(Optional CM, - CodeModel::Model Default) { +inline CodeModel::Model +getEffectiveCodeModel(std::optional CM, + CodeModel::Model Default) { if (CM) { // By default, targets do not support the tiny and kernel models. if (*CM == CodeModel::Tiny) diff --git a/llvm/include/llvm/Testing/Support/Error.h b/llvm/include/llvm/Testing/Support/Error.h --- a/llvm/include/llvm/Testing/Support/Error.h +++ b/llvm/include/llvm/Testing/Support/Error.h @@ -196,7 +196,7 @@ template testing::Matcher Failed() { - return MakeMatcher(new detail::ErrorMatchesMono(None)); + return MakeMatcher(new detail::ErrorMatchesMono(std::nullopt)); } template diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1348,7 +1348,7 @@ DenseSet *Allowed = nullptr; /// Maximum number of iterations to run until fixpoint. - std::optional MaxFixpointIterations = None; + std::optional MaxFixpointIterations = std::nullopt; /// A callback function that returns an ORE object from a Function pointer. ///{ @@ -4392,7 +4392,7 @@ AA::getWithType(*ConstantInt::get(Ty->getContext(), *C), *Ty)); } if (RangeV.isEmptySet()) - return llvm::None; + return std::nullopt; return nullptr; } @@ -4647,7 +4647,7 @@ if (getAssumedSet().size() == 0) { if (undefIsContained()) return UndefValue::get(getAssociatedValue().getType()); - return llvm::None; + return std::nullopt; } return nullptr; diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -378,12 +378,12 @@ LoopInfo *getLoopInfo() const { return LI; } // Call target specific combiners - Optional targetInstCombineIntrinsic(IntrinsicInst &II); - Optional + std::optional targetInstCombineIntrinsic(IntrinsicInst &II); + std::optional targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed); - Optional targetSimplifyDemandedVectorEltsIntrinsic( + std::optional targetSimplifyDemandedVectorEltsIntrinsic( IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -72,11 +72,11 @@ /// Intended use is to create a default object, modify parameters with /// additional setters and then pass it to GVN. struct GVNOptions { - std::optional AllowPRE = None; - std::optional AllowLoadPRE = None; - std::optional AllowLoadInLoopPRE = None; - std::optional AllowLoadPRESplitBackedge = None; - std::optional AllowMemDep = None; + std::optional AllowPRE = std::nullopt; + std::optional AllowLoadPRE = std::nullopt; + std::optional AllowLoadInLoopPRE = std::nullopt; + std::optional AllowLoadPRESplitBackedge = std::nullopt; + std::optional AllowMemDep = std::nullopt; GVNOptions() = default; diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -403,7 +403,7 @@ // Check the PassInstrumentation's BeforePass callbacks before running the // pass, skip its execution completely if asked to (callback returns false). if (!PI.runBeforePass(*Pass, L)) - return None; + return std::nullopt; PreservedAnalyses PA = Pass->run(IR, AM, AR, U); diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -574,12 +574,11 @@ /// for the caller to accomplish, since each specific use of this function /// may have additional information which simplifies this fixup. For example, /// see restoreSSA() in the UnifyLoopExits pass. -BasicBlock *CreateControlFlowHub(DomTreeUpdater *DTU, - SmallVectorImpl &GuardBlocks, +BasicBlock *CreateControlFlowHub( + DomTreeUpdater *DTU, SmallVectorImpl &GuardBlocks, const SetVector &Predecessors, - const SetVector &Successors, - const StringRef Prefix, - Optional MaxControlFlowBooleans = None); + const SetVector &Successors, const StringRef Prefix, + Optional MaxControlFlowBooleans = std::nullopt); } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h --- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h +++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h @@ -14,7 +14,8 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOWERMEMINTRINSICS_H #define LLVM_TRANSFORMS_UTILS_LOWERMEMINTRINSICS_H -#include "llvm/ADT/Optional.h" +#include +#include namespace llvm { @@ -31,21 +32,19 @@ /// Emit a loop implementing the semantics of llvm.memcpy where the size is not /// a compile-time constant. Loop will be insterted at \p InsertBefore. -void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, - Value *DstAddr, Value *CopyLen, Align SrcAlign, - Align DestAlign, bool SrcIsVolatile, - bool DstIsVolatile, bool CanOverlap, - const TargetTransformInfo &TTI, - Optional AtomicSize = None); +void createMemCpyLoopUnknownSize( + Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, + Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, + bool CanOverlap, const TargetTransformInfo &TTI, + std::optional AtomicSize = std::nullopt); /// Emit a loop implementing the semantics of an llvm.memcpy whose size is a /// compile time constant. Loop is inserted at \p InsertBefore. -void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, - Value *DstAddr, ConstantInt *CopyLen, - Align SrcAlign, Align DestAlign, - bool SrcIsVolatile, bool DstIsVolatile, - bool CanOverlap, const TargetTransformInfo &TTI, - Optional AtomicCpySize = None); +void createMemCpyLoopKnownSize( + Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, + ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, + bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, + std::optional AtomicCpySize = std::nullopt); /// Expand \p MemCpy as a loop. \p MemCpy is not deleted. void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -89,9 +89,9 @@ /// parameter. These are used by an implementation to opt-into stricter /// checking. bool isFortifiedCallFoldable(CallInst *CI, unsigned ObjSizeOp, - Optional SizeOp = None, - Optional StrOp = None, - Optional FlagsOp = None); + Optional SizeOp = std::nullopt, + Optional StrOp = std::nullopt, + Optional FlagsOp = std::nullopt); }; /// LibCallSimplifier - This class implements a collection of optimizations diff --git a/llvm/include/llvm/WindowsDriver/MSVCPaths.h b/llvm/include/llvm/WindowsDriver/MSVCPaths.h --- a/llvm/include/llvm/WindowsDriver/MSVCPaths.h +++ b/llvm/include/llvm/WindowsDriver/MSVCPaths.h @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include #include namespace llvm { @@ -65,25 +66,24 @@ /// Get Windows SDK installation directory. bool getWindowsSDKDir(vfs::FileSystem &VFS, - llvm::Optional WinSdkDir, - llvm::Optional WinSdkVersion, - llvm::Optional WinSysRoot, + std::optional WinSdkDir, + std::optional WinSdkVersion, + std::optional WinSysRoot, std::string &Path, int &Major, std::string &WindowsSDKIncludeVersion, std::string &WindowsSDKLibVersion); bool getUniversalCRTSdkDir(vfs::FileSystem &VFS, - llvm::Optional WinSdkDir, - llvm::Optional WinSdkVersion, - llvm::Optional WinSysRoot, - std::string &Path, - std::string &UCRTVersion); + std::optional WinSdkDir, + std::optional WinSdkVersion, + std::optional WinSysRoot, + std::string &Path, std::string &UCRTVersion); // Check command line arguments to try and find a toolchain. bool findVCToolChainViaCommandLine( - vfs::FileSystem &VFS, llvm::Optional VCToolsDir, - llvm::Optional VCToolsVersion, - llvm::Optional WinSysRoot, std::string &Path, + vfs::FileSystem &VFS, std::optional VCToolsDir, + std::optional VCToolsVersion, + std::optional WinSysRoot, std::string &Path, ToolsetLayout &VSLayout); // Check various environment variables to try and find a toolchain. diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -603,7 +603,7 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I, const Optional &OptLoc, AAQueryInfo &AAQIP) { - if (OptLoc == None) { + if (OptLoc == std::nullopt) { if (const auto *Call = dyn_cast(I)) return getMemoryEffects(Call, AAQIP).getModRef(); } diff --git a/llvm/lib/Analysis/AliasAnalysisSummary.cpp b/llvm/lib/Analysis/AliasAnalysisSummary.cpp --- a/llvm/lib/Analysis/AliasAnalysisSummary.cpp +++ b/llvm/lib/Analysis/AliasAnalysisSummary.cpp @@ -79,17 +79,17 @@ auto *V = (Index == 0) ? &Call : Call.getArgOperand(Index - 1); if (V->getType()->isPointerTy()) return InstantiatedValue{V, IValue.DerefLevel}; - return None; + return std::nullopt; } Optional instantiateExternalRelation(ExternalRelation ERelation, CallBase &Call) { auto From = instantiateInterfaceValue(ERelation.From, Call); if (!From) - return None; + return std::nullopt; auto To = instantiateInterfaceValue(ERelation.To, Call); if (!To) - return None; + return std::nullopt; return InstantiatedRelation{*From, *To, ERelation.Offset}; } @@ -97,7 +97,7 @@ CallBase &Call) { auto Value = instantiateInterfaceValue(EAttr.IValue, Call); if (!Value) - return None; + return std::nullopt; return InstantiatedAttr{*Value, EAttr.Attr}; } } diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -574,7 +574,7 @@ // Track whether we've seen at least one in bounds gep, and if so, whether // all geps parsed were in bounds. - if (Decomposed.InBounds == None) + if (Decomposed.InBounds == std::nullopt) Decomposed.InBounds = GEPOp->isInBounds(); else if (!GEPOp->isInBounds()) Decomposed.InBounds = false; @@ -1399,7 +1399,7 @@ // In the recursive alias queries below, we may compare values from two // different loop iterations. - SaveAndRestore SavedMayBeCrossIteration(AAQI.MayBeCrossIteration, true); + SaveAndRestore SavedMayBeCrossIteration(AAQI.MayBeCrossIteration, true); AliasResult Alias = AAQI.AAR.alias(MemoryLocation(V1Srcs[0], PNSize), MemoryLocation(V2, V2Size), AAQI); diff --git a/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/llvm/lib/Analysis/BlockFrequencyInfo.cpp --- a/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -208,7 +208,7 @@ BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic) const { if (!BFI) - return None; + return std::nullopt; return BFI->getBlockProfileCount(*getFunction(), BB, AllowSynthetic); } @@ -216,7 +216,7 @@ Optional BlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { if (!BFI) - return None; + return std::nullopt; return BFI->getProfileCountFromFreq(*getFunction(), Freq); } diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp --- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -599,7 +599,7 @@ bool AllowSynthetic) const { auto EntryCount = F.getEntryCount(AllowSynthetic); if (!EntryCount) - return None; + return std::nullopt; // Use 128 bit APInt to do the arithmetic to avoid overflow. APInt BlockCount(128, EntryCount->getCount()); APInt BlockFreq(128, Freq); diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -651,7 +651,7 @@ BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const { auto WeightIt = EstimatedBlockWeight.find(BB); if (WeightIt == EstimatedBlockWeight.end()) - return None; + return std::nullopt; return WeightIt->second; } @@ -659,7 +659,7 @@ BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const { auto WeightIt = EstimatedLoopWeight.find(L); if (WeightIt == EstimatedLoopWeight.end()) - return None; + return std::nullopt; return WeightIt->second; } @@ -682,7 +682,7 @@ auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); if (!Weight) - return None; + return std::nullopt; if (!MaxWeight || *MaxWeight < *Weight) MaxWeight = Weight; @@ -805,7 +805,7 @@ if (CI->hasFnAttr(Attribute::Cold)) return static_cast(BlockExecWeight::COLD); - return None; + return std::nullopt; } // Does RPO traversal over all blocks in \p F and assigns weights to diff --git a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp --- a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -365,7 +365,7 @@ if (Index) return InterfaceValue{*Index, IValue.DerefLevel}; - return None; + return std::nullopt; } static void populateAttrMap(DenseMap &AttrMap, @@ -515,7 +515,7 @@ auto Itr = AttrMap.find(V); if (Itr != AttrMap.end()) return Itr->second; - return None; + return std::nullopt; } bool CFLAndersAAResult::FunctionInfo::mayAlias( @@ -631,7 +631,7 @@ auto NodeBelow = InstantiatedValue{V.Val, V.DerefLevel + 1}; if (Graph.getNode(NodeBelow)) return NodeBelow; - return None; + return std::nullopt; } static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph, diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1906,8 +1906,8 @@ /// \param St Exception flags raised during constant evaluation. static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI, APFloat::opStatus St) { - Optional ORM = CI->getRoundingMode(); - Optional EB = CI->getExceptionBehavior(); + std::optional ORM = CI->getRoundingMode(); + std::optional EB = CI->getExceptionBehavior(); // If the operation does not change exception status flags, it is safe // to fold. @@ -1932,7 +1932,7 @@ /// Returns the rounding mode that should be used for constant evaluation. static RoundingMode getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) { - Optional ORM = CI->getRoundingMode(); + std::optional ORM = CI->getRoundingMode(); if (!ORM || *ORM == RoundingMode::Dynamic) // Even if the rounding mode is unknown, try evaluating the operation. // If it does not raise inexact exception, rounding was not applied, @@ -2133,7 +2133,7 @@ // Rounding operations (floor, trunc, ceil, round and nearbyint) do not // raise FP exceptions, unless the argument is signaling NaN. - Optional RM; + std::optional RM; switch (IntrinsicID) { default: break; @@ -2164,12 +2164,12 @@ APFloat::opStatus St = U.roundToIntegral(*RM); if (IntrinsicID == Intrinsic::experimental_constrained_rint && St == APFloat::opInexact) { - Optional EB = CI->getExceptionBehavior(); + std::optional EB = CI->getExceptionBehavior(); if (EB && *EB == fp::ebStrict) return nullptr; } } else if (U.isSignaling()) { - Optional EB = CI->getExceptionBehavior(); + std::optional EB = CI->getExceptionBehavior(); if (EB && *EB != fp::ebIgnore) return nullptr; U = APFloat::getQNaN(U.getSemantics()); diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -402,7 +402,7 @@ }); } setInlineRemark(CB, inlineCostStr(IC)); - return None; + return std::nullopt; } int TotalSecondaryCost = 0; @@ -419,7 +419,7 @@ << "' in other contexts"; }); setInlineRemark(CB, "deferred"); - return None; + return std::nullopt; } LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -168,7 +168,7 @@ if (!Attr.getValueAsString().getAsInteger(10, AttrValue)) return AttrValue; } - return None; + return std::nullopt; } Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) { @@ -493,7 +493,7 @@ std::optional getSimplifiedValue(Instruction *I) { if (SimplifiedValues.find(I) != SimplifiedValues.end()) return SimplifiedValues[I]; - return None; + return std::nullopt; } // Keep a bunch of stats about the cost savings found so we can print them @@ -584,7 +584,7 @@ bool DecidedByCostBenefit = false; // The cost-benefit pair computed by cost-benefit analysis. - Optional CostBenefit = None; + Optional CostBenefit = std::nullopt; bool SingleBB = true; @@ -817,14 +817,14 @@ // suficient profiling information to determine. std::optional costBenefitAnalysis() { if (!CostBenefitAnalysisEnabled) - return None; + return std::nullopt; // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0 // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by // falling back to the cost-based metric. // TODO: Improve this hacky condition. if (Threshold == 0) - return None; + return std::nullopt; assert(GetBFI); BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); @@ -1056,7 +1056,7 @@ Optional getCostDetails(const Instruction *I) { if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end()) return InstructionCostDetailMap[I]; - return None; + return std::nullopt; } virtual ~InlineCostCallAnalyzer() = default; @@ -1793,7 +1793,7 @@ // Otherwise we need BFI to be available and to have a locally hot callsite // threshold. if (!CallerBFI || !Params.LocallyHotCallSiteThreshold) - return None; + return std::nullopt; // Determine if the callsite is hot relative to caller's entry. We could // potentially cache the computation of scaled entry frequency, but the added @@ -1806,7 +1806,7 @@ return Params.LocallyHotCallSiteThreshold; // Otherwise treat it normally. - return None; + return std::nullopt; } void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { @@ -2850,7 +2850,7 @@ /*IgnoreThreshold*/ true); auto R = CA.analyze(); if (!R.isSuccess()) - return None; + return std::nullopt; return CA.getCost(); } @@ -2863,7 +2863,7 @@ ORE, *Call.getCalledFunction(), Call); auto R = CFA.analyze(); if (!R.isSuccess()) - return None; + return std::nullopt; return CFA.features(); } @@ -2935,7 +2935,7 @@ if (Call.isNoInline()) return InlineResult::failure("noinline call site attribute"); - return None; + return std::nullopt; } InlineCost llvm::getInlineCost( diff --git a/llvm/lib/Analysis/InlineOrder.cpp b/llvm/lib/Analysis/InlineOrder.cpp --- a/llvm/lib/Analysis/InlineOrder.cpp +++ b/llvm/lib/Analysis/InlineOrder.cpp @@ -22,7 +22,7 @@ #define DEBUG_TYPE "inline-order" -enum class InlinePriorityMode : int { Size, Cost, CostBenefit }; +enum class InlinePriorityMode : int { Size, Cost, CostBenefit, ML }; static cl::opt UseInlinePriority( "inline-priority-mode", cl::init(InlinePriorityMode::Size), cl::Hidden, @@ -32,7 +32,9 @@ clEnumValN(InlinePriorityMode::Cost, "cost", "Use inline cost priority."), clEnumValN(InlinePriorityMode::CostBenefit, "cost-benefit", - "Use cost-benefit ratio."))); + "Use cost-benefit ratio."), + clEnumValN(InlinePriorityMode::ML, "ml", + "Use ML."))); static cl::opt ModuleInlinerTopPriorityThreshold( "moudle-inliner-top-priority-threshold", cl::Hidden, cl::init(0), @@ -84,7 +86,7 @@ } private: - unsigned Size; + unsigned Size = UINT_MAX; }; class CostPriority { @@ -104,7 +106,7 @@ } private: - int Cost; + int Cost = INT_MAX; }; class CostBenefitPriority { @@ -170,11 +172,31 @@ } private: - int Cost; - int StaticBonusApplied; + int Cost = INT_MAX; + int StaticBonusApplied = 0; Optional CostBenefit; }; +class MLPriority { +public: + MLPriority() = default; + MLPriority(const CallBase *CB, FunctionAnalysisManager &FAM, + const InlineParams &Params) { + auto IC = getInlineCostWrapper(const_cast(*CB), FAM, Params); + if (IC.isVariable()) + Cost = IC.getCost(); + else + Cost = IC.isNever() ? INT_MAX : INT_MIN; + } + + static bool isMoreDesirable(const MLPriority &P1, const MLPriority &P2) { + return P1.Cost < P2.Cost; + } + +private: + int Cost = INT_MAX; +}; + template class PriorityInlineOrder : public InlineOrder> { using T = std::pair; @@ -274,6 +296,10 @@ LLVM_DEBUG( dbgs() << " Current used priority: cost-benefit priority ---- \n"); return std::make_unique>(FAM, Params); + case InlinePriorityMode::ML: + LLVM_DEBUG( + dbgs() << " Current used priority: ML priority ---- \n"); + return std::make_unique>(FAM, Params); } return nullptr; } diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp --- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp +++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp @@ -267,7 +267,7 @@ InlineSizeEstimatorAnalysis::Result InlineSizeEstimatorAnalysis::run(const Function &F, FunctionAnalysisManager &FAM) { - return None; + return std::nullopt; } bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } #endif diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" #include +#include using namespace llvm; using namespace llvm::PatternMatch; @@ -6219,7 +6220,7 @@ if (!Attr.isValid()) return nullptr; unsigned VScaleMin = Attr.getVScaleRangeMin(); - Optional VScaleMax = Attr.getVScaleRangeMax(); + std::optional VScaleMax = Attr.getVScaleRangeMax(); if (VScaleMax && VScaleMin == VScaleMax) return ConstantInt::get(F->getReturnType(), VScaleMin); return nullptr; diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -215,14 +215,14 @@ BasicBlock *BB) const { const BlockCacheEntry *Entry = getBlockEntry(BB); if (!Entry) - return None; + return std::nullopt; if (Entry->OverDefined.count(V)) return ValueLatticeElement::getOverdefined(); auto LatticeIt = Entry->LatticeElements.find_as(V); if (LatticeIt == Entry->LatticeElements.end()) - return None; + return std::nullopt; return LatticeIt->second; } @@ -551,7 +551,7 @@ return ValueLatticeElement::getOverdefined(); // Yet to be resolved. - return None; + return std::nullopt; } static ValueLatticeElement getFromRangeMetadata(Instruction *BBI) { @@ -694,7 +694,7 @@ Optional EdgeResult = getEdgeValue(Val, Pred, BB); if (!EdgeResult) // Explore that input, then return here - return None; + return std::nullopt; Result.mergeIn(*EdgeResult); @@ -730,7 +730,7 @@ getEdgeValue(PhiVal, PhiBB, BB, PN); if (!EdgeResult) // Explore that input, then return here - return None; + return std::nullopt; Result.mergeIn(*EdgeResult); @@ -809,13 +809,13 @@ Optional OptTrueVal = getBlockValue(SI->getTrueValue(), BB, SI); if (!OptTrueVal) - return None; + return std::nullopt; ValueLatticeElement &TrueVal = *OptTrueVal; Optional OptFalseVal = getBlockValue(SI->getFalseValue(), BB, SI); if (!OptFalseVal) - return None; + return std::nullopt; ValueLatticeElement &FalseVal = *OptFalseVal; if (TrueVal.isConstantRange() || FalseVal.isConstantRange()) { @@ -889,7 +889,7 @@ BasicBlock *BB) { Optional OptVal = getBlockValue(V, BB, CxtI); if (!OptVal) - return None; + return std::nullopt; return getConstantRangeOrFull(*OptVal, V->getType(), DL); } @@ -922,7 +922,7 @@ Optional LHSRes = getRangeFor(CI->getOperand(0), CI, BB); if (!LHSRes) // More work to do before applying this transfer rule. - return None; + return std::nullopt; const ConstantRange &LHSRange = LHSRes.value(); const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth(); @@ -946,7 +946,7 @@ Optional RHSRes = getRangeFor(I->getOperand(1), I, BB); if (!LHSRes || !RHSRes) // More work to do before applying this transfer rule. - return None; + return std::nullopt; const ConstantRange &LHSRange = LHSRes.value(); const ConstantRange &RHSRange = RHSRes.value(); @@ -998,7 +998,7 @@ for (Value *Op : II->args()) { Optional Range = getRangeFor(Op, II, BB); if (!Range) - return None; + return std::nullopt; OpRanges.push_back(*Range); } @@ -1210,7 +1210,7 @@ Worklist.push_back(L); if (RV == Visited.end()) Worklist.push_back(R); - return None; + return std::nullopt; } return intersect(LV->second, RV->second); @@ -1372,7 +1372,7 @@ if (SwitchInst *SI = dyn_cast(BBFrom->getTerminator())) { Value *Condition = SI->getCondition(); if (!isa(Val->getType())) - return None; + return std::nullopt; bool ValUsesConditionAndMayBeFoldable = false; if (Condition != Val) { // Check if Val has Condition as an operand. @@ -1380,7 +1380,7 @@ ValUsesConditionAndMayBeFoldable = isOperationFoldable(Usr) && usesOperand(Usr, Condition); if (!ValUsesConditionAndMayBeFoldable) - return None; + return std::nullopt; } assert((Condition == Val || ValUsesConditionAndMayBeFoldable) && "Condition != Val nor Val doesn't use Condition"); @@ -1398,7 +1398,7 @@ ValueLatticeElement EdgeLatticeVal = constantFoldUser(Usr, Condition, CaseValue, DL); if (EdgeLatticeVal.isOverdefined()) - return None; + return std::nullopt; EdgeVal = EdgeLatticeVal.getConstantRange(); } if (DefaultCase) { @@ -1415,7 +1415,7 @@ } return ValueLatticeElement::getRange(std::move(EdgesVals)); } - return None; + return std::nullopt; } /// Compute the value of Val on the edge BBFrom -> BBTo or the value at @@ -1436,7 +1436,7 @@ Optional OptInBlock = getBlockValue(Val, BBFrom, BBFrom->getTerminator()); if (!OptInBlock) - return None; + return std::nullopt; ValueLatticeElement &InBlock = *OptInBlock; // We can use the context instruction (generically the ultimate instruction diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -187,8 +187,8 @@ void Lint::visitCallBase(CallBase &I) { Value *Callee = I.getCalledOperand(); - visitMemoryReference(I, MemoryLocation::getAfter(Callee), None, nullptr, - MemRef::Callee); + visitMemoryReference(I, MemoryLocation::getAfter(Callee), std::nullopt, + nullptr, MemRef::Callee); if (Function *F = dyn_cast(findValue(Callee, /*OffsetOk=*/false))) { @@ -347,26 +347,26 @@ "Undefined behavior: va_start called in a non-varargs function", &I); - visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, - nullptr, MemRef::Read | MemRef::Write); + visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), + std::nullopt, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: - visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, - nullptr, MemRef::Write); - visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI), None, - nullptr, MemRef::Read); + visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), + std::nullopt, nullptr, MemRef::Write); + visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI), + std::nullopt, nullptr, MemRef::Read); break; case Intrinsic::vaend: - visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, - nullptr, MemRef::Read | MemRef::Write); + visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), + std::nullopt, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: // Stackrestore doesn't read or write memory, but it sets the // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. - visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None, - nullptr, MemRef::Read | MemRef::Write); + visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), + std::nullopt, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::get_active_lane_mask: if (auto *TripCount = dyn_cast(I.getArgOperand(1))) @@ -588,13 +588,13 @@ } void Lint::visitVAArgInst(VAArgInst &I) { - visitMemoryReference(I, MemoryLocation::get(&I), None, nullptr, + visitMemoryReference(I, MemoryLocation::get(&I), std::nullopt, nullptr, MemRef::Read | MemRef::Write); } void Lint::visitIndirectBrInst(IndirectBrInst &I) { - visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), None, - nullptr, MemRef::Branchee); + visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), + std::nullopt, nullptr, MemRef::Branchee); Check(I.getNumDestinations() != 0, "Undefined behavior: indirectbr with no destinations", &I); diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1376,7 +1376,7 @@ if (isa(AccessTy)) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy << "\n"); - return None; + return std::nullopt; } const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); @@ -1388,14 +1388,14 @@ if (!AR) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr << " SCEV: " << *PtrScev << "\n"); - return None; + return std::nullopt; } // The access function must stride over the innermost loop. if (Lp != AR->getLoop()) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << *Ptr << " SCEV: " << *AR << "\n"); - return None; + return std::nullopt; } // The address calculation must not wrap. Otherwise, a dependence could be @@ -1423,7 +1423,7 @@ LLVM_DEBUG( dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " << *Ptr << " SCEV: " << *AR << "\n"); - return None; + return std::nullopt; } } @@ -1435,7 +1435,7 @@ if (!C) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << " SCEV: " << *AR << "\n"); - return None; + return std::nullopt; } auto &DL = Lp->getHeader()->getModule()->getDataLayout(); @@ -1445,7 +1445,7 @@ // Huge step value - give up. if (APStepVal.getBitWidth() > 64) - return None; + return std::nullopt; int64_t StepVal = APStepVal.getSExtValue(); @@ -1453,7 +1453,7 @@ int64_t Stride = StepVal / Size; int64_t Rem = StepVal % Size; if (Rem) - return None; + return std::nullopt; // If the SCEV could wrap but we have an inbounds gep with a unit stride we // know we can't "wrap around the address space". In case of address space @@ -1470,7 +1470,7 @@ << "LAA: Added an overflow assumption\n"); PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); } else - return None; + return std::nullopt; } return Stride; @@ -1492,14 +1492,14 @@ // Make sure that the element types are the same if required. if (CheckType && ElemTyA != ElemTyB) - return None; + return std::nullopt; unsigned ASA = PtrA->getType()->getPointerAddressSpace(); unsigned ASB = PtrB->getType()->getPointerAddressSpace(); // Check that the address spaces match. if (ASA != ASB) - return None; + return std::nullopt; unsigned IdxWidth = DL.getIndexSizeInBits(ASA); APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); @@ -1514,7 +1514,7 @@ ASB = cast(PtrB1->getType())->getAddressSpace(); // Check that the address spaces match and that the pointers are valid. if (ASA != ASB) - return None; + return std::nullopt; IdxWidth = DL.getIndexSizeInBits(ASA); OffsetA = OffsetA.sextOrTrunc(IdxWidth); @@ -1529,7 +1529,7 @@ const auto *Diff = dyn_cast(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA)); if (!Diff) - return None; + return std::nullopt; Val = Diff->getAPInt().getSExtValue(); } int Size = DL.getTypeStoreSize(ElemTyA); @@ -1539,7 +1539,7 @@ // the bitcasts removal in the provided pointers. if (!StrictCheck || Dist * Size == Val) return Dist; - return None; + return std::nullopt; } bool llvm::sortPtrAccesses(ArrayRef VL, Type *ElemTy, diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -196,7 +196,7 @@ << "No spacial reuse, difference between subscript:\n\t" << *LastSubscript << "\n\t" << OtherLastSubscript << "\nis not constant.\n"); - return None; + return std::nullopt; } bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS); @@ -248,7 +248,7 @@ if (SCEVConst == nullptr) { LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n"); - return None; + return std::nullopt; } const ConstantInt &CI = *SCEVConst->getValue(); diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -203,12 +203,12 @@ ScalarEvolution &SE) { InductionDescriptor IndDesc; if (!InductionDescriptor::isInductionPHI(&IndVar, &L, &SE, IndDesc)) - return None; + return std::nullopt; Value *InitialIVValue = IndDesc.getStartValue(); Instruction *StepInst = IndDesc.getInductionBinOp(); if (!InitialIVValue || !StepInst) - return None; + return std::nullopt; const SCEV *Step = IndDesc.getStep(); Value *StepInstOp1 = StepInst->getOperand(1); @@ -221,7 +221,7 @@ Value *FinalIVValue = findFinalIVValue(L, IndVar, *StepInst); if (!FinalIVValue) - return None; + return std::nullopt; return LoopBounds(L, *InitialIVValue, *StepInst, StepValue, *FinalIVValue, SE); @@ -288,7 +288,7 @@ if (PHINode *IndVar = getInductionVariable(SE)) return LoopBounds::getBounds(*this, *IndVar, SE); - return None; + return std::nullopt; } PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const { @@ -1053,7 +1053,7 @@ StringRef Name) { MDNode *MD = findOptionMDForLoop(TheLoop, Name); if (!MD) - return None; + return std::nullopt; switch (MD->getNumOperands()) { case 1: return nullptr; @@ -1068,7 +1068,7 @@ StringRef Name) { MDNode *MD = findOptionMDForLoop(TheLoop, Name); if (!MD) - return None; + return std::nullopt; switch (MD->getNumOperands()) { case 1: // When the value is absent it is interpreted as 'attribute set'. diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp --- a/llvm/lib/Analysis/LoopNestAnalysis.cpp +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -128,7 +128,7 @@ // Bail out if we cannot retrieve the outer loop bounds. auto OuterLoopLB = OuterLoop.getBounds(SE); - if (OuterLoopLB == None) { + if (OuterLoopLB == std::nullopt) { LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: " << OuterLoop << "\n";); return OuterLoopLowerBoundUnknown; diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -176,12 +177,12 @@ // Don't perform a slow TLI lookup, if this function doesn't return a pointer // and thus can't be an allocation function. if (!Callee->getReturnType()->isPointerTy()) - return None; + return std::nullopt; // Make sure that the function is available. LibFunc TLIFn; if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn)) - return None; + return std::nullopt; const auto *Iter = find_if( AllocationFnData, [TLIFn](const std::pair &P) { @@ -189,11 +190,11 @@ }); if (Iter == std::end(AllocationFnData)) - return None; + return std::nullopt; const AllocFnsTy *FnData = &Iter->second; if ((FnData->AllocTy & AllocTy) != FnData->AllocTy) - return None; + return std::nullopt; // Check function prototype. int FstParam = FnData->FstParam; @@ -209,7 +210,7 @@ FTy->getParamType(SndParam)->isIntegerTy(32) || FTy->getParamType(SndParam)->isIntegerTy(64))) return *FnData; - return None; + return std::nullopt; } static Optional getAllocationData(const Value *V, AllocType AllocTy, @@ -218,7 +219,7 @@ if (const Function *Callee = getCalledFunction(V, IsNoBuiltinCall)) if (!IsNoBuiltinCall) return getAllocationDataForFunction(Callee, AllocTy, TLI); - return None; + return std::nullopt; } static Optional @@ -229,7 +230,7 @@ if (!IsNoBuiltinCall) return getAllocationDataForFunction( Callee, AllocTy, &GetTLI(const_cast(*Callee))); - return None; + return std::nullopt; } static Optional getAllocationSize(const Value *V, @@ -238,7 +239,7 @@ const Function *Callee = getCalledFunction(V, IsNoBuiltinCall); if (!Callee) - return None; + return std::nullopt; // Prefer to use existing information over allocsize. This will give us an // accurate AllocTy. @@ -249,9 +250,9 @@ Attribute Attr = Callee->getFnAttribute(Attribute::AllocSize); if (Attr == Attribute()) - return None; + return std::nullopt; - std::pair> Args = Attr.getAllocSizeArgs(); + std::pair> Args = Attr.getAllocSizeArgs(); AllocFnsTy Result; // Because allocsize only tells us how many bytes are allocated, we're not @@ -401,7 +402,7 @@ // allocsize. The code structure could stand to be cleaned up a bit. Optional FnData = getAllocationSize(CB, TLI); if (!FnData) - return None; + return std::nullopt; // Get the index type for this address space, results and intermediate // computations are performed at that width. @@ -412,14 +413,14 @@ if (FnData->AllocTy == StrDupLike) { APInt Size(IntTyBits, GetStringLength(Mapper(CB->getArgOperand(0)))); if (!Size) - return None; + return std::nullopt; // Strndup limits strlen. if (FnData->FstParam > 0) { const ConstantInt *Arg = dyn_cast(Mapper(CB->getArgOperand(FnData->FstParam))); if (!Arg) - return None; + return std::nullopt; APInt MaxSize = Arg->getValue().zext(IntTyBits); if (Size.ugt(MaxSize)) @@ -431,11 +432,11 @@ const ConstantInt *Arg = dyn_cast(Mapper(CB->getArgOperand(FnData->FstParam))); if (!Arg) - return None; + return std::nullopt; APInt Size = Arg->getValue(); if (!CheckedZextOrTrunc(Size, IntTyBits)) - return None; + return std::nullopt; // Size is determined by just 1 parameter. if (FnData->SndParam < 0) @@ -443,16 +444,16 @@ Arg = dyn_cast(Mapper(CB->getArgOperand(FnData->SndParam))); if (!Arg) - return None; + return std::nullopt; APInt NumElems = Arg->getValue(); if (!CheckedZextOrTrunc(NumElems, IntTyBits)) - return None; + return std::nullopt; bool Overflow; Size = Size.umul_ov(NumElems, Overflow); if (Overflow) - return None; + return std::nullopt; return Size; } @@ -528,7 +529,7 @@ return P.first == TLIFn; }); if (Iter == std::end(FreeFnData)) - return None; + return std::nullopt; return Iter->second; } @@ -537,7 +538,7 @@ bool IsNoBuiltin; const Function *Callee = getCalledFunction(I, IsNoBuiltin); if (Callee == nullptr || IsNoBuiltin) - return None; + return std::nullopt; LibFunc TLIFn; if (TLI && TLI->getLibFunc(*Callee, TLIFn) && TLI->has(TLIFn)) { @@ -556,7 +557,7 @@ if (Attr.isValid()) return Attr.getValueAsString(); } - return None; + return std::nullopt; } /// isLibFreeFunction - Returns true if the function is a builtin free() diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -74,7 +74,8 @@ RMWI->getAAMetadata()); } -Optional MemoryLocation::getOrNone(const Instruction *Inst) { +std::optional +MemoryLocation::getOrNone(const Instruction *Inst) { switch (Inst->getOpcode()) { case Instruction::Load: return get(cast(Inst)); @@ -87,7 +88,7 @@ case Instruction::AtomicRMW: return get(cast(Inst)); default: - return None; + return std::nullopt; } } @@ -117,39 +118,39 @@ return getForArgument(MI, 0, nullptr); } -Optional +std::optional MemoryLocation::getForDest(const CallBase *CB, const TargetLibraryInfo &TLI) { if (!CB->onlyAccessesArgMemory()) - return None; + return std::nullopt; if (CB->hasOperandBundles()) // TODO: remove implementation restriction - return None; + return std::nullopt; Value *UsedV = nullptr; std::optional UsedIdx; for (unsigned i = 0; i < CB->arg_size(); i++) { if (!CB->getArgOperand(i)->getType()->isPointerTy()) continue; - if (CB->onlyReadsMemory(i)) - continue; + if (CB->onlyReadsMemory(i)) + continue; if (!UsedV) { // First potentially writing parameter UsedV = CB->getArgOperand(i); UsedIdx = i; continue; } - UsedIdx = None; + UsedIdx = std::nullopt; if (UsedV != CB->getArgOperand(i)) // Can't describe writing to two distinct locations. // TODO: This results in an inprecision when two values derived from the // same object are passed as arguments to the same function. - return None; + return std::nullopt; } if (!UsedV) // We don't currently have a way to represent a "does not write" result // and thus have to be conservative and return unknown. - return None; + return std::nullopt; if (UsedIdx) return getForArgument(CB, *UsedIdx, &TLI); diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -694,7 +694,7 @@ addSearches(cast(Res.Result), PausedSearches, PathIndex); } - return None; + return std::nullopt; } template @@ -721,7 +721,7 @@ T &curNode() const { return W->Paths[*N]; } Walker *W = nullptr; - Optional N = None; + Optional N = std::nullopt; }; using def_path_iterator = generic_def_path_iterator; @@ -771,7 +771,7 @@ assert(Paths.empty() && VisitedPhis.empty() && "Reset the optimization state."); - Paths.emplace_back(Loc, Start, Phi, None); + Paths.emplace_back(Loc, Start, Phi, std::nullopt); // Stores how many "valid" optimization nodes we had prior to calling // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. auto PriorPathsSize = Paths.size(); @@ -947,7 +947,7 @@ if (auto *MU = dyn_cast(Start)) Current = MU->getDefiningAccess(); - DefPath FirstDesc(Q.StartingLoc, Current, Current, None); + DefPath FirstDesc(Q.StartingLoc, Current, Current, std::nullopt); // Fast path for the overly-common case (no crazy phi optimization // necessary) UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); @@ -1756,7 +1756,7 @@ Def = isa(Template); Use = isa(Template); #if !defined(NDEBUG) - ModRefInfo ModRef = AAP->getModRefInfo(I, None); + ModRefInfo ModRef = AAP->getModRefInfo(I, std::nullopt); bool DefCheck, UseCheck; DefCheck = isModSet(ModRef) || isOrdered(I); UseCheck = isRefSet(ModRef); @@ -1771,7 +1771,7 @@ #endif } else { // Find out what affect this instruction has on memory. - ModRefInfo ModRef = AAP->getModRefInfo(I, None); + ModRefInfo ModRef = AAP->getModRefInfo(I, std::nullopt); // The isOrdered check is used to ensure that volatiles end up as defs // (atomics end up as ModRef right now anyway). Until we separate the // ordering chain from the memory chain, this enables people to see at least diff --git a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp --- a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -62,7 +62,7 @@ Optional OptimizationRemarkEmitter::computeHotness(const Value *V) { if (!BFI) - return None; + return std::nullopt; return BFI->getBlockProfileCount(cast(V)); } diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -87,11 +87,11 @@ uint64_t TotalCount; if (Call.extractProfTotalWeight(TotalCount)) return TotalCount; - return None; + return std::nullopt; } if (BFI) return BFI->getBlockProfileCount(Call.getParent(), AllowSynthetic); - return None; + return std::nullopt; } /// Returns true if the function's entry is hot. If it returns false, it @@ -267,7 +267,7 @@ Optional ProfileSummaryInfo::computeThreshold(int PercentileCutoff) const { if (!hasProfileSummary()) - return None; + return std::nullopt; auto iter = ThresholdCache.find(PercentileCutoff); if (iter != ThresholdCache.end()) { return iter->second; diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp --- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp +++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp @@ -124,7 +124,7 @@ LLVM_DEBUG(dbgs() << "Replay Inliner: Not Inlined " << Callee << " @ " << CallSiteLoc << "\n"); // A negative inline is conveyed by "None" Optional - return std::make_unique(this, CB, None, ORE, + return std::make_unique(this, CB, std::nullopt, ORE, EmitRemarks); } } @@ -138,7 +138,7 @@ else if (ReplaySettings.ReplayFallback == ReplayInlinerSettings::Fallback::NeverInline) // A negative inline is conveyed by "None" Optional - return std::make_unique(this, CB, None, ORE, + return std::make_unique(this, CB, std::nullopt, ORE, EmitRemarks); else { assert(ReplaySettings.ReplayFallback == diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -732,7 +732,7 @@ return 0; if (Depth > MaxSCEVCompareDepth) - return None; + return std::nullopt; // Aside from the getSCEVType() ordering, the particular ordering // isn't very important except that it's beneficial to be consistent, @@ -2359,7 +2359,7 @@ const OverflowingBinaryOperator *OBO) { // It cannot be done any better. if (OBO->hasNoUnsignedWrap() && OBO->hasNoSignedWrap()) - return None; + return std::nullopt; SCEV::NoWrapFlags Flags = SCEV::NoWrapFlags::FlagAnyWrap; @@ -2373,7 +2373,7 @@ if (OBO->getOpcode() != Instruction::Add && OBO->getOpcode() != Instruction::Sub && OBO->getOpcode() != Instruction::Mul) - return None; + return std::nullopt; const SCEV *LHS = getSCEV(OBO->getOperand(0)); const SCEV *RHS = getSCEV(OBO->getOperand(1)); @@ -2396,7 +2396,7 @@ if (Deduced) return Flags; - return None; + return std::nullopt; } // We're trying to construct a SCEV of type `Type' with `Ops' as operands and @@ -3966,7 +3966,7 @@ if (!Changed) return S; if (NewOps.empty()) - return None; + return std::nullopt; return isa(S) ? SE.getSequentialMinMaxExpr(Kind, NewOps) @@ -3976,7 +3976,7 @@ RetVal visit(const SCEV *S) { // Has the whole operand been seen already? if (!SeenOps.insert(S).second) - return None; + return std::nullopt; return Base::visit(S); } @@ -4401,7 +4401,7 @@ ArrayRef ScalarEvolution::getSCEVValues(const SCEV *S) { ExprValueMapType::iterator SI = ExprValueMap.find_as(S); if (SI == ExprValueMap.end()) - return None; + return std::nullopt; #ifndef NDEBUG if (VerifySCEVMap) { // Check there is no dangling Value in the set returned. @@ -4915,7 +4915,7 @@ if (BackedgeCond == IC) return IsPositiveBECond ? SE.getOne(Type::getInt1Ty(SE.getContext())) : SE.getZero(Type::getInt1Ty(SE.getContext())); - return None; + return std::nullopt; } class SCEVShiftRewriter : public SCEVRewriteVisitor { @@ -5130,7 +5130,7 @@ static std::optional MatchBinaryOp(Value *V, DominatorTree &DT) { auto *Op = dyn_cast(V); if (!Op) - return None; + return std::nullopt; // Implementation detail: all the cleverness here should happen without // creating new SCEV expressions -- our caller knowns tricks to avoid creating @@ -5209,7 +5209,7 @@ if (II->getIntrinsicID() == Intrinsic::loop_decrement_reg) return BinaryOp(Instruction::Sub, II->getOperand(0), II->getOperand(1)); - return None; + return std::nullopt; } /// Helper function to createAddRecFromPHIWithCasts. We have a phi @@ -5353,7 +5353,7 @@ } } if (!BEValueV || !StartValueV) - return None; + return std::nullopt; const SCEV *BEValue = getSCEV(BEValueV); @@ -5362,7 +5362,7 @@ // an appropriate runtime guard, then we found a simple induction variable! const auto *Add = dyn_cast(BEValue); if (!Add) - return None; + return std::nullopt; // If there is a single occurrence of the symbolic value, possibly // casted, replace it with a recurrence. @@ -5378,7 +5378,7 @@ } if (FoundIndex == Add->getNumOperands()) - return None; + return std::nullopt; // Create an add with everything but the specified operand. SmallVector Ops; @@ -5390,7 +5390,7 @@ // The runtime checks will not be valid if the step amount is // varying inside the loop. if (!isLoopInvariant(Accum, L)) - return None; + return std::nullopt; // *** Part2: Create the predicates @@ -5495,7 +5495,7 @@ const SCEV *StartExtended = getExtendedExpr(StartVal, Signed); if (PredIsKnownFalse(StartVal, StartExtended)) { LLVM_DEBUG(dbgs() << "P2 is compile-time false\n";); - return None; + return std::nullopt; } // The Step is always Signed (because the overflow checks are either @@ -5503,7 +5503,7 @@ const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true); if (PredIsKnownFalse(Accum, AccumExtended)) { LLVM_DEBUG(dbgs() << "P3 is compile-time false\n";); - return None; + return std::nullopt; } auto AppendPredicate = [&](const SCEV *Expr, @@ -5537,7 +5537,7 @@ auto *PN = cast(SymbolicPHI->getValue()); const Loop *L = isIntegerLoopHeaderPHI(PN, LI); if (!L) - return None; + return std::nullopt; // Check to see if we already analyzed this PHI. auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L}); @@ -5546,7 +5546,7 @@ I->second; // Analysis was done before and failed to create an AddRec: if (Rewrite.first == SymbolicPHI) - return None; + return std::nullopt; // Analysis was done before and succeeded to create an AddRec under // a predicate: assert(isa(Rewrite.first) && "Expected an AddRec"); @@ -5561,7 +5561,7 @@ if (!Rewrite) { SmallVector Predicates; PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates}; - return None; + return std::nullopt; } return Rewrite; @@ -6127,7 +6127,7 @@ // FIXME: while we can't legally model the case where both of the hands // are fully variable, we only require that the *difference* is constant. if (!isa(TrueExpr) && !isa(FalseExpr)) - return None; + return std::nullopt; const SCEV *X, *C; if (isa(TrueExpr)) { @@ -6147,7 +6147,7 @@ Value *TrueVal, Value *FalseVal) { if (!isa(TrueVal) && !isa(FalseVal)) - return None; + return std::nullopt; const auto *SECond = SE->getSCEV(Cond); const auto *SETrue = SE->getSCEV(TrueVal); @@ -6300,7 +6300,7 @@ if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) return getConstantRangeFromMetadata(*MD); - return None; + return std::nullopt; } void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec, @@ -8600,8 +8600,7 @@ } ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E) - : ExitLimit(E, E, false, None) { -} + : ExitLimit(E, E, false, std::nullopt) {} ScalarEvolution::ExitLimit::ExitLimit( const SCEV *E, const SCEV *ConstantMaxNotTaken, bool MaxOrZero, @@ -8818,7 +8817,7 @@ "Variance in assumed invariant key components!"); auto Itr = TripCountMap.find({ExitCond, ControlsExit}); if (Itr == TripCountMap.end()) - return None; + return std::nullopt; return Itr->second; } @@ -8924,7 +8923,7 @@ else if (match(ExitCond, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) IsAnd = false; else - return None; + return std::nullopt; // EitherMayExit is true in these two cases: // br (and Op0 Op1), loop, exit @@ -10020,7 +10019,7 @@ // We currently can only solve this if the coefficients are constants. if (!LC || !MC || !NC) { LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n"); - return None; + return std::nullopt; } APInt L = LC->getAPInt(); @@ -10070,7 +10069,7 @@ return XW.slt(YW) ? *X : *Y; } if (!X && !Y) - return None; + return std::nullopt; return X ? *X : *Y; } @@ -10087,7 +10086,7 @@ /// the addrec to the equation). static Optional TruncIfPossible(Optional X, unsigned BitWidth) { if (!X) - return None; + return std::nullopt; unsigned W = X->getBitWidth(); if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth)) return X->trunc(BitWidth); @@ -10114,18 +10113,18 @@ unsigned BitWidth; auto T = GetQuadraticEquation(AddRec); if (!T) - return None; + return std::nullopt; std::tie(A, B, C, M, BitWidth) = *T; LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n"); Optional X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1); if (!X) - return None; + return std::nullopt; ConstantInt *CX = ConstantInt::get(SE.getContext(), *X); ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE); if (!V->isZero()) - return None; + return std::nullopt; return TruncIfPossible(X, BitWidth); } @@ -10156,7 +10155,7 @@ unsigned BitWidth; auto T = GetQuadraticEquation(AddRec); if (!T) - return None; + return std::nullopt; // Be careful about the return value: there can be two reasons for not // returning an actual number. First, if no solutions to the equations @@ -10201,7 +10200,7 @@ // be a solution, but the function failed to find it. We cannot treat it // as "no solution". if (!SO || !UO) - return { None, false }; + return {std::nullopt, false}; // Check the smaller value first to see if it leaves the range. // At this point, both SO and UO must have values. @@ -10213,7 +10212,7 @@ return { Max, true }; // Solutions were found, but were eliminated, hence the "true". - return { None, true }; + return {std::nullopt, true}; }; std::tie(A, B, C, M, BitWidth) = *T; @@ -10225,7 +10224,7 @@ // If any of the solutions was unknown, no meaninigful conclusions can // be made. if (!SL.second || !SU.second) - return None; + return std::nullopt; // Claim: The correct solution is not some value between Min and Max. // @@ -10776,7 +10775,7 @@ return true; else if (isKnownPredicate(ICmpInst::getInversePredicate(Pred), LHS, RHS)) return false; - return None; + return std::nullopt; } bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred, @@ -10801,7 +10800,7 @@ ICmpInst::getInversePredicate(Pred), LHS, RHS)) return false; - return None; + return std::nullopt; } bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred, @@ -10848,7 +10847,7 @@ // Only handle LE/LT/GE/GT predicates. if (!ICmpInst::isRelational(Pred)) - return None; + return std::nullopt; bool IsGreater = ICmpInst::isGE(Pred) || ICmpInst::isGT(Pred); assert((IsGreater || ICmpInst::isLE(Pred) || ICmpInst::isLT(Pred)) && @@ -10857,13 +10856,13 @@ // Check that AR does not wrap. if (ICmpInst::isUnsigned(Pred)) { if (!LHS->hasNoUnsignedWrap()) - return None; + return std::nullopt; return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing; } else { assert(ICmpInst::isSigned(Pred) && "Relational predicate is either signed or unsigned!"); if (!LHS->hasNoSignedWrap()) - return None; + return std::nullopt; const SCEV *Step = LHS->getStepRecurrence(*this); @@ -10873,7 +10872,7 @@ if (isKnownNonPositive(Step)) return !IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing; - return None; + return std::nullopt; } } @@ -10885,7 +10884,7 @@ // If there is a loop-invariant, force it into the RHS, otherwise bail out. if (!isLoopInvariant(RHS, L)) { if (!isLoopInvariant(LHS, L)) - return None; + return std::nullopt; std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); @@ -10893,11 +10892,11 @@ const SCEVAddRecExpr *ArLHS = dyn_cast(LHS); if (!ArLHS || ArLHS->getLoop() != L) - return None; + return std::nullopt; auto MonotonicType = getMonotonicPredicateType(ArLHS, Pred); if (!MonotonicType) - return None; + return std::nullopt; // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to // true as the loop iterates, and the backedge is control dependent on // "ArLHS `Pred` RHS" == true then we can reason as follows: @@ -10923,7 +10922,7 @@ RHS); if (!CtxI) - return None; + return std::nullopt; // Try to prove via context. // TODO: Support other cases. switch (Pred) { @@ -10960,7 +10959,7 @@ } } - return None; + return std::nullopt; } Optional @@ -10978,7 +10977,7 @@ // If there is a loop-invariant, force it into the RHS, otherwise bail out. if (!isLoopInvariant(RHS, L)) { if (!isLoopInvariant(LHS, L)) - return None; + return std::nullopt; std::swap(LHS, RHS); Pred = ICmpInst::getSwappedPredicate(Pred); @@ -10986,30 +10985,30 @@ auto *AR = dyn_cast(LHS); if (!AR || AR->getLoop() != L) - return None; + return std::nullopt; // The predicate must be relational (i.e. <, <=, >=, >). if (!ICmpInst::isRelational(Pred)) - return None; + return std::nullopt; // TODO: Support steps other than +/- 1. const SCEV *Step = AR->getStepRecurrence(*this); auto *One = getOne(Step->getType()); auto *MinusOne = getNegativeSCEV(One); if (Step != One && Step != MinusOne) - return None; + return std::nullopt; // Type mismatch here means that MaxIter is potentially larger than max // unsigned value in start type, which mean we cannot prove no wrap for the // indvar. if (AR->getType() != MaxIter->getType()) - return None; + return std::nullopt; // Value of IV on suggested last iteration. const SCEV *Last = AR->evaluateAtIteration(MaxIter, *this); // Does it still meet the requirement? if (!isLoopBackedgeGuardedByCond(L, Pred, Last, RHS)) - return None; + return std::nullopt; // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does // not exceed max unsigned value of this type), this effectively proves // that there is no wrap during the iteration. To prove that there is no @@ -11021,7 +11020,7 @@ NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred); const SCEV *Start = AR->getStart(); if (!isKnownPredicateAt(NoOverflowPred, Start, Last, CtxI)) - return None; + return std::nullopt; // Everything is fine. return ScalarEvolution::LoopInvariantPredicate(Pred, Start, RHS); @@ -11170,7 +11169,7 @@ // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on // the stack can result in exponential time complexity. - SaveAndRestore Restore(ProvingSplitPredicate, true); + SaveAndRestore Restore(ProvingSplitPredicate, true); // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L // @@ -11239,7 +11238,7 @@ if (WalkingBEDominatingConds) return false; - SaveAndRestore ClearOnExit(WalkingBEDominatingConds, true); + SaveAndRestore ClearOnExit(WalkingBEDominatingConds, true); // See if we can exploit a trip count to prove the predicate. const auto &BETakenInfo = getBackedgeTakenInfo(L); @@ -11758,15 +11757,15 @@ const auto *MAR = cast(More); if (LAR->getLoop() != MAR->getLoop()) - return None; + return std::nullopt; // We look at affine expressions only; not for correctness but to keep // getStepRecurrence cheap. if (!LAR->isAffine() || !MAR->isAffine()) - return None; + return std::nullopt; if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this)) - return None; + return std::nullopt; Less = LAR->getStart(); More = MAR->getStart(); @@ -11800,7 +11799,7 @@ if (C1 && C2 && RLess == RMore) return C2->getAPInt() - C1->getAPInt(); - return None; + return std::nullopt; } bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart( diff --git a/llvm/lib/Analysis/StratifiedSets.h b/llvm/lib/Analysis/StratifiedSets.h --- a/llvm/lib/Analysis/StratifiedSets.h +++ b/llvm/lib/Analysis/StratifiedSets.h @@ -94,7 +94,7 @@ Optional find(const T &Elem) const { auto Iter = Values.find(Elem); if (Iter == Values.end()) - return None; + return std::nullopt; return Iter->second; } @@ -547,21 +547,21 @@ Optional get(const T &Val) const { auto Result = Values.find(Val); if (Result == Values.end()) - return None; + return std::nullopt; return &Result->second; } Optional get(const T &Val) { auto Result = Values.find(Val); if (Result == Values.end()) - return None; + return std::nullopt; return &Result->second; } Optional indexOf(const T &Val) { auto MaybeVal = get(Val); if (!MaybeVal) - return None; + return std::nullopt; auto *Info = *MaybeVal; auto &Link = linksAt(Info->Index); return Link.Number; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" +#include #include using namespace llvm; @@ -308,20 +309,20 @@ return TTIImpl->emitGetActiveLaneMask(); } -Optional +std::optional TargetTransformInfo::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { return TTIImpl->instCombineIntrinsic(IC, II); } -Optional TargetTransformInfo::simplifyDemandedUseBitsIntrinsic( +std::optional TargetTransformInfo::simplifyDemandedUseBitsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const { return TTIImpl->simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, KnownBitsComputed); } -Optional TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic( +std::optional TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -546,6 +547,10 @@ return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp); } +bool TargetTransformInfo::enableSelectOptimize() const { + return TTIImpl->enableSelectOptimize(); +} + bool TargetTransformInfo::enableInterleavedAccessVectorization() const { return TTIImpl->enableInterleavedAccessVectorization(); } @@ -650,11 +655,11 @@ return TTIImpl->getMinVectorRegisterBitWidth(); } -Optional TargetTransformInfo::getMaxVScale() const { +std::optional TargetTransformInfo::getMaxVScale() const { return TTIImpl->getMaxVScale(); } -Optional TargetTransformInfo::getVScaleForTuning() const { +std::optional TargetTransformInfo::getVScaleForTuning() const { return TTIImpl->getVScaleForTuning(); } @@ -689,12 +694,12 @@ : TTIImpl->getCacheLineSize(); } -llvm::Optional +std::optional TargetTransformInfo::getCacheSize(CacheLevel Level) const { return TTIImpl->getCacheSize(Level); } -llvm::Optional +std::optional TargetTransformInfo::getCacheAssociativity(CacheLevel Level) const { return TTIImpl->getCacheAssociativity(Level); } @@ -1000,7 +1005,7 @@ } InstructionCost TargetTransformInfo::getArithmeticReductionCost( - unsigned Opcode, VectorType *Ty, Optional FMF, + unsigned Opcode, VectorType *Ty, std::optional FMF, TTI::TargetCostKind CostKind) const { InstructionCost Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); @@ -1019,7 +1024,7 @@ InstructionCost TargetTransformInfo::getExtendedReductionCost( unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, - Optional FMF, TTI::TargetCostKind CostKind) const { + std::optional FMF, TTI::TargetCostKind CostKind) const { return TTIImpl->getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF, CostKind); } @@ -1052,7 +1057,7 @@ Type *TargetTransformInfo::getMemcpyLoopLoweringType( LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, - Optional AtomicElementSize) const { + std::optional AtomicElementSize) const { return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace, DestAddrSpace, SrcAlign, DestAlign, AtomicElementSize); @@ -1062,7 +1067,7 @@ SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, - Optional AtomicCpySize) const { + std::optional AtomicCpySize) const { TTIImpl->getMemcpyLoopResidualLoweringType( OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign, DestAlign, AtomicCpySize); diff --git a/llvm/lib/Analysis/TensorSpec.cpp b/llvm/lib/Analysis/TensorSpec.cpp --- a/llvm/lib/Analysis/TensorSpec.cpp +++ b/llvm/lib/Analysis/TensorSpec.cpp @@ -47,7 +47,7 @@ llvm::raw_string_ostream OS(S); OS << Value; Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S); - return None; + return std::nullopt; }; // FIXME: accept a Path as a parameter, and use it for error reporting. json::Path::Root Root("tensor_spec"); @@ -74,7 +74,7 @@ return TensorSpec::createSpec(TensorName, TensorShape, TensorPort); SUPPORTED_TENSOR_TYPES(PARSE_TYPE) #undef PARSE_TYPE - return None; + return std::nullopt; } } // namespace llvm diff --git a/llvm/lib/Analysis/TrainingLogger.cpp b/llvm/lib/Analysis/TrainingLogger.cpp --- a/llvm/lib/Analysis/TrainingLogger.cpp +++ b/llvm/lib/Analysis/TrainingLogger.cpp @@ -52,10 +52,29 @@ namespace llvm { class LoggerDataImpl { +protected: const std::vector LoggedFeatureSpecs; const TensorSpec RewardSpec; const bool IncludeReward; + LoggerDataImpl(const std::vector &LoggedSpecs, + const TensorSpec &RewardSpec, bool IncludeReward) + : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec), + IncludeReward(IncludeReward) {} + virtual void logRewardImpl(const char *Value, size_t Size) = 0; + +public: + // flush the logged info to a stream and clear the log contents. + virtual void flush(std::string *Str) = 0; + virtual char *addNewTensor(size_t FeatureID) = 0; + virtual size_t getNrRecords() const = 0; + virtual ~LoggerDataImpl() = default; + + template void logReward(T Value) { + logRewardImpl(reinterpret_cast(&Value), sizeof(T)); + } +}; +class TFSequenceExampleLoggerDataImpl : public LoggerDataImpl { std::vector FeatureLists; tensorflow::FeatureList Reward; @@ -94,13 +113,14 @@ } public: - LoggerDataImpl(const std::vector &LoggedSpecs, - const TensorSpec &RewardSpec, bool IncludeReward) - : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec), - IncludeReward(IncludeReward), FeatureLists(LoggedFeatureSpecs.size()) {} + TFSequenceExampleLoggerDataImpl(const std::vector &LoggedSpecs, + const TensorSpec &RewardSpec, + bool IncludeReward) + : LoggerDataImpl(LoggedSpecs, RewardSpec, IncludeReward), + FeatureLists(LoggedFeatureSpecs.size()) {} // flush the logged info to a stream and clear the log contents. - void flush(std::string *Str) { + void flush(std::string *Str) override { size_t NrRecords = getNrRecords(); (void)NrRecords; tensorflow::SequenceExample SE; @@ -109,7 +129,7 @@ serialize(SE, Str); } - char *addNewTensor(size_t FeatureID) { + char *addNewTensor(size_t FeatureID) override { const auto &Spec = LoggedFeatureSpecs[FeatureID]; if (Spec.isElementType()) { auto *RF = FeatureLists[FeatureID] @@ -129,18 +149,22 @@ llvm_unreachable("Unsupported tensor type."); } - template void logReward(T Value) { + void logRewardImpl(const char *Value, size_t Size) override { assert(IncludeReward); if (RewardSpec.isElementType()) - Reward.add_feature()->mutable_float_list()->add_value(Value); - else if (RewardSpec.isElementType() || - RewardSpec.isElementType()) - Reward.add_feature()->mutable_int64_list()->add_value(Value); + Reward.add_feature()->mutable_float_list()->add_value( + *reinterpret_cast(Value)); + else if (RewardSpec.isElementType()) + Reward.add_feature()->mutable_int64_list()->add_value( + *reinterpret_cast(Value)); + else if (RewardSpec.isElementType()) + Reward.add_feature()->mutable_int64_list()->add_value( + *reinterpret_cast(Value)); else llvm_unreachable("Unsupported tensor type."); } - size_t getNrRecords() const { + size_t getNrRecords() const override { return FeatureLists.empty() ? 0 : FeatureLists[0].feature().size(); } }; @@ -150,8 +174,8 @@ const TensorSpec &RewardSpec, bool IncludeReward) : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec), IncludeReward(IncludeReward), - LoggerData(std::make_unique(FeatureSpecs, RewardSpec, - IncludeReward)) {} + LoggerData(std::make_unique( + FeatureSpecs, RewardSpec, IncludeReward)) {} Logger::~Logger() {} diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp --- a/llvm/lib/Analysis/VFABIDemangling.cpp +++ b/llvm/lib/Analysis/VFABIDemangling.cpp @@ -324,24 +324,24 @@ // Parse the fixed size part of the manled name if (!MangledName.consume_front("_ZGV")) - return None; + return std::nullopt; // Extract ISA. An unknow ISA is also supported, so we accept all // values. VFISAKind ISA; if (tryParseISA(MangledName, ISA) != ParseRet::OK) - return None; + return std::nullopt; // Extract . bool IsMasked; if (tryParseMask(MangledName, IsMasked) != ParseRet::OK) - return None; + return std::nullopt; // Parse the variable size, starting from . unsigned VF; bool IsScalable; if (tryParseVLEN(MangledName, VF, IsScalable) != ParseRet::OK) - return None; + return std::nullopt; // Parse the . ParseRet ParamFound; @@ -354,7 +354,7 @@ // Bail off if there is a parsing error in the parsing of the parameter. if (ParamFound == ParseRet::Error) - return None; + return std::nullopt; if (ParamFound == ParseRet::OK) { Align Alignment; @@ -362,7 +362,7 @@ const ParseRet AlignFound = tryParseAlign(MangledName, Alignment); // Bail off if there is a syntax error in the align token. if (AlignFound == ParseRet::Error) - return None; + return std::nullopt; // Add the parameter. Parameters.push_back({ParameterPos, PKind, StepOrPos, Alignment}); @@ -372,12 +372,12 @@ // A valid MangledName must have at least one valid entry in the // . if (Parameters.empty()) - return None; + return std::nullopt; // Check for the and the optional , which // are separated from the prefix with "_" if (!MangledName.consume_front("_")) - return None; + return std::nullopt; // The rest of the string must be in the format: // [()] @@ -385,25 +385,25 @@ MangledName.take_while([](char In) { return In != '('; }); if (ScalarName.empty()) - return None; + return std::nullopt; // Reduce MangledName to [()]. MangledName = MangledName.ltrim(ScalarName); // Find the optional custom name redirection. if (MangledName.consume_front("(")) { if (!MangledName.consume_back(")")) - return None; + return std::nullopt; // Update the vector variant with the one specified by the user. VectorName = MangledName; // If the vector name is missing, bail out. if (VectorName.empty()) - return None; + return std::nullopt; } // LLVM internal mapping via the TargetLibraryInfo (TLI) must be // redirected to an existing name. if (ISA == VFISAKind::LLVM && VectorName == OriginalName) - return None; + return std::nullopt; // When is "M", we need to add a parameter that is used as // global predicate for the function. @@ -438,7 +438,7 @@ // The declaration of the function must be present in the module // to be able to retrieve its signature. if (!F) - return None; + return std::nullopt; const ElementCount EC = getECFromSignature(F->getFunctionType()); VF = EC.getKnownMinValue(); } @@ -447,9 +447,9 @@ // 2. We don't accept the demangling if the vector function is not // present in the module. if (VF == 0) - return None; + return std::nullopt; if (!M.getFunction(VectorName)) - return None; + return std::nullopt; const VFShape Shape({ElementCount::get(VF, IsScalable), Parameters}); return VFInfo({Shape, std::string(ScalarName), std::string(VectorName), ISA}); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1747,7 +1747,7 @@ break; auto Attr = II->getFunction()->getFnAttribute(Attribute::VScaleRange); - Optional VScaleMax = Attr.getVScaleRangeMax(); + std::optional VScaleMax = Attr.getVScaleRangeMax(); if (!VScaleMax) break; @@ -2750,7 +2750,7 @@ getInvertibleOperands(const Operator *Op1, const Operator *Op2) { if (Op1->getOpcode() != Op2->getOpcode()) - return None; + return std::nullopt; auto getOperands = [&](unsigned OpNum) -> auto { return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum)); @@ -2844,7 +2844,7 @@ return std::make_pair(Start1, Start2); } } - return None; + return std::nullopt; } /// Return true if V2 == V1 + X, where X is known non-zero. @@ -6664,21 +6664,21 @@ const DataLayout &DL, unsigned Depth) { switch (Pred) { default: - return None; + return std::nullopt; case CmpInst::ICMP_SLT: case CmpInst::ICMP_SLE: if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth) && isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth)) return true; - return None; + return std::nullopt; case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) && isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth)) return true; - return None; + return std::nullopt; } } @@ -6707,7 +6707,7 @@ if (CmpInst::isImpliedFalseByMatchingCmp(LPred, RPred)) return false; - return None; + return std::nullopt; } /// Return true if "icmp LPred X, LC" implies "icmp RPred X, RC" is true. @@ -6724,7 +6724,7 @@ return false; if (Difference.isEmptySet()) return true; - return None; + return std::nullopt; } /// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1") @@ -6757,7 +6757,7 @@ if (LPred == RPred) return isImpliedCondOperands(LPred, L0, L1, R0, R1, DL, Depth); - return None; + return std::nullopt; } /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is @@ -6788,9 +6788,9 @@ if (Optional Implication = isImpliedCondition( ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) return Implication; - return None; + return std::nullopt; } - return None; + return std::nullopt; } Optional @@ -6799,12 +6799,12 @@ const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { // Bail out when we hit the limit. if (Depth == MaxAnalysisRecursionDepth) - return None; + return std::nullopt; // A mismatch occurs when we compare a scalar cmp to a vector cmp, for // example. if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy()) - return None; + return std::nullopt; assert(LHS->getType()->isIntOrIntVectorTy(1) && "Expected integer type only!"); @@ -6825,7 +6825,7 @@ return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth); } - return None; + return std::nullopt; } Optional llvm::isImpliedCondition(const Value *LHS, const Value *RHS, @@ -6841,7 +6841,7 @@ LHSIsTrue, Depth); if (Depth == MaxAnalysisRecursionDepth) - return None; + return std::nullopt; // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2 // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2 @@ -6867,7 +6867,7 @@ return false; } - return None; + return std::nullopt; } // Returns a pair (Condition, ConditionIsTrue), where Condition is a branch @@ -6908,7 +6908,7 @@ auto PredCond = getDomPredecessorCondition(ContextI); if (PredCond.first) return isImpliedCondition(PredCond.first, Cond, DL, PredCond.second); - return None; + return std::nullopt; } Optional llvm::isImpliedByDomCondition(CmpInst::Predicate Pred, @@ -6919,7 +6919,7 @@ if (PredCond.first) return isImpliedCondition(PredCond.first, Pred, LHS, RHS, DL, PredCond.second); - return None; + return std::nullopt; } static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, @@ -7335,7 +7335,7 @@ for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { ConstantInt *OpC = dyn_cast(GEP->getOperand(i)); if (!OpC) - return None; + return std::nullopt; if (OpC->isZero()) continue; // No offset. @@ -7349,7 +7349,7 @@ // vector. Multiply the index by the ElementSize. TypeSize Size = DL.getTypeAllocSize(GTI.getIndexedType()); if (Size.isScalable()) - return None; + return std::nullopt; Offset += Size.getFixedSize() * OpC->getSExtValue(); } @@ -7377,7 +7377,7 @@ // handle no other case. if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0) || GEP1->getSourceElementType() != GEP2->getSourceElementType()) - return None; + return std::nullopt; // Skip any common indices and track the GEP types. unsigned Idx = 1; @@ -7388,7 +7388,7 @@ auto IOffset1 = getOffsetFromIndex(GEP1, Idx, DL); auto IOffset2 = getOffsetFromIndex(GEP2, Idx, DL); if (!IOffset1 || !IOffset2) - return None; + return std::nullopt; return *IOffset2 - *IOffset1 + Offset2.getSExtValue() - Offset1.getSExtValue(); } diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -50,6 +50,7 @@ #include #include #include +#include #include using namespace llvm; @@ -782,7 +783,7 @@ // Otherwise, create MDNode forward reference. auto &FwdRef = ForwardRefMDNodes[MID]; - FwdRef = std::make_pair(MDTuple::getTemporary(Context, None), IDLoc); + FwdRef = std::make_pair(MDTuple::getTemporary(Context, std::nullopt), IDLoc); Result = FwdRef.first.get(); NumberedMetadata[MID].reset(Result); @@ -1427,7 +1428,7 @@ } case Attribute::AllocSize: { unsigned ElemSizeArg; - Optional NumElemsArg; + std::optional NumElemsArg; if (parseAllocSizeArguments(ElemSizeArg, NumElemsArg)) return true; B.addAllocSizeAttr(ElemSizeArg, NumElemsArg); @@ -1438,7 +1439,7 @@ if (parseVScaleRangeArguments(MinValue, MaxValue)) return true; B.addVScaleRangeAttr(MinValue, - MaxValue > 0 ? MaxValue : Optional()); + MaxValue > 0 ? MaxValue : std::optional()); return false; } case Attribute::Dereferenceable: { @@ -2131,7 +2132,7 @@ /// ::= /* empty */ /// ::= 'align' 4 bool LLParser::parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) { - Alignment = None; + Alignment = std::nullopt; if (!EatIfPresent(lltok::kw_align)) return false; LocTy AlignLoc = Lex.getLoc(); @@ -2243,7 +2244,7 @@ case lltok::kw_inaccessiblemem: return MemoryEffects::InaccessibleMem; default: - return None; + return std::nullopt; } } @@ -2258,7 +2259,7 @@ case lltok::kw_readwrite: return ModRefInfo::ModRef; default: - return None; + return std::nullopt; } } @@ -2273,7 +2274,7 @@ Lex.Lex(); if (!EatIfPresent(lltok::lparen)) { tokError("expected '('"); - return None; + return std::nullopt; } bool SeenLoc = false; @@ -2283,7 +2284,7 @@ Lex.Lex(); if (!EatIfPresent(lltok::colon)) { tokError("expected ':' after location"); - return None; + return std::nullopt; } } @@ -2294,7 +2295,7 @@ "or access kind (none, read, write, readwrite)"); else tokError("expected access kind (none, read, write, readwrite)"); - return None; + return std::nullopt; } Lex.Lex(); @@ -2304,7 +2305,7 @@ } else { if (SeenLoc) { tokError("default access kind must be specified first"); - return None; + return std::nullopt; } ME = MemoryEffects(*MR); } @@ -2314,7 +2315,7 @@ } while (EatIfPresent(lltok::comma)); tokError("unterminated memory attribute"); - return None; + return std::nullopt; } /// parseOptionalCommaAlign @@ -2371,7 +2372,7 @@ } bool LLParser::parseAllocSizeArguments(unsigned &BaseSizeArg, - Optional &HowManyArg) { + std::optional &HowManyArg) { Lex.Lex(); auto StartParen = Lex.getLoc(); @@ -2391,7 +2392,7 @@ "'allocsize' indices can't refer to the same parameter"); HowManyArg = HowMany; } else - HowManyArg = None; + HowManyArg = std::nullopt; auto EndParen = Lex.getLoc(); if (!EatIfPresent(lltok::rparen)) @@ -6071,7 +6072,7 @@ // within this function. if (PFS.resolveForwardRefBlockAddresses()) return true; - SaveAndRestore ScopeExit(BlockAddressPFS, &PFS); + SaveAndRestore ScopeExit(BlockAddressPFS, &PFS); // We need at least one basic block. if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_uselistorder) diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp --- a/llvm/lib/AsmParser/Parser.cpp +++ b/llvm/lib/AsmParser/Parser.cpp @@ -93,7 +93,7 @@ SlotMapping *Slots) { return ::parseAssemblyWithIndex(F, Err, Context, Slots, /*UpgradeDebugInfo*/ true, - [](StringRef) { return None; }); + [](StringRef) { return std::nullopt; }); } static ParsedModuleAndIndex @@ -150,7 +150,7 @@ // index, but we need to initialize it. LLVMContext unusedContext; return LLParser(F.getBuffer(), SM, Err, nullptr, &Index, unusedContext) - .Run(true, [](StringRef) { return None; }); + .Run(true, [](StringRef) { return std::nullopt; }); } std::unique_ptr diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp --- a/llvm/lib/BinaryFormat/Dwarf.cpp +++ b/llvm/lib/BinaryFormat/Dwarf.cpp @@ -368,7 +368,7 @@ Optional llvm::dwarf::LanguageLowerBound(dwarf::SourceLanguage Lang) { switch (Lang) { default: - return None; + return std::nullopt; #define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return LOWER_BOUND; @@ -697,7 +697,7 @@ case DW_FORM_addr: if (Params) return Params.AddrSize; - return None; + return std::nullopt; case DW_FORM_block: // ULEB128 length L followed by L bytes. case DW_FORM_block1: // 1 byte length L followed by L bytes. @@ -715,12 +715,12 @@ case DW_FORM_rnglistx: // ULEB128. case DW_FORM_GNU_addr_index: // ULEB128. case DW_FORM_GNU_str_index: // ULEB128. - return None; + return std::nullopt; case DW_FORM_ref_addr: if (Params) return Params.getRefAddrByteSize(); - return None; + return std::nullopt; case DW_FORM_flag: case DW_FORM_data1: @@ -753,7 +753,7 @@ case DW_FORM_strp_sup: if (Params) return Params.getDwarfOffsetByteSize(); - return None; + return std::nullopt; case DW_FORM_data8: case DW_FORM_ref8: @@ -775,7 +775,7 @@ default: break; } - return None; + return std::nullopt; } bool llvm::dwarf::isValidFormForVersion(Form F, unsigned Version, diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -28,7 +28,7 @@ if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) { if (BlockID == bitc::BLOCKINFO_BLOCK_ID) return "BLOCKINFO_BLOCK"; - return None; + return std::nullopt; } // Check to see if we have a blockinfo record for this block, with a name. @@ -39,11 +39,11 @@ } if (CurStreamType != LLVMIRBitstream) - return None; + return std::nullopt; switch (BlockID) { default: - return None; + return std::nullopt; case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: return "OPERAND_BUNDLE_TAGS_BLOCK"; case bitc::MODULE_BLOCK_ID: @@ -92,7 +92,7 @@ if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { switch (CodeID) { default: - return None; + return std::nullopt; case bitc::BLOCKINFO_CODE_SETBID: return "SETBID"; case bitc::BLOCKINFO_CODE_BLOCKNAME: @@ -101,7 +101,7 @@ return "SETRECORDNAME"; } } - return None; + return std::nullopt; } // Check to see if we have a blockinfo record for this record, with a name. @@ -113,18 +113,18 @@ } if (CurStreamType != LLVMIRBitstream) - return None; + return std::nullopt; #define STRINGIFY_CODE(PREFIX, CODE) \ case bitc::PREFIX##_##CODE: \ return #CODE; switch (BlockID) { default: - return None; + return std::nullopt; case bitc::MODULE_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; STRINGIFY_CODE(MODULE_CODE, VERSION) STRINGIFY_CODE(MODULE_CODE, TRIPLE) STRINGIFY_CODE(MODULE_CODE, DATALAYOUT) @@ -144,14 +144,14 @@ case bitc::IDENTIFICATION_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; STRINGIFY_CODE(IDENTIFICATION_CODE, STRING) STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH) } case bitc::PARAMATTR_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; // FIXME: Should these be different? case bitc::PARAMATTR_CODE_ENTRY_OLD: return "ENTRY"; @@ -161,14 +161,14 @@ case bitc::PARAMATTR_GROUP_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; case bitc::PARAMATTR_GRP_CODE_ENTRY: return "ENTRY"; } case bitc::TYPE_BLOCK_ID_NEW: switch (CodeID) { default: - return None; + return std::nullopt; STRINGIFY_CODE(TYPE_CODE, NUMENTRY) STRINGIFY_CODE(TYPE_CODE, VOID) STRINGIFY_CODE(TYPE_CODE, FLOAT) @@ -196,7 +196,7 @@ case bitc::CONSTANTS_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; STRINGIFY_CODE(CST_CODE, SETTYPE) STRINGIFY_CODE(CST_CODE, NULL) STRINGIFY_CODE(CST_CODE, UNDEF) @@ -227,7 +227,7 @@ case bitc::FUNCTION_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS) STRINGIFY_CODE(FUNC_CODE, INST_BINOP) STRINGIFY_CODE(FUNC_CODE, INST_CAST) @@ -272,7 +272,7 @@ case bitc::VALUE_SYMTAB_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; STRINGIFY_CODE(VST_CODE, ENTRY) STRINGIFY_CODE(VST_CODE, BBENTRY) STRINGIFY_CODE(VST_CODE, FNENTRY) @@ -281,7 +281,7 @@ case bitc::MODULE_STRTAB_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; STRINGIFY_CODE(MST_CODE, ENTRY) STRINGIFY_CODE(MST_CODE, HASH) } @@ -289,7 +289,7 @@ case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; STRINGIFY_CODE(FS, PERMODULE) STRINGIFY_CODE(FS, PERMODULE_PROFILE) STRINGIFY_CODE(FS, PERMODULE_RELBF) @@ -324,13 +324,13 @@ case bitc::METADATA_ATTACHMENT_ID: switch (CodeID) { default: - return None; + return std::nullopt; STRINGIFY_CODE(METADATA, ATTACHMENT) } case bitc::METADATA_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; STRINGIFY_CODE(METADATA, STRING_OLD) STRINGIFY_CODE(METADATA, VALUE) STRINGIFY_CODE(METADATA, NODE) @@ -374,13 +374,13 @@ case bitc::METADATA_KIND_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; STRINGIFY_CODE(METADATA, KIND) } case bitc::USELIST_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; case bitc::USELIST_CODE_DEFAULT: return "USELIST_CODE_DEFAULT"; case bitc::USELIST_CODE_BB: @@ -390,21 +390,21 @@ case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; case bitc::OPERAND_BUNDLE_TAG: return "OPERAND_BUNDLE_TAG"; } case bitc::STRTAB_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; case bitc::STRTAB_BLOB: return "BLOB"; } case bitc::SYMTAB_BLOCK_ID: switch (CodeID) { default: - return None; + return std::nullopt; case bitc::SYMTAB_BLOB: return "BLOB"; } diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -554,7 +554,7 @@ Optional getInRangeIndex() const { assert(Opcode == Instruction::GetElementPtr); if (Extra == (unsigned)-1) - return None; + return std::nullopt; return Extra; } @@ -822,7 +822,9 @@ Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); Error parseModule( uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false, - DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; }); + DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { + return std::nullopt; + }); Error parseComdatRecord(ArrayRef Record); Error parseGlobalVarRecord(ArrayRef Record); @@ -7915,7 +7917,7 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, bool IsImporting) { return getModuleImpl(Context, false, ShouldLazyLoadMetadata, IsImporting, - [](StringRef) { return None; }); + [](StringRef) { return std::nullopt; }); } // Parse the specified bitcode buffer and merge the index into CombinedIndex. diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -222,7 +222,7 @@ // Create and return a placeholder, which will later be RAUW'd. ++NumMDNodeTemporary; - Metadata *MD = MDNode::getTemporary(Context, None).release(); + Metadata *MD = MDNode::getTemporary(Context, std::nullopt).release(); MetadataPtrs[Idx].reset(MD); return MD; } @@ -304,7 +304,7 @@ auto &Ref = OldTypeRefs.Unknown[UUID]; if (!Ref) - Ref = MDNode::getTemporary(Context, None); + Ref = MDNode::getTemporary(Context, std::nullopt); return Ref.get(); } @@ -321,7 +321,7 @@ // resolveTypeRefArrays() will be resolve this forward reference. OldTypeRefs.Arrays.emplace_back( std::piecewise_construct, std::forward_as_tuple(Tuple), - std::forward_as_tuple(MDTuple::getTemporary(Context, None))); + std::forward_as_tuple(MDTuple::getTemporary(Context, std::nullopt))); return OldTypeRefs.Arrays.back().second.get(); } @@ -1212,7 +1212,8 @@ // If this isn't a LocalAsMetadata record, we're dropping it. This used // to be legal, but there's no upgrade path. auto dropRecord = [&] { - MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo); + MetadataList.assignValue(MDNode::get(Context, std::nullopt), + NextMetadataNo); NextMetadataNo++; }; if (Record.size() != 2) { @@ -1624,7 +1625,7 @@ DIFile, (Context, getMDString(Record[1]), getMDString(Record[2]), Checksum, Record.size() > 5 ? Optional(getMDString(Record[5])) - : None)), + : std::nullopt)), NextMetadataNo); NextMetadataNo++; break; diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -528,7 +528,7 @@ Optional getValueId(GlobalValue::GUID ValGUID) { auto VMI = GUIDToValueIdMap.find(ValGUID); if (VMI == GUIDToValueIdMap.end()) - return None; + return std::nullopt; return VMI->second; } @@ -4431,7 +4431,7 @@ auto GetValueId = [&](const ValueInfo &VI) -> Optional { if (!VI) - return None; + return std::nullopt; return getValueId(VI.getGUID()); }; diff --git a/llvm/lib/Bitstream/Reader/BitstreamReader.cpp b/llvm/lib/Bitstream/Reader/BitstreamReader.cpp --- a/llvm/lib/Bitstream/Reader/BitstreamReader.cpp +++ b/llvm/lib/Bitstream/Reader/BitstreamReader.cpp @@ -438,7 +438,7 @@ switch (Entry.Kind) { case llvm::BitstreamEntry::SubBlock: // Handled for us already. case llvm::BitstreamEntry::Error: - return None; + return std::nullopt; case llvm::BitstreamEntry::EndBlock: return std::move(NewBlockInfo); case llvm::BitstreamEntry::Record: @@ -448,7 +448,8 @@ // Read abbrev records, associate them with CurBID. if (Entry.ID == bitc::DEFINE_ABBREV) { - if (!CurBlockInfo) return None; + if (!CurBlockInfo) + return std::nullopt; if (Error Err = ReadAbbrevRecord()) return std::move(Err); @@ -469,24 +470,25 @@ break; // Default behavior, ignore unknown content. case bitc::BLOCKINFO_CODE_SETBID: if (Record.size() < 1) - return None; + return std::nullopt; CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]); break; case bitc::BLOCKINFO_CODE_BLOCKNAME: { if (!CurBlockInfo) - return None; + return std::nullopt; if (!ReadBlockInfoNames) break; // Ignore name. CurBlockInfo->Name = std::string(Record.begin(), Record.end()); break; } case bitc::BLOCKINFO_CODE_SETRECORDNAME: { - if (!CurBlockInfo) return None; - if (!ReadBlockInfoNames) - break; // Ignore name. - CurBlockInfo->RecordNames.emplace_back( - (unsigned)Record[0], std::string(Record.begin() + 1, Record.end())); - break; + if (!CurBlockInfo) + return std::nullopt; + if (!ReadBlockInfoNames) + break; // Ignore name. + CurBlockInfo->RecordNames.emplace_back( + (unsigned)Record[0], std::string(Record.begin() + 1, Record.end())); + break; } } } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -712,6 +712,16 @@ // GV's or GVSym's attributes will be used for the EmittedSym. emitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration()); + if (GV->isTagged()) { + Triple T = TM.getTargetTriple(); + + if (T.getArch() != Triple::aarch64 || !T.isAndroid()) + OutContext.reportError(SMLoc(), + "Tagged symbols (-fsanitize=memtag-globals) are " + "only supported on aarch64 + Android."); + OutStreamer->emitSymbolAttribute(EmittedSym, MAI->getMemtagAttr()); + } + if (!GV->hasInitializer()) // External globals require no extra code. return; @@ -1337,7 +1347,8 @@ OutStreamer->pushSection(); OutStreamer->switchSection(BBAddrMapSection); OutStreamer->AddComment("version"); - OutStreamer->emitInt8(OutStreamer->getContext().getBBAddrMapVersion()); + uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion(); + OutStreamer->emitInt8(BBAddrMapVersion); OutStreamer->AddComment("feature"); OutStreamer->emitInt8(0); OutStreamer->AddComment("function address"); @@ -1349,12 +1360,18 @@ for (const MachineBasicBlock &MBB : MF) { const MCSymbol *MBBSymbol = MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol(); + if (BBAddrMapVersion > 1) { + OutStreamer->AddComment("BB id"); + // Emit the BB ID for this basic block. + OutStreamer->emitULEB128IntValue(*MBB.getBBID()); + } // Emit the basic block offset relative to the end of the previous block. // This is zero unless the block is padded due to alignment. emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol); // Emit the basic block size. When BBs have alignments, their size cannot // always be computed from their offsets. emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol); + // Emit the Metadata. OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); PrevMBBEndSymbol = MBB.getEndSymbol(); } @@ -2270,9 +2287,9 @@ // Emit address-significance attributes for all globals. OutStreamer->emitAddrsig(); for (const GlobalValue &GV : M.global_values()) { - if (!GV.use_empty() && !GV.isTransitiveUsedByMetadataOnly() && - !GV.isThreadLocal() && !GV.hasDLLImportStorageClass() && - !GV.getName().startswith("llvm.") && !GV.hasAtLeastLocalUnnamedAddr()) + if (!GV.use_empty() && !GV.isThreadLocal() && + !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") && + !GV.hasAtLeastLocalUnnamedAddr()) OutStreamer->emitAddrsigSym(getSymbol(&GV)); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -2034,7 +2034,7 @@ ReturnAndArgTypeIndices.back() = TypeIndex::None(); } TypeIndex ReturnTypeIndex = TypeIndex::Void(); - ArrayRef ArgTypeIndices = None; + ArrayRef ArgTypeIndices = std::nullopt; if (!ReturnAndArgTypeIndices.empty()) { auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices); ReturnTypeIndex = ReturnAndArgTypesRef.front(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -118,13 +118,13 @@ for (auto RangesI = Ranges.begin(), RangesE = Ranges.end(); RangesI != RangesE; ++RangesI) { if (EndMI && Ordering.isBefore(EndMI, RangesI->first)) - return None; + return std::nullopt; if (EndMI && !Ordering.isBefore(RangesI->second, EndMI)) return RangesI; if (Ordering.isBefore(StartMI, RangesI->second)) return RangesI; } - return None; + return std::nullopt; } void DbgValueHistoryMap::trimLocationRanges( diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp --- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -36,9 +36,9 @@ DbgVariableLocation Location; // Variables calculated from multiple locations can't be represented here. if (Instruction.getNumDebugOperands() != 1) - return None; + return std::nullopt; if (!Instruction.getDebugOperand(0).isReg()) - return None; + return std::nullopt; Location.Register = Instruction.getDebugOperand(0).getReg(); Location.FragmentInfo.reset(); // We only handle expressions generated by DIExpression::appendOffset, @@ -53,7 +53,7 @@ Op->getOp() == dwarf::DW_OP_LLVM_arg) ++Op; else - return None; + return std::nullopt; } while (Op != DIExpr->expr_op_end()) { switch (Op->getOp()) { @@ -84,7 +84,7 @@ Offset = 0; break; default: - return None; + return std::nullopt; } ++Op; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h --- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -165,7 +165,7 @@ ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm, DbgVariable &V, const MachineInstr &MI) : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)), - TagOffset(None) {} + TagOffset(std::nullopt) {} void setTagOffset(uint8_t TO) { TagOffset = TO; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -122,8 +122,8 @@ // extend .file to support this. unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID(); if (!File) - return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None, - CUID); + return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", std::nullopt, + std::nullopt, CUID); if (LastFile != File) { LastFile = File; @@ -671,13 +671,13 @@ // Add the call site information to the DIE. const DILocation *IA = Scope->getInlinedAt(); - addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, + addUInt(*ScopeDIE, dwarf::DW_AT_call_file, std::nullopt, getOrCreateSourceID(IA->getFile())); - addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); + addUInt(*ScopeDIE, dwarf::DW_AT_call_line, std::nullopt, IA->getLine()); if (IA->getColumn()) - addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn()); + addUInt(*ScopeDIE, dwarf::DW_AT_call_column, std::nullopt, IA->getColumn()); if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4) - addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, + addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, std::nullopt, IA->getDiscriminator()); // Add name to the name table, we do this here because we're guaranteed @@ -1594,7 +1594,8 @@ "_" + Twine(Btr.BitSize)).toStringRef(Str)); addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding); // Round up to smallest number of bytes that contains this number of bits. - addUInt(Die, dwarf::DW_AT_byte_size, None, divideCeil(Btr.BitSize, 8)); + addUInt(Die, dwarf::DW_AT_byte_size, std::nullopt, + divideCeil(Btr.BitSize, 8)); Btr.Die = &Die; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -3532,10 +3532,10 @@ Optional DwarfDebug::getMD5AsBytes(const DIFile *File) const { assert(File); if (getDwarfVersion() < 5) - return None; + return std::nullopt; Optional> Checksum = File->getChecksum(); if (!Checksum || Checksum->Kind != DIFile::CSK_MD5) - return None; + return std::nullopt; // Convert the string checksum to an MD5Result for the streamer. // The verifier validates the checksum so we assume it's okay. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -55,7 +55,7 @@ /// Consume one operation. Optional take() { if (Start == End) - return None; + return std::nullopt; return *(Start++); } @@ -65,18 +65,18 @@ /// Return the current operation. Optional peek() const { if (Start == End) - return None; + return std::nullopt; return *(Start); } /// Return the next operation. Optional peekNext() const { if (Start == End) - return None; + return std::nullopt; auto Next = Start.getNext(); if (Next == End) - return None; + return std::nullopt; return *Next; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -604,7 +604,7 @@ emitLegacySExt(PrevConvertOp->getArg(0)); else if (Encoding == dwarf::DW_ATE_unsigned) emitLegacyZExt(PrevConvertOp->getArg(0)); - PrevConvertOp = None; + PrevConvertOp = std::nullopt; } else { PrevConvertOp = Op; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -411,8 +411,8 @@ return; unsigned FileID = getOrCreateSourceID(File); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); + addUInt(Die, dwarf::DW_AT_decl_file, std::nullopt, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, std::nullopt, Line); } void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) { @@ -705,12 +705,12 @@ BTy->getEncoding()); uint64_t Size = BTy->getSizeInBits() >> 3; - addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size); if (BTy->isBigEndian()) - addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_big); + addUInt(Buffer, dwarf::DW_AT_endianity, std::nullopt, dwarf::DW_END_big); else if (BTy->isLittleEndian()) - addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little); + addUInt(Buffer, dwarf::DW_AT_endianity, std::nullopt, dwarf::DW_END_little); } void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) { @@ -734,7 +734,7 @@ addBlock(Buffer, dwarf::DW_AT_string_length, DwarfExpr.finalize()); } else { uint64_t Size = STy->getSizeInBits() >> 3; - addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size); } if (DIExpression *Expr = STy->getStringLocationExp()) { @@ -785,7 +785,7 @@ && Tag != dwarf::DW_TAG_ptr_to_member_type && Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_rvalue_reference_type) - addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) addDIEEntry(Buffer, dwarf::DW_AT_containing_type, @@ -932,9 +932,11 @@ if (const ConstantInt *CI = dyn_cast_or_null(DDTy->getDiscriminantValue())) { if (DD->isUnsignedDIType(Discriminator->getBaseType())) - addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue()); + addUInt(Variant, dwarf::DW_AT_discr_value, std::nullopt, + CI->getZExtValue()); else - addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue()); + addSInt(Variant, dwarf::DW_AT_discr_value, std::nullopt, + CI->getSExtValue()); } constructMemberDIE(Variant, DDTy); } else { @@ -954,7 +956,7 @@ if (!SetterName.empty()) addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName); if (unsigned PropertyAttributes = Property->getAttributes()) - addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, std::nullopt, PropertyAttributes); } else if (auto *Composite = dyn_cast(Element)) { if (Composite->getTag() == dwarf::DW_TAG_variant_part) { @@ -1020,10 +1022,10 @@ // TODO: Do we care about size for enum forward declarations? if (Size && (!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type)) - addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, Size); else if (!CTy->isForwardDecl()) // Add zero size if it is not a forward declaration. - addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0); + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, 0); // If we're a forward decl, say so. if (CTy->isForwardDecl()) @@ -1142,10 +1144,10 @@ if (!M->getAPINotesFile().empty()) addString(MDie, dwarf::DW_AT_LLVM_apinotes, M->getAPINotesFile()); if (M->getFile()) - addUInt(MDie, dwarf::DW_AT_decl_file, None, + addUInt(MDie, dwarf::DW_AT_decl_file, std::nullopt, getOrCreateSourceID(M->getFile())); if (M->getLineNo()) - addUInt(MDie, dwarf::DW_AT_decl_line, None, M->getLineNo()); + addUInt(MDie, dwarf::DW_AT_decl_line, std::nullopt, M->getLineNo()); if (M->getIsDecl()) addFlag(MDie, dwarf::DW_AT_declaration); @@ -1208,10 +1210,10 @@ unsigned DeclID = getOrCreateSourceID(SPDecl->getFile()); unsigned DefID = getOrCreateSourceID(SP->getFile()); if (DeclID != DefID) - addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID); + addUInt(SPDie, dwarf::DW_AT_decl_file, std::nullopt, DefID); if (SP->getLine() != SPDecl->getLine()) - addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine()); + addUInt(SPDie, dwarf::DW_AT_decl_line, std::nullopt, SP->getLine()); } } @@ -1379,7 +1381,7 @@ } else if (auto *BI = Bound.dyn_cast()) { if (Attr == dwarf::DW_AT_count) { if (BI->getSExtValue() != -1) - addUInt(DW_Subrange, Attr, None, BI->getSExtValue()); + addUInt(DW_Subrange, Attr, std::nullopt, BI->getSExtValue()); } else if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 || BI->getSExtValue() != DefaultLowerBound) addSInt(DW_Subrange, Attr, dwarf::DW_FORM_sdata, BI->getSExtValue()); @@ -1440,7 +1442,7 @@ IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie()); StringRef Name = "__ARRAY_SIZE_TYPE__"; addString(*IndexTyDie, dwarf::DW_AT_name, Name); - addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); + addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, std::nullopt, sizeof(int64_t)); addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::getArrayIndexTypeEncoding( (dwarf::SourceLanguage)getLanguage())); @@ -1481,7 +1483,7 @@ if (CTy->isVector()) { addFlag(Buffer, dwarf::DW_AT_GNU_vector); if (hasVectorBeenPadded(CTy)) - addUInt(Buffer, dwarf::DW_AT_byte_size, None, + addUInt(Buffer, dwarf::DW_AT_byte_size, std::nullopt, CTy->getSizeInBits() / CHAR_BIT); } @@ -1632,8 +1634,8 @@ if (IsBitfield) { // Handle bitfield, assume bytes are 8 bits. if (DD->useDWARF2Bitfields()) - addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); - addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); + addUInt(MemberDie, dwarf::DW_AT_byte_size, std::nullopt, FieldSize / 8); + addUInt(MemberDie, dwarf::DW_AT_bit_size, std::nullopt, Size); uint64_t Offset = DT->getOffsetInBits(); // We can't use DT->getAlignInBits() here: AlignInBits for member type @@ -1655,10 +1657,10 @@ if (Asm->getDataLayout().isLittleEndian()) Offset = FieldSize - (Offset + Size); - addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, std::nullopt, Offset); OffsetInBytes = FieldOffset >> 3; } else { - addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, None, Offset); + addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, std::nullopt, Offset); } } else { // This is not a bitfield. @@ -1682,7 +1684,7 @@ addUInt(MemberDie, dwarf::DW_AT_data_member_location, dwarf::DW_FORM_udata, OffsetInBytes); else - addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, + addUInt(MemberDie, dwarf::DW_AT_data_member_location, std::nullopt, OffsetInBytes); } } diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -71,13 +71,14 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/MCContext.h" #include "llvm/Target/TargetMachine.h" #include @@ -131,14 +132,15 @@ // This function updates and optimizes the branching instructions of every basic // block in a given function to account for changes in the layout. -static void updateBranches( - MachineFunction &MF, - const SmallVector &PreLayoutFallThroughs) { +static void +updateBranches(MachineFunction &MF, + DenseMap + &PreLayoutFallThroughs) { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); SmallVector Cond; for (auto &MBB : MF) { auto NextMBBI = std::next(MBB.getIterator()); - auto *FTMBB = PreLayoutFallThroughs[MBB.getNumber()]; + auto *FTMBB = PreLayoutFallThroughs[&MBB]; // If this block had a fallthrough before we need an explicit unconditional // branch to that block if either // 1- the block ends a section, which means its next block may be @@ -183,13 +185,13 @@ return true; } - V.resize(MF.getNumBlockIDs()); - for (auto bbClusterInfo : P.second) { - // Bail out if the cluster information contains invalid MBB numbers. - if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs()) - return false; - V[bbClusterInfo.MBBNumber] = bbClusterInfo; - } + unsigned MaxBBID = 0; + for (const BBClusterInfo &BBCI : P.second) + if (BBCI.BBID > MaxBBID) + MaxBBID = BBCI.BBID; + V.resize(MaxBBID + 1); + for (const BBClusterInfo &BBCI : P.second) + V[BBCI.BBID] = BBCI; return true; } @@ -202,9 +204,10 @@ // and "Cold" succeeding all other clusters. // FuncBBClusterInfo represent the cluster information for basic blocks. If this // is empty, it means unique sections for all basic blocks in the function. -static void -assignSections(MachineFunction &MF, - const std::vector> &FuncBBClusterInfo) { +static void assignSections( + MachineFunction &MF, + const std::vector> &FuncBBClusterInfo, + const DenseMap &PreLayoutPosition) { assert(MF.hasBBSections() && "BB Sections is not set for function."); // This variable stores the section ID of the cluster containing eh_pads (if // all eh_pads are one cluster). If more than one cluster contain eh_pads, we @@ -219,11 +222,12 @@ if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All || FuncBBClusterInfo.empty()) { // If unique sections are desired for all basic blocks of the function, we - // set every basic block's section ID equal to its number (basic block - // id). This further ensures that basic blocks are ordered canonically. - MBB.setSectionID({static_cast(MBB.getNumber())}); - } else if (FuncBBClusterInfo[MBB.getNumber()]) - MBB.setSectionID(FuncBBClusterInfo[MBB.getNumber()]->ClusterID); + // set every basic block's section ID equal to its original position in + // the layout. This ensures that basic blocks are ordered canonically. + MBB.setSectionID(PreLayoutPosition.lookup(&MBB)); + } else if (MBB.getBBIDOrNumber() < FuncBBClusterInfo.size() && + FuncBBClusterInfo[MBB.getBBIDOrNumber()].has_value()) + MBB.setSectionID(FuncBBClusterInfo[MBB.getBBIDOrNumber()]->ClusterID); else { // BB goes into the special cold section if it is not specified in the // cluster info map. @@ -250,12 +254,15 @@ void llvm::sortBasicBlocksAndUpdateBranches( MachineFunction &MF, MachineBasicBlockComparator MBBCmp) { - SmallVector PreLayoutFallThroughs( - MF.getNumBlockIDs()); + [[maybe_unused]] const MachineBasicBlock *EntryBlock = &MF.front(); + DenseMap + PreLayoutFallThroughs; for (auto &MBB : MF) - PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough(); + PreLayoutFallThroughs[&MBB] = MBB.getFallThrough(); MF.sort(MBBCmp); + assert(&MF.front() == EntryBlock && + "Entry block should not be displaced by basic block sections"); // Set IsBeginSection and IsEndSection according to the assigned section IDs. MF.assignBeginEndSections(); @@ -318,12 +325,14 @@ if (BBSectionsType == BasicBlockSection::List && hasInstrProfHashMismatch(MF)) return true; - - // Renumber blocks before sorting them for basic block sections. This is - // useful during sorting, basic blocks in the same section will retain the - // default order. This renumbering should also be done for basic block - // labels to match the profiles with the correct blocks. - MF.RenumberBlocks(); + // With LLVM_BB_ADDR_MAP versions less than 2, renumber blocks before sorting + // them. This is useful during sorting, basic blocks in the same section will + // retain the default order. This renumbering should also be done for basic + // block labels to match the profiles with the correct blocks. Note: This is + // only needed for BB address map versions lower than 2. + uint8_t BBAddrMapVersion = MF.getContext().getBBAddrMapVersion(); + if (BBAddrMapVersion < 2) + MF.RenumberBlocks(); if (BBSectionsType == BasicBlockSection::Labels) { MF.setBBSectionsType(BBSectionsType); @@ -338,7 +347,14 @@ FuncBBClusterInfo)) return true; MF.setBBSectionsType(BBSectionsType); - assignSections(MF, FuncBBClusterInfo); + + // Compute the original positions to use for internal ordering of the blocks + // in the cold section. + DenseMap PreLayoutPosition; + unsigned Position = 0; + for (const auto &MBB : MF) + PreLayoutPosition[&MBB] = Position++; + assignSections(MF, FuncBBClusterInfo, PreLayoutPosition); // We make sure that the cluster including the entry basic block precedes all // other clusters. @@ -372,9 +388,9 @@ // If the two basic block are in the same section, the order is decided by // their position within the section. if (XSectionID.Type == MBBSectionID::SectionType::Default) - return FuncBBClusterInfo[X.getNumber()]->PositionInCluster < - FuncBBClusterInfo[Y.getNumber()]->PositionInCluster; - return X.getNumber() < Y.getNumber(); + return FuncBBClusterInfo[X.getBBIDOrNumber()]->PositionInCluster < + FuncBBClusterInfo[Y.getBBIDOrNumber()]->PositionInCluster; + return PreLayoutPosition[&X] < PreLayoutPosition[&Y]; }; sortBasicBlocksAndUpdateBranches(MF, Comparator); diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -93,23 +93,23 @@ if (FI == ProgramBBClusterInfo.end()) return invalidProfileError( "Cluster list does not follow a function name specifier."); - SmallVector BBIndexes; - S.split(BBIndexes, ' '); + SmallVector BBIDs; + S.split(BBIDs, ' '); // Reset current cluster position. CurrentPosition = 0; - for (auto BBIndexStr : BBIndexes) { - unsigned long long BBIndex; - if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) + for (auto BBIDStr : BBIDs) { + unsigned long long BBID; + if (getAsUnsignedInteger(BBIDStr, 10, BBID)) return invalidProfileError(Twine("Unsigned integer expected: '") + - BBIndexStr + "'."); - if (!FuncBBIDs.insert(BBIndex).second) + BBIDStr + "'."); + if (!FuncBBIDs.insert(BBID).second) return invalidProfileError(Twine("Duplicate basic block id found '") + - BBIndexStr + "'."); - if (!BBIndex && CurrentPosition) + BBIDStr + "'."); + if (BBID == 0 && CurrentPosition) return invalidProfileError("Entry BB (0) does not begin a cluster."); - FI->second.emplace_back(BBClusterInfo{ - ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); + FI->second.emplace_back( + BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++}); } CurrentCluster++; } else { // This is a function name specifier. diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -88,7 +88,9 @@ bool relaxBranchInstructions(); void scanFunction(); - MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &BB); + MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &OrigMBB); + MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &OrigMBB, + const BasicBlock *BB); MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI, MachineBasicBlock *DestBB); @@ -202,12 +204,20 @@ } } -/// Insert a new empty basic block and insert it after \BB -MachineBasicBlock *BranchRelaxation::createNewBlockAfter(MachineBasicBlock &BB) { +/// Insert a new empty MachineBasicBlock and insert it after \p OrigMBB +MachineBasicBlock * +BranchRelaxation::createNewBlockAfter(MachineBasicBlock &OrigBB) { + return createNewBlockAfter(OrigBB, OrigBB.getBasicBlock()); +} + +/// Insert a new empty MachineBasicBlock with \p BB as its BasicBlock +/// and insert it after \p OrigMBB +MachineBasicBlock * +BranchRelaxation::createNewBlockAfter(MachineBasicBlock &OrigMBB, + const BasicBlock *BB) { // Create a new MBB for the code after the OrigBB. - MachineBasicBlock *NewBB = - MF->CreateMachineBasicBlock(BB.getBasicBlock()); - MF->insert(++BB.getIterator(), NewBB); + MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(BB); + MF->insert(++OrigMBB.getIterator(), NewBB); // Insert an entry into BlockInfo to align it properly with the block numbers. BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); @@ -482,7 +492,8 @@ // Create the optional restore block and, initially, place it at the end of // function. That block will be placed later if it's used; otherwise, it will // be erased. - MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back()); + MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back(), + DestBB->getBasicBlock()); TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL, DestOffset - SrcOffset, RS.get()); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -119,6 +119,7 @@ MachineFunctionSplitter.cpp MachineInstrBundle.cpp MachineInstr.cpp + MachineLateInstrsCleanup.cpp MachineLICM.cpp MachineLoopInfo.cpp MachineLoopUtils.cpp diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -240,8 +240,8 @@ // Oftentimes calling conventions will not user register parameters for // variadic functions, so we need to assume we're not variadic so that we get // all the registers that might be used in a non-variadic call. - SaveAndRestore SavedVarArg(IsVarArg, false); - SaveAndRestore SavedMustTail(AnalyzingMustTailForwardedRegs, true); + SaveAndRestore SavedVarArg(IsVarArg, false); + SaveAndRestore SavedMustTail(AnalyzingMustTailForwardedRegs, true); for (MVT RegVT : RegParmTypes) { SmallVector RemainingRegs; diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -78,6 +78,7 @@ initializeMachineCycleInfoWrapperPassPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachineFunctionPrinterPassPass(Registry); + initializeMachineLateInstrsCleanupPass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); initializeMachineModuleInfoWrapperPassPass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1438,16 +1438,16 @@ getIVIncrement(const PHINode *PN, const LoopInfo *LI) { const Loop *L = LI->getLoopFor(PN->getParent()); if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch()) - return None; + return std::nullopt; auto *IVInc = dyn_cast(PN->getIncomingValueForBlock(L->getLoopLatch())); if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L) - return None; + return std::nullopt; Instruction *LHS = nullptr; Constant *Step = nullptr; if (matchIncrement(IVInc, LHS, Step) && LHS == PN) return std::make_pair(IVInc, Step); - return None; + return std::nullopt; } static bool isIVIncrement(const Value *V, const LoopInfo *LI) { @@ -4020,10 +4020,10 @@ [this](const Value *V) -> std::optional> { auto *PN = dyn_cast(V); if (!PN) - return None; + return std::nullopt; auto IVInc = getIVIncrement(PN, &LI); if (!IVInc) - return None; + return std::nullopt; // TODO: The result of the intrinsics above is two-compliment. However when // IV inc is expressed as add or sub, iv.next is potentially a poison value. // If it has nuw or nsw flags, we need to make sure that these flags are @@ -4032,10 +4032,10 @@ // potentially complex analysis needed to prove this, we reject such cases. if (auto *OIVInc = dyn_cast(IVInc->first)) if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap()) - return None; + return std::nullopt; if (auto *ConstantStep = dyn_cast(IVInc->second)) return std::make_pair(IVInc->first, ConstantStep->getValue()); - return None; + return std::nullopt; }; // Try to account for the following special case: diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" +#include using namespace llvm; @@ -40,14 +41,15 @@ return *NAME##View; \ } +// Temporary macro for incremental transition to std::optional. #define CGOPT_EXP(TY, NAME) \ CGOPT(TY, NAME) \ - Optional codegen::getExplicit##NAME() { \ + std::optional codegen::getExplicit##NAME() { \ if (NAME##View->getNumOccurrences()) { \ TY res = *NAME##View; \ return res; \ } \ - return None; \ + return std::nullopt; \ } CGOPT(std::string, MArch) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -114,18 +114,18 @@ // Need at least two byte positions to decide on endianness. unsigned Width = MemOffset2Idx.size(); if (Width < 2) - return None; + return std::nullopt; bool BigEndian = true, LittleEndian = true; for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) { auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset); if (MemOffsetAndIdx == MemOffset2Idx.end()) - return None; + return std::nullopt; const int64_t Idx = MemOffsetAndIdx->second - LowestIdx; assert(Idx >= 0 && "Expected non-negative byte offset?"); LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset); BigEndian &= Idx == bigEndianByteAt(Width, MemOffset); if (!BigEndian && !LittleEndian) - return None; + return std::nullopt; } assert((BigEndian != LittleEndian) && @@ -1290,7 +1290,7 @@ const MachineRegisterInfo &MRI) { const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI); if (!MaybeCst) - return None; + return std::nullopt; APFloat V = MaybeCst->getValueAPF(); switch (Opcode) { @@ -3246,7 +3246,7 @@ // In the combine, we want to elimate the entire tree. if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS)) - return None; + return std::nullopt; // If it's a G_OR, save it and continue to walk. If it's not, then it's // something that may be a load + arithmetic. @@ -3263,7 +3263,7 @@ // We're going to try and merge each register into a wider power-of-2 type, // so we ought to have an even number of registers. if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0) - return None; + return std::nullopt; return RegsToVisit; } @@ -3289,15 +3289,15 @@ } if (Shift % MemSizeInBits != 0) - return None; + return std::nullopt; // TODO: Handle other types of loads. auto *Load = getOpcodeDef(MaybeLoad, MRI); if (!Load) - return None; + return std::nullopt; if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits) - return None; + return std::nullopt; return std::make_pair(Load, Shift / MemSizeInBits); } @@ -3342,7 +3342,7 @@ // shifted) value. auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI); if (!LoadAndPos) - return None; + return std::nullopt; GZExtLoad *Load; int64_t DstPos; std::tie(Load, DstPos) = *LoadAndPos; @@ -3353,14 +3353,14 @@ if (!MBB) MBB = LoadMBB; if (LoadMBB != MBB) - return None; + return std::nullopt; // Make sure that the MachineMemOperands of every seen load are compatible. auto &LoadMMO = Load->getMMO(); if (!MMO) MMO = &LoadMMO; if (MMO->getAddrSpace() != LoadMMO.getAddrSpace()) - return None; + return std::nullopt; // Find out what the base pointer and index for the load is. Register LoadPtr; @@ -3373,7 +3373,7 @@ // Don't combine things like a[i], a[i] -> a bigger load. if (!SeenIdx.insert(Idx).second) - return None; + return std::nullopt; // Every load must share the same base pointer; don't combine things like: // @@ -3381,7 +3381,7 @@ if (!BasePtr.isValid()) BasePtr = LoadPtr; if (BasePtr != LoadPtr) - return None; + return std::nullopt; if (Idx < LowestIdx) { LowestIdx = Idx; @@ -3393,7 +3393,7 @@ // // a[i] << 16, a[i + k] << 16 -> a bigger load. if (!MemOffset2Idx.try_emplace(DstPos, Idx).second) - return None; + return std::nullopt; Loads.insert(Load); // Keep track of the position of the earliest/latest loads in the pattern. @@ -3428,9 +3428,9 @@ if (Loads.count(&MI)) continue; if (MI.isLoadFoldBarrier()) - return None; + return std::nullopt; if (Iter++ == MaxIter) - return None; + return std::nullopt; } return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad); @@ -3560,7 +3560,7 @@ MachineRegisterInfo &MRI) { Register TruncVal; if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) - return None; + return std::nullopt; // The shift amount must be a constant multiple of the narrow type. // It is translated to the offset address in the wide source value "y". @@ -3578,21 +3578,21 @@ SrcVal = TruncVal; return 0; // If it's the lowest index store. } - return None; + return std::nullopt; } unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); if (ShiftAmt % NarrowBits!= 0) - return None; + return std::nullopt; const unsigned Offset = ShiftAmt / NarrowBits; if (SrcVal.isValid() && FoundSrcVal != SrcVal) - return None; + return std::nullopt; if (!SrcVal.isValid()) SrcVal = FoundSrcVal; else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) - return None; + return std::nullopt; return Offset; } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -84,6 +84,7 @@ #include #include #include +#include #include #include #include @@ -2304,7 +2305,7 @@ // Convert the metadata argument to a constant integer Metadata *MD = cast(CI.getArgOperand(1))->getMetadata(); - Optional RoundMode = + std::optional RoundMode = convertStrToRoundingMode(cast(MD)->getString()); // Add the Rounding mode as an integer diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -205,7 +205,7 @@ if (Value == 0) { Res = Op0; - return None; + return std::nullopt; } Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0)); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -293,7 +293,7 @@ assert((!ValAndVReg || ValAndVReg->VReg == VReg) && "Value found while looking through instrs"); if (!ValAndVReg) - return None; + return std::nullopt; return ValAndVReg->Value; } @@ -302,7 +302,7 @@ Optional Val = getIConstantVRegVal(VReg, MRI); if (Val && Val->getBitWidth() <= 64) return Val->getSExtValue(); - return None; + return std::nullopt; } namespace { @@ -322,7 +322,7 @@ switch (MI->getOpcode()) { case TargetOpcode::G_ANYEXT: if (!LookThroughAnyExt) - return None; + return std::nullopt; [[fallthrough]]; case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: @@ -335,21 +335,21 @@ case TargetOpcode::COPY: VReg = MI->getOperand(1).getReg(); if (Register::isPhysicalRegister(VReg)) - return None; + return std::nullopt; break; case TargetOpcode::G_INTTOPTR: VReg = MI->getOperand(1).getReg(); break; default: - return None; + return std::nullopt; } } if (!MI || !IsConstantOpcode(MI)) - return None; + return std::nullopt; Optional MaybeVal = getAPCstValue(MI); if (!MaybeVal) - return None; + return std::nullopt; APInt &Val = *MaybeVal; while (!SeenOpcodes.empty()) { std::pair OpcodeAndSize = SeenOpcodes.pop_back_val(); @@ -393,7 +393,7 @@ const MachineOperand &CstVal = MI->getOperand(1); if (CstVal.isCImm()) return CstVal.getCImm()->getValue(); - return None; + return std::nullopt; } Optional getCImmOrFPImmAsAPInt(const MachineInstr *MI) { @@ -402,7 +402,7 @@ return CstVal.getCImm()->getValue(); if (CstVal.isFPImm()) return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); - return None; + return std::nullopt; } } // end anonymous namespace @@ -426,7 +426,7 @@ auto Reg = getConstantVRegValWithLookThrough( VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs); if (!Reg) - return None; + return std::nullopt; return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(), Reg->VReg}; } @@ -445,7 +445,7 @@ auto *DefMI = MRI.getVRegDef(Reg); auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); if (!DstTy.isValid()) - return None; + return std::nullopt; unsigned Opc = DefMI->getOpcode(); while (Opc == TargetOpcode::COPY || isPreISelGenericOptimizationHint(Opc)) { Register SrcReg = DefMI->getOperand(1).getReg(); @@ -497,11 +497,11 @@ const MachineRegisterInfo &MRI) { auto MaybeOp2Cst = getAnyConstantVRegValWithLookThrough(Op2, MRI, false); if (!MaybeOp2Cst) - return None; + return std::nullopt; auto MaybeOp1Cst = getAnyConstantVRegValWithLookThrough(Op1, MRI, false); if (!MaybeOp1Cst) - return None; + return std::nullopt; const APInt &C1 = MaybeOp1Cst->Value; const APInt &C2 = MaybeOp2Cst->Value; @@ -553,7 +553,7 @@ return APIntOps::umax(C1, C2); } - return None; + return std::nullopt; } Optional llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, @@ -561,11 +561,11 @@ const MachineRegisterInfo &MRI) { const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI); if (!Op2Cst) - return None; + return std::nullopt; const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI); if (!Op1Cst) - return None; + return std::nullopt; APFloat C1 = Op1Cst->getValueAPF(); const APFloat &C2 = Op2Cst->getValueAPF(); @@ -607,7 +607,7 @@ break; } - return None; + return std::nullopt; } SmallVector @@ -773,7 +773,7 @@ } } } - return None; + return std::nullopt; } Optional llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, @@ -786,7 +786,7 @@ APFloat::rmNearestTiesToEven); return DstVal; } - return None; + return std::nullopt; } Optional> @@ -796,20 +796,20 @@ auto tryFoldScalar = [&](Register R) -> std::optional { auto MaybeCst = getIConstantVRegVal(R, MRI); if (!MaybeCst) - return None; + return std::nullopt; return MaybeCst->countLeadingZeros(); }; if (Ty.isVector()) { // Try to constant fold each element. auto *BV = getOpcodeDef(Src, MRI); if (!BV) - return None; + return std::nullopt; for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) { if (auto MaybeFold = tryFoldScalar(BV->getSourceReg(SrcIdx))) { FoldedCTLZs.emplace_back(*MaybeFold); continue; } - return None; + return std::nullopt; } return FoldedCTLZs; } @@ -817,7 +817,7 @@ FoldedCTLZs.emplace_back(*MaybeCst); return FoldedCTLZs; } - return None; + return std::nullopt; } bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, @@ -1016,7 +1016,7 @@ int SplatValue = *FirstDefinedIdx; if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()), [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; })) - return None; + return std::nullopt; return SplatValue; } @@ -1033,10 +1033,10 @@ bool AllowUndef) { MachineInstr *MI = getDefIgnoringCopies(VReg, MRI); if (!MI) - return None; + return std::nullopt; if (!isBuildVectorOp(MI->getOpcode())) - return None; + return std::nullopt; Optional SplatValAndReg; for (MachineOperand &Op : MI->uses()) { @@ -1048,7 +1048,7 @@ if (!ElementValAndReg) { if (AllowUndef && isa(MRI.getVRegDef(Element))) continue; - return None; + return std::nullopt; } // Record splat value @@ -1057,7 +1057,7 @@ // Different constant then the one already recorded, not a constant splat. if (SplatValAndReg->Value != ElementValAndReg->Value) - return None; + return std::nullopt; } return SplatValAndReg; @@ -1089,7 +1089,7 @@ return ValAndVReg->Value; } - return None; + return std::nullopt; } Optional llvm::getIConstantSplatVal(const MachineInstr &MI, @@ -1103,7 +1103,7 @@ if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI); - return None; + return std::nullopt; } Optional @@ -1117,7 +1117,7 @@ bool AllowUndef) { if (auto SplatValAndReg = getAnyConstantSplat(VReg, MRI, AllowUndef)) return getFConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI); - return None; + return std::nullopt; } bool llvm::isBuildVectorAllZeros(const MachineInstr &MI, @@ -1136,13 +1136,13 @@ const MachineRegisterInfo &MRI) { unsigned Opc = MI.getOpcode(); if (!isBuildVectorOp(Opc)) - return None; + return std::nullopt; if (auto Splat = getIConstantSplatSExtVal(MI, MRI)) return RegOrConstant(*Splat); auto Reg = MI.getOperand(1).getReg(); if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()), [&Reg](const MachineOperand &Op) { return Op.getReg() != Reg; })) - return None; + return std::nullopt; return RegOrConstant(Reg); } @@ -1210,7 +1210,7 @@ return C->Value; auto MaybeCst = getIConstantSplatSExtVal(MI, MRI); if (!MaybeCst) - return None; + return std::nullopt; const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits(); return APInt(ScalarSize, *MaybeCst, true); } diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp --- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -261,12 +261,12 @@ if (canReorder(*I, MI)) continue; - if (Dep == None) { + if (Dep == std::nullopt) { // Found one possible dependency, keep track of it. Dep = I; } else { // We found two dependencies, so bail out. - return {false, None}; + return {false, std::nullopt}; } } @@ -805,7 +805,7 @@ // Insert an *unconditional* branch to not-null successor - we expect // block placement to remove fallthroughs later. TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr, - /*Cond=*/None, DL); + /*Cond=*/std::nullopt, DL); NumImplicitNullChecks++; } diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -1613,7 +1613,7 @@ RMEnt->removeOperand(i - 1); } } - Edit.eliminateDeadDefs(SpillsToRm, None); + Edit.eliminateDeadDefs(SpillsToRm, std::nullopt); } } diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -1007,7 +1007,7 @@ // in a DebugVariable is as "None". Optional OptFragmentInfo = FragmentInfo; if (DebugVariable::isDefaultFragment(FragmentInfo)) - OptFragmentInfo = None; + OptFragmentInfo = std::nullopt; DebugVariable Overlapped(Var.getVariable(), OptFragmentInfo, Var.getInlinedAt()); diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -1045,7 +1045,7 @@ // If there is no location, and we have reached the limit of how many stack // slots to track, then don't track this one. if (SpillLocs.size() >= StackWorkingSetLimit) - return None; + return std::nullopt; // Spill location is untracked: create record for this one, and all // subregister slots too. @@ -1297,7 +1297,7 @@ InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) { Optional SpillLoc = extractSpillBaseRegAndOffset(MI); if (!SpillLoc) - return None; + return std::nullopt; // Where in the stack slot is this value defined -- i.e., what size of value // is this? An important question, because it could be loaded into a register @@ -1311,7 +1311,7 @@ if (IdxIt == MTracker->StackSlotIdxes.end()) // That index is not tracked. This is suprising, and unlikely to ever // occur, but the safe action is to indicate the variable is optimised out. - return None; + return std::nullopt; unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillLoc, IdxIt->second); return MTracker->getSpillMLoc(SpillID); @@ -1529,7 +1529,7 @@ // If we didn't find anything: there's no way to express our value. if (!NewReg) { - NewID = None; + NewID = std::nullopt; } else { // Re-state the value as being defined within the subregister // that we found. @@ -1539,7 +1539,7 @@ } } else { // If we can't handle subregisters, unset the new value. - NewID = None; + NewID = std::nullopt; } } @@ -1628,7 +1628,8 @@ // a DBG_PHI. This can happen if DBG_PHIs are malformed, or refer to a // dead stack slot, for example. // Record a DebugPHIRecord with an empty value + location. - DebugPHINumToValue.push_back({InstrNum, MI.getParent(), None, None}); + DebugPHINumToValue.push_back( + {InstrNum, MI.getParent(), std::nullopt, std::nullopt}); return true; }; @@ -1840,17 +1841,17 @@ MachineFunction *MF) { // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) - return None; + return std::nullopt; // Reject any memory operand that's aliased -- we can't guarantee its value. auto MMOI = MI.memoperands_begin(); const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue(); if (PVal->isAliased(MFI)) - return None; + return std::nullopt; if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII)) - return None; // This is not a spill instruction, since no valid size was - // returned from either function. + return std::nullopt; // This is not a spill instruction, since no valid size + // was returned from either function. return extractSpillBaseRegAndOffset(MI); } @@ -1869,7 +1870,7 @@ InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI, MachineFunction *MF, unsigned &Reg) { if (!MI.hasOneMemOperand()) - return None; + return std::nullopt; // FIXME: Handle folded restore instructions with more than one memory // operand. @@ -1877,7 +1878,7 @@ Reg = MI.getOperand(0).getReg(); return extractSpillBaseRegAndOffset(MI); } - return None; + return std::nullopt; } bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { @@ -2781,7 +2782,7 @@ CandidateLocs = NewCandidates; } if (CandidateLocs.empty()) - return None; + return std::nullopt; // We now have a set of LocIdxes that contain the right output value in // each of the predecessors. Pick the lowest; if there's a register loc, @@ -3984,7 +3985,7 @@ // No DBG_PHI means there can be no location. if (LowerIt == UpperIt) - return None; + return std::nullopt; // If any DBG_PHIs referred to a location we didn't understand, don't try to // compute a value. There might be scenarios where we could recover a value @@ -3993,7 +3994,7 @@ auto DBGPHIRange = make_range(LowerIt, UpperIt); for (const DebugPHIRecord &DBG_PHI : DBGPHIRange) if (!DBG_PHI.ValueRead) - return None; + return std::nullopt; // If there's only one DBG_PHI, then that is our value number. if (std::distance(LowerIt, UpperIt) == 1) @@ -4077,7 +4078,7 @@ for (auto &PHIIt : PHI->IncomingValues) { // Any undef input means DBG_PHIs didn't dominate the use point. if (Updater.UndefMap.find(&PHIIt.first->BB) != Updater.UndefMap.end()) - return None; + return std::nullopt; ValueIDNum ValueToCheck; const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()]; @@ -4096,7 +4097,7 @@ } if (BlockLiveOuts[Loc.asU64()] != ValueToCheck) - return None; + return std::nullopt; } // Record this value as validated. diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -1115,7 +1115,7 @@ if (It != EntryValuesBackupVars.end()) return It->second; - return llvm::None; + return std::nullopt; } void VarLocBasedLDV::collectIDsForRegs(VarLocsInRange &Collected, @@ -1621,7 +1621,7 @@ VarLocBasedLDV::isRestoreInstruction(const MachineInstr &MI, MachineFunction *MF, Register &Reg) { if (!MI.hasOneMemOperand()) - return None; + return std::nullopt; // FIXME: Handle folded restore instructions with more than one memory // operand. @@ -1629,7 +1629,7 @@ Reg = MI.getOperand(0).getReg(); return extractSpillBaseRegAndOffset(MI); } - return None; + return std::nullopt; } /// A spilled register may indicate that we have to end the current range of diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -985,7 +985,7 @@ Start = Start.getNextSlot(); if (I.value() != DbgValue || I.stop() != Start) { // Clear `Kills`, as we have a new def available. - Kills = None; + Kills = std::nullopt; return; } // This is a one-slot placeholder. Just skip it. @@ -996,7 +996,7 @@ if (I.valid() && I.start() < Stop) { Stop = I.start(); // Clear `Kills`, as we have a new def available. - Kills = None; + Kills = std::nullopt; } if (Start < Stop) { diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -128,6 +128,7 @@ kw_pcsections, kw_cfi_type, kw_bbsections, + kw_bb_id, kw_unknown_size, kw_unknown_address, kw_ir_block_address_taken, diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -159,7 +159,7 @@ ErrorCallback( C.location(), "end of machine instruction reached before the closing '\"'"); - return None; + return std::nullopt; } } C.advance(); @@ -273,17 +273,19 @@ .Case("pcsections", MIToken::kw_pcsections) .Case("cfi-type", MIToken::kw_cfi_type) .Case("bbsections", MIToken::kw_bbsections) + .Case("bb_id", MIToken::kw_bb_id) .Case("unknown-size", MIToken::kw_unknown_size) .Case("unknown-address", MIToken::kw_unknown_address) .Case("distinct", MIToken::kw_distinct) .Case("ir-block-address-taken", MIToken::kw_ir_block_address_taken) - .Case("machine-block-address-taken", MIToken::kw_machine_block_address_taken) + .Case("machine-block-address-taken", + MIToken::kw_machine_block_address_taken) .Default(MIToken::Identifier); } static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { if (!isalpha(C.peek()) && C.peek() != '_') - return None; + return std::nullopt; auto Range = C; while (isIdentifierChar(C.peek())) C.advance(); @@ -297,7 +299,7 @@ ErrorCallbackType ErrorCallback) { bool IsReference = C.remaining().startswith("%bb."); if (!IsReference && !C.remaining().startswith("bb.")) - return None; + return std::nullopt; auto Range = C; unsigned PrefixLength = IsReference ? 4 : 3; C.advance(PrefixLength); // Skip '%bb.' or 'bb.' @@ -331,7 +333,7 @@ static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, MIToken::TokenKind Kind) { if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) - return None; + return std::nullopt; auto Range = C; C.advance(Rule.size()); auto NumberRange = C; @@ -344,7 +346,7 @@ static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, MIToken::TokenKind Kind) { if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) - return None; + return std::nullopt; auto Range = C; C.advance(Rule.size()); auto NumberRange = C; @@ -384,7 +386,7 @@ ErrorCallbackType ErrorCallback) { const StringRef Rule = "%subreg."; if (!C.remaining().startswith(Rule)) - return None; + return std::nullopt; return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(), ErrorCallback); } @@ -393,7 +395,7 @@ ErrorCallbackType ErrorCallback) { const StringRef Rule = "%ir-block."; if (!C.remaining().startswith(Rule)) - return None; + return std::nullopt; if (isdigit(C.peek(Rule.size()))) return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); @@ -403,7 +405,7 @@ ErrorCallbackType ErrorCallback) { const StringRef Rule = "%ir."; if (!C.remaining().startswith(Rule)) - return None; + return std::nullopt; if (isdigit(C.peek(Rule.size()))) return maybeLexIndex(C, Token, Rule, MIToken::IRValue); return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); @@ -412,7 +414,7 @@ static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { if (C.peek() != '"') - return None; + return std::nullopt; return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, ErrorCallback); } @@ -446,7 +448,7 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { if (C.peek() != '%' && C.peek() != '$') - return None; + return std::nullopt; if (C.peek() == '%') { if (isdigit(C.peek(1))) @@ -455,7 +457,7 @@ if (isRegisterChar(C.peek(1))) return lexNamedVirtualRegister(C, Token); - return None; + return std::nullopt; } assert(C.peek() == '$'); @@ -471,7 +473,7 @@ static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { if (C.peek() != '@') - return None; + return std::nullopt; if (!isdigit(C.peek(1))) return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, ErrorCallback); @@ -488,7 +490,7 @@ static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { if (C.peek() != '&') - return None; + return std::nullopt; return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, ErrorCallback); } @@ -497,7 +499,7 @@ ErrorCallbackType ErrorCallback) { const StringRef Rule = " &SID); + bool parseBBID(Optional &PrID); bool parseOperandsOffset(MachineOperand &Op); bool parseIRValue(const Value *&V); bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); @@ -595,7 +596,7 @@ // Create a diagnostic for a YAML string literal. Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1, Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), - Source, None, None); + Source, std::nullopt, std::nullopt); return true; } @@ -662,6 +663,18 @@ return false; } +// Parse Machine Basic Block ID. +bool MIParser::parseBBID(Optional &BBID) { + assert(Token.is(MIToken::kw_bb_id)); + lex(); + unsigned Value = 0; + if (getUnsigned(Value)) + return error("Unknown BB ID"); + BBID = Value; + lex(); + return false; +} + bool MIParser::parseBasicBlockDefinition( DenseMap &MBBSlots) { assert(Token.is(MIToken::MachineBasicBlockLabel)); @@ -678,6 +691,7 @@ bool IsEHFuncletEntry = false; Optional SectionID; uint64_t Alignment = 0; + Optional BBID; BasicBlock *BB = nullptr; if (consumeIfPresent(MIToken::lparen)) { do { @@ -718,6 +732,10 @@ if (parseSectionID(SectionID)) return true; break; + case MIToken::kw_bb_id: + if (parseBBID(BBID)) + return true; + break; default: break; } @@ -755,6 +773,13 @@ MBB->setSectionID(SectionID.value()); MF.setBBSectionsType(BasicBlockSection::List); } + if (BBID.has_value()) { + // BBSectionsType is set to `List` if any basic blocks has `SectionID`. + // Here, we set it to `Labels` if it hasn't been set above. + if (!MF.hasBBSections()) + MF.setBBSectionsType(BasicBlockSection::Labels); + MBB->setBBID(BBID.value()); + } return false; } @@ -1356,7 +1381,7 @@ // Forward reference. auto &FwdRef = PFS.MachineForwardRefMDNodes[ID]; FwdRef = std::make_pair( - MDTuple::getTemporary(MF.getFunction().getContext(), None), Loc); + MDTuple::getTemporary(MF.getFunction().getContext(), std::nullopt), Loc); PFS.MachineMetadataNodes[ID].reset(FwdRef.first.get()); MD = FwdRef.first.get(); diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -999,7 +999,7 @@ (HasQuote ? 1 : 0)); // TODO: Translate any source ranges as well. - return SM.GetMessage(Loc, Error.getKind(), Error.getMessage(), None, + return SM.GetMessage(Loc, Error.getKind(), Error.getMessage(), std::nullopt, Error.getFixIts()); } diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -558,6 +558,11 @@ } hasAttributes = true; } + if (getBBID().has_value()) { + os << (hasAttributes ? ", " : " ("); + os << "bb_id " << *getBBID(); + hasAttributes = true; + } } if (hasAttributes) @@ -1646,6 +1651,11 @@ return false; } +unsigned MachineBasicBlock::getBBIDOrNumber() const { + uint8_t BBAddrMapVersion = getParent()->getContext().getBBAddrMapVersion(); + return BBAddrMapVersion < 2 ? getNumber() : *getBBID(); +} + const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold); const MBBSectionID MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception); diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp --- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -234,7 +234,7 @@ Optional MachineBlockFrequencyInfo::getBlockProfileCount( const MachineBasicBlock *MBB) const { if (!MBFI) - return None; + return std::nullopt; const Function &F = MBFI->getFunction()->getFunction(); return MBFI->getBlockProfileCount(F, MBB); @@ -243,7 +243,7 @@ Optional MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { if (!MBFI) - return None; + return std::nullopt; const Function &F = MBFI->getFunction()->getFunction(); return MBFI->getProfileCountFromFreq(F, Freq); diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -98,7 +98,7 @@ return Optional( DestSourcePair{MI.getOperand(0), MI.getOperand(1)}); - return None; + return std::nullopt; } class CopyTracker { diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -437,8 +437,13 @@ /// `new MachineBasicBlock'. MachineBasicBlock * MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) { - return new (BasicBlockRecycler.Allocate(Allocator)) - MachineBasicBlock(*this, bb); + MachineBasicBlock *MBB = + new (BasicBlockRecycler.Allocate(Allocator)) + MachineBasicBlock(*this, bb); + if (Target.getBBSectionsType() == BasicBlockSection::Labels || + Target.getBBSectionsType() == BasicBlockSection::List) + MBB->setBBID(NextBBID++); + return MBB; } /// Delete the given MachineBasicBlock. diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -2338,7 +2338,7 @@ if (MFI.isSpillSlotObjectIndex(FI)) return (*memoperands_begin())->getSize(); } - return None; + return std::nullopt; } Optional @@ -2346,7 +2346,7 @@ MMOList Accesses; if (TII->hasStoreToStackSlot(*this, Accesses)) return getSpillSlotSize(Accesses, getMF()->getFrameInfo()); - return None; + return std::nullopt; } Optional @@ -2357,7 +2357,7 @@ if (MFI.isSpillSlotObjectIndex(FI)) return (*memoperands_begin())->getSize(); } - return None; + return std::nullopt; } Optional @@ -2365,7 +2365,7 @@ MMOList Accesses; if (TII->hasLoadFromStackSlot(*this, Accesses)) return getSpillSlotSize(Accesses, getMF()->getFrameInfo()); - return None; + return std::nullopt; } unsigned MachineInstr::getDebugInstrNum() { diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp @@ -0,0 +1,240 @@ +//==--- MachineLateInstrsCleanup.cpp - Late Instructions Cleanup Pass -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This simple pass removes any identical and redundant immediate or address +// loads to the same register. The immediate loads removed can originally be +// the result of rematerialization, while the addresses are redundant frame +// addressing anchor points created during Frame Indices elimination. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "machine-latecleanup" + +STATISTIC(NumRemoved, "Number of redundant instructions removed."); + +namespace { + +class MachineLateInstrsCleanup : public MachineFunctionPass { + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // Data structures to map regs to their definitions per MBB. + using Reg2DefMap = std::map; + std::vector RegDefs; + + // Walk through the instructions in MBB and remove any redundant + // instructions. + bool processBlock(MachineBasicBlock *MBB); + +public: + static char ID; // Pass identification, replacement for typeid + + MachineLateInstrsCleanup() : MachineFunctionPass(ID) { + initializeMachineLateInstrsCleanupPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } +}; + +} // end anonymous namespace + +char MachineLateInstrsCleanup::ID = 0; + +char &llvm::MachineLateInstrsCleanupID = MachineLateInstrsCleanup::ID; + +INITIALIZE_PASS(MachineLateInstrsCleanup, DEBUG_TYPE, + "Machine Late Instructions Cleanup Pass", false, false) + +bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + bool Changed = false; + + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + + RegDefs.clear(); + RegDefs.resize(MF.getNumBlockIDs()); + + // Visit all MBBs in an order that maximises the reuse from predecessors. + ReversePostOrderTraversal RPOT(&MF); + for (MachineBasicBlock *MBB : RPOT) + Changed |= processBlock(MBB); + + return Changed; +} + +// Clear any previous kill flag on Reg found before I in MBB. Walk backwards +// in MBB and if needed continue in predecessors until a use/def of Reg is +// encountered. This seems to be faster in practice than tracking kill flags +// in a map. +static void clearKillsForDef(Register Reg, MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + BitVector &VisitedPreds, + const TargetRegisterInfo *TRI) { + VisitedPreds.set(MBB->getNumber()); + while (I != MBB->begin()) { + I--; + bool Found = false; + for (auto &MO : I->operands()) + if (MO.isReg() && TRI->regsOverlap(MO.getReg(), Reg)) { + if (MO.isDef()) + return; + if (MO.readsReg()) { + MO.setIsKill(false); + Found = true; // Keep going for an implicit kill of the super-reg. + } + } + if (Found) + return; + } + + // If an earlier def is not in MBB, continue in predecessors. + if (!MBB->isLiveIn(Reg)) + MBB->addLiveIn(Reg); + assert(!MBB->pred_empty() && "Predecessor def not found!"); + for (MachineBasicBlock *Pred : MBB->predecessors()) + if (!VisitedPreds.test(Pred->getNumber())) + clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds, TRI); +} + +static void removeRedundantDef(MachineInstr *MI, + const TargetRegisterInfo *TRI) { + Register Reg = MI->getOperand(0).getReg(); + BitVector VisitedPreds(MI->getMF()->getNumBlockIDs()); + clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds, TRI); + MI->eraseFromParent(); + ++NumRemoved; +} + +// Return true if MI is a potential candidate for reuse/removal and if so +// also the register it defines in DefedReg. A candidate is a simple +// instruction that does not touch memory, has only one register definition +// and the only reg it may use is FrameReg. Typically this is an immediate +// load or a load-address instruction. +static bool isCandidate(const MachineInstr *MI, Register &DefedReg, + Register FrameReg) { + DefedReg = MCRegister::NoRegister; + bool SawStore = true; + if (!MI->isSafeToMove(nullptr, SawStore) || MI->isImplicitDef() || + MI->isInlineAsm()) + return false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + if (MO.isDef()) { + if (i == 0 && !MO.isImplicit() && !MO.isDead()) + DefedReg = MO.getReg(); + else + return false; + } else if (MO.getReg() && MO.getReg() != FrameReg) + return false; + } else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() || + MO.isGlobal() || MO.isSymbol())) + return false; + } + return DefedReg.isValid(); +} + +bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) { + bool Changed = false; + + Reg2DefMap &MBBDefs = RegDefs[MBB->getNumber()]; + + // Find reusable definitions in the predecessor(s). + if (!MBB->pred_empty()) { + MachineBasicBlock *FirstPred = *MBB->pred_begin(); + for (auto [Reg, DefMI] : RegDefs[FirstPred->getNumber()]) + if (llvm::all_of( + drop_begin(MBB->predecessors()), + [&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) { + auto PredDefI = RegDefs[Pred->getNumber()].find(Reg); + return PredDefI != RegDefs[Pred->getNumber()].end() && + DefMI->isIdenticalTo(*PredDefI->second); + })) { + MBBDefs[Reg] = DefMI; + LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in " + << printMBBReference(*MBB) << ": " << *DefMI;); + } + } + + // Process MBB. + MachineFunction *MF = MBB->getParent(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + Register FrameReg = TRI->getFrameRegister(*MF); + for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { + // If FrameReg is modified, no previous load-address instructions are valid. + if (MI.modifiesRegister(FrameReg, TRI)) { + MBBDefs.clear(); + continue; + } + + Register DefedReg; + bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg); + + // Check for an earlier identical and reusable instruction. + if (IsCandidate) { + auto DefI = MBBDefs.find(DefedReg); + if (DefI != MBBDefs.end() && MI.isIdenticalTo(*DefI->second)) { + LLVM_DEBUG(dbgs() << "Removing redundant instruction in " + << printMBBReference(*MBB) << ": " << MI;); + removeRedundantDef(&MI, TRI); + Changed = true; + continue; + } + } + + // Clear any entries in map that MI clobbers. + for (auto DefI = MBBDefs.begin(); DefI != MBBDefs.end();) { + Register Reg = DefI->first; + if (MI.modifiesRegister(Reg, TRI)) + DefI = MBBDefs.erase(DefI); + else + ++DefI; + } + + // Record this MI for potential later reuse. + if (IsCandidate) { + LLVM_DEBUG(dbgs() << "Found interesting instruction in " + << printMBBReference(*MBB) << ": " << MI;); + MBBDefs[DefedReg] = &MI; + } + } + + return Changed; +} diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -749,7 +749,7 @@ const TargetIntrinsicInfo *IntrinsicInfo) const { tryToGetTargetInfo(*this, TRI, IntrinsicInfo); ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST, TypeToPrint, None, /*PrintDef=*/false, + print(OS, DummyMST, TypeToPrint, std::nullopt, /*PrintDef=*/false, /*IsStandalone=*/true, /*ShouldPrintRegisterTies=*/true, /*TiedOperandIdx=*/0, TRI, IntrinsicInfo); diff --git a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp --- a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -33,7 +33,7 @@ Optional MachineOptimizationRemarkEmitter::computeHotness(const MachineBasicBlock &MBB) { if (!MBFI) - return None; + return std::nullopt; return MBFI->getBlockProfileCount(&MBB); } diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -727,7 +727,8 @@ Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()), Unit /* File */, 0 /* Line 0 is reserved for compiler-generated code. */, - DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */ + DB.createSubroutineType( + DB.getOrCreateTypeArray(std::nullopt)), /* void type */ 0, /* Line 0 is reserved for compiler-generated code. */ DINode::DIFlags::FlagArtificial /* Compiler-generated code. */, /* Outlined code is optimized code by definition. */ diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -536,7 +536,7 @@ if (MinCost >= CostPerUseLimit) { LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost << ", no cheaper registers to be found.\n"); - return None; + return std::nullopt; } // It is normal for register classes to have a long tail of registers with diff --git a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp --- a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp +++ b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp @@ -89,7 +89,7 @@ for (auto &MI : MBB) { if (MI.isDebugValue()) { - DebugVariable Var(MI.getDebugVariable(), None, + DebugVariable Var(MI.getDebugVariable(), std::nullopt, MI.getDebugLoc()->getInlinedAt()); auto VMI = VariableMap.find(Var); // Just stop tracking this variable, until we cover DBG_VALUE_LIST. diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -243,6 +243,10 @@ return false; TTI = &getAnalysis().getTTI(F); + + if (!TTI->enableSelectOptimize()) + return false; + DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); BPI.reset(new BranchProbabilityInfo(F, *LI)); @@ -978,7 +982,7 @@ TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); if (auto OC = ICost.getValue()) return Optional(*OC); - return None; + return std::nullopt; } ScaledNumber diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7935,22 +7935,22 @@ // Typical i64 by i8 pattern requires recursion up to 8 calls depth if (Depth == 10) - return None; + return std::nullopt; // Only allow multiple uses if the instruction is a vector load (in which // case we will use the load for every ExtractVectorElement) if (Depth && !Op.hasOneUse() && (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector())) - return None; + return std::nullopt; // Fail to combine if we have encountered anything but a LOAD after handling // an ExtractVectorElement. if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value()) - return None; + return std::nullopt; unsigned BitWidth = Op.getValueSizeInBits(); if (BitWidth % 8 != 0) - return None; + return std::nullopt; unsigned ByteWidth = BitWidth / 8; assert(Index < ByteWidth && "invalid index requested"); (void) ByteWidth; @@ -7960,27 +7960,27 @@ auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex); if (!LHS) - return None; + return std::nullopt; auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex); if (!RHS) - return None; + return std::nullopt; if (LHS->isConstantZero()) return RHS; if (RHS->isConstantZero()) return LHS; - return None; + return std::nullopt; } case ISD::SHL: { auto ShiftOp = dyn_cast(Op->getOperand(1)); if (!ShiftOp) - return None; + return std::nullopt; uint64_t BitShift = ShiftOp->getZExtValue(); if (BitShift % 8 != 0) - return None; + return std::nullopt; uint64_t ByteShift = BitShift / 8; // If we are shifting by an amount greater than the index we are trying to @@ -7997,13 +7997,13 @@ SDValue NarrowOp = Op->getOperand(0); unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); if (NarrowBitWidth % 8 != 0) - return None; + return std::nullopt; uint64_t NarrowByteWidth = NarrowBitWidth / 8; if (Index >= NarrowByteWidth) return Op.getOpcode() == ISD::ZERO_EXTEND ? Optional(ByteProvider::getConstantZero()) - : None; + : std::nullopt; return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex, StartingIndex); } @@ -8013,14 +8013,14 @@ case ISD::EXTRACT_VECTOR_ELT: { auto OffsetOp = dyn_cast(Op->getOperand(1)); if (!OffsetOp) - return None; + return std::nullopt; VectorIndex = OffsetOp->getZExtValue(); SDValue NarrowOp = Op->getOperand(0); unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); if (NarrowBitWidth % 8 != 0) - return None; + return std::nullopt; uint64_t NarrowByteWidth = NarrowBitWidth / 8; // Check to see if the position of the element in the vector corresponds @@ -8030,9 +8030,9 @@ // vector of i16s, each element provides two bytes (V[1] provides byte 2 and // 3). if (VectorIndex.value() * NarrowByteWidth > StartingIndex) - return None; + return std::nullopt; if ((VectorIndex.value() + 1) * NarrowByteWidth <= StartingIndex) - return None; + return std::nullopt; return calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex, StartingIndex); @@ -8040,11 +8040,11 @@ case ISD::LOAD: { auto L = cast(Op.getNode()); if (!L->isSimple() || L->isIndexed()) - return None; + return std::nullopt; unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); if (NarrowBitWidth % 8 != 0) - return None; + return std::nullopt; uint64_t NarrowByteWidth = NarrowBitWidth / 8; // If the width of the load does not reach byte we are trying to provide for @@ -8053,14 +8053,14 @@ if (Index >= NarrowByteWidth) return L->getExtensionType() == ISD::ZEXTLOAD ? Optional(ByteProvider::getConstantZero()) - : None; + : std::nullopt; unsigned BPVectorIndex = VectorIndex.value_or(0U); return ByteProvider::getMemory(L, Index, BPVectorIndex); } } - return None; + return std::nullopt; } static unsigned littleEndianByteAt(unsigned BW, unsigned i) { @@ -8079,7 +8079,7 @@ // The endian can be decided only when it is 2 bytes at least. unsigned Width = ByteOffsets.size(); if (Width < 2) - return None; + return std::nullopt; bool BigEndian = true, LittleEndian = true; for (unsigned i = 0; i < Width; i++) { @@ -8087,7 +8087,7 @@ LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i); BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i); if (!BigEndian && !LittleEndian) - return None; + return std::nullopt; } assert((BigEndian != LittleEndian) && "It should be either big endian or" @@ -8150,9 +8150,13 @@ while (auto *Store = dyn_cast(Chain)) { // All stores must be the same size to ensure that we are writing all of the // bytes in the wide value. + // This store should have exactly one use as a chain operand for another + // store in the merging set. If there are other chain uses, then the + // transform may not be safe because order of loads/stores outside of this + // set may not be preserved. // TODO: We could allow multiple sizes by tracking each stored byte. if (Store->getMemoryVT() != MemVT || !Store->isSimple() || - Store->isIndexed()) + Store->isIndexed() || !Store->hasOneUse()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); @@ -8370,8 +8374,9 @@ SmallVector ByteOffsets(ByteWidth); unsigned ZeroExtendedBytes = 0; for (int i = ByteWidth - 1; i >= 0; --i) { - auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ None, - /*StartingIndex*/ i); + auto P = + calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt, + /*StartingIndex*/ i); if (!P) return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -6647,7 +6647,7 @@ // Using element-wise loads and stores for widening operations is not // supported for scalable vectors if (Scalable) - return None; + return std::nullopt; return RetVT; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -496,7 +496,7 @@ Optional ISD::getVPMaskIdx(unsigned Opcode) { switch (Opcode) { default: - return None; + return std::nullopt; #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \ case ISD::VPSD: \ return MASKPOS; @@ -508,7 +508,7 @@ Optional ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { switch (Opcode) { default: - return None; + return std::nullopt; #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ case ISD::VPSD: \ return EVLPOS; @@ -1609,7 +1609,7 @@ "APInt size does not match type size!"); unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), None); + AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt); ID.AddPointer(Elt); ID.AddBoolean(isO); void *IP = nullptr; @@ -1664,7 +1664,7 @@ // we don't have issues with SNANs. unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), None); + AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt); ID.AddPointer(&V); void *IP = nullptr; SDNode *N = nullptr; @@ -1721,7 +1721,7 @@ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); @@ -1739,7 +1739,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(FI); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -1757,7 +1757,7 @@ "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(JTI); ID.AddInteger(TargetFlags); void *IP = nullptr; @@ -1781,7 +1781,7 @@ : getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); ID.AddPointer(C); @@ -1808,7 +1808,7 @@ Alignment = getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); C->addSelectionDAGCSEId(ID); @@ -1827,7 +1827,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, unsigned TargetFlags) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), std::nullopt); ID.AddInteger(Index); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); @@ -1843,7 +1843,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None); + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), std::nullopt); ID.AddPointer(MBB); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -2120,7 +2120,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); + AddNodeIDNode(ID, ISD::Register, getVTList(VT), std::nullopt); ID.AddInteger(RegNo); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -2135,7 +2135,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None); + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), std::nullopt); ID.AddPointer(RegMask); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -2177,7 +2177,7 @@ unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddPointer(BA); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); @@ -2193,7 +2193,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), std::nullopt); ID.AddPointer(V); void *IP = nullptr; @@ -2208,7 +2208,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None); + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), std::nullopt); ID.AddPointer(MD); void *IP = nullptr; @@ -5065,7 +5065,7 @@ /// Gets or creates the specified node. SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, getVTList(VT), None); + AddNodeIDNode(ID, Opcode, getVTList(VT), std::nullopt); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); @@ -5633,7 +5633,7 @@ return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1); } } - return llvm::None; + return std::nullopt; } // Handle constant folding with UNDEF. @@ -5650,7 +5650,7 @@ if (Opcode == ISD::AND || Opcode == ISD::MUL) return APInt::getZero(C1.getBitWidth()); - return llvm::None; + return std::nullopt; } SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, @@ -9341,7 +9341,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList) { - return getNode(Opcode, DL, VTList, None); + return getNode(Opcode, DL, VTList, std::nullopt); } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, @@ -9608,7 +9608,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT) { SDVTList VTs = getVTList(VT); - return SelectNodeTo(N, MachineOpc, VTs, None); + return SelectNodeTo(N, MachineOpc, VTs, std::nullopt); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -9649,7 +9649,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); - return SelectNodeTo(N, MachineOpc, VTs, None); + return SelectNodeTo(N, MachineOpc, VTs, std::nullopt); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -9816,7 +9816,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT) { SDVTList VTs = getVTList(VT); - return getMachineNode(Opcode, dl, VTs, None); + return getMachineNode(Opcode, dl, VTs, std::nullopt); } MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, @@ -11501,7 +11501,7 @@ return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset); } - return None; + return std::nullopt; } /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type @@ -11928,26 +11928,26 @@ BuildVectorSDNode::isConstantSequence() const { unsigned NumOps = getNumOperands(); if (NumOps < 2) - return None; + return std::nullopt; if (!isa(getOperand(0)) || !isa(getOperand(1))) - return None; + return std::nullopt; unsigned EltSize = getValueType(0).getScalarSizeInBits(); APInt Start = getConstantOperandAPInt(0).trunc(EltSize); APInt Stride = getConstantOperandAPInt(1).trunc(EltSize) - Start; if (Stride.isZero()) - return None; + return std::nullopt; for (unsigned i = 2; i < NumOps; ++i) { if (!isa(getOperand(i))) - return None; + return std::nullopt; APInt Val = getConstantOperandAPInt(i).trunc(EltSize); if (Val != (Start + (Stride * i))) - return None; + return std::nullopt; } return std::make_pair(Start, Stride); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -702,14 +703,14 @@ /// Records if this value needs to be treated in an ABI dependant manner, /// different to normal type legalization. - Optional CallConv; + std::optional CallConv; RegsForValue() = default; RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt, - Optional CC = None); + std::optional CC = std::nullopt); RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty, - Optional CC); + std::optional CC); bool isABIMangled() const { return CallConv.has_value(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -150,18 +149,18 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional CC); + std::optional CC); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger than ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, - const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V, - Optional CC = None, - Optional AssertOp = None) { +static SDValue +getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, + unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, + std::optional CC = std::nullopt, + std::optional AssertOp = std::nullopt) { // Let the target assemble the parts if it wants to const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts, @@ -322,7 +321,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional CallConv) { + std::optional CallConv) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const bool IsABIRegCopy = CallConv.has_value(); @@ -474,16 +473,16 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - Optional CallConv); + std::optional CallConv); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, - SDValue *Parts, unsigned NumParts, MVT PartVT, - const Value *V, - Optional CallConv = None, - ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { +static void +getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, const Value *V, + std::optional CallConv = std::nullopt, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { // Let the target split the parts if it wants to const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT, @@ -656,7 +655,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - Optional CallConv) { + std::optional CallConv) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -803,13 +802,13 @@ } RegsForValue::RegsForValue(const SmallVector ®s, MVT regvt, - EVT valuevt, Optional CC) + EVT valuevt, std::optional CC) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), RegCount(1, regs.size()), CallConv(CC) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty, - Optional CC) { + std::optional CC) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); CallConv = CC; @@ -1443,7 +1442,7 @@ // If this is a PHI node, it may be split up into several MI PHI nodes // (in FunctionLoweringInfo::set). RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType(), None); + V->getType(), std::nullopt); if (RFV.occupiesMultipleRegs()) { // FIXME: We could potentially support variadic dbg_values here. if (IsVariadic) @@ -1511,7 +1510,7 @@ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Ty, - None); // This is not an ABI copy. + std::nullopt); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); @@ -1707,7 +1706,7 @@ unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType(), None); + Inst->getType(), std::nullopt); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -2782,7 +2781,8 @@ CallOptions.setDiscardResult(true); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, - None, CallOptions, getCurSDLoc()).second; + std::nullopt, CallOptions, getCurSDLoc()) + .second; // On PS4/PS5, the "return address" must still be within the calling // function, even if it's at the very end, so emit an explicit TRAP here. // Passing 'true' for doesNotReturn above won't generate the trap for us. @@ -4049,7 +4049,7 @@ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign(); if (*Alignment <= StackAlign) - Alignment = None; + Alignment = std::nullopt; const uint64_t StackAlignMask = StackAlign.value() - 1U; // Round the size of the allocation up to the stack alignment size @@ -4344,7 +4344,7 @@ Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); Mask = I.getArgOperand(2); - Alignment = None; + Alignment = std::nullopt; }; Value *PtrOperand, *MaskOperand, *Src0Operand; @@ -4506,7 +4506,7 @@ MaybeAlign &Alignment) { // @llvm.masked.expandload.*(Ptr, Mask, Src0) Ptr = I.getArgOperand(0); - Alignment = None; + Alignment = std::nullopt; Mask = I.getArgOperand(1); Src0 = I.getArgOperand(2); }; @@ -5720,7 +5720,7 @@ if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, - V->getType(), None); + V->getType(), std::nullopt); if (RFV.occupiesMultipleRegs()) { splitMultiRegDbgValue(RFV.getRegsAndSizes()); return true; @@ -6398,7 +6398,7 @@ // Get the last argument, the metadata and convert it to an integer in the // call Metadata *MD = cast(I.getArgOperand(1))->getMetadata(); - Optional RoundMode = + std::optional RoundMode = convertStrToRoundingMode(cast(MD)->getString()); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -6886,7 +6886,7 @@ SDValue Result = DAG.getMemIntrinsicNode( ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), - /* align */ None, Flags); + /* align */ std::nullopt, Flags); // Chain the prefetch in parallell with any pending loads, to stay out of // the way of later optimizations. @@ -10172,7 +10172,7 @@ } else { // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. - Optional AssertOp; + std::optional AssertOp; if (CLI.RetSExt) AssertOp = ISD::AssertSext; else if (CLI.RetZExt) @@ -10250,7 +10250,7 @@ // notional registers required by the type. RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), - None); // This is not an ABI copy. + std::nullopt); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); if (ExtendType == ISD::ANY_EXTEND) { @@ -10663,7 +10663,7 @@ ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - Optional AssertOp; + std::optional AssertOp; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, F.getCallingConv(), AssertOp); @@ -10725,7 +10725,7 @@ // we do generate a copy for it that can be used on return from the // function. if (ArgHasUses || isSwiftErrorArg) { - Optional AssertOp; + std::optional AssertOp; if (Arg.hasAttribute(Attribute::SExt)) AssertOp = ISD::AssertSext; else if (Arg.hasAttribute(Attribute::ZExt)) diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -165,7 +165,7 @@ int LookUpDepth) { // Can not look any further - give up now if (LookUpDepth <= 0) - return None; + return std::nullopt; // Spill location is known for gc relocates if (const auto *Relocate = dyn_cast(Val)) { @@ -173,18 +173,18 @@ assert((isa(Statepoint) || isa(Statepoint)) && "GetStatepoint must return one of two types"); if (isa(Statepoint)) - return None; + return std::nullopt; const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps [cast(Statepoint)]; auto It = RelocationMap.find(Relocate); if (It == RelocationMap.end()) - return None; + return std::nullopt; auto &Record = It->second; if (Record.type != RecordType::Spill) - return None; + return std::nullopt; return Record.payload.FI; } @@ -203,10 +203,10 @@ Optional SpillSlot = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); if (!SpillSlot) - return None; + return std::nullopt; if (MergedResult && *MergedResult != *SpillSlot) - return None; + return std::nullopt; MergedResult = SpillSlot; } @@ -241,7 +241,7 @@ // which we visit values is unspecified. // Don't know any information about this instruction - return None; + return std::nullopt; } /// Return true if-and-only-if the given SDValue can be lowered as either a @@ -920,7 +920,7 @@ auto *RetTy = Relocate->getType(); Register Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), Reg, RetTy, None); + DAG.getDataLayout(), Reg, RetTy, std::nullopt); SDValue Chain = DAG.getRoot(); RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr); PendingExports.push_back(Chain); @@ -1265,7 +1265,7 @@ Register InReg = Record.payload.Reg; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Relocate.getType(), - None); // This is not an ABI copy. + std::nullopt); // This is not an ABI copy. // We generate copy to/from regs even for local uses, hence we must // chain with current root to ensure proper ordering of copies w.r.t. // statepoint. diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -1450,7 +1450,7 @@ if (Dead.empty()) return; - Edit->eliminateDeadDefs(Dead, None); + Edit->eliminateDeadDefs(Dead, std::nullopt); } void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) { diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -46,6 +46,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include #include using namespace llvm; @@ -166,7 +167,7 @@ const auto *I = cast(U); // If this instruction accesses memory make sure it doesn't access beyond // the bounds of the allocated object. - Optional MemLoc = MemoryLocation::getOrNone(I); + std::optional MemLoc = MemoryLocation::getOrNone(I); if (MemLoc && MemLoc->Size.hasValue() && !TypeSize::isKnownGE(AllocSize, TypeSize::getFixed(MemLoc->Size.getValue()))) @@ -414,11 +415,11 @@ /// /// Returns true if the platform/triple supports the stackprotectorcreate pseudo /// node. -static bool CreatePrologue(Function *F, Module *M, Instruction *CheckLoc, +static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, const TargetLoweringBase *TLI, AllocaInst *&AI) { bool SupportsSelectionDAGSP = false; IRBuilder<> B(&F->getEntryBlock().front()); - PointerType *PtrTy = Type::getInt8PtrTy(CheckLoc->getContext()); + PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot"); Value *GuardSlot = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP); @@ -443,27 +444,14 @@ AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. for (BasicBlock &BB : llvm::make_early_inc_range(*F)) { - Instruction *CheckLoc = dyn_cast(BB.getTerminator()); - if (!CheckLoc) { - for (auto &Inst : BB) { - auto *CB = dyn_cast(&Inst); - if (!CB) - continue; - if (!CB->doesNotReturn()) - continue; - // Do stack check before non-return calls (e.g: __cxa_throw) - CheckLoc = CB; - break; - } - } - - if (!CheckLoc) + ReturnInst *RI = dyn_cast(BB.getTerminator()); + if (!RI) continue; // Generate prologue instrumentation if not already generated. if (!HasPrologue) { HasPrologue = true; - SupportsSelectionDAGSP &= CreatePrologue(F, M, CheckLoc, TLI, AI); + SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, AI); } // SelectionDAG based code generation. Nothing else needs to be done here. @@ -489,7 +477,8 @@ // verifier guarantees that a tail call is either directly before the // return or with a single correct bitcast of the return value in between so // we don't need to worry about many situations here. - Instruction *Prev = CheckLoc->getPrevNonDebugInstruction(); + Instruction *CheckLoc = RI; + Instruction *Prev = RI->getPrevNonDebugInstruction(); if (Prev && isa(Prev) && cast(Prev)->isTailCall()) CheckLoc = Prev; else if (Prev) { diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1201,7 +1201,7 @@ assert(!TRI->isSuperOrSubRegisterEq(Reg, DestReg) && "TargetInstrInfo::describeLoadedValue can't describe super- or " "sub-regs for copy instructions"); - return None; + return std::nullopt; } else if (auto RegImm = isAddImmediate(MI, Reg)) { Register SrcReg = RegImm->Reg; Offset = RegImm->Imm; @@ -1219,16 +1219,16 @@ // If the address points to "special" memory (e.g. a spill slot), it's // sufficient to check that it isn't aliased by any high-level IR value. if (!PSV || PSV->mayAlias(&MFI)) - return None; + return std::nullopt; const MachineOperand *BaseOp; if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) - return None; + return std::nullopt; // FIXME: Scalable offsets are not yet handled in the offset code below. if (OffsetIsScalable) - return None; + return std::nullopt; // TODO: Can currently only handle mem instructions with a single define. // An example from the x86 target: @@ -1237,7 +1237,7 @@ // ... // if (MI.getNumExplicitDefs() != 1) - return None; + return std::nullopt; // TODO: In what way do we need to take Reg into consideration here? @@ -1249,7 +1249,7 @@ return ParamLoadedValue(*BaseOp, Expr); } - return None; + return std::nullopt; } /// Both DefMI and UseMI must be valid. By default, call directly to the diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -341,7 +341,7 @@ if (!FSProfileFile.empty()) return FSProfileFile.getValue(); const Optional &PGOOpt = TM->getPGOOption(); - if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse) + if (PGOOpt == std::nullopt || PGOOpt->Action != PGOOptions::SampleUse) return std::string(); return PGOOpt->ProfileFile; } @@ -352,7 +352,7 @@ if (!FSRemappingFile.empty()) return FSRemappingFile.getValue(); const Optional &PGOOpt = TM->getPGOOption(); - if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse) + if (PGOOpt == std::nullopt || PGOOpt->Action != PGOOptions::SampleUse) return std::string(); return PGOOpt->ProfileRemappingFile; } @@ -1059,13 +1059,13 @@ // pass manager into two. GlobalISel with the fallback path disabled // and -run-pass seem to be unaffected. The majority of GlobalISel // testing uses -run-pass so this probably isn't too bad. - SaveAndRestore SavedDebugifyIsSafe(DebugifyIsSafe); + SaveAndRestore SavedDebugifyIsSafe(DebugifyIsSafe); if (Selector != SelectorType::GlobalISel || !isGlobalISelAbortEnabled()) DebugifyIsSafe = false; // Add instruction selector passes. if (Selector == SelectorType::GlobalISel) { - SaveAndRestore SavedAddingMachinePasses(AddingMachinePasses, true); + SaveAndRestore SavedAddingMachinePasses(AddingMachinePasses, true); if (addIRTranslator()) return true; @@ -1522,6 +1522,9 @@ /// Add passes that optimize machine instructions after register allocation. void TargetPassConfig::addMachineLateOptimization() { + // Cleanup of redundant immediate/address loads. + addPass(&MachineLateInstrsCleanupID); + // Branch folding must be run after regalloc and prolog/epilog insertion. addPass(&BranchFolderPassID); diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp --- a/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -182,8 +182,7 @@ Changed = true; auto *BB = ThrowI->getParent(); SmallVector Succs(successors(BB)); - auto &InstList = BB->getInstList(); - InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end()); + BB->erase(std::next(BasicBlock::iterator(ThrowI)), BB->end()); IRB.SetInsertPoint(BB); IRB.CreateUnreachable(); eraseDeadBBsAndChildren(Succs); diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp --- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp +++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp @@ -96,7 +96,7 @@ // Finally create the AsmPrinter we'll use to emit the DIEs. TM.reset(TheTarget->createTargetMachine(TripleName, "", "", TargetOptions(), - None)); + std::nullopt)); if (!TM) return error("no target machine for target " + TripleName, Context), false; diff --git a/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp --- a/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp @@ -31,14 +31,14 @@ Optional AppendingTypeTableBuilder::getFirst() { if (empty()) - return None; + return std::nullopt; return TypeIndex(TypeIndex::FirstNonSimpleIndex); } Optional AppendingTypeTableBuilder::getNext(TypeIndex Prev) { if (++Prev == nextTypeIndex()) - return None; + return std::nullopt; return Prev; } diff --git a/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp --- a/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp @@ -34,14 +34,14 @@ Optional GlobalTypeTableBuilder::getFirst() { if (empty()) - return None; + return std::nullopt; return TypeIndex(TypeIndex::FirstNonSimpleIndex); } Optional GlobalTypeTableBuilder::getNext(TypeIndex Prev) { if (++Prev == nextTypeIndex()) - return None; + return std::nullopt; return Prev; } diff --git a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp --- a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp +++ b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp @@ -100,11 +100,11 @@ Optional LazyRandomTypeCollection::tryGetType(TypeIndex Index) { if (Index.isSimple()) - return None; + return std::nullopt; if (auto EC = ensureTypeExists(Index)) { consumeError(std::move(EC)); - return None; + return std::nullopt; } assert(contains(Index)); @@ -206,7 +206,7 @@ TypeIndex TI = TypeIndex::fromArrayIndex(0); if (auto EC = ensureTypeExists(TI)) { consumeError(std::move(EC)); - return None; + return std::nullopt; } return TI; } @@ -217,7 +217,7 @@ // record exists, and if anything goes wrong, we must be at the end. if (auto EC = ensureTypeExists(Prev + 1)) { consumeError(std::move(EC)); - return None; + return std::nullopt; } return Prev + 1; diff --git a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp --- a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp @@ -35,14 +35,14 @@ Optional MergingTypeTableBuilder::getFirst() { if (empty()) - return None; + return std::nullopt; return TypeIndex(TypeIndex::FirstNonSimpleIndex); } Optional MergingTypeTableBuilder::getNext(TypeIndex Prev) { if (++Prev == nextTypeIndex()) - return None; + return std::nullopt; return Prev; } diff --git a/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp --- a/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -23,7 +23,7 @@ Optional TypeTableCollection::getFirst() { if (empty()) - return None; + return std::nullopt; return TypeIndex::fromArrayIndex(0); } @@ -31,7 +31,7 @@ assert(contains(Prev)); ++Prev; if (Prev.toArrayIndex() == size()) - return None; + return std::nullopt; return Prev; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -144,7 +144,7 @@ if (AttributeSpecs[i].Attr == Attr) return i; } - return None; + return std::nullopt; } uint64_t DWARFAbbreviationDeclaration::getAttributeOffsetFromIndex( @@ -180,7 +180,7 @@ DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor(); if (FormValue.extractValue(DebugInfoData, &Offset, U.getFormParams(), &U)) return FormValue; - return None; + return std::nullopt; } Optional @@ -191,7 +191,7 @@ // any data so we can return quickly if it doesn't. Optional MatchAttrIndex = findAttributeIndex(Attr); if (!MatchAttrIndex) - return None; + return std::nullopt; uint64_t Offset = getAttributeOffsetFromIndex(*MatchAttrIndex, DIEOffset, U); @@ -227,5 +227,5 @@ const DWARFUnit &U) const { if (FixedAttributeSize) return FixedAttributeSize->getByteSize(U); - return None; + return std::nullopt; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -146,7 +146,7 @@ Optional AppleAcceleratorTable::HeaderData::extractOffset( Optional Value) const { if (!Value) - return None; + return std::nullopt; switch (Value->getForm()) { case dwarf::DW_FORM_ref1: @@ -280,7 +280,7 @@ if (std::get<0>(Tuple).first == Atom) return std::get<1>(Tuple); } - return None; + return std::nullopt; } Optional AppleAcceleratorTable::Entry::getDIESectionOffset() const { @@ -294,10 +294,10 @@ Optional AppleAcceleratorTable::Entry::getTag() const { Optional Tag = lookup(dwarf::DW_ATOM_die_tag); if (!Tag) - return None; + return std::nullopt; if (Optional Value = Tag->getAsUnsignedConstant()) return dwarf::Tag(*Value); - return None; + return std::nullopt; } AppleAcceleratorTable::ValueIterator::ValueIterator( @@ -541,13 +541,13 @@ if (std::get<0>(Tuple).Index == Index) return std::get<1>(Tuple); } - return None; + return std::nullopt; } Optional DWARFDebugNames::Entry::getDIEUnitOffset() const { if (Optional Off = lookup(dwarf::DW_IDX_die_offset)) return Off->getAsReferenceUVal(); - return None; + return std::nullopt; } Optional DWARFDebugNames::Entry::getCUIndex() const { @@ -557,13 +557,13 @@ // implicitly refer to the single CU. if (NameIdx->getCUCount() == 1) return 0; - return None; + return std::nullopt; } Optional DWARFDebugNames::Entry::getCUOffset() const { Optional Index = getCUIndex(); if (!Index || *Index >= NameIdx->getCUCount()) - return None; + return std::nullopt; return NameIdx->getCUOffset(*Index); } @@ -767,7 +767,7 @@ W.startLine() << "Hash table not present\n"; for (const NameTableEntry &NTE : *this) - dumpName(W, NTE, None); + dumpName(W, NTE, std::nullopt); } Error DWARFDebugNames::extract() { @@ -802,7 +802,7 @@ if (NTE.getString() == Key) return NTE.getEntryOffset(); } - return None; + return std::nullopt; } // The Name Index has a Hash Table, so use that to speed up the search. @@ -812,18 +812,18 @@ uint32_t Bucket = *Hash % Hdr.BucketCount; uint32_t Index = CurrentIndex->getBucketArrayEntry(Bucket); if (Index == 0) - return None; // Empty bucket + return std::nullopt; // Empty bucket for (; Index <= Hdr.NameCount; ++Index) { uint32_t Hash = CurrentIndex->getHashArrayEntry(Index); if (Hash % Hdr.BucketCount != Bucket) - return None; // End of bucket + return std::nullopt; // End of bucket NameTableEntry NTE = CurrentIndex->getNameTableEntry(Index); if (NTE.getString() == Key) return NTE.getEntryOffset(); } - return None; + return std::nullopt; } bool DWARFDebugNames::ValueIterator::getEntryAtCurrentOffset() { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -330,8 +330,8 @@ if (DumpOffset) { if (DumpOffset >= Offset && DumpOffset < EndOffset) { Offset = *DumpOffset; - Loc.dumpLocationList(&Offset, OS, /*BaseAddr=*/None, MRI, Obj, nullptr, - DumpOpts, /*Indent=*/0); + Loc.dumpLocationList(&Offset, OS, /*BaseAddr=*/std::nullopt, MRI, Obj, + nullptr, DumpOpts, /*Indent=*/0); OS << "\n"; return; } @@ -450,8 +450,8 @@ if (*Off) { uint64_t Offset = **Off; Loc.dumpLocationList(&Offset, OS, - /*BaseAddr=*/None, getRegisterInfo(), *DObj, nullptr, - LLDumpOpts, /*Indent=*/0); + /*BaseAddr=*/std::nullopt, getRegisterInfo(), *DObj, + nullptr, LLDumpOpts, /*Indent=*/0); OS << "\n"; } else { Loc.dumpRange(0, Data.getData().size(), OS, getRegisterInfo(), *DObj, @@ -616,7 +616,7 @@ const auto &CUs = compile_units(); auto I = CUs.begin(); if (I == CUs.end()) - return None; + return std::nullopt; return (*I)->getAddrOffsetSectionItem(Index); }; @@ -1157,7 +1157,7 @@ return Offset; // Fallthrough. Do not accept ex. (DW_OP_breg W29, DW_OP_stack_value) } - return None; + return std::nullopt; } void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp @@ -72,7 +72,7 @@ DWARFDataExtractor::getEncodedPointer(uint64_t *Offset, uint8_t Encoding, uint64_t PCRelOffset) const { if (Encoding == dwarf::DW_EH_PE_omit) - return None; + return std::nullopt; uint64_t Result = 0; uint64_t OldOffset = *Offset; @@ -86,7 +86,7 @@ Result = getUnsigned(Offset, getAddressSize()); break; default: - return None; + return std::nullopt; } break; case dwarf::DW_EH_PE_uleb128: @@ -114,7 +114,7 @@ Result = getRelocatedValue(8, Offset); break; default: - return None; + return std::nullopt; } // Then add relative offset, if required switch (Encoding & 0x70) { @@ -130,7 +130,7 @@ case dwarf::DW_EH_PE_aligned: default: *Offset = OldOffset; - return None; + return std::nullopt; } return Result; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp @@ -122,7 +122,7 @@ break; AbbrDeclSets.insert(I, std::make_pair(CUAbbrOffset, std::move(AbbrDecls))); } - Data = None; + Data = std::nullopt; } void DWARFDebugAbbrev::dump(raw_ostream &OS) const { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp @@ -179,7 +179,7 @@ Optional DWARFDebugAddrTable::getFullLength() const { if (Length == 0) - return None; + return std::nullopt; return Length + dwarf::getUnitLengthFieldByteSize(Format); } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -50,15 +50,15 @@ UnwindLocation UnwindLocation::createSame() { return {Same}; } UnwindLocation UnwindLocation::createIsConstant(int32_t Value) { - return {Constant, InvalidRegisterNumber, Value, None, false}; + return {Constant, InvalidRegisterNumber, Value, std::nullopt, false}; } UnwindLocation UnwindLocation::createIsCFAPlusOffset(int32_t Offset) { - return {CFAPlusOffset, InvalidRegisterNumber, Offset, None, false}; + return {CFAPlusOffset, InvalidRegisterNumber, Offset, std::nullopt, false}; } UnwindLocation UnwindLocation::createAtCFAPlusOffset(int32_t Offset) { - return {CFAPlusOffset, InvalidRegisterNumber, Offset, None, true}; + return {CFAPlusOffset, InvalidRegisterNumber, Offset, std::nullopt, true}; } UnwindLocation @@ -1052,7 +1052,7 @@ if (Length == 0) { auto Cie = std::make_unique( IsDWARF64, StartOffset, 0, 0, SmallString<8>(), 0, 0, 0, 0, 0, - SmallString<8>(), 0, 0, None, None, Arch); + SmallString<8>(), 0, 0, std::nullopt, std::nullopt, Arch); CIEs[StartOffset] = Cie.get(); Entries.push_back(std::move(Cie)); break; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -81,7 +81,7 @@ Optional DWARFDebugLine::Prologue::getLastValidFileIndex() const { if (FileNames.empty()) - return None; + return std::nullopt; uint16_t DwarfVersion = getVersion(); assert(DwarfVersion != 0 && "line table prologue has no dwarf version information"); @@ -722,7 +722,7 @@ T Value = Data.getULEB128(Cursor); if (Cursor) return Value; - return None; + return std::nullopt; } Error DWARFDebugLine::LineTable::parse( @@ -1333,11 +1333,11 @@ Optional DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileIndex, FileLineInfoKind Kind) const { if (Kind == FileLineInfoKind::None || !Prologue.hasFileAtIndex(FileIndex)) - return None; + return std::nullopt; const FileNameEntry &Entry = Prologue.getFileNameEntry(FileIndex); if (auto E = dwarf::toString(Entry.Source)) return StringRef(*E); - return None; + return std::nullopt; } static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -52,12 +52,12 @@ DWARFLocationInterpreter::Interpret(const DWARFLocationEntry &E) { switch (E.Kind) { case dwarf::DW_LLE_end_of_list: - return None; + return std::nullopt; case dwarf::DW_LLE_base_addressx: { Base = LookupAddr(E.Value0); if (!Base) return createResolverError(E.Value0, E.Kind); - return None; + return std::nullopt; } case dwarf::DW_LLE_startx_endx: { Optional LowPC = LookupAddr(E.Value0); @@ -92,10 +92,10 @@ return DWARFLocationExpression{Range, E.Loc}; } case dwarf::DW_LLE_default_location: - return DWARFLocationExpression{None, E.Loc}; + return DWARFLocationExpression{std::nullopt, E.Loc}; case dwarf::DW_LLE_base_address: Base = SectionedAddress{E.Value0, E.SectionIndex}; - return None; + return std::nullopt; case dwarf::DW_LLE_start_end: return DWARFLocationExpression{ DWARFAddressRange{E.Value0, E.Value1, E.SectionIndex}, E.Loc}; @@ -130,7 +130,7 @@ BaseAddr, [U](uint32_t Index) -> Optional { if (U) return U->getAddrOffsetSectionItem(Index); - return None; + return std::nullopt; }); OS << format("0x%8.8" PRIx64 ": ", *Offset); Error E = visitLocationList(Offset, [&](const DWARFLocationEntry &E) { @@ -187,7 +187,7 @@ void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI, const DWARFObject &Obj, DIDumpOptions DumpOpts, Optional DumpOffset) const { - auto BaseAddr = None; + auto BaseAddr = std::nullopt; unsigned Indent = 12; if (DumpOffset) { dumpLocationList(&*DumpOffset, OS, BaseAddr, MRI, Obj, nullptr, DumpOpts, @@ -401,8 +401,8 @@ OS << Separator; Separator = "\n"; - CanContinue = dumpLocationList(&Offset, OS, /*BaseAddr=*/None, MRI, Obj, - nullptr, DumpOpts, /*Indent=*/12); + CanContinue = dumpLocationList(&Offset, OS, /*BaseAddr=*/std::nullopt, MRI, + Obj, nullptr, DumpOpts, /*Indent=*/12); OS << '\n'; } } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -251,17 +251,17 @@ Optional DWARFDie::find(dwarf::Attribute Attr) const { if (!isValid()) - return None; + return std::nullopt; auto AbbrevDecl = getAbbreviationDeclarationPtr(); if (AbbrevDecl) return AbbrevDecl->getAttributeValue(getOffset(), Attr, *U); - return None; + return std::nullopt; } Optional DWARFDie::find(ArrayRef Attrs) const { if (!isValid()) - return None; + return std::nullopt; auto AbbrevDecl = getAbbreviationDeclarationPtr(); if (AbbrevDecl) { for (auto Attr : Attrs) { @@ -269,7 +269,7 @@ return Value; } } - return None; + return std::nullopt; } Optional @@ -302,7 +302,7 @@ Worklist.push_back(D); } - return None; + return std::nullopt; } DWARFDie @@ -348,7 +348,7 @@ Optional DWARFDie::getHighPC(uint64_t LowPC) const { uint64_t Tombstone = dwarf::computeTombstoneAddress(U->getAddressByteSize()); if (LowPC == Tombstone) - return None; + return std::nullopt; if (auto FormValue = find(DW_AT_high_pc)) { if (auto Address = FormValue->getAsAddress()) { // High PC is an address. @@ -359,7 +359,7 @@ return LowPC + *Offset; } } - return None; + return std::nullopt; } bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC, @@ -429,7 +429,7 @@ if (Optional> Expr = Location->getAsBlock()) { return DWARFLocationExpressionsVector{ - DWARFLocationExpression{None, to_vector<4>(*Expr)}}; + DWARFLocationExpression{std::nullopt, to_vector<4>(*Expr)}}; } return createStringError( @@ -520,10 +520,10 @@ case DW_TAG_array_type: { DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type); if (!BaseType) - return None; + return std::nullopt; Optional BaseSize = BaseType.getTypeSize(PointerSize); if (!BaseSize) - return None; + return std::nullopt; uint64_t Size = *BaseSize; for (DWARFDie Child : *this) { if (Child.getTag() != DW_TAG_subrange_type) @@ -549,7 +549,7 @@ return BaseType.getTypeSize(PointerSize); break; } - return None; + return std::nullopt; } /// Helper to dump a DIE with all of its parents, but no siblings. diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -473,7 +473,7 @@ OS << format("0x%016" PRIx64, UValue); break; case DW_FORM_data16: - OS << format_bytes(ArrayRef(Value.data, 16), None, 16, 16); + OS << format_bytes(ArrayRef(Value.data, 16), std::nullopt, 16, 16); break; case DW_FORM_string: OS << '"'; @@ -669,22 +669,22 @@ Optional DWARFFormValue::getAsAddress() const { if (auto SA = getAsSectionedAddress()) return SA->Address; - return None; + return std::nullopt; } Optional DWARFFormValue::getAsSectionedAddress() const { if (!isFormClass(FC_Address)) - return None; + return std::nullopt; bool AddrOffset = Form == dwarf::DW_FORM_LLVM_addrx_offset; if (Form == DW_FORM_GNU_addr_index || Form == DW_FORM_addrx || AddrOffset) { uint32_t Index = AddrOffset ? (Value.uval >> 32) : Value.uval; if (!U) - return None; + return std::nullopt; Optional SA = U->getAddrOffsetSectionItem(Index); if (!SA) - return None; + return std::nullopt; if (AddrOffset) SA->Address += (Value.uval & 0xffffffff); return SA; @@ -695,12 +695,12 @@ Optional DWARFFormValue::getAsReference() const { if (auto R = getAsRelativeReference()) return R->Unit ? R->Unit->getOffset() + R->Offset : R->Offset; - return None; + return std::nullopt; } Optional DWARFFormValue::getAsRelativeReference() const { if (!isFormClass(FC_Reference)) - return None; + return std::nullopt; switch (Form) { case DW_FORM_ref1: case DW_FORM_ref2: @@ -708,27 +708,27 @@ case DW_FORM_ref8: case DW_FORM_ref_udata: if (!U) - return None; + return std::nullopt; return UnitOffset{const_cast(U), Value.uval}; case DW_FORM_ref_addr: case DW_FORM_ref_sig8: case DW_FORM_GNU_ref_alt: return UnitOffset{nullptr, Value.uval}; default: - return None; + return std::nullopt; } } Optional DWARFFormValue::getAsSectionOffset() const { if (!isFormClass(FC_SectionOffset)) - return None; + return std::nullopt; return Value.uval; } Optional DWARFFormValue::getAsUnsignedConstant() const { if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) || Form == DW_FORM_sdata) - return None; + return std::nullopt; return Value.uval; } @@ -736,7 +736,7 @@ if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) || (Form == DW_FORM_udata && uint64_t(std::numeric_limits::max()) < Value.uval)) - return None; + return std::nullopt; switch (Form) { case DW_FORM_data4: return int32_t(Value.uval); @@ -754,26 +754,26 @@ Optional> DWARFFormValue::getAsBlock() const { if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc) && Form != DW_FORM_data16) - return None; + return std::nullopt; return makeArrayRef(Value.data, Value.uval); } Optional DWARFFormValue::getAsCStringOffset() const { if (!isFormClass(FC_String) && Form == DW_FORM_string) - return None; + return std::nullopt; return Value.uval; } Optional DWARFFormValue::getAsReferenceUVal() const { if (!isFormClass(FC_Reference)) - return None; + return std::nullopt; return Value.uval; } Optional DWARFFormValue::getAsFile(DILineInfoSpecifier::FileLineInfoKind Kind) const { if (U == nullptr || !isFormClass(FC_Constant)) - return None; + return std::nullopt; DWARFUnit *DLU = const_cast(U)->getLinkedUnit(); if (auto *LT = DLU->getContext().getLineTableForUnit(DLU)) { std::string FileName; @@ -781,5 +781,5 @@ FileName)) return FileName; } - return None; + return std::nullopt; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp @@ -35,7 +35,7 @@ if ((DefaultLB = LanguageLowerBound(static_cast(*LC)))) if (LB && *LB == *DefaultLB) - LB = None; + LB = std::nullopt; if (!LB && !Count && !UB) OS << "[]"; else if (!LB && (Count || UB) && DefaultLB) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -215,12 +215,12 @@ if (IsDWO && hasSingleElement(R)) return (*R.begin())->getAddrOffsetSectionItem(Index); - return None; + return std::nullopt; } uint64_t Offset = *AddrOffsetSectionBase + Index * getAddressByteSize(); if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize()) - return None; + return std::nullopt; DWARFDataExtractor DA(Context.getDWARFObj(), *AddrOffsetSection, IsLittleEndian, getAddressByteSize()); uint64_t Section; @@ -377,7 +377,7 @@ BaseAddr.reset(); RangeSectionBase = 0; LocSectionBase = 0; - AddrOffsetSectionBase = None; + AddrOffsetSectionBase = std::nullopt; SU = nullptr; clearDIEs(false); AddrDieMap.clear(); @@ -502,7 +502,7 @@ if (Optional DWOId = toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id))) Header.setDWOId(*DWOId); if (!IsDWO) { - assert(AddrOffsetSectionBase == None); + assert(AddrOffsetSectionBase == std::nullopt); assert(RangeSectionBase == 0); assert(LocSectionBase == 0); AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base)); @@ -1138,7 +1138,7 @@ assert(!IsDWO); auto OptOffset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base)); if (!OptOffset) - return None; + return std::nullopt; auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), *OptOffset); if (!DescOrError) @@ -1157,7 +1157,7 @@ Offset = C->Offset; if (getVersion() >= 5) { if (DA.getData().data() == nullptr) - return None; + return std::nullopt; Offset += Header.getFormat() == dwarf::DwarfFormat::DWARF32 ? 8 : 16; // Look for a valid contribution at the given offset. auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset); @@ -1176,7 +1176,7 @@ Desc = StrOffsetsContributionDescriptor(0, StringOffsetSection.Data.size(), 4, Header.getFormat()); else - return None; + return std::nullopt; auto DescOrError = Desc.validateContributionSize(DA); if (!DescOrError) return DescOrError.takeError(); @@ -1191,12 +1191,12 @@ if (Optional Off = llvm::DWARFListTableHeader::getOffsetEntry( RangesData, RangeSectionBase, getFormat(), Index)) return *Off + RangeSectionBase; - return None; + return std::nullopt; } Optional DWARFUnit::getLoclistOffset(uint32_t Index) { if (Optional Off = llvm::DWARFListTableHeader::getOffsetEntry( LocTable->getData(), LocSectionBase, getFormat(), Index)) return *Off + LocSectionBase; - return None; + return std::nullopt; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -62,7 +62,7 @@ } Ranges.insert(Pos, R); - return None; + return std::nullopt; } DWARFVerifier::DieRangeInfo::die_range_info_iterator diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -140,7 +140,7 @@ StringRef ShortName(Die.getName(DINameKind::ShortName)); if (ShortName.empty()) - return llvm::None; + return std::nullopt; // For C++ and ObjC, prepend names of all parent declaration contexts if (!(Language == dwarf::DW_LANG_C_plus_plus || @@ -346,7 +346,7 @@ // If not line table rows were added, clear the line table so we don't encode // on in the GSYM file. if (FI.OptLineTable->empty()) - FI.OptLineTable = llvm::None; + FI.OptLineTable = std::nullopt; } void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) { diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -213,14 +213,14 @@ case 4: return addressForIndex(Index); case 8: return addressForIndex(Index); } - return llvm::None; + return std::nullopt; } Optional GsymReader::getAddressInfoOffset(size_t Index) const { const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); if (Index < NumAddrInfoOffsets) return AddrInfoOffsets[Index]; - return llvm::None; + return std::nullopt; } Expected diff --git a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp --- a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp @@ -57,7 +57,7 @@ InlineArray Result; if (getInlineStackHelper(*this, Addr, Result)) return Result; - return llvm::None; + return std::nullopt; } /// Skip an InlineInfo object in the specified data at the specified offset. diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp @@ -409,7 +409,8 @@ std::string BufferCodes; raw_string_ostream StreamCodes(BufferCodes); StreamCodes << format_bytes( - ArrayRef(Begin, Begin + BytesConsumed), None, 16, 16); + ArrayRef(Begin, Begin + BytesConsumed), std::nullopt, 16, + 16); dbgs() << "[" << hexValue((uint64_t)Begin) << "] " << "Size: " << format_decimal(BytesConsumed, 2) << " (" << formatv("{0}", diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp --- a/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp @@ -57,7 +57,7 @@ if (Line.Header->Inlinee == Id) return Line; } - return None; + return std::nullopt; } std::string NativeInlineSiteSymbol::getName() const { @@ -140,10 +140,10 @@ FileOffset = *NextFileOffset; if (NextLineOffset) { CurLineOffset = NextLineOffset; - NextLineOffset = None; + NextLineOffset = std::nullopt; } CodeOffsetBase = CodeOffsetEnd; - CodeOffsetEnd = NextFileOffset = None; + CodeOffsetEnd = NextFileOffset = std::nullopt; } return false; }; diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -34,7 +34,7 @@ Optional load(StringRef FileName, const Optional &EmbeddedSource) { if (Lines <= 0) - return None; + return std::nullopt; if (EmbeddedSource) return EmbeddedSource; @@ -42,7 +42,7 @@ ErrorOr> BufOrErr = MemoryBuffer::getFile(FileName); if (!BufOrErr) - return None; + return std::nullopt; MemBuf = std::move(*BufOrErr); return MemBuf->getBuffer(); } @@ -50,7 +50,7 @@ Optional pruneSource(const Optional &Source) { if (!Source) - return None; + return std::nullopt; size_t FirstLinePos = StringRef::npos, Pos = 0; for (int64_t L = 1; L <= LastLine; ++L, ++Pos) { if (L == FirstLine) @@ -60,7 +60,7 @@ break; } if (FirstLinePos == StringRef::npos) - return None; + return std::nullopt; return Source->substr(FirstLinePos, (Pos == StringRef::npos) ? StringRef::npos : Pos - FirstLinePos); diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp --- a/llvm/lib/DebugInfo/Symbolize/Markup.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp @@ -54,7 +54,7 @@ // The buffer is empty, so parse the next bit of the line. if (Line.empty()) - return None; + return std::nullopt; if (!InProgressMultiline.empty()) { if (Optional MultilineEnd = parseMultiLineEnd(Line)) { @@ -70,7 +70,7 @@ // The whole line is part of the multi-line element. llvm::append_range(InProgressMultiline, Line); Line = Line.drop_front(Line.size()); - return None; + return std::nullopt; } // Find the first valid markup element, if any. @@ -116,10 +116,10 @@ // Find next element using begin and end markers. size_t BeginPos = Line.find("{{{"); if (BeginPos == StringRef::npos) - return None; + return std::nullopt; size_t EndPos = Line.find("}}}", BeginPos + 3); if (EndPos == StringRef::npos) - return None; + return std::nullopt; EndPos += 3; MarkupNode Element; Element.Text = Line.slice(BeginPos, EndPos); @@ -173,22 +173,22 @@ // A multi-line begin marker must be the last one on the line. size_t BeginPos = Line.rfind("{{{"); if (BeginPos == StringRef::npos) - return None; + return std::nullopt; size_t BeginTagPos = BeginPos + 3; // If there are any end markers afterwards, the begin marker cannot belong to // a multi-line element. size_t EndPos = Line.find("}}}", BeginTagPos); if (EndPos != StringRef::npos) - return None; + return std::nullopt; // Check whether the tag is registered multi-line. size_t EndTagPos = Line.find(':', BeginTagPos); if (EndTagPos == StringRef::npos) - return None; + return std::nullopt; StringRef Tag = Line.slice(BeginTagPos, EndTagPos); if (!MultilineTags.contains(Tag)) - return None; + return std::nullopt; return Line.substr(BeginPos); } @@ -197,7 +197,7 @@ Optional MarkupParser::parseMultiLineEnd(StringRef Line) { size_t EndPos = Line.find("}}}"); if (EndPos == StringRef::npos) - return None; + return std::nullopt; return Line.take_front(EndPos + 3); } diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp --- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp @@ -421,7 +421,7 @@ .Case("\033[35m", raw_ostream::Colors::MAGENTA) .Case("\033[36m", raw_ostream::Colors::CYAN) .Case("\033[37m", raw_ostream::Colors::WHITE) - .Default(llvm::None); + .Default(std::nullopt); if (SGRColor) { Color = *SGRColor; if (ColorsEnabled) @@ -502,17 +502,17 @@ Optional MarkupFilter::parseModule(const MarkupNode &Element) const { if (!checkNumFieldsAtLeast(Element, 3)) - return None; + return std::nullopt; ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[0])); StringRef Name = Element.Fields[1]; StringRef Type = Element.Fields[2]; if (Type != "elf") { WithColor::error() << "unknown module type\n"; reportLocation(Type.begin()); - return None; + return std::nullopt; } if (!checkNumFields(Element, 4)) - return None; + return std::nullopt; ASSIGN_OR_RETURN_NONE(SmallVector, BuildID, parseBuildID(Element.Fields[3])); return Module{ID, Name.str(), std::move(BuildID)}; @@ -521,24 +521,24 @@ Optional MarkupFilter::parseMMap(const MarkupNode &Element) const { if (!checkNumFieldsAtLeast(Element, 3)) - return None; + return std::nullopt; ASSIGN_OR_RETURN_NONE(uint64_t, Addr, parseAddr(Element.Fields[0])); ASSIGN_OR_RETURN_NONE(uint64_t, Size, parseSize(Element.Fields[1])); StringRef Type = Element.Fields[2]; if (Type != "load") { WithColor::error() << "unknown mmap type\n"; reportLocation(Type.begin()); - return None; + return std::nullopt; } if (!checkNumFields(Element, 6)) - return None; + return std::nullopt; ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[3])); ASSIGN_OR_RETURN_NONE(std::string, Mode, parseMode(Element.Fields[4])); auto It = Modules.find(ID); if (It == Modules.end()) { WithColor::error() << "unknown module ID\n"; reportLocation(Element.Fields[3].begin()); - return None; + return std::nullopt; } ASSIGN_OR_RETURN_NONE(uint64_t, ModuleRelativeAddr, parseAddr(Element.Fields[5])); @@ -550,18 +550,18 @@ Optional MarkupFilter::parseAddr(StringRef Str) const { if (Str.empty()) { reportTypeError(Str, "address"); - return None; + return std::nullopt; } if (all_of(Str, [](char C) { return C == '0'; })) return 0; if (!Str.startswith("0x")) { reportTypeError(Str, "address"); - return None; + return std::nullopt; } uint64_t Addr; if (Str.drop_front(2).getAsInteger(16, Addr)) { reportTypeError(Str, "address"); - return None; + return std::nullopt; } return Addr; } @@ -571,7 +571,7 @@ uint64_t ID; if (Str.getAsInteger(0, ID)) { reportTypeError(Str, "module ID"); - return None; + return std::nullopt; } return ID; } @@ -581,7 +581,7 @@ uint64_t ID; if (Str.getAsInteger(0, ID)) { reportTypeError(Str, "size"); - return None; + return std::nullopt; } return ID; } @@ -591,7 +591,7 @@ uint64_t ID; if (Str.getAsInteger(10, ID)) { reportTypeError(Str, "frame number"); - return None; + return std::nullopt; } return ID; } @@ -601,7 +601,7 @@ std::string Bytes; if (Str.empty() || Str.size() % 2 || !tryGetFromHex(Str, Bytes)) { reportTypeError(Str, "build ID"); - return None; + return std::nullopt; } ArrayRef BuildID(reinterpret_cast(Bytes.data()), Bytes.size()); @@ -612,7 +612,7 @@ Optional MarkupFilter::parseMode(StringRef Str) const { if (Str.empty()) { reportTypeError(Str, "mode"); - return None; + return std::nullopt; } // Pop off each of r/R, w/W, and x/X from the front, in that order. @@ -627,7 +627,7 @@ // If anything remains, then the string wasn't a mode. if (!Remainder.empty()) { reportTypeError(Str, "mode"); - return None; + return std::nullopt; } // Normalize the mode. @@ -639,7 +639,7 @@ StringSwitch>(Str) .Case("ra", MarkupFilter::PCType::ReturnAddress) .Case("pc", MarkupFilter::PCType::PreciseCode) - .Default(None); + .Default(std::nullopt); if (!Type) reportTypeError(Str, "PC type"); return Type; diff --git a/llvm/lib/Debuginfod/BuildIDFetcher.cpp b/llvm/lib/Debuginfod/BuildIDFetcher.cpp --- a/llvm/lib/Debuginfod/BuildIDFetcher.cpp +++ b/llvm/lib/Debuginfod/BuildIDFetcher.cpp @@ -27,5 +27,5 @@ if (PathOrErr) return *PathOrErr; consumeError(PathOrErr.takeError()); - return None; + return std::nullopt; } diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp --- a/llvm/lib/Debuginfod/Debuginfod.cpp +++ b/llvm/lib/Debuginfod/Debuginfod.cpp @@ -434,7 +434,7 @@ std::string Path = Loc->getValue(); return Path; } - return None; + return std::nullopt; } Expected> @@ -446,7 +446,7 @@ std::string Path = Loc->getValue(); return Path; } - return None; + return std::nullopt; } Expected DebuginfodCollection::findBinaryPath(BuildIDRef ID) { diff --git a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp --- a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp @@ -395,7 +395,7 @@ // Execute the ctor/dtor function! if (Function *F = dyn_cast(FP)) - runFunction(F, None); + runFunction(F, std::nullopt); // FIXME: It is marginally lame that we just do nothing here if we see an // entry we don't recognize. It might not be unreasonable for the verifier diff --git a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp --- a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -21,6 +21,7 @@ #include "llvm/Target/CodeGenCWrappers.h" #include "llvm/Target/TargetOptions.h" #include +#include using namespace llvm; @@ -199,7 +200,7 @@ .setOptLevel((CodeGenOpt::Level)options.OptLevel) .setTargetOptions(targetOptions); bool JIT; - if (Optional CM = unwrap(options.CodeModel, JIT)) + if (std::optional CM = unwrap(options.CodeModel, JIT)) builder.setCodeModel(*CM); if (options.MCJMM) builder.setMCJITMemoryManager( diff --git a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp --- a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp +++ b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp @@ -69,7 +69,7 @@ void Interpreter::runAtExitHandlers () { while (!AtExitHandlers.empty()) { - callFunction(AtExitHandlers.back(), None); + callFunction(AtExitHandlers.back(), std::nullopt); AtExitHandlers.pop_back(); run(); } diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp --- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp @@ -613,7 +613,7 @@ setGraphSymbol(Symbol.getSectionNumber(), PendingComdatExport->SymbolIndex, *GSym); DefinedSymbols[SymbolName] = GSym; - PendingComdatExport = None; + PendingComdatExport = std::nullopt; return GSym; } diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -195,7 +195,7 @@ SplitBlockCache LocalBlockSymbolsCache; if (!Cache) Cache = &LocalBlockSymbolsCache; - if (*Cache == None) { + if (*Cache == std::nullopt) { *Cache = SplitBlockCache::value_type(); for (auto *Sym : B.getSection().symbols()) if (&Sym->getBlock() == &B) diff --git a/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp b/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp --- a/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp @@ -157,8 +157,8 @@ std::string VCToolChainPath; ToolsetLayout VSLayout; IntrusiveRefCntPtr VFS = vfs::getRealFileSystem(); - if (!findVCToolChainViaCommandLine(*VFS, None, None, None, VCToolChainPath, - VSLayout) && + if (!findVCToolChainViaCommandLine(*VFS, std::nullopt, std::nullopt, + std::nullopt, VCToolChainPath, VSLayout) && !findVCToolChainViaEnvironment(*VFS, VCToolChainPath, VSLayout) && !findVCToolChainViaSetupConfig(*VFS, VCToolChainPath, VSLayout) && !findVCToolChainViaRegistry(VCToolChainPath, VSLayout)) @@ -167,8 +167,8 @@ std::string UniversalCRTSdkPath; std::string UCRTVersion; - if (!getUniversalCRTSdkDir(*VFS, None, None, None, UniversalCRTSdkPath, - UCRTVersion)) + if (!getUniversalCRTSdkDir(*VFS, std::nullopt, std::nullopt, std::nullopt, + UniversalCRTSdkPath, UCRTVersion)) return make_error("Couldn't find universal sdk.", inconvertibleErrorCode()); diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp --- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -109,7 +109,7 @@ Optional CompileOnDemandLayer::compileWholeModule(GlobalValueSet Requested) { - return None; + return std::nullopt; } CompileOnDemandLayer::CompileOnDemandLayer( @@ -287,7 +287,7 @@ // Take a 'None' partition to mean the whole module (as opposed to an empty // partition, which means "materialize nothing"). Emit the whole module // unmodified to the base layer. - if (GVsToExtract == None) { + if (GVsToExtract == std::nullopt) { Defs.clear(); BaseLayer.emit(std::move(R), std::move(TSM)); return; diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -423,6 +423,10 @@ UnmapViewOfFile(R.second.LocalAddr); +#else + + (void)R; + #endif } } diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp --- a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp @@ -179,7 +179,7 @@ if (Def->Selection != COFF::IMAGE_COMDAT_SELECT_NODUPLICATES) { IsWeak = true; } - ComdatDefs[COFFSym.getSectionNumber()] = None; + ComdatDefs[COFFSym.getSectionNumber()] = std::nullopt; } else { // Skip symbols not defined in this object file. if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined) diff --git a/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp b/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp --- a/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp +++ b/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp @@ -97,7 +97,7 @@ auto IBBs = findBBwithCalls(F); if (IBBs.empty()) - return None; + return std::nullopt; auto &BFI = FAM.getResult(F); @@ -288,7 +288,7 @@ CallerBlocks = findBBwithCalls(F); if (CallerBlocks.empty()) - return None; + return std::nullopt; if (isStraightLine(F)) SequencedBlocks = rearrangeBB(F, CallerBlocks); diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp --- a/llvm/lib/FileCheck/FileCheck.cpp +++ b/llvm/lib/FileCheck/FileCheck.cpp @@ -702,7 +702,7 @@ .Case("min", min) .Case("mul", operator*) .Case("sub", operator-) - .Default(None); + .Default(std::nullopt); if (!OptFunc) return ErrorDiagnostic::get( @@ -770,7 +770,7 @@ FileCheckPatternContext *Context, const SourceMgr &SM) { std::unique_ptr ExpressionASTPointer = nullptr; StringRef DefExpr = StringRef(); - DefinedNumericVariable = None; + DefinedNumericVariable = std::nullopt; ExpressionFormat ExplicitFormat = ExpressionFormat(); unsigned Precision = 0; @@ -2703,8 +2703,9 @@ StringRef CmdlineDefExpr = CmdlineDef.substr(1); Optional DefinedNumericVariable; Expected> ExpressionResult = - Pattern::parseNumericSubstitutionBlock( - CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM); + Pattern::parseNumericSubstitutionBlock(CmdlineDefExpr, + DefinedNumericVariable, false, + std::nullopt, this, SM); if (!ExpressionResult) { Errs = joinErrors(std::move(Errs), ExpressionResult.takeError()); continue; diff --git a/llvm/lib/FileCheck/FileCheckImpl.h b/llvm/lib/FileCheck/FileCheckImpl.h --- a/llvm/lib/FileCheck/FileCheckImpl.h +++ b/llvm/lib/FileCheck/FileCheckImpl.h @@ -282,7 +282,7 @@ /// defined at line \p DefLineNumber or defined before input is parsed if /// \p DefLineNumber is None. explicit NumericVariable(StringRef Name, ExpressionFormat ImplicitFormat, - Optional DefLineNumber = None) + Optional DefLineNumber = std::nullopt) : Name(Name), ImplicitFormat(ImplicitFormat), DefLineNumber(DefLineNumber) {} @@ -306,7 +306,7 @@ /// buffer string from which it was parsed to \p NewStrValue. See comments on /// getStringValue for a discussion of when the latter can be None. void setValue(ExpressionValue NewValue, - Optional NewStrValue = None) { + Optional NewStrValue = std::nullopt) { Value = NewValue; StrValue = NewStrValue; } @@ -314,8 +314,8 @@ /// Clears value of this numeric variable, regardless of whether it is /// currently defined or not. void clearValue() { - Value = None; - StrValue = None; + Value = std::nullopt; + StrValue = std::nullopt; } /// \returns the line number where this variable is defined, if any, or None @@ -555,7 +555,7 @@ SMRange getRange() const { return Range; } static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg, - SMRange Range = None) { + SMRange Range = std::nullopt) { return make_error( SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg), Range); } @@ -682,7 +682,7 @@ public: Pattern(Check::FileCheckType Ty, FileCheckPatternContext *Context, - Optional Line = None) + Optional Line = std::nullopt) : Context(Context), CheckTy(Ty), LineNumber(Line) {} /// \returns the location in source code. diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp --- a/llvm/lib/Frontend/OpenMP/OMPContext.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp @@ -169,13 +169,13 @@ if (MK == MK_ANY) { if (WasFound) return true; - return None; + return std::nullopt; } // In "all" or "none" mode we accept a matching or non-matching property // respectively and move on. We are not done yet! if ((WasFound && MK == MK_ALL) || (!WasFound && MK == MK_NONE)) - return None; + return std::nullopt; // We missed a property, provide some debug output and indicate failure. LLVM_DEBUG({ diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -261,8 +261,7 @@ // Move instructions to new block. BasicBlock *Old = IP.getBlock(); - New->getInstList().splice(New->begin(), Old->getInstList(), IP.getPoint(), - Old->end()); + New->splice(New->begin(), Old, IP.getPoint(), Old->end()); if (CreateBranch) BranchInst::Create(New, Old); @@ -3195,8 +3194,8 @@ llvm::TargetOptions Options; return std::unique_ptr(TheTarget->createTargetMachine( - Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None, - OptLevel)); + Triple, CPU, Features, Options, /*RelocModel=*/std::nullopt, + /*CodeModel=*/std::nullopt, OptLevel)); } /// Heuristically determine the best-performant unroll factor for \p CLI. This @@ -3241,12 +3240,12 @@ gatherUnrollingPreferences(L, SE, TTI, /*BlockFrequencyInfo=*/nullptr, /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel, - /*UserThreshold=*/None, - /*UserCount=*/None, + /*UserThreshold=*/std::nullopt, + /*UserCount=*/std::nullopt, /*UserAllowPartial=*/true, /*UserAllowRuntime=*/true, - /*UserUpperBound=*/None, - /*UserFullUnrollMaxCount=*/None); + /*UserUpperBound=*/std::nullopt, + /*UserFullUnrollMaxCount=*/std::nullopt); UP.Force = true; diff --git a/llvm/lib/FuzzMutate/IRMutator.cpp b/llvm/lib/FuzzMutate/IRMutator.cpp --- a/llvm/lib/FuzzMutate/IRMutator.cpp +++ b/llvm/lib/FuzzMutate/IRMutator.cpp @@ -106,7 +106,7 @@ }; auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred)); if (RS.isEmpty()) - return None; + return std::nullopt; return *RS; } diff --git a/llvm/lib/FuzzMutate/Operations.cpp b/llvm/lib/FuzzMutate/Operations.cpp --- a/llvm/lib/FuzzMutate/Operations.cpp +++ b/llvm/lib/FuzzMutate/Operations.cpp @@ -163,7 +163,7 @@ SourcePred isInt1Ty{[](ArrayRef, const Value *V) { return V->getType()->isIntegerTy(1); }, - None}; + std::nullopt}; return {Weight, {isInt1Ty}, buildSplitBlock}; } @@ -182,7 +182,7 @@ // TODO: Try to avoid meaningless accesses. SourcePred sizedType( [](ArrayRef, const Value *V) { return V->getType()->isSized(); }, - None); + std::nullopt); return {Weight, {sizedPtrType(), sizedType, anyIntType()}, buildGEP}; } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1675,7 +1675,8 @@ void printInt(StringRef Name, IntTy Int, bool ShouldSkipZero = true); void printAPInt(StringRef Name, const APInt &Int, bool IsUnsigned, bool ShouldSkipZero); - void printBool(StringRef Name, bool Value, Optional Default = None); + void printBool(StringRef Name, bool Value, + Optional Default = std::nullopt); void printDIFlags(StringRef Name, DINode::DIFlags Flags); void printDISPFlags(StringRef Name, DISubprogram::DISPFlags Flags); template @@ -2494,7 +2495,7 @@ if (const MDNode *N = dyn_cast(MD)) { std::unique_ptr MachineStorage; - SaveAndRestore SARMachine(WriterCtx.Machine); + SaveAndRestore SARMachine(WriterCtx.Machine); if (!WriterCtx.Machine) { MachineStorage = std::make_unique(WriterCtx.Context); WriterCtx.Machine = MachineStorage.get(); diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h --- a/llvm/lib/IR/AttributeImpl.h +++ b/llvm/lib/IR/AttributeImpl.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -229,7 +230,7 @@ static AttributeSetNode *getSorted(LLVMContext &C, ArrayRef SortedAttrs); - Optional findEnumAttribute(Attribute::AttrKind Kind) const; + std::optional findEnumAttribute(Attribute::AttrKind Kind) const; public: // AttributesSetNode is uniqued, these should not be available. @@ -258,9 +259,10 @@ MaybeAlign getStackAlignment() const; uint64_t getDereferenceableBytes() const; uint64_t getDereferenceableOrNullBytes() const; - Optional>> getAllocSizeArgs() const; + std::optional>> getAllocSizeArgs() + const; unsigned getVScaleRangeMin() const; - Optional getVScaleRangeMax() const; + std::optional getVScaleRangeMax() const; UWTableKind getUWTableKind() const; AllocFnKind getAllocKind() const; MemoryEffects getMemoryEffects() const; diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -17,7 +17,6 @@ #include "LLVMContextImpl.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -36,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -55,7 +55,7 @@ static const unsigned AllocSizeNumElemsNotPresent = -1; static uint64_t packAllocSizeArgs(unsigned ElemSizeArg, - const Optional &NumElemsArg) { + const std::optional &NumElemsArg) { assert((!NumElemsArg || *NumElemsArg != AllocSizeNumElemsNotPresent) && "Attempting to pack a reserved value"); @@ -63,29 +63,29 @@ NumElemsArg.value_or(AllocSizeNumElemsNotPresent); } -static std::pair> +static std::pair> unpackAllocSizeArgs(uint64_t Num) { unsigned NumElems = Num & std::numeric_limits::max(); unsigned ElemSizeArg = Num >> 32; - Optional NumElemsArg; + std::optional NumElemsArg; if (NumElems != AllocSizeNumElemsNotPresent) NumElemsArg = NumElems; return std::make_pair(ElemSizeArg, NumElemsArg); } static uint64_t packVScaleRangeArgs(unsigned MinValue, - Optional MaxValue) { + std::optional MaxValue) { return uint64_t(MinValue) << 32 | MaxValue.value_or(0); } -static std::pair> +static std::pair> unpackVScaleRangeArgs(uint64_t Value) { unsigned MaxValue = Value & std::numeric_limits::max(); unsigned MinValue = Value >> 32; return std::make_pair(MinValue, - MaxValue > 0 ? MaxValue : Optional()); + MaxValue > 0 ? MaxValue : std::optional()); } Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind, @@ -218,7 +218,7 @@ Attribute Attribute::getWithAllocSizeArgs(LLVMContext &Context, unsigned ElemSizeArg, - const Optional &NumElemsArg) { + const std::optional &NumElemsArg) { assert(!(ElemSizeArg == 0 && NumElemsArg && *NumElemsArg == 0) && "Invalid allocsize arguments -- given allocsize(0, 0)"); return get(Context, AllocSize, packAllocSizeArgs(ElemSizeArg, NumElemsArg)); @@ -359,7 +359,8 @@ return pImpl->getValueAsInt(); } -std::pair> Attribute::getAllocSizeArgs() const { +std::pair> +Attribute::getAllocSizeArgs() const { assert(hasAttribute(Attribute::AllocSize) && "Trying to get allocsize args from non-allocsize attribute"); return unpackAllocSizeArgs(pImpl->getValueAsInt()); @@ -371,7 +372,7 @@ return unpackVScaleRangeArgs(pImpl->getValueAsInt()).first; } -Optional Attribute::getVScaleRangeMax() const { +std::optional Attribute::getVScaleRangeMax() const { assert(hasAttribute(Attribute::VScaleRange) && "Trying to get vscale args from non-vscale attribute"); return unpackVScaleRangeArgs(pImpl->getValueAsInt()).second; @@ -452,7 +453,7 @@ if (hasAttribute(Attribute::AllocSize)) { unsigned ElemSize; - Optional NumElems; + std::optional NumElems; std::tie(ElemSize, NumElems) = getAllocSizeArgs(); return (NumElems @@ -463,7 +464,7 @@ if (hasAttribute(Attribute::VScaleRange)) { unsigned MinValue = getVScaleRangeMin(); - Optional MaxValue = getVScaleRangeMax(); + std::optional MaxValue = getVScaleRangeMax(); return ("vscale_range(" + Twine(MinValue) + "," + Twine(MaxValue.value_or(0)) + ")") .str(); @@ -773,11 +774,11 @@ } MaybeAlign AttributeSet::getAlignment() const { - return SetNode ? SetNode->getAlignment() : None; + return SetNode ? SetNode->getAlignment() : std::nullopt; } MaybeAlign AttributeSet::getStackAlignment() const { - return SetNode ? SetNode->getStackAlignment() : None; + return SetNode ? SetNode->getStackAlignment() : std::nullopt; } uint64_t AttributeSet::getDereferenceableBytes() const { @@ -812,19 +813,19 @@ return SetNode ? SetNode->getAttributeType(Attribute::ElementType) : nullptr; } -Optional>> +std::optional>> AttributeSet::getAllocSizeArgs() const { if (SetNode) return SetNode->getAllocSizeArgs(); - return None; + return std::nullopt; } unsigned AttributeSet::getVScaleRangeMin() const { return SetNode ? SetNode->getVScaleRangeMin() : 1; } -Optional AttributeSet::getVScaleRangeMax() const { - return SetNode ? SetNode->getVScaleRangeMax() : None; +std::optional AttributeSet::getVScaleRangeMax() const { + return SetNode ? SetNode->getVScaleRangeMax() : std::nullopt; } UWTableKind AttributeSet::getUWTableKind() const { @@ -929,11 +930,11 @@ return StringAttrs.count(Kind); } -Optional +std::optional AttributeSetNode::findEnumAttribute(Attribute::AttrKind Kind) const { // Do a quick presence check. if (!hasAttribute(Kind)) - return None; + return std::nullopt; // Attributes in a set are sorted by enum value, followed by string // attributes. Binary search the one we want. @@ -959,13 +960,13 @@ MaybeAlign AttributeSetNode::getAlignment() const { if (auto A = findEnumAttribute(Attribute::Alignment)) return A->getAlignment(); - return None; + return std::nullopt; } MaybeAlign AttributeSetNode::getStackAlignment() const { if (auto A = findEnumAttribute(Attribute::StackAlignment)) return A->getStackAlignment(); - return None; + return std::nullopt; } Type *AttributeSetNode::getAttributeType(Attribute::AttrKind Kind) const { @@ -986,11 +987,11 @@ return 0; } -Optional>> +std::optional>> AttributeSetNode::getAllocSizeArgs() const { if (auto A = findEnumAttribute(Attribute::AllocSize)) return A->getAllocSizeArgs(); - return None; + return std::nullopt; } unsigned AttributeSetNode::getVScaleRangeMin() const { @@ -999,10 +1000,10 @@ return 1; } -Optional AttributeSetNode::getVScaleRangeMax() const { +std::optional AttributeSetNode::getVScaleRangeMax() const { if (auto A = findEnumAttribute(Attribute::VScaleRange)) return A->getVScaleRangeMax(); - return None; + return std::nullopt; } UWTableKind AttributeSetNode::getUWTableKind() const { @@ -1445,10 +1446,9 @@ return addParamAttributes(C, Index, B); } -AttributeList -AttributeList::addAllocSizeParamAttr(LLVMContext &C, unsigned Index, - unsigned ElemSizeArg, - const Optional &NumElemsArg) { +AttributeList AttributeList::addAllocSizeParamAttr( + LLVMContext &C, unsigned Index, unsigned ElemSizeArg, + const std::optional &NumElemsArg) { AttrBuilder B(C); B.addAllocSizeAttr(ElemSizeArg, NumElemsArg); return addParamAttributes(C, Index, B); @@ -1723,12 +1723,13 @@ return *this; } -Optional AttrBuilder::getRawIntAttr(Attribute::AttrKind Kind) const { +std::optional +AttrBuilder::getRawIntAttr(Attribute::AttrKind Kind) const { assert(Attribute::isIntAttrKind(Kind) && "Not an int attribute"); Attribute A = getAttribute(Kind); if (A.isValid()) return A.getValueAsInt(); - return None; + return std::nullopt; } AttrBuilder &AttrBuilder::addRawIntAttr(Attribute::AttrKind Kind, @@ -1736,12 +1737,12 @@ return addAttribute(Attribute::get(Ctx, Kind, Value)); } -Optional>> +std::optional>> AttrBuilder::getAllocSizeArgs() const { Attribute A = getAttribute(Attribute::AllocSize); if (A.isValid()) return A.getAllocSizeArgs(); - return None; + return std::nullopt; } AttrBuilder &AttrBuilder::addAlignmentAttr(MaybeAlign Align) { @@ -1774,8 +1775,9 @@ return addRawIntAttr(Attribute::DereferenceableOrNull, Bytes); } -AttrBuilder &AttrBuilder::addAllocSizeAttr(unsigned ElemSize, - const Optional &NumElems) { +AttrBuilder & +AttrBuilder::addAllocSizeAttr(unsigned ElemSize, + const std::optional &NumElems) { return addAllocSizeAttrFromRawRepr(packAllocSizeArgs(ElemSize, NumElems)); } @@ -1786,7 +1788,7 @@ } AttrBuilder &AttrBuilder::addVScaleRangeAttr(unsigned MinValue, - Optional MaxValue) { + std::optional MaxValue) { return addVScaleRangeAttrFromRawRepr(packVScaleRangeArgs(MinValue, MaxValue)); } diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1133,7 +1133,7 @@ // Remangle our intrinsic since we upgrade the mangling auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); - if (Result != None) { + if (Result != std::nullopt) { NewFn = *Result; return true; } diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -415,7 +415,7 @@ DebugLoc Loc = I->getDebugLoc(); // Move all of the specified instructions from the original basic block into // the new basic block. - New->getInstList().splice(New->end(), this->getInstList(), I, end()); + New->splice(New->end(), this, I, end()); // Add a branch instruction to the newly formed basic block. BranchInst *BI = BranchInst::Create(New, this); @@ -444,7 +444,7 @@ DebugLoc Loc = I->getDebugLoc(); // Move all of the specified instructions from the original basic block into // the new basic block. - New->getInstList().splice(New->end(), this->getInstList(), begin(), I); + New->splice(New->end(), this, begin(), I); // Loop through all of the predecessors of the 'this' block (which will be the // predecessors of the New block), replace the specified successor 'this' @@ -468,6 +468,23 @@ return New; } +void BasicBlock::splice(BasicBlock::iterator ToIt, BasicBlock *FromBB, + BasicBlock::iterator FromBeginIt, + BasicBlock::iterator FromEndIt) { +#ifdef EXPENSIVE_CHECKS + // Check that FromBeginIt is befor FromEndIt. + auto FromBBEnd = FromBB->end(); + for (auto It = FromBeginIt; It != FromEndIt; ++It) + assert(It != FromBBEnd && "FromBeginIt not before FromEndIt!"); +#endif // EXPENSIVE_CHECKS + getInstList().splice(ToIt, FromBB->getInstList(), FromBeginIt, FromEndIt); +} + +BasicBlock::iterator BasicBlock::erase(BasicBlock::iterator FromIt, + BasicBlock::iterator ToIt) { + return getInstList().erase(FromIt, ToIt); +} + void BasicBlock::replacePhiUsesWith(BasicBlock *Old, BasicBlock *New) { // N.B. This might not be a complete BasicBlock, so don't assume // that it ends with a non-phi instruction. @@ -512,7 +529,7 @@ return Optional(CI->getValue().getZExtValue()); } } - return None; + return std::nullopt; } BasicBlock::iterator llvm::skipDebugIntrinsics(BasicBlock::iterator It) { diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1073,7 +1073,7 @@ } else if (isa(GV)) { // Without a datalayout we have to assume the worst case: that the // function pointer isn't aligned at all. - GVAlign = llvm::None; + GVAlign = std::nullopt; } else if (isa(GV)) { GVAlign = cast(GV)->getAlign(); } @@ -2024,7 +2024,7 @@ // outer GEP, any inbounds attribute on that index is invalidated. Optional IRIndex = GEP->getInRangeIndex(); if (IRIndex && *IRIndex == GEP->getNumIndices() - 1) - IRIndex = None; + IRIndex = std::nullopt; return ConstantExpr::getGetElementPtr( GEP->getSourceElementType(), cast(GEP->getPointerOperand()), diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -705,7 +705,7 @@ ConstantRange Result = intersectWith(CR); if (Result == inverse().unionWith(CR.inverse()).inverse()) return Result; - return None; + return std::nullopt; } Optional @@ -714,7 +714,7 @@ ConstantRange Result = unionWith(CR); if (Result == inverse().intersectWith(CR.inverse()).inverse()) return Result; - return None; + return std::nullopt; } ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp, diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -2487,7 +2487,7 @@ if (InRangeIndex && *InRangeIndex < 63) SubClassOptionalData |= (*InRangeIndex + 1) << 1; const ConstantExprKeyType Key(Instruction::GetElementPtr, ArgVec, 0, - SubClassOptionalData, None, Ty); + SubClassOptionalData, std::nullopt, Ty); LLVMContextImpl *pImpl = C->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(ReqTy, Key); diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h --- a/llvm/lib/IR/ConstantsContext.h +++ b/llvm/lib/IR/ConstantsContext.h @@ -441,7 +441,7 @@ static ArrayRef getShuffleMaskIfValid(const ConstantExpr *CE) { if (CE->getOpcode() == Instruction::ShuffleVector) return CE->getShuffleMask(); - return None; + return std::nullopt; } static Type *getSourceElementTypeIfValid(const ConstantExpr *CE) { @@ -454,7 +454,7 @@ ConstantExprKeyType(unsigned Opcode, ArrayRef Ops, unsigned short SubclassData = 0, unsigned short SubclassOptionalData = 0, - ArrayRef ShuffleMask = None, + ArrayRef ShuffleMask = std::nullopt, Type *ExplicitTy = nullptr) : Opcode(Opcode), SubclassOptionalData(SubclassOptionalData), SubclassData(SubclassData), Ops(Ops), ShuffleMask(ShuffleMask), diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -312,7 +312,7 @@ DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) { return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, FromTy, 0, - 0, 0, None, DINode::FlagZero); + 0, 0, std::nullopt, DINode::FlagZero); } DIDerivedType * @@ -334,7 +334,7 @@ DINode::DIFlags Flags) { return DIDerivedType::get(VMContext, dwarf::DW_TAG_ptr_to_member_type, "", nullptr, 0, nullptr, PointeeTy, SizeInBits, - AlignInBits, 0, None, Flags, Base); + AlignInBits, 0, std::nullopt, Flags, Base); } DIDerivedType * @@ -354,14 +354,15 @@ DINodeArray Annotations) { return DIDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File, LineNo, getNonCompileUnitScope(Context), Ty, 0, - AlignInBits, 0, None, Flags, nullptr, Annotations); + AlignInBits, 0, std::nullopt, Flags, nullptr, + Annotations); } DIDerivedType *DIBuilder::createFriend(DIType *Ty, DIType *FriendTy) { assert(Ty && "Invalid type!"); assert(FriendTy && "Invalid friend type!"); return DIDerivedType::get(VMContext, dwarf::DW_TAG_friend, "", nullptr, 0, Ty, - FriendTy, 0, 0, 0, None, DINode::FlagZero); + FriendTy, 0, 0, 0, std::nullopt, DINode::FlagZero); } DIDerivedType *DIBuilder::createInheritance(DIType *Ty, DIType *BaseTy, @@ -372,8 +373,8 @@ Metadata *ExtraData = ConstantAsMetadata::get( ConstantInt::get(IntegerType::get(VMContext, 32), VBPtrOffset)); return DIDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr, - 0, Ty, BaseTy, 0, 0, BaseOffset, None, Flags, - ExtraData); + 0, Ty, BaseTy, 0, 0, BaseOffset, std::nullopt, + Flags, ExtraData); } DIDerivedType *DIBuilder::createMemberType( @@ -382,8 +383,8 @@ DINode::DIFlags Flags, DIType *Ty, DINodeArray Annotations) { return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File, LineNumber, getNonCompileUnitScope(Scope), Ty, - SizeInBits, AlignInBits, OffsetInBits, None, Flags, - nullptr, Annotations); + SizeInBits, AlignInBits, OffsetInBits, std::nullopt, + Flags, nullptr, Annotations); } static ConstantAsMetadata *getConstantOrNull(Constant *C) { @@ -398,8 +399,8 @@ Constant *Discriminant, DINode::DIFlags Flags, DIType *Ty) { return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File, LineNumber, getNonCompileUnitScope(Scope), Ty, - SizeInBits, AlignInBits, OffsetInBits, None, Flags, - getConstantOrNull(Discriminant)); + SizeInBits, AlignInBits, OffsetInBits, std::nullopt, + Flags, getConstantOrNull(Discriminant)); } DIDerivedType *DIBuilder::createBitFieldMemberType( @@ -410,7 +411,7 @@ return DIDerivedType::get( VMContext, dwarf::DW_TAG_member, Name, File, LineNumber, getNonCompileUnitScope(Scope), Ty, SizeInBits, /*AlignInBits=*/0, - OffsetInBits, None, Flags, + OffsetInBits, std::nullopt, Flags, ConstantAsMetadata::get(ConstantInt::get(IntegerType::get(VMContext, 64), StorageOffsetInBits)), Annotations); @@ -424,7 +425,7 @@ Flags |= DINode::FlagStaticMember; return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File, LineNumber, getNonCompileUnitScope(Scope), Ty, 0, - AlignInBits, 0, None, Flags, + AlignInBits, 0, std::nullopt, Flags, getConstantOrNull(Val)); } @@ -435,8 +436,8 @@ DIType *Ty, MDNode *PropertyNode) { return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File, LineNumber, getNonCompileUnitScope(File), Ty, - SizeInBits, AlignInBits, OffsetInBits, None, Flags, - PropertyNode); + SizeInBits, AlignInBits, OffsetInBits, std::nullopt, + Flags, PropertyNode); } DIObjCProperty * @@ -570,7 +571,7 @@ auto *R = DIDerivedType::get(VMContext, dwarf::DW_TAG_set_type, Name, File, LineNo, getNonCompileUnitScope(Scope), Ty, SizeInBits, - AlignInBits, 0, None, DINode::FlagZero); + AlignInBits, 0, std::nullopt, DINode::FlagZero); trackIfUnresolved(R); return R; } @@ -855,7 +856,7 @@ /*IsDistinct=*/IsDefinition, VMContext, getNonCompileUnitScope(Context), Name, LinkageName, File, LineNo, Ty, ScopeLine, nullptr, 0, 0, Flags, SPFlags, IsDefinition ? CUNode : nullptr, TParams, Decl, - MDTuple::getTemporary(VMContext, None).release(), ThrownTypes, + MDTuple::getTemporary(VMContext, std::nullopt).release(), ThrownTypes, Annotations, TargetFuncName); if (IsDefinition) diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -937,7 +937,7 @@ unsigned ElemSizeInBits = getTypeSizeInBits(ElemTy).getFixedSize(); // GEPs over non-multiple of 8 size vector elements are invalid. if (ElemSizeInBits % 8 != 0) - return None; + return std::nullopt; return getElementIndex(TypeSize::Fixed(ElemSizeInBits / 8), Offset); } @@ -946,7 +946,7 @@ const StructLayout *SL = getStructLayout(STy); uint64_t IntOffset = Offset.getZExtValue(); if (IntOffset >= SL->getSizeInBytes()) - return None; + return std::nullopt; unsigned Index = SL->getElementContainingOffset(IntOffset); Offset -= SL->getElementOffset(Index); @@ -955,7 +955,7 @@ } // Non-aggregate type. - return None; + return std::nullopt; } SmallVector DataLayout::getGEPIndicesForOffset(Type *&ElemTy, diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -1755,10 +1755,10 @@ uint64_t OffsetInBytes = GEPOffset.getLimitedValue(); // Check for overflow. if (OffsetInBytes == UINT64_MAX) - return None; + return std::nullopt; if (const auto *Alloca = dyn_cast(Base)) return AssignmentInfo(DL, Alloca, OffsetInBytes * 8, SizeInBits); - return None; + return std::nullopt; } Optional at::getAssignmentInfo(const DataLayout &DL, @@ -1768,7 +1768,7 @@ auto *ConstLengthInBytes = dyn_cast(I->getLength()); if (!ConstLengthInBytes) // We can't use a non-const size, bail. - return None; + return std::nullopt; uint64_t SizeInBits = 8 * ConstLengthInBytes->getZExtValue(); return getAssignmentInfoImpl(DL, StoreDest, SizeInBits); } @@ -1793,14 +1793,16 @@ assert(ID && "Store instruction must have DIAssignID metadata"); (void)ID; - DIExpression *Expr = DIExpression::get(StoreLikeInst.getContext(), None); + DIExpression *Expr = + DIExpression::get(StoreLikeInst.getContext(), std::nullopt); if (!Info.StoreToWholeAlloca) { auto R = DIExpression::createFragmentExpression(Expr, Info.OffsetInBits, Info.SizeInBits); assert(R.has_value() && "failed to create fragment expression"); Expr = R.value(); } - DIExpression *AddrExpr = DIExpression::get(StoreLikeInst.getContext(), None); + DIExpression *AddrExpr = + DIExpression::get(StoreLikeInst.getContext(), std::nullopt); return DIB.insertDbgAssign(&StoreLikeInst, Val, VarRec.Var, Expr, Dest, AddrExpr, VarRec.DL); } @@ -1827,7 +1829,7 @@ for (auto BBI = Start; BBI != End; ++BBI) { for (Instruction &I : *BBI) { - Optional Info = None; + Optional Info = std::nullopt; Value *ValueComponent = nullptr; Value *DestComponent = nullptr; if (auto *AI = dyn_cast(&I)) { diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -203,7 +203,7 @@ decodeDiscriminator(Ret, TBD, TDF, TCI); if (TBD == BD && TDF == DF && TCI == CI) return Ret; - return None; + return std::nullopt; } void DILocation::decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF, @@ -610,7 +610,7 @@ case dwarf::DW_ATE_unsigned_char: return Signedness::Unsigned; default: - return None; + return std::nullopt; } } @@ -824,7 +824,7 @@ .Case("CSK_MD5", DIFile::CSK_MD5) .Case("CSK_SHA1", DIFile::CSK_SHA1) .Case("CSK_SHA256", DIFile::CSK_SHA256) - .Default(None); + .Default(std::nullopt); } DIFile *DIFile::getImpl(LLVMContext &Context, MDString *Filename, @@ -896,7 +896,7 @@ .Case("FullDebug", FullDebug) .Case("LineTablesOnly", LineTablesOnly) .Case("DebugDirectivesOnly", DebugDirectivesOnly) - .Default(None); + .Default(std::nullopt); } Optional @@ -905,7 +905,7 @@ .Case("Default", DebugNameTableKind::Default) .Case("GNU", DebugNameTableKind::GNU) .Case("None", DebugNameTableKind::None) - .Default(None); + .Default(std::nullopt); } const char *DICompileUnit::emissionKindString(DebugEmissionKind EK) { @@ -1224,7 +1224,7 @@ } // Fail gracefully. - return None; + return std::nullopt; } DILabel::DILabel(LLVMContext &C, StorageType Storage, unsigned Line, @@ -1415,7 +1415,7 @@ DIExpression::FragmentInfo Info = {I->getArg(1), I->getArg(0)}; return Info; } - return None; + return std::nullopt; } void DIExpression::appendOffset(SmallVectorImpl &Ops, @@ -1597,7 +1597,7 @@ NewOps.append(Ops.begin(), Ops.end()); // Ensure that the new opcodes are only appended once. - Ops = None; + Ops = std::nullopt; } Op.appendToVector(NewOps); } @@ -1679,7 +1679,7 @@ case dwarf::DW_OP_stack_value: // Bail if this expression computes a value that cannot be split. if (!CanSplitValue) - return None; + return std::nullopt; break; case dwarf::DW_OP_LLVM_fragment: { // Make the new offset point into the existing fragment. @@ -1765,7 +1765,7 @@ getNumElements() != 6) || (getElement(0) != dwarf::DW_OP_consts && getElement(0) != dwarf::DW_OP_constu)) - return None; + return std::nullopt; if (getNumElements() == 2 && getElement(0) == dwarf::DW_OP_consts) return SignedOrUnsignedConstant::SignedConstant; @@ -1773,7 +1773,7 @@ if ((getNumElements() == 3 && getElement(2) != dwarf::DW_OP_stack_value) || (getNumElements() == 6 && (getElement(2) != dwarf::DW_OP_stack_value || getElement(3) != dwarf::DW_OP_LLVM_fragment))) - return None; + return std::nullopt; return getElement(0) == dwarf::DW_OP_constu ? SignedOrUnsignedConstant::UnsignedConstant : SignedOrUnsignedConstant::SignedConstant; diff --git a/llvm/lib/IR/FPEnv.cpp b/llvm/lib/IR/FPEnv.cpp --- a/llvm/lib/IR/FPEnv.cpp +++ b/llvm/lib/IR/FPEnv.cpp @@ -17,24 +17,25 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include namespace llvm { -Optional convertStrToRoundingMode(StringRef RoundingArg) { +std::optional convertStrToRoundingMode(StringRef RoundingArg) { // For dynamic rounding mode, we use round to nearest but we will set the // 'exact' SDNodeFlag so that the value will not be rounded. - return StringSwitch>(RoundingArg) + return StringSwitch>(RoundingArg) .Case("round.dynamic", RoundingMode::Dynamic) .Case("round.tonearest", RoundingMode::NearestTiesToEven) .Case("round.tonearestaway", RoundingMode::NearestTiesToAway) .Case("round.downward", RoundingMode::TowardNegative) .Case("round.upward", RoundingMode::TowardPositive) .Case("round.towardzero", RoundingMode::TowardZero) - .Default(None); + .Default(std::nullopt); } -Optional convertRoundingModeToStr(RoundingMode UseRounding) { - Optional RoundingStr; +std::optional convertRoundingModeToStr(RoundingMode UseRounding) { + std::optional RoundingStr; switch (UseRounding) { case RoundingMode::Dynamic: RoundingStr = "round.dynamic"; @@ -60,18 +61,18 @@ return RoundingStr; } -Optional +std::optional convertStrToExceptionBehavior(StringRef ExceptionArg) { - return StringSwitch>(ExceptionArg) + return StringSwitch>(ExceptionArg) .Case("fpexcept.ignore", fp::ebIgnore) .Case("fpexcept.maytrap", fp::ebMayTrap) .Case("fpexcept.strict", fp::ebStrict) - .Default(None); + .Default(std::nullopt); } -Optional +std::optional convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) { - Optional ExceptStr; + std::optional ExceptStr; switch (UseExcept) { case fp::ebStrict: ExceptStr = "fpexcept.strict"; diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1846,14 +1846,14 @@ Optional Intrinsic::remangleIntrinsicFunction(Function *F) { SmallVector ArgTys; if (!getIntrinsicSignature(F, ArgTys)) - return None; + return std::nullopt; Intrinsic::ID ID = F->getIntrinsicID(); StringRef Name = F->getName(); std::string WantedName = Intrinsic::getName(ID, ArgTys, F->getParent(), F->getFunctionType()); if (Name == WantedName) - return None; + return std::nullopt; Function *NewDecl = [&] { if (auto *ExistingGV = F->getParent()->getNamedValue(WantedName)) { @@ -2061,7 +2061,7 @@ // A value of -1 is used for SamplePGO when there were no samples. // Treat this the same as unknown. if (Count == (uint64_t)-1) - return None; + return std::nullopt; return ProfileCount(Count, PCT_Real); } else if (AllowSynthetic && MDS->getString().equals("synthetic_function_entry_count")) { @@ -2070,7 +2070,7 @@ return ProfileCount(Count, PCT_Synthetic); } } - return None; + return std::nullopt; } DenseSet Function::getImportGUIDs() const { @@ -2099,7 +2099,7 @@ "Metadata not match"); return cast(MD->getOperand(1))->getString(); } - return None; + return std::nullopt; } bool Function::nullPointerIsDefined() const { diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -373,11 +373,11 @@ Optional GlobalValue::getAbsoluteSymbolRange() const { auto *GO = dyn_cast(this); if (!GO) - return None; + return std::nullopt; MDNode *MD = GO->getMetadata(LLVMContext::MD_absolute_symbol); if (!MD) - return None; + return std::nullopt; return getConstantRangeFromMetadata(*MD); } @@ -584,9 +584,7 @@ } const Function *GlobalIFunc::getResolverFunction() const { - DenseSet Aliases; - return dyn_cast( - findBaseObject(getResolver(), Aliases, [](const GlobalValue &) {})); + return dyn_cast(getResolver()->stripPointerCastsAndAliases()); } void GlobalIFunc::applyAlongResolverPath( diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/Casting.h" #include #include +#include #include using namespace llvm; @@ -768,8 +769,8 @@ template static std::vector -getStatepointBundles(Optional> TransitionArgs, - Optional> DeoptArgs, +getStatepointBundles(std::optional> TransitionArgs, + std::optional> DeoptArgs, ArrayRef GCArgs) { std::vector Rval; if (DeoptArgs) { @@ -794,8 +795,9 @@ static CallInst *CreateGCStatepointCallCommon( IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee, uint32_t Flags, ArrayRef CallArgs, - Optional> TransitionArgs, Optional> DeoptArgs, - ArrayRef GCArgs, const Twine &Name) { + std::optional> TransitionArgs, + std::optional> DeoptArgs, ArrayRef GCArgs, + const Twine &Name) { Module *M = Builder->GetInsertBlock()->getParent()->getParent(); // Fill in the one generic type'd argument (the function is also vararg) Function *FnStatepoint = @@ -816,18 +818,19 @@ CallInst *IRBuilderBase::CreateGCStatepointCall( uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee, - ArrayRef CallArgs, Optional> DeoptArgs, + ArrayRef CallArgs, std::optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { return CreateGCStatepointCallCommon( this, ID, NumPatchBytes, ActualCallee, uint32_t(StatepointFlags::None), - CallArgs, None /* No Transition Args */, DeoptArgs, GCArgs, Name); + CallArgs, std::nullopt /* No Transition Args */, DeoptArgs, GCArgs, Name); } CallInst *IRBuilderBase::CreateGCStatepointCall( uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee, uint32_t Flags, ArrayRef CallArgs, - Optional> TransitionArgs, Optional> DeoptArgs, - ArrayRef GCArgs, const Twine &Name) { + std::optional> TransitionArgs, + std::optional> DeoptArgs, ArrayRef GCArgs, + const Twine &Name) { return CreateGCStatepointCallCommon( this, ID, NumPatchBytes, ActualCallee, Flags, CallArgs, TransitionArgs, DeoptArgs, GCArgs, Name); @@ -835,11 +838,11 @@ CallInst *IRBuilderBase::CreateGCStatepointCall( uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee, - ArrayRef CallArgs, Optional> DeoptArgs, + ArrayRef CallArgs, std::optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { return CreateGCStatepointCallCommon( this, ID, NumPatchBytes, ActualCallee, uint32_t(StatepointFlags::None), - CallArgs, None, DeoptArgs, GCArgs, Name); + CallArgs, std::nullopt, DeoptArgs, GCArgs, Name); } template @@ -847,8 +850,9 @@ IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags, ArrayRef InvokeArgs, - Optional> TransitionArgs, Optional> DeoptArgs, - ArrayRef GCArgs, const Twine &Name) { + std::optional> TransitionArgs, + std::optional> DeoptArgs, ArrayRef GCArgs, + const Twine &Name) { Module *M = Builder->GetInsertBlock()->getParent()->getParent(); // Fill in the one generic type'd argument (the function is also vararg) Function *FnStatepoint = @@ -871,19 +875,19 @@ InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, - ArrayRef InvokeArgs, Optional> DeoptArgs, + ArrayRef InvokeArgs, std::optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { return CreateGCStatepointInvokeCommon( this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, - uint32_t(StatepointFlags::None), InvokeArgs, None /* No Transition Args*/, - DeoptArgs, GCArgs, Name); + uint32_t(StatepointFlags::None), InvokeArgs, + std::nullopt /* No Transition Args*/, DeoptArgs, GCArgs, Name); } InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags, - ArrayRef InvokeArgs, Optional> TransitionArgs, - Optional> DeoptArgs, ArrayRef GCArgs, + ArrayRef InvokeArgs, std::optional> TransitionArgs, + std::optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { return CreateGCStatepointInvokeCommon( this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, Flags, @@ -893,12 +897,12 @@ InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef InvokeArgs, - Optional> DeoptArgs, ArrayRef GCArgs, + std::optional> DeoptArgs, ArrayRef GCArgs, const Twine &Name) { return CreateGCStatepointInvokeCommon( this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, - uint32_t(StatepointFlags::None), InvokeArgs, None, DeoptArgs, GCArgs, - Name); + uint32_t(StatepointFlags::None), InvokeArgs, std::nullopt, DeoptArgs, + GCArgs, Name); } CallInst *IRBuilderBase::CreateGCResult(Instruction *Statepoint, @@ -999,8 +1003,8 @@ CallInst *IRBuilderBase::CreateConstrainedFPBinOp( Intrinsic::ID ID, Value *L, Value *R, Instruction *FMFSource, const Twine &Name, MDNode *FPMathTag, - Optional Rounding, - Optional Except) { + std::optional Rounding, + std::optional Except) { Value *RoundingV = getConstrainedFPRounding(Rounding); Value *ExceptV = getConstrainedFPExcept(Except); @@ -1033,8 +1037,8 @@ CallInst *IRBuilderBase::CreateConstrainedFPCast( Intrinsic::ID ID, Value *V, Type *DestTy, Instruction *FMFSource, const Twine &Name, MDNode *FPMathTag, - Optional Rounding, - Optional Except) { + std::optional Rounding, + std::optional Except) { Value *ExceptV = getConstrainedFPExcept(Except); FastMathFlags UseFMF = FMF; @@ -1084,7 +1088,7 @@ CallInst *IRBuilderBase::CreateConstrainedFPCmp( Intrinsic::ID ID, CmpInst::Predicate P, Value *L, Value *R, - const Twine &Name, Optional Except) { + const Twine &Name, std::optional Except) { Value *PredicateV = getConstrainedFPPredicate(P); Value *ExceptV = getConstrainedFPExcept(Except); @@ -1096,8 +1100,8 @@ CallInst *IRBuilderBase::CreateConstrainedFPCall( Function *Callee, ArrayRef Args, const Twine &Name, - Optional Rounding, - Optional Except) { + std::optional Rounding, + std::optional Except) { llvm::SmallVector UseArgs; append_range(UseArgs, Args); diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -115,7 +115,7 @@ void Instruction::moveBefore(BasicBlock &BB, SymbolTableList::iterator I) { assert(I == BB.end() || I->getParent() == &BB); - BB.getInstList().splice(I, getParent()->getInstList(), getIterator()); + BB.splice(I, getParent(), getIterator()); } bool Instruction::comesBefore(const Instruction *Other) const { diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -61,7 +61,7 @@ if (isArrayAllocation()) { auto *C = dyn_cast(getArraySize()); if (!C) - return None; + return std::nullopt; assert(!Size.isScalable() && "Array elements cannot have a scalable size"); Size *= C->getZExtValue(); } @@ -852,7 +852,7 @@ Function *MallocF, const Twine &Name) { return createMalloc(InsertBefore, nullptr, IntPtrTy, AllocTy, AllocSize, - ArraySize, None, MallocF, Name); + ArraySize, std::nullopt, MallocF, Name); } Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, Type *IntPtrTy, Type *AllocTy, @@ -877,7 +877,7 @@ Value *AllocSize, Value *ArraySize, Function *MallocF, const Twine &Name) { return createMalloc(nullptr, InsertAtEnd, IntPtrTy, AllocTy, AllocSize, - ArraySize, None, MallocF, Name); + ArraySize, std::nullopt, MallocF, Name); } Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd, Type *IntPtrTy, Type *AllocTy, @@ -924,7 +924,7 @@ /// CreateFree - Generate the IR for a call to the builtin free function. Instruction *CallInst::CreateFree(Value *Source, Instruction *InsertBefore) { - return createFree(Source, None, InsertBefore, nullptr); + return createFree(Source, std::nullopt, InsertBefore, nullptr); } Instruction *CallInst::CreateFree(Value *Source, ArrayRef Bundles, @@ -936,7 +936,8 @@ /// Note: This function does not add the call to the basic block, that is the /// responsibility of the caller. Instruction *CallInst::CreateFree(Value *Source, BasicBlock *InsertAtEnd) { - Instruction *FreeCall = createFree(Source, None, nullptr, InsertAtEnd); + Instruction *FreeCall = + createFree(Source, std::nullopt, nullptr, InsertAtEnd); assert(FreeCall && "CreateFree did not create a CallInst"); return FreeCall; } @@ -4653,7 +4654,7 @@ SwitchInstProfUpdateWrapper::CaseWeightOpt SwitchInstProfUpdateWrapper::getSuccessorWeight(unsigned idx) { if (!Weights) - return None; + return std::nullopt; return (*Weights)[idx]; } @@ -4683,7 +4684,7 @@ ->getValue() .getZExtValue(); - return None; + return std::nullopt; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -275,18 +275,18 @@ return ConstantInt::get(Type::getInt64Ty(Context), 1); } -Optional ConstrainedFPIntrinsic::getRoundingMode() const { +std::optional ConstrainedFPIntrinsic::getRoundingMode() const { unsigned NumOperands = arg_size(); Metadata *MD = nullptr; auto *MAV = dyn_cast(getArgOperand(NumOperands - 2)); if (MAV) MD = MAV->getMetadata(); if (!MD || !isa(MD)) - return None; + return std::nullopt; return convertStrToRoundingMode(cast(MD)->getString()); } -Optional +std::optional ConstrainedFPIntrinsic::getExceptionBehavior() const { unsigned NumOperands = arg_size(); Metadata *MD = nullptr; @@ -294,18 +294,18 @@ if (MAV) MD = MAV->getMetadata(); if (!MD || !isa(MD)) - return None; + return std::nullopt; return convertStrToExceptionBehavior(cast(MD)->getString()); } bool ConstrainedFPIntrinsic::isDefaultFPEnvironment() const { - Optional Except = getExceptionBehavior(); + std::optional Except = getExceptionBehavior(); if (Except) { if (Except.value() != fp::ebIgnore) return false; } - Optional Rounding = getRoundingMode(); + std::optional Rounding = getRoundingMode(); if (Rounding) { if (Rounding.value() != RoundingMode::NearestTiesToEven) return false; @@ -415,7 +415,7 @@ Optional VPIntrinsic::getMaskParamPos(Intrinsic::ID IntrinsicID) { switch (IntrinsicID) { default: - return None; + return std::nullopt; #define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \ case Intrinsic::VPID: \ @@ -428,7 +428,7 @@ VPIntrinsic::getVectorLengthParamPos(Intrinsic::ID IntrinsicID) { switch (IntrinsicID) { default: - return None; + return std::nullopt; #define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \ case Intrinsic::VPID: \ @@ -461,7 +461,7 @@ #define END_REGISTER_VP_INTRINSIC(VPID) break; #include "llvm/IR/VPIntrinsics.def" } - return None; + return std::nullopt; } /// \return The data (payload) operand of this store or scatter. @@ -481,7 +481,7 @@ #define END_REGISTER_VP_INTRINSIC(VPID) break; #include "llvm/IR/VPIntrinsics.def" } - return None; + return std::nullopt; } bool VPIntrinsic::isVPIntrinsic(Intrinsic::ID ID) { @@ -506,7 +506,7 @@ #define END_REGISTER_VP_INTRINSIC(VPID) break; #include "llvm/IR/VPIntrinsics.def" } - return None; + return std::nullopt; } Intrinsic::ID VPIntrinsic::getForOpcode(unsigned IROPC) { @@ -717,7 +717,7 @@ default: break; } - return None; + return std::nullopt; } Optional VPReductionIntrinsic::getStartParamPos(Intrinsic::ID ID) { @@ -729,7 +729,7 @@ default: break; } - return None; + return std::nullopt; } Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const { diff --git a/llvm/lib/IR/LLVMRemarkStreamer.cpp b/llvm/lib/IR/LLVMRemarkStreamer.cpp --- a/llvm/lib/IR/LLVMRemarkStreamer.cpp +++ b/llvm/lib/IR/LLVMRemarkStreamer.cpp @@ -48,7 +48,7 @@ static Optional toRemarkLocation(const DiagnosticLocation &DL) { if (!DL.isValid()) - return None; + return std::nullopt; StringRef File = DL.getRelativePath(); unsigned Line = DL.getLine(); unsigned Col = DL.getColumn(); diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp --- a/llvm/lib/IR/MDBuilder.cpp +++ b/llvm/lib/IR/MDBuilder.cpp @@ -53,7 +53,7 @@ } MDNode *MDBuilder::createUnpredictable() { - return MDNode::get(Context, None); + return MDNode::get(Context, std::nullopt); } MDNode *MDBuilder::createFunctionEntryCount( diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -82,7 +82,7 @@ Metadata *MD) { if (!MD) // !{} - return MDNode::get(Context, None); + return MDNode::get(Context, std::nullopt); // Return early if this isn't a single-operand MDNode. auto *N = dyn_cast(MD); @@ -91,7 +91,7 @@ if (!N->getOperand(0)) // !{} - return MDNode::get(Context, None); + return MDNode::get(Context, std::nullopt); if (auto *C = dyn_cast(N->getOperand(0))) // Look through the MDNode. diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -616,11 +616,11 @@ addModuleFlag(ModFlagBehavior::Max, "PIE Level", PL); } -Optional Module::getCodeModel() const { +std::optional Module::getCodeModel() const { auto *Val = cast_or_null(getModuleFlag("Code Model")); if (!Val) - return None; + return std::nullopt; return static_cast( cast(Val->getValue())->getZExtValue()); @@ -778,7 +778,7 @@ return {}; auto getVersionComponent = [&](unsigned Index) -> std::optional { if (Index >= Arr->getNumElements()) - return None; + return std::nullopt; return (unsigned)Arr->getElementAsInteger(Index); }; auto Major = getVersionComponent(0); diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -35,7 +35,7 @@ case Instruction::GetElementPtr: { auto *GEP = cast(this); // Note: inrange exists on constexpr only - return GEP->isInBounds() || GEP->getInRangeIndex() != None; + return GEP->isInBounds() || GEP->getInRangeIndex() != std::nullopt; } default: if (const auto *FP = dyn_cast(this)) diff --git a/llvm/lib/IR/PrintPasses.cpp b/llvm/lib/IR/PrintPasses.cpp --- a/llvm/lib/IR/PrintPasses.cpp +++ b/llvm/lib/IR/PrintPasses.cpp @@ -198,8 +198,9 @@ StringRef Args[] = {DiffBinary, "-w", "-d", OLF, NLF, ULF, FileName[0], FileName[1]}; - Optional Redirects[] = {None, StringRef(FileName[2]), None}; - int Result = sys::ExecuteAndWait(*DiffExe, Args, None, Redirects); + std::optional Redirects[] = {std::nullopt, StringRef(FileName[2]), + std::nullopt}; + int Result = sys::ExecuteAndWait(*DiffExe, Args, std::nullopt, Redirects); if (Result < 0) return "Error executing system diff."; std::string Diff; diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp --- a/llvm/lib/IR/PseudoProbe.cpp +++ b/llvm/lib/IR/PseudoProbe.cpp @@ -42,7 +42,7 @@ return Probe; } } - return None; + return std::nullopt; } Optional extractProbe(const Instruction &Inst) { @@ -59,7 +59,7 @@ if (isa(&Inst) && !isa(&Inst)) return extractProbeFromDiscriminator(Inst); - return None; + return std::nullopt; } void setProbeDistributionFactor(Instruction &Inst, float Factor) { diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -385,7 +385,7 @@ } FunctionType *FunctionType::get(Type *Result, bool isVarArg) { - return get(Result, None, isVarArg); + return get(Result, std::nullopt, isVarArg); } bool FunctionType::isValidReturnType(Type *RetTy) { @@ -518,7 +518,7 @@ } StructType *StructType::get(LLVMContext &Context, bool isPacked) { - return get(Context, None, isPacked); + return get(Context, std::nullopt, isPacked); } StructType *StructType::create(LLVMContext &Context, ArrayRef Elements, diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -1022,22 +1022,6 @@ return Alloca->isSwiftError(); } -bool Value::isTransitiveUsedByMetadataOnly() const { - SmallVector WorkList(user_begin(), user_end()); - SmallPtrSet Visited(user_begin(), user_end()); - while (!WorkList.empty()) { - const User *U = WorkList.pop_back_val(); - // If it is transitively used by a global value or a non-constant value, - // it's obviously not only used by metadata. - if (!isa(U) || isa(U)) - return false; - for (const User *UU : U->users()) - if (Visited.insert(UU).second) - WorkList.push_back(UU); - } - return true; -} - //===----------------------------------------------------------------------===// // ValueHandleBase Class //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2130,7 +2130,7 @@ if (VScaleMin == 0) CheckFailed("'vscale_range' minimum must be greater than 0", V); - Optional VScaleMax = Attrs.getFnAttrs().getVScaleRangeMax(); + std::optional VScaleMax = Attrs.getFnAttrs().getVScaleRangeMax(); if (VScaleMax && VScaleMin > VScaleMax) CheckFailed("'vscale_range' minimum cannot be greater than maximum", V); } @@ -5025,7 +5025,7 @@ " (the operand should be a string)"), MD); - Optional RoundMode = + std::optional RoundMode = convertStrToRoundingMode(cast(MD)->getString()); Check(RoundMode && *RoundMode != RoundingMode::Dynamic, "unsupported rounding mode argument", Call); diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -44,6 +44,7 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/SplitModule.h" +#include using namespace llvm; using namespace lto; @@ -207,14 +208,14 @@ for (const std::string &A : Conf.MAttrs) Features.AddFeature(A); - Optional RelocModel; + std::optional RelocModel; if (Conf.RelocModel) RelocModel = *Conf.RelocModel; else if (M.getModuleFlag("PIC Level")) RelocModel = M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; - Optional CodeModel; + std::optional CodeModel; if (Conf.CodeModel) CodeModel = *Conf.CodeModel; else diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -134,7 +134,7 @@ Context.setDiscardValueNames(LTODiscardValueNames); Context.enableDebugTypeODRUniquing(); - Config.CodeModel = None; + Config.CodeModel = std::nullopt; Config.StatsFile = LTOStatsFile; Config.PreCodeGenPassesHook = [](legacy::PassManager &PM) { PM.add(createObjCARCContractPass()); @@ -446,7 +446,7 @@ assert(MArch && "MArch is not set!"); return std::unique_ptr(MArch->createTargetMachine( TripleStr, Config.CPU, FeatureStr, Config.Options, Config.RelocModel, - None, Config.CGOptLevel)); + std::nullopt, Config.CGOptLevel)); } // If a linkonce global is present in the MustPreserveSymbols, we need to make diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp --- a/llvm/lib/LTO/LTOModule.cpp +++ b/llvm/lib/LTO/LTOModule.cpp @@ -229,8 +229,8 @@ CPU = "cyclone"; } - TargetMachine *target = - march->createTargetMachine(TripleStr, CPU, FeatureStr, options, None); + TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr, + options, std::nullopt); std::unique_ptr Ret(new LTOModule(std::move(M), Buffer, target)); Ret->parseSymbols(); diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -618,7 +618,7 @@ std::unique_ptr TM( TheTarget->createTargetMachine(TheTriple.str(), MCpu, FeatureStr, Options, - RelocModel, None, CGOptLevel)); + RelocModel, std::nullopt, CGOptLevel)); assert(TM && "Cannot create target machine"); return TM; diff --git a/llvm/lib/LineEditor/LineEditor.cpp b/llvm/lib/LineEditor/LineEditor.cpp --- a/llvm/lib/LineEditor/LineEditor.cpp +++ b/llvm/lib/LineEditor/LineEditor.cpp @@ -255,7 +255,7 @@ // Either of these may mean end-of-file. if (!Line || LineLen == 0) - return None; + return std::nullopt; // Strip any newlines off the end of the string. while (LineLen > 0 && diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -192,6 +192,8 @@ MCSectionELF *createRelocationSection(MCContext &Ctx, const MCSectionELF &Sec); + void createMemtagRelocs(MCAssembler &Asm); + void writeSectionHeader(const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap, const SectionOffsetsTy &SectionOffsets); @@ -609,6 +611,23 @@ return true; } +void ELFWriter::createMemtagRelocs(MCAssembler &Asm) { + MCSectionELF *MemtagRelocs = nullptr; + for (const MCSymbol &Sym : Asm.symbols()) { + const auto &SymE = cast(Sym); + if (!SymE.isMemtag()) + continue; + if (MemtagRelocs == nullptr) { + MemtagRelocs = OWriter.TargetObjectWriter->getMemtagRelocsSection(Asm.getContext()); + if (MemtagRelocs == nullptr) + report_fatal_error("Tagged globals are not available on this architecture."); + Asm.registerSection(*MemtagRelocs); + } + ELFRelocationEntry Rec(0, &SymE, ELF::R_AARCH64_NONE, 0, nullptr, 0); + OWriter.Relocations[MemtagRelocs].push_back(Rec); + } +} + void ELFWriter::computeSymbolTable( MCAssembler &Asm, const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap, const RevGroupMapTy &RevGroupMap, @@ -1037,7 +1056,7 @@ // Null section first. uint64_t FirstSectionSize = (NumSections + 1) >= ELF::SHN_LORESERVE ? NumSections + 1 : 0; - WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, 0, 0, None, 0); + WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, 0, 0, std::nullopt, 0); for (const MCSectionELF *Section : SectionTable) { uint32_t GroupSymbolIndex; @@ -1068,6 +1087,8 @@ Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0); StringTableIndex = addToSectionTable(StrtabSection); + createMemtagRelocs(Asm); + RevGroupMapTy RevGroupMap; SectionIndexMapTy SectionIndexMap; @@ -1317,6 +1338,15 @@ if (Sym->isUndefined()) return true; + // For memory-tagged symbols, ensure that the relocation uses the symbol. For + // tagged symbols, we emit an empty relocation (R_AARCH64_NONE) in a special + // section (SHT_AARCH64_MEMTAG_GLOBALS_STATIC) to indicate to the linker that + // this global needs to be tagged. In addition, the linker needs to know + // whether to emit a special addend when relocating `end` symbols, and this + // can only be determined by the attributes of the symbol itself. + if (Sym->isMemtag()) + return true; + unsigned Binding = Sym->getBinding(); switch(Binding) { default: diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp --- a/llvm/lib/MC/MCAsmBackend.cpp +++ b/llvm/lib/MC/MCAsmBackend.cpp @@ -75,7 +75,7 @@ } Optional MCAsmBackend::getFixupKind(StringRef Name) const { - return None; + return std::nullopt; } const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -266,12 +266,10 @@ void emitFileDirective(StringRef Filename) override; void emitFileDirective(StringRef Filename, StringRef CompilerVerion, StringRef TimeStamp, StringRef Description) override; - Expected tryEmitDwarfFileDirective(unsigned FileNo, - StringRef Directory, - StringRef Filename, - Optional Checksum = None, - Optional Source = None, - unsigned CUID = 0) override; + Expected tryEmitDwarfFileDirective( + unsigned FileNo, StringRef Directory, StringRef Filename, + Optional Checksum = std::nullopt, + Optional Source = std::nullopt, unsigned CUID = 0) override; void emitDwarfFile0Directive(StringRef Directory, StringRef Filename, Optional Checksum, Optional Source, @@ -769,6 +767,9 @@ case MCSA_Exported: // Non-AIX assemblers currently do not support exported visibility. return false; + case MCSA_Memtag: + OS << "\t.memtag\t"; + break; } Symbol->print(OS, MAI); @@ -1496,7 +1497,7 @@ emitAlignmentDirective(Alignment.value(), MAI->getTextAlignFillValue(), 1, MaxBytesToEmit); else - emitAlignmentDirective(Alignment.value(), None, 1, MaxBytesToEmit); + emitAlignmentDirective(Alignment.value(), std::nullopt, 1, MaxBytesToEmit); } void MCAsmStreamer::emitValueToOffset(const MCExpr *Offset, @@ -2380,7 +2381,7 @@ Expr->print(OS, MAI); } EmitEOL(); - return None; + return std::nullopt; } void MCAsmStreamer::emitAddrsig() { diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp --- a/llvm/lib/MC/MCCodeView.cpp +++ b/llvm/lib/MC/MCCodeView.cpp @@ -318,9 +318,9 @@ ArrayRef CodeViewContext::getLinesForExtent(size_t L, size_t R) { if (R <= L) - return None; + return std::nullopt; if (L >= MCCVLines.size()) - return None; + return std::nullopt; return makeArrayRef(&MCCVLines[L], R - L); } diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -639,7 +639,8 @@ unsigned EntrySize) { auto I = ELFEntrySizeMap.find( MCContext::ELFEntrySizeKey{SectionName, Flags, EntrySize}); - return (I != ELFEntrySizeMap.end()) ? Optional(I->second) : None; + return (I != ELFEntrySizeMap.end()) ? Optional(I->second) + : std::nullopt; } MCSectionGOFF *MCContext::getGOFFSection(StringRef Section, SectionKind Kind, @@ -960,7 +961,7 @@ FileName = FileName.drop_front(); assert(!FileName.empty()); setMCLineTableRootFile( - /*CUID=*/0, getCompilationDir(), FileName, Cksum, None); + /*CUID=*/0, getCompilationDir(), FileName, Cksum, std::nullopt); } /// getDwarfFile - takes a file name and number to place in the dwarf file and diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp --- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -17,7 +17,7 @@ MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const { - return None; + return std::nullopt; } uint64_t MCDisassembler::suggestBytesToSkip(ArrayRef Bytes, diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -284,9 +284,9 @@ MCSection *Section) const { if (!HasSplitLineTable) return; - Optional NoLineStr(None); + Optional NoLineStr(std::nullopt); MCOS.switchSection(Section); - MCOS.emitLabel(Header.Emit(&MCOS, Params, None, NoLineStr).second); + MCOS.emitLabel(Header.Emit(&MCOS, Params, std::nullopt, NoLineStr).second); } std::pair @@ -594,7 +594,7 @@ // If any files have embedded source, they all must. if (MCDwarfFiles.empty()) { trackMD5Usage(Checksum.has_value()); - HasSource = (Source != None); + HasSource = (Source != std::nullopt); } if (DwarfVersion >= 5 && isRootFile(RootFile, Directory, FileName, Checksum)) return 0; @@ -622,7 +622,7 @@ inconvertibleErrorCode()); // If any files have embedded source, they all must. - if (HasSource != (Source != None)) + if (HasSource != (Source != std::nullopt)) return make_error("inconsistent use of embedded source", inconvertibleErrorCode()); diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -287,6 +287,10 @@ Symbol->setVisibility(ELF::STV_PROTECTED); break; + case MCSA_Memtag: + Symbol->setMemtag(true); + break; + case MCSA_Hidden: Symbol->setVisibility(ELF::STV_HIDDEN); break; diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp --- a/llvm/lib/MC/MCInstrAnalysis.cpp +++ b/llvm/lib/MC/MCInstrAnalysis.cpp @@ -33,11 +33,11 @@ Optional MCInstrAnalysis::evaluateMemoryOperandAddress( const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr, uint64_t Size) const { - return None; + return std::nullopt; } Optional MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst, uint64_t Size) const { - return None; + return std::nullopt; } diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp --- a/llvm/lib/MC/MCMachOStreamer.cpp +++ b/llvm/lib/MC/MCMachOStreamer.cpp @@ -358,6 +358,7 @@ case MCSA_Local: case MCSA_LGlobal: case MCSA_Exported: + case MCSA_Memtag: return false; case MCSA_Global: diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -972,47 +972,53 @@ // sections, and the individual DWARF sections are distinguished by their // section subtype. DwarfAbbrevSection = Ctx->getXCOFFSection( - ".dwabrev", SectionKind::getMetadata(), /* CsectProperties */ None, + ".dwabrev", SectionKind::getMetadata(), + /* CsectProperties */ std::nullopt, /* MultiSymbolsAllowed */ true, ".dwabrev", XCOFF::SSUBTYP_DWABREV); DwarfInfoSection = Ctx->getXCOFFSection( - ".dwinfo", SectionKind::getMetadata(), /* CsectProperties */ None, + ".dwinfo", SectionKind::getMetadata(), /* CsectProperties */ std::nullopt, /* MultiSymbolsAllowed */ true, ".dwinfo", XCOFF::SSUBTYP_DWINFO); DwarfLineSection = Ctx->getXCOFFSection( - ".dwline", SectionKind::getMetadata(), /* CsectProperties */ None, + ".dwline", SectionKind::getMetadata(), /* CsectProperties */ std::nullopt, /* MultiSymbolsAllowed */ true, ".dwline", XCOFF::SSUBTYP_DWLINE); DwarfFrameSection = Ctx->getXCOFFSection( - ".dwframe", SectionKind::getMetadata(), /* CsectProperties */ None, + ".dwframe", SectionKind::getMetadata(), + /* CsectProperties */ std::nullopt, /* MultiSymbolsAllowed */ true, ".dwframe", XCOFF::SSUBTYP_DWFRAME); DwarfPubNamesSection = Ctx->getXCOFFSection( - ".dwpbnms", SectionKind::getMetadata(), /* CsectProperties */ None, + ".dwpbnms", SectionKind::getMetadata(), + /* CsectProperties */ std::nullopt, /* MultiSymbolsAllowed */ true, ".dwpbnms", XCOFF::SSUBTYP_DWPBNMS); DwarfPubTypesSection = Ctx->getXCOFFSection( - ".dwpbtyp", SectionKind::getMetadata(), /* CsectProperties */ None, + ".dwpbtyp", SectionKind::getMetadata(), + /* CsectProperties */ std::nullopt, /* MultiSymbolsAllowed */ true, ".dwpbtyp", XCOFF::SSUBTYP_DWPBTYP); DwarfStrSection = Ctx->getXCOFFSection( - ".dwstr", SectionKind::getMetadata(), /* CsectProperties */ None, + ".dwstr", SectionKind::getMetadata(), /* CsectProperties */ std::nullopt, /* MultiSymbolsAllowed */ true, ".dwstr", XCOFF::SSUBTYP_DWSTR); DwarfLocSection = Ctx->getXCOFFSection( - ".dwloc", SectionKind::getMetadata(), /* CsectProperties */ None, + ".dwloc", SectionKind::getMetadata(), /* CsectProperties */ std::nullopt, /* MultiSymbolsAllowed */ true, ".dwloc", XCOFF::SSUBTYP_DWLOC); DwarfARangesSection = Ctx->getXCOFFSection( - ".dwarnge", SectionKind::getMetadata(), /* CsectProperties */ None, + ".dwarnge", SectionKind::getMetadata(), + /* CsectProperties */ std::nullopt, /* MultiSymbolsAllowed */ true, ".dwarnge", XCOFF::SSUBTYP_DWARNGE); DwarfRangesSection = Ctx->getXCOFFSection( - ".dwrnges", SectionKind::getMetadata(), /* CsectProperties */ None, + ".dwrnges", SectionKind::getMetadata(), + /* CsectProperties */ std::nullopt, /* MultiSymbolsAllowed */ true, ".dwrnges", XCOFF::SSUBTYP_DWRNGES); DwarfMacinfoSection = Ctx->getXCOFFSection( - ".dwmac", SectionKind::getMetadata(), /* CsectProperties */ None, + ".dwmac", SectionKind::getMetadata(), /* CsectProperties */ std::nullopt, /* MultiSymbolsAllowed */ true, ".dwmac", XCOFF::SSUBTYP_DWMAC); } diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -154,7 +154,7 @@ assert(Hi && Lo); if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment() || Hi->isVariable() || Lo->isVariable()) - return None; + return std::nullopt; return Hi->getOffset() - Lo->getOffset(); } @@ -746,7 +746,7 @@ std::string("symbol in offset has no data " "fragment")); DF = cast(Fragment); - return None; + return std::nullopt; } if (OffsetVal.getSymB()) @@ -785,7 +785,7 @@ "fragment")); DF = cast(Fragment); } - return None; + return std::nullopt; } Optional> @@ -814,7 +814,7 @@ return std::make_pair(false, std::string(".reloc offset is negative")); DF->getFixups().push_back( MCFixup::create(OffsetVal.getConstant(), Expr, Kind, Loc)); - return None; + return std::nullopt; } if (OffsetVal.getSymB()) return std::make_pair(false, @@ -827,19 +827,19 @@ Optional> Error; Error = getOffsetAndDataFragment(Symbol, SymbolOffset, DF); - if (Error != None) + if (Error != std::nullopt) return Error; DF->getFixups().push_back( MCFixup::create(SymbolOffset + OffsetVal.getConstant(), Expr, Kind, Loc)); - return None; + return std::nullopt; } PendingFixups.emplace_back( &SRE.getSymbol(), DF, MCFixup::create(OffsetVal.getConstant(), Expr, Kind, Loc)); - return None; + return std::nullopt; } void MCObjectStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue, diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -684,12 +684,12 @@ size_t AsmLexer::peekTokens(MutableArrayRef Buf, bool ShouldSkipSpace) { - SaveAndRestore SavedTokenStart(TokStart); - SaveAndRestore SavedCurPtr(CurPtr); - SaveAndRestore SavedAtStartOfLine(IsAtStartOfLine); - SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement); - SaveAndRestore SavedSkipSpace(SkipSpace, ShouldSkipSpace); - SaveAndRestore SavedIsPeeking(IsPeeking, true); + SaveAndRestore SavedTokenStart(TokStart); + SaveAndRestore SavedCurPtr(CurPtr); + SaveAndRestore SavedAtStartOfLine(IsAtStartOfLine); + SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement); + SaveAndRestore SavedSkipSpace(SkipSpace, ShouldSkipSpace); + SaveAndRestore SavedIsPeeking(IsPeeking, true); std::string SavedErr = getErr(); SMLoc SavedErrLoc = getErrLoc(); diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -237,9 +237,11 @@ AssemblerDialect = i; } - void Note(SMLoc L, const Twine &Msg, SMRange Range = None) override; - bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override; - bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override; + void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override; + bool Warning(SMLoc L, const Twine &Msg, + SMRange Range = std::nullopt) override; + bool printError(SMLoc L, const Twine &Msg, + SMRange Range = std::nullopt) override; const AsmToken &Lex() override; @@ -322,7 +324,7 @@ void printMacroInstantiations(); void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, - SMRange Range = None) const { + SMRange Range = std::nullopt) const { ArrayRef Ranges(Range); SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges); } @@ -539,6 +541,7 @@ DK_LTO_DISCARD, DK_LTO_SET_CONDITIONAL, DK_CFI_MTE_TAGGED_FRAME, + DK_MEMTAG, DK_END }; @@ -948,10 +951,9 @@ // Use the first #line directive for this, if any. It's preprocessed, so // there is no checksum, and of course no source directive. if (!FirstCppHashFilename.empty()) - getContext().setMCLineTableRootFile(/*CUID=*/0, - getContext().getCompilationDir(), - FirstCppHashFilename, - /*Cksum=*/None, /*Source=*/None); + getContext().setMCLineTableRootFile( + /*CUID=*/0, getContext().getCompilationDir(), FirstCppHashFilename, + /*Cksum=*/std::nullopt, /*Source=*/std::nullopt); const MCDwarfFile &RootFile = getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile(); getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective( @@ -2298,6 +2300,8 @@ return parseDirectivePseudoProbe(); case DK_LTO_DISCARD: return parseDirectiveLTODiscard(); + case DK_MEMTAG: + return parseDirectiveSymbolAttribute(MCSA_Memtag); } return Error(IDLoc, "unknown directive"); @@ -4984,8 +4988,9 @@ MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - // Assembler local symbols don't make any sense here. Complain loudly. - if (Sym->isTemporary()) + // Assembler local symbols don't make any sense here, except for directives + // that the symbol should be tagged. + if (Sym->isTemporary() && Attr != MCSA_Memtag) return Error(Loc, "non-local symbol required"); if (!getStreamer().emitSymbolAttribute(Sym, Attr)) @@ -5598,6 +5603,7 @@ DirectiveKindMap[".pseudoprobe"] = DK_PSEUDO_PROBE; DirectiveKindMap[".lto_discard"] = DK_LTO_DISCARD; DirectiveKindMap[".lto_set_conditional"] = DK_LTO_SET_CONDITIONAL; + DirectiveKindMap[".memtag"] = DK_MEMTAG; } MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { @@ -5693,7 +5699,8 @@ raw_svector_ostream OS(Buf); while (Count--) { // Note that the AtPseudoVariable is disabled for instantiations of .rep(t). - if (expandMacro(OS, M->Body, None, None, false, getTok().getLoc())) + if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, false, + getTok().getLoc())) return true; } instantiateMacroLikeBody(M, DirectiveLoc, OS); diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -178,7 +178,7 @@ StringRef Name; if (getParser().parseIdentifier(Name)) - return TokError("expected identifier in directive"); + return TokError("expected identifier"); if (getParser().discardLTOSymbol(Name)) { if (getLexer().is(AsmToken::EndOfStatement)) @@ -194,7 +194,7 @@ break; if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); + return TokError("expected comma"); Lex(); } } @@ -221,11 +221,11 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) { StringRef Name; if (getParser().parseIdentifier(Name)) - return TokError("expected identifier in directive"); + return TokError("expected identifier"); MCSymbolELF *Sym = cast(getContext().getOrCreateSymbol(Name)); if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); + return TokError("expected comma"); Lex(); const MCExpr *Expr; @@ -233,7 +233,7 @@ return true; if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); + return TokError("unexpected token"); Lex(); getStreamer().emitELFSize(Sym, Expr); @@ -417,7 +417,7 @@ TypeName = getTok().getString(); Lex(); } else if (getParser().parseIdentifier(TypeName)) - return TokError("expected identifier in directive"); + return TokError("expected identifier"); return false; } @@ -485,7 +485,7 @@ Lex(); StringRef UniqueStr; if (getParser().parseIdentifier(UniqueStr)) - return TokError("expected identifier in directive"); + return TokError("expected identifier"); if (UniqueStr != "unique") return TokError("expected 'unique'"); if (L.isNot(AsmToken::Comma)) @@ -526,7 +526,7 @@ StringRef SectionName; if (ParseSectionName(SectionName)) - return TokError("expected identifier in directive"); + return TokError("expected identifier"); StringRef TypeName; int64_t Size = 0; @@ -567,7 +567,7 @@ if (getLexer().isNot(AsmToken::String)) { if (getLexer().isNot(AsmToken::Hash)) - return TokError("expected string in directive"); + return TokError("expected string"); extraFlags = parseSunStyleSectionFlags(); } else { StringRef FlagsStr = getTok().getStringContents(); @@ -596,7 +596,7 @@ if (Group) return TokError("Group section must specify the type"); if (L.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); + return TokError("expected end of directive"); } if (Mergeable) @@ -614,7 +614,7 @@ EndStmt: if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); + return TokError("expected end of directive"); Lex(); unsigned Type = ELF::SHT_PROGBITS; @@ -746,7 +746,7 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) { StringRef Name; if (getParser().parseIdentifier(Name)) - return TokError("expected identifier in directive"); + return TokError("expected identifier"); // Handle the identifier as the key symbol. MCSymbol *Sym = getContext().getOrCreateSymbol(Name); @@ -779,14 +779,14 @@ StringRef Type; if (getParser().parseIdentifier(Type)) - return TokError("expected symbol type in directive"); + return TokError("expected symbol type"); MCSymbolAttr Attr = MCAttrForString(Type); if (Attr == MCSA_Invalid) - return Error(TypeLoc, "unsupported attribute in '.type' directive"); + return Error(TypeLoc, "unsupported attribute"); if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.type' directive"); + return TokError("expected end of directive"); Lex(); getStreamer().emitSymbolAttribute(Sym, Attr); @@ -798,14 +798,14 @@ /// ::= .ident string bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) { if (getLexer().isNot(AsmToken::String)) - return TokError("unexpected token in '.ident' directive"); + return TokError("expected string"); StringRef Data = getTok().getIdentifier(); Lex(); if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.ident' directive"); + return TokError("expected end of directive"); Lex(); getStreamer().emitIdent(Data); @@ -817,7 +817,7 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) { StringRef OriginalName, Name, Action; if (getParser().parseIdentifier(OriginalName)) - return TokError("expected identifier in directive"); + return TokError("expected identifier"); if (getLexer().isNot(AsmToken::Comma)) return TokError("expected a comma"); @@ -832,7 +832,7 @@ getLexer().setAllowAtInIdentifier(AllowAtInIdentifier); if (getParser().parseIdentifier(Name)) - return TokError("expected identifier in directive"); + return TokError("expected identifier"); if (!Name.contains('@')) return TokError("expected a '@' in the name"); @@ -853,7 +853,7 @@ /// ::= .version string bool ELFAsmParser::ParseDirectiveVersion(StringRef, SMLoc) { if (getLexer().isNot(AsmToken::String)) - return TokError("unexpected token in '.version' directive"); + return TokError("expected string"); StringRef Data = getTok().getIdentifier(); @@ -880,7 +880,7 @@ StringRef AliasName; if (getParser().parseIdentifier(AliasName)) - return TokError("expected identifier in directive"); + return TokError("expected identifier"); if (getLexer().isNot(AsmToken::Comma)) return TokError("expected a comma"); @@ -889,7 +889,7 @@ StringRef Name; if (getParser().parseIdentifier(Name)) - return TokError("expected identifier in directive"); + return TokError("expected identifier"); MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName); @@ -907,7 +907,7 @@ } if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); + return TokError("expected end of directive"); Lex(); diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -510,9 +510,11 @@ AssemblerDialect = i; } - void Note(SMLoc L, const Twine &Msg, SMRange Range = None) override; - bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override; - bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override; + void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override; + bool Warning(SMLoc L, const Twine &Msg, + SMRange Range = std::nullopt) override; + bool printError(SMLoc L, const Twine &Msg, + SMRange Range = std::nullopt) override; enum ExpandKind { ExpandMacros, DoNotExpandMacros }; const AsmToken &Lex(ExpandKind ExpandNextToken); @@ -619,7 +621,7 @@ bool expandStatement(SMLoc Loc); void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, - SMRange Range = None) const { + SMRange Range = std::nullopt) const { ArrayRef Ranges(Range); SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges); } @@ -1323,10 +1325,9 @@ // Use the first #line directive for this, if any. It's preprocessed, so // there is no checksum, and of course no source directive. if (!FirstCppHashFilename.empty()) - getContext().setMCLineTableRootFile(/*CUID=*/0, - getContext().getCompilationDir(), - FirstCppHashFilename, - /*Cksum=*/None, /*Source=*/None); + getContext().setMCLineTableRootFile( + /*CUID=*/0, getContext().getCompilationDir(), FirstCppHashFilename, + /*Cksum=*/std::nullopt, /*Source=*/std::nullopt); const MCDwarfFile &RootFile = getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile(); getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective( @@ -6991,7 +6992,8 @@ SmallString<256> Buf; raw_svector_ostream OS(Buf); while (Count--) { - if (expandMacro(OS, M->Body, None, None, M->Locals, getTok().getLoc())) + if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals, + getTok().getLoc())) return true; } instantiateMacroLikeBody(M, DirectiveLoc, OS); @@ -7024,7 +7026,8 @@ if (Condition) { // Instantiate the macro, then resume at this directive to recheck the // condition. - if (expandMacro(OS, M->Body, None, None, M->Locals, getTok().getLoc())) + if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals, + getTok().getLoc())) return true; instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS); } diff --git a/llvm/lib/MC/MCRegisterInfo.cpp b/llvm/lib/MC/MCRegisterInfo.cpp --- a/llvm/lib/MC/MCRegisterInfo.cpp +++ b/llvm/lib/MC/MCRegisterInfo.cpp @@ -84,12 +84,12 @@ unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize; if (!M) - return None; + return std::nullopt; DwarfLLVMRegPair Key = { RegNum, 0 }; const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); if (I != M + Size && I->FromReg == RegNum) return I->ToReg; - return None; + return std::nullopt; } int MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const { diff --git a/llvm/lib/MC/MCSubtargetInfo.cpp b/llvm/lib/MC/MCSubtargetInfo.cpp --- a/llvm/lib/MC/MCSubtargetInfo.cpp +++ b/llvm/lib/MC/MCSubtargetInfo.cpp @@ -17,6 +17,7 @@ #include #include #include +#include using namespace llvm; @@ -335,17 +336,17 @@ ForwardingPaths); } -Optional MCSubtargetInfo::getCacheSize(unsigned Level) const { - return None; +std::optional MCSubtargetInfo::getCacheSize(unsigned Level) const { + return std::nullopt; } -Optional +std::optional MCSubtargetInfo::getCacheAssociativity(unsigned Level) const { - return None; + return std::nullopt; } Optional MCSubtargetInfo::getCacheLineSize(unsigned Level) const { - return None; + return std::nullopt; } unsigned MCSubtargetInfo::getPrefetchDistance() const { diff --git a/llvm/lib/MC/MCSymbolELF.cpp b/llvm/lib/MC/MCSymbolELF.cpp --- a/llvm/lib/MC/MCSymbolELF.cpp +++ b/llvm/lib/MC/MCSymbolELF.cpp @@ -33,7 +33,10 @@ ELF_WeakrefUsedInReloc_Shift = 11, // One bit. - ELF_BindingSet_Shift = 12 + ELF_BindingSet_Shift = 12, + + // One bit. + ELF_IsMemoryTagged_Shift = 13, }; } @@ -193,4 +196,16 @@ bool MCSymbolELF::isBindingSet() const { return getFlags() & (0x1 << ELF_BindingSet_Shift); } + +bool MCSymbolELF::isMemtag() const { + return getFlags() & (0x1 << ELF_IsMemoryTagged_Shift); +} + +void MCSymbolELF::setMemtag(bool Tagged) { + uint32_t OtherFlags = getFlags() & ~(1 << ELF_IsMemoryTagged_Shift); + if (Tagged) + setFlags(OtherFlags | (1 << ELF_IsMemoryTagged_Shift)); + else + setFlags(OtherFlags); +} } diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp --- a/llvm/lib/MC/MCWin64EH.cpp +++ b/llvm/lib/MC/MCWin64EH.cpp @@ -287,7 +287,7 @@ // unusual constructs, like an inline asm with an alignment directive. int64_t value; if (!Diff->evaluateAsAbsolute(value, OS->getAssembler())) - return None; + return std::nullopt; return value; } @@ -999,6 +999,23 @@ // These are never canonical; they don't show up with the usual Arm64 // calling convention. return false; + case Win64EH::UOP_AllocLarge: + // Allocations this large can't be represented in packed unwind (and + // usually don't fit the canonical form anyway because we need to use + // __chkstk to allocate the stack space). + return false; + case Win64EH::UOP_AddFP: + // "add x29, sp, #N" doesn't show up in the canonical pattern (except for + // N=0, which is UOP_SetFP). + return false; + case Win64EH::UOP_TrapFrame: + case Win64EH::UOP_Context: + case Win64EH::UOP_ClearUnwoundToCall: + case Win64EH::UOP_PushMachFrame: + // These are special opcodes that aren't normally generated. + return false; + default: + report_fatal_error("Unknown Arm64 unwind opcode"); } } if (RegI > 10 || RegF > 8) diff --git a/llvm/lib/MC/StringTableBuilder.cpp b/llvm/lib/MC/StringTableBuilder.cpp --- a/llvm/lib/MC/StringTableBuilder.cpp +++ b/llvm/lib/MC/StringTableBuilder.cpp @@ -52,7 +52,7 @@ } } -StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment) +StringTableBuilder::StringTableBuilder(Kind K, Align Alignment) : K(K), Alignment(Alignment) { initSize(); } @@ -151,7 +151,7 @@ StringRef S = P->first.val(); if (Previous.endswith(S)) { size_t Pos = Size - S.size() - (K != RAW); - if (!(Pos & (Alignment - 1))) { + if (isAligned(Alignment, Pos)) { P->second = Pos; continue; } diff --git a/llvm/lib/ObjCopy/MachO/MachOObject.h b/llvm/lib/ObjCopy/MachO/MachOObject.h --- a/llvm/lib/ObjCopy/MachO/MachOObject.h +++ b/llvm/lib/ObjCopy/MachO/MachOObject.h @@ -125,7 +125,7 @@ } Optional section() const { - return n_sect == MachO::NO_SECT ? None : Optional(n_sect); + return n_sect == MachO::NO_SECT ? std::nullopt : Optional(n_sect); } }; diff --git a/llvm/lib/ObjCopy/MachO/MachOObject.cpp b/llvm/lib/ObjCopy/MachO/MachOObject.cpp --- a/llvm/lib/ObjCopy/MachO/MachOObject.cpp +++ b/llvm/lib/ObjCopy/MachO/MachOObject.cpp @@ -209,7 +209,7 @@ case MachO::LC_SEGMENT_64: return extractSegmentName(MLC.segment_command_64_data.segname); default: - return None; + return std::nullopt; } } @@ -221,6 +221,6 @@ case MachO::LC_SEGMENT_64: return MLC.segment_command_64_data.vmaddr; default: - return None; + return std::nullopt; } } diff --git a/llvm/lib/ObjCopy/MachO/MachOReader.cpp b/llvm/lib/ObjCopy/MachO/MachOReader.cpp --- a/llvm/lib/ObjCopy/MachO/MachOReader.cpp +++ b/llvm/lib/ObjCopy/MachO/MachOReader.cpp @@ -333,7 +333,7 @@ for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i); if ((Index & AbsOrLocalMask) != 0) - O.IndirectSymTable.Symbols.emplace_back(Index, None); + O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt); else O.IndirectSymTable.Symbols.emplace_back( Index, O.SymTable.getSymbolByIndex(Index)); diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp --- a/llvm/lib/Object/Archive.cpp +++ b/llvm/lib/Object/Archive.cpp @@ -1158,7 +1158,7 @@ return MemberOrErr.takeError(); } } - return None; + return std::nullopt; } // Returns true if archive file contains no member file. diff --git a/llvm/lib/Object/BuildID.cpp b/llvm/lib/Object/BuildID.cpp --- a/llvm/lib/Object/BuildID.cpp +++ b/llvm/lib/Object/BuildID.cpp @@ -54,7 +54,7 @@ return getBuildID(O->getELFFile()); if (auto *O = dyn_cast>(Obj)) return getBuildID(O->getELFFile()); - return None; + return std::nullopt; } Optional BuildIDFetcher::fetch(BuildIDRef BuildID) const { @@ -86,7 +86,7 @@ return std::string(Path); } } - return None; + return std::nullopt; } } // namespace object diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -669,7 +669,7 @@ Version = Data.getU8(Cur); if (!Cur) break; - if (Version > 1) + if (Version > 2) return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " + Twine(static_cast(Version))); Data.getU8(Cur); // Feature byte @@ -678,8 +678,9 @@ uint32_t NumBlocks = ReadULEB128AsUInt32(); std::vector BBEntries; uint32_t PrevBBEndOffset = 0; - for (uint32_t BlockID = 0; !ULEBSizeErr && Cur && (BlockID < NumBlocks); - ++BlockID) { + for (uint32_t BlockIndex = 0; + !ULEBSizeErr && Cur && (BlockIndex < NumBlocks); ++BlockIndex) { + uint32_t ID = Version >= 2 ? ReadULEB128AsUInt32() : BlockIndex; uint32_t Offset = ReadULEB128AsUInt32(); uint32_t Size = ReadULEB128AsUInt32(); uint32_t Metadata = ReadULEB128AsUInt32(); @@ -688,7 +689,7 @@ Offset += PrevBBEndOffset; PrevBBEndOffset = Offset + Size; } - BBEntries.push_back({Offset, Size, Metadata}); + BBEntries.push_back({ID, Offset, Size, Metadata}); } FunctionEntries.push_back({Address, std::move(BBEntries)}); } diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -166,7 +166,7 @@ // both ARMv7-M and R have to support thumb hardware div bool isV7 = false; - Optional Attr = + std::optional Attr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); if (Attr) isV7 = Attr.value() == ARMBuildAttrs::v7; @@ -303,7 +303,8 @@ return Features; // Keep "c" feature if there is one in PlatformFlags. } - Optional Attr = Attributes.getAttributeString(RISCVAttrs::ARCH); + std::optional Attr = + Attributes.getAttributeString(RISCVAttrs::ARCH); if (Attr) { // The Arch pattern is [rv32|rv64][i|e]version(_[m|a|f|d|c]version)* // Version string pattern is (major)p(minor). Major and minor are optional. @@ -382,7 +383,7 @@ case ELF::EM_PPC64: return StringRef("future"); default: - return None; + return std::nullopt; } } @@ -542,7 +543,7 @@ else Triple = "arm"; - Optional Attr = + std::optional Attr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); if (Attr) { switch (Attr.value()) { @@ -574,7 +575,7 @@ Triple += "v6k"; break; case ARMBuildAttrs::v7: { - Optional ArchProfileAttr = + std::optional ArchProfileAttr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); if (ArchProfileAttr && ArchProfileAttr.value() == ARMBuildAttrs::MicroControllerProfile) @@ -685,7 +686,7 @@ if (PltEntryIter != GotToPlt.end()) { symbol_iterator Sym = Relocation.getSymbol(); if (Sym == symbol_end()) - Result.emplace_back(None, PltEntryIter->second); + Result.emplace_back(std::nullopt, PltEntryIter->second); else Result.emplace_back(Sym->getRawDataRefImpl(), PltEntryIter->second); } diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -4889,12 +4889,12 @@ ArrayRef MachOObjectFile::getDyldInfoRebaseOpcodes() const { if (!DyldInfoLoadCmd) - return None; + return std::nullopt; auto DyldInfoOrErr = getStructOrErr(*this, DyldInfoLoadCmd); if (!DyldInfoOrErr) - return None; + return std::nullopt; MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get(); const uint8_t *Ptr = reinterpret_cast(getPtr(*this, DyldInfo.rebase_off)); @@ -4903,12 +4903,12 @@ ArrayRef MachOObjectFile::getDyldInfoBindOpcodes() const { if (!DyldInfoLoadCmd) - return None; + return std::nullopt; auto DyldInfoOrErr = getStructOrErr(*this, DyldInfoLoadCmd); if (!DyldInfoOrErr) - return None; + return std::nullopt; MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get(); const uint8_t *Ptr = reinterpret_cast(getPtr(*this, DyldInfo.bind_off)); @@ -4917,12 +4917,12 @@ ArrayRef MachOObjectFile::getDyldInfoWeakBindOpcodes() const { if (!DyldInfoLoadCmd) - return None; + return std::nullopt; auto DyldInfoOrErr = getStructOrErr(*this, DyldInfoLoadCmd); if (!DyldInfoOrErr) - return None; + return std::nullopt; MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get(); const uint8_t *Ptr = reinterpret_cast(getPtr(*this, DyldInfo.weak_bind_off)); @@ -4931,12 +4931,12 @@ ArrayRef MachOObjectFile::getDyldInfoLazyBindOpcodes() const { if (!DyldInfoLoadCmd) - return None; + return std::nullopt; auto DyldInfoOrErr = getStructOrErr(*this, DyldInfoLoadCmd); if (!DyldInfoOrErr) - return None; + return std::nullopt; MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get(); const uint8_t *Ptr = reinterpret_cast(getPtr(*this, DyldInfo.lazy_bind_off)); @@ -4945,12 +4945,12 @@ ArrayRef MachOObjectFile::getDyldInfoExportsTrie() const { if (!DyldInfoLoadCmd) - return None; + return std::nullopt; auto DyldInfoOrErr = getStructOrErr(*this, DyldInfoLoadCmd); if (!DyldInfoOrErr) - return None; + return std::nullopt; MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get(); const uint8_t *Ptr = reinterpret_cast(getPtr(*this, DyldInfo.export_off)); @@ -4961,7 +4961,7 @@ MachOObjectFile::getChainedFixupsLoadCommand() const { // Load the dyld chained fixups load command. if (!DyldChainedFixupsLoadCmd) - return llvm::None; + return std::nullopt; auto DyldChainedFixupsOrErr = getStructOrErr( *this, DyldChainedFixupsLoadCmd); if (!DyldChainedFixupsOrErr) @@ -4972,7 +4972,7 @@ // If the load command is present but the data offset has been zeroed out, // as is the case for dylib stubs, return None (no error). if (!DyldChainedFixups.dataoff) - return llvm::None; + return std::nullopt; return DyldChainedFixups; } @@ -4982,7 +4982,7 @@ if (!CFOrErr) return CFOrErr.takeError(); if (!CFOrErr->has_value()) - return llvm::None; + return std::nullopt; const MachO::linkedit_data_command &DyldChainedFixups = **CFOrErr; @@ -5236,12 +5236,12 @@ ArrayRef MachOObjectFile::getDyldExportsTrie() const { if (!DyldExportsTrieLoadCmd) - return None; + return std::nullopt; auto DyldExportsTrieOrError = getStructOrErr( *this, DyldExportsTrieLoadCmd); if (!DyldExportsTrieOrError) - return None; + return std::nullopt; MachO::linkedit_data_command DyldExportsTrie = DyldExportsTrieOrError.get(); const uint8_t *Ptr = reinterpret_cast(getPtr(*this, DyldExportsTrie.dataoff)); @@ -5265,7 +5265,7 @@ ArrayRef MachOObjectFile::getUuid() const { if (!UuidLoadCmd) - return None; + return std::nullopt; // Returning a pointer is fine as uuid doesn't need endian swapping. const char *Ptr = UuidLoadCmd + offsetof(MachO::uuid_command, uuid); return makeArrayRef(reinterpret_cast(Ptr), 16); diff --git a/llvm/lib/Object/Minidump.cpp b/llvm/lib/Object/Minidump.cpp --- a/llvm/lib/Object/Minidump.cpp +++ b/llvm/lib/Object/Minidump.cpp @@ -19,7 +19,7 @@ auto It = StreamMap.find(Type); if (It != StreamMap.end()) return getRawStream(Streams[It->second]); - return None; + return std::nullopt; } Expected MinidumpFile::getString(size_t Offset) const { diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -775,7 +775,7 @@ if (!S->Offset) S->Offset = alignToOffset(CBA, sizeof(typename ELFT::uint), - /*Offset=*/None); + /*Offset=*/std::nullopt); else S->Offset = alignToOffset(CBA, /*Align=*/1, S->Offset); @@ -1015,8 +1015,8 @@ assignSectionAddress(SHeader, YAMLSec); - SHeader.sh_offset = - alignToOffset(CBA, SHeader.sh_addralign, RawSec ? RawSec->Offset : None); + SHeader.sh_offset = alignToOffset(CBA, SHeader.sh_addralign, + RawSec ? RawSec->Offset : std::nullopt); if (RawSec && (RawSec->Content || RawSec->Size)) { assert(Symbols.empty()); @@ -1043,7 +1043,7 @@ dyn_cast_or_null(YAMLSec); SHeader.sh_offset = alignToOffset(CBA, SHeader.sh_addralign, - YAMLSec ? YAMLSec->Offset : None); + YAMLSec ? YAMLSec->Offset : std::nullopt); if (RawSec && (RawSec->Content || RawSec->Size)) { SHeader.sh_size = writeContent(CBA, RawSec->Content, RawSec->Size); @@ -1097,7 +1097,7 @@ SHeader.sh_type = YAMLSec ? YAMLSec->Type : ELF::SHT_PROGBITS; SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 1; SHeader.sh_offset = alignToOffset(CBA, SHeader.sh_addralign, - YAMLSec ? YAMLSec->Offset : None); + YAMLSec ? YAMLSec->Offset : std::nullopt); ELFYAML::RawContentSection *RawSec = dyn_cast_or_null(YAMLSec); @@ -1395,7 +1395,7 @@ for (const ELFYAML::BBAddrMapEntry &E : *Section.Entries) { // Write version and feature values. if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) { - if (E.Version > 1) + if (E.Version > 2) WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: " << static_cast(E.Version) << "; encoding using the most recent version"; @@ -1413,10 +1413,13 @@ // Write all BBEntries. if (!E.BBEntries) continue; - for (const ELFYAML::BBAddrMapEntry::BBEntry &BBE : *E.BBEntries) + for (const ELFYAML::BBAddrMapEntry::BBEntry &BBE : *E.BBEntries) { + if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP && E.Version > 1) + SHeader.sh_size += CBA.writeULEB128(BBE.ID); SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset) + CBA.writeULEB128(BBE.Size) + CBA.writeULEB128(BBE.Metadata); + } } } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1202,7 +1202,7 @@ Optional denormalize(IO &) { if (!Other) - return None; + return std::nullopt; uint8_t Ret = 0; for (StOtherPiece &Val : *Other) Ret |= toValue(Val); @@ -1794,6 +1794,7 @@ void MappingTraits::mapping( IO &IO, ELFYAML::BBAddrMapEntry::BBEntry &E) { assert(IO.getContext() && "The IO context is not initialized"); + IO.mapOptional("ID", E.ID); IO.mapRequired("AddressOffset", E.AddressOffset); IO.mapRequired("Size", E.Size); IO.mapRequired("Metadata", E.Metadata); diff --git a/llvm/lib/Option/ArgList.cpp b/llvm/lib/Option/ArgList.cpp --- a/llvm/lib/Option/ArgList.cpp +++ b/llvm/lib/Option/ArgList.cpp @@ -128,7 +128,7 @@ /// This is a nicer interface when you don't have a list of Ids to exclude. void ArgList::AddAllArgs(ArgStringList &Output, ArrayRef Ids) const { - ArrayRef Exclude = None; + ArrayRef Exclude = std::nullopt; AddAllArgsExcept(Output, Ids, Exclude); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -474,19 +474,19 @@ static std::optional parseRepeatPassName(StringRef Name) { if (!Name.consume_front("repeat<") || !Name.consume_back(">")) - return None; + return std::nullopt; int Count; if (Name.getAsInteger(0, Count) || Count <= 0) - return None; + return std::nullopt; return Count; } static std::optional parseDevirtPassName(StringRef Name) { if (!Name.consume_front("devirt<") || !Name.consume_back(">")) - return None; + return std::nullopt; int Count; if (Name.getAsInteger(0, Count) || Count < 0) - return None; + return std::nullopt; return Count; } @@ -1060,7 +1060,7 @@ do { // If we try to pop the outer pipeline we have unbalanced parentheses. if (PipelineStack.size() == 1) - return None; + return std::nullopt; PipelineStack.pop_back(); } while (Text.consume_front(")")); @@ -1072,12 +1072,12 @@ // Otherwise, the end of an inner pipeline always has to be followed by // a comma, and then we can continue. if (!Text.consume_front(",")) - return None; + return std::nullopt; } if (PipelineStack.size() > 1) // Unbalanced paretheses. - return None; + return std::nullopt; assert(PipelineStack.back() == &ResultPipeline && "Wrong pipeline at the bottom of the stack!"); diff --git a/llvm/lib/Passes/PassBuilderBindings.cpp b/llvm/lib/Passes/PassBuilderBindings.cpp --- a/llvm/lib/Passes/PassBuilderBindings.cpp +++ b/llvm/lib/Passes/PassBuilderBindings.cpp @@ -53,7 +53,7 @@ Module *Mod = unwrap(M); PassInstrumentationCallbacks PIC; - PassBuilder PB(Machine, PassOpts->PTO, None, &PIC); + PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC); LoopAnalysisManager LAM; FunctionAnalysisManager FAM; diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -1877,7 +1877,7 @@ return "Unable to find dot executable."; StringRef Args[] = {DotBinary, "-Tpdf", "-o", PDFFile, DotFile}; - int Result = sys::ExecuteAndWait(*DotExe, Args, None); + int Result = sys::ExecuteAndWait(*DotExe, Args, std::nullopt); if (Result < 0) return "Error executing system dot."; diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -558,7 +558,7 @@ // Complete any remaining active regions. if (!ActiveRegions.empty()) - completeRegionsUntil(None, 0); + completeRegionsUntil(std::nullopt, 0); } /// Sort a nested sequence of regions from a single file. @@ -684,7 +684,7 @@ IsNotExpandedFile[CR.ExpandedFileID] = false; int I = IsNotExpandedFile.find_first(); if (I == -1) - return None; + return std::nullopt; return I; } @@ -695,7 +695,7 @@ Optional I = findMainViewFileID(Function); if (I && SourceFile == Function.Filenames[*I]) return I; - return None; + return std::nullopt; } static bool isExpansion(const CountedRegion &R, unsigned FileID) { diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp --- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -817,8 +817,8 @@ // In Version4, function records are not affixed to coverage headers. Read // the records from their dedicated section. if (Version >= CovMapVersion::Version4) - return Reader->readFunctionRecords(FuncRecBuf, FuncRecBufEnd, None, nullptr, - nullptr); + return Reader->readFunctionRecords(FuncRecBuf, FuncRecBufEnd, std::nullopt, + nullptr, nullptr); return Error::success(); } diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -1824,7 +1824,7 @@ SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { if (auto Key = Remappings->lookup(Fname)) return NameMap.lookup(Key); - return None; + return std::nullopt; } /// Prepare a memory buffer for the contents of \p Filename. diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.h b/llvm/lib/Remarks/BitstreamRemarkParser.h --- a/llvm/lib/Remarks/BitstreamRemarkParser.h +++ b/llvm/lib/Remarks/BitstreamRemarkParser.h @@ -77,8 +77,8 @@ }; Expected> createBitstreamParserFromMeta( - StringRef Buf, Optional StrTab = None, - Optional ExternalFilePrependPath = None); + StringRef Buf, Optional StrTab = std::nullopt, + Optional ExternalFilePrependPath = std::nullopt); } // end namespace remarks } // end namespace llvm diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.cpp b/llvm/lib/Remarks/BitstreamRemarkParser.cpp --- a/llvm/lib/Remarks/BitstreamRemarkParser.cpp +++ b/llvm/lib/Remarks/BitstreamRemarkParser.cpp @@ -501,7 +501,7 @@ std::unique_ptr Result = std::make_unique(); Remark &R = *Result; - if (StrTab == None) + if (StrTab == std::nullopt) return createStringError( std::make_error_code(std::errc::invalid_argument), "Error while parsing BLOCK_REMARK: missing string table."); diff --git a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp --- a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp +++ b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp @@ -245,19 +245,19 @@ switch (ContainerType) { case BitstreamRemarkContainerType::SeparateRemarksMeta: - assert(StrTab != None && *StrTab != nullptr); + assert(StrTab != std::nullopt && *StrTab != nullptr); emitMetaStrTab(**StrTab); - assert(Filename != None); + assert(Filename != std::nullopt); emitMetaExternalFile(*Filename); break; case BitstreamRemarkContainerType::SeparateRemarksFile: - assert(RemarkVersion != None); + assert(RemarkVersion != std::nullopt); emitMetaRemarkVersion(*RemarkVersion); break; case BitstreamRemarkContainerType::Standalone: - assert(RemarkVersion != None); + assert(RemarkVersion != std::nullopt); emitMetaRemarkVersion(*RemarkVersion); - assert(StrTab != None && *StrTab != nullptr); + assert(StrTab != std::nullopt && *StrTab != nullptr); emitMetaStrTab(**StrTab); break; } @@ -297,7 +297,7 @@ R.clear(); unsigned Key = StrTab.add(Arg.Key).first; unsigned Val = StrTab.add(Arg.Val).first; - bool HasDebugLoc = Arg.Loc != None; + bool HasDebugLoc = Arg.Loc != std::nullopt; R.push_back(HasDebugLoc ? RECORD_REMARK_ARG_WITH_DEBUGLOC : RECORD_REMARK_ARG_WITHOUT_DEBUGLOC); R.push_back(Key); @@ -353,7 +353,7 @@ Helper.ContainerType == BitstreamRemarkContainerType::Standalone; BitstreamMetaSerializer MetaSerializer( OS, Helper, - IsStandalone ? &*StrTab : Optional(None)); + IsStandalone ? &*StrTab : Optional(std::nullopt)); MetaSerializer.emit(); DidSetUp = true; } diff --git a/llvm/lib/Remarks/RemarkLinker.cpp b/llvm/lib/Remarks/RemarkLinker.cpp --- a/llvm/lib/Remarks/RemarkLinker.cpp +++ b/llvm/lib/Remarks/RemarkLinker.cpp @@ -78,7 +78,7 @@ Expected> MaybeParser = createRemarkParserFromMeta( - *RemarkFormat, Buffer, /*StrTab=*/None, + *RemarkFormat, Buffer, /*StrTab=*/std::nullopt, PrependPath ? Optional(StringRef(*PrependPath)) : Optional()); if (!MaybeParser) diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp --- a/llvm/lib/Remarks/RemarkParser.cpp +++ b/llvm/lib/Remarks/RemarkParser.cpp @@ -113,7 +113,7 @@ std::optional Err; CParser(Format ParserFormat, StringRef Buf, - std::optional StrTab = None) + std::optional StrTab = std::nullopt) : TheParser(cantFail( StrTab ? createRemarkParser(ParserFormat, Buf, std::move(*StrTab)) : createRemarkParser(ParserFormat, Buf))) {} diff --git a/llvm/lib/Remarks/RemarkStreamer.cpp b/llvm/lib/Remarks/RemarkStreamer.cpp --- a/llvm/lib/Remarks/RemarkStreamer.cpp +++ b/llvm/lib/Remarks/RemarkStreamer.cpp @@ -27,7 +27,8 @@ std::unique_ptr RemarkSerializer, Optional FilenameIn) : RemarkSerializer(std::move(RemarkSerializer)), - Filename(FilenameIn ? Optional(FilenameIn->str()) : None) {} + Filename(FilenameIn ? Optional(FilenameIn->str()) + : std::nullopt) {} Error RemarkStreamer::setFilter(StringRef Filter) { Regex R = Regex(Filter); diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h --- a/llvm/lib/Remarks/YAMLRemarkParser.h +++ b/llvm/lib/Remarks/YAMLRemarkParser.h @@ -107,10 +107,9 @@ Expected parseStr(yaml::KeyValueNode &Node) override; }; -Expected> -createYAMLParserFromMeta(StringRef Buf, - Optional StrTab = None, - Optional ExternalFilePrependPath = None); +Expected> createYAMLParserFromMeta( + StringRef Buf, Optional StrTab = std::nullopt, + Optional ExternalFilePrependPath = std::nullopt); } // end namespace remarks } // end namespace llvm diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp --- a/llvm/lib/Remarks/YAMLRemarkParser.cpp +++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp @@ -168,7 +168,7 @@ } YAMLRemarkParser::YAMLRemarkParser(StringRef Buf) - : YAMLRemarkParser(Buf, None) {} + : YAMLRemarkParser(Buf, std::nullopt) {} YAMLRemarkParser::YAMLRemarkParser(StringRef Buf, Optional StrTab) diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp --- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp +++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp @@ -187,7 +187,7 @@ // metadata first and set DidEmitMeta to avoid emitting it again. if (Mode == SerializerMode::Standalone && !DidEmitMeta) { std::unique_ptr MetaSerializer = - metaSerializer(OS, /*ExternalFilename=*/None); + metaSerializer(OS, /*ExternalFilename=*/std::nullopt); MetaSerializer->emit(); DidEmitMeta = true; } @@ -243,7 +243,7 @@ void YAMLMetaSerializer::emit() { emitMagic(OS); emitVersion(OS); - emitStrTab(OS, None); + emitStrTab(OS, std::nullopt); if (ExternalFilename) emitExternalFile(OS, *ExternalFilename); } diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp --- a/llvm/lib/Support/AArch64TargetParser.cpp +++ b/llvm/lib/Support/AArch64TargetParser.cpp @@ -73,10 +73,9 @@ StringRef AArch64::resolveCPUAlias(StringRef CPU) { return StringSwitch(CPU) -#define AARCH64_CPU_ALIAS(ALIAS,NAME) \ - .Case(ALIAS, NAME) +#define AARCH64_CPU_ALIAS(ALIAS, NAME) .Case(ALIAS, NAME) #include "../../include/llvm/Support/AArch64TargetParser.def" - .Default(CPU); + .Default(CPU); } StringRef AArch64::getArchExtFeature(StringRef ArchExt) { diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -3900,8 +3900,7 @@ zeroSignificand(); sign = Negative; exponent = semantics->minExponent; - significandParts()[partCountForBits(semantics->precision) - 1] |= - (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth)); + APInt::tcSetBit(significandParts(), semantics->precision - 1); } IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -2952,7 +2952,7 @@ // between them, so they would both be contained between X and X+1. if (!SignChange) { LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); - return None; + return std::nullopt; } X += 1; @@ -2964,7 +2964,7 @@ llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) { assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth"); if (A == B) - return llvm::None; + return std::nullopt; return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1); } diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp --- a/llvm/lib/Support/CrashRecoveryContext.cpp +++ b/llvm/lib/Support/CrashRecoveryContext.cpp @@ -518,7 +518,7 @@ bool UseBackgroundPriority = hasThreadBackgroundPriority(); RunSafelyOnThreadInfo Info = { Fn, this, UseBackgroundPriority, false }; llvm::thread Thread(RequestedStackSize == 0 - ? llvm::None + ? std::nullopt : llvm::Optional(RequestedStackSize), RunSafelyOnThread_Dispatch, &Info); Thread.join(); diff --git a/llvm/lib/Support/DJB.cpp b/llvm/lib/Support/DJB.cpp --- a/llvm/lib/Support/DJB.cpp +++ b/llvm/lib/Support/DJB.cpp @@ -65,7 +65,7 @@ } if (AllASCII) return H; - return None; + return std::nullopt; } uint32_t llvm::caseFoldingDjbHash(StringRef Buffer, uint32_t H) { diff --git a/llvm/lib/Support/ELFAttributes.cpp b/llvm/lib/Support/ELFAttributes.cpp --- a/llvm/lib/Support/ELFAttributes.cpp +++ b/llvm/lib/Support/ELFAttributes.cpp @@ -21,14 +21,14 @@ return hasTagPrefix ? tagName : tagName.drop_front(4); } -Optional ELFAttrs::attrTypeFromString(StringRef tag, - TagNameMap tagNameMap) { +std::optional ELFAttrs::attrTypeFromString(StringRef tag, + TagNameMap tagNameMap) { bool hasTagPrefix = tag.startswith("Tag_"); auto tagNameIt = find_if(tagNameMap, [tag, hasTagPrefix](const TagNameItem item) { return item.tagName.drop_front(hasTagPrefix ? 0 : 4) == tag; }); if (tagNameIt == tagNameMap.end()) - return None; + return std::nullopt; return tagNameIt->attr; } diff --git a/llvm/lib/Support/FormatVariadic.cpp b/llvm/lib/Support/FormatVariadic.cpp --- a/llvm/lib/Support/FormatVariadic.cpp +++ b/llvm/lib/Support/FormatVariadic.cpp @@ -20,7 +20,7 @@ case '+': return AlignStyle::Right; default: - return None; + return std::nullopt; } LLVM_BUILTIN_UNREACHABLE; } diff --git a/llvm/lib/Support/GraphWriter.cpp b/llvm/lib/Support/GraphWriter.cpp --- a/llvm/lib/Support/GraphWriter.cpp +++ b/llvm/lib/Support/GraphWriter.cpp @@ -136,14 +136,14 @@ StringRef Filename, bool wait, std::string &ErrMsg) { if (wait) { - if (sys::ExecuteAndWait(ExecPath, args, None, {}, 0, 0, &ErrMsg)) { + if (sys::ExecuteAndWait(ExecPath, args, std::nullopt, {}, 0, 0, &ErrMsg)) { errs() << "Error: " << ErrMsg << "\n"; return true; } sys::fs::remove(Filename); errs() << " done. \n"; } else { - sys::ExecuteNoWait(ExecPath, args, None, {}, 0, &ErrMsg); + sys::ExecuteNoWait(ExecPath, args, std::nullopt, {}, 0, &ErrMsg); errs() << "Remember to erase graph file: " << Filename << "\n"; } return false; diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp --- a/llvm/lib/Support/JSON.cpp +++ b/llvm/lib/Support/JSON.cpp @@ -40,27 +40,27 @@ llvm::Optional Object::getNull(StringRef K) const { if (auto *V = get(K)) return V->getAsNull(); - return llvm::None; + return std::nullopt; } llvm::Optional Object::getBoolean(StringRef K) const { if (auto *V = get(K)) return V->getAsBoolean(); - return llvm::None; + return std::nullopt; } llvm::Optional Object::getNumber(StringRef K) const { if (auto *V = get(K)) return V->getAsNumber(); - return llvm::None; + return std::nullopt; } llvm::Optional Object::getInteger(StringRef K) const { if (auto *V = get(K)) return V->getAsInteger(); - return llvm::None; + return std::nullopt; } llvm::Optional Object::getString(StringRef K) const { if (auto *V = get(K)) return V->getAsString(); - return llvm::None; + return std::nullopt; } const json::Object *Object::getObject(StringRef K) const { if (auto *V = get(K)) diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -335,13 +335,13 @@ return Optional(LHS.getConstant() == RHS.getConstant()); if (LHS.One.intersects(RHS.Zero) || RHS.One.intersects(LHS.Zero)) return Optional(false); - return None; + return std::nullopt; } Optional KnownBits::ne(const KnownBits &LHS, const KnownBits &RHS) { if (Optional KnownEQ = eq(LHS, RHS)) return Optional(!*KnownEQ); - return None; + return std::nullopt; } Optional KnownBits::ugt(const KnownBits &LHS, const KnownBits &RHS) { @@ -351,13 +351,13 @@ // LHS >u RHS -> true if umin(LHS) > umax(RHS) if (LHS.getMinValue().ugt(RHS.getMaxValue())) return Optional(true); - return None; + return std::nullopt; } Optional KnownBits::uge(const KnownBits &LHS, const KnownBits &RHS) { if (Optional IsUGT = ugt(RHS, LHS)) return Optional(!*IsUGT); - return None; + return std::nullopt; } Optional KnownBits::ult(const KnownBits &LHS, const KnownBits &RHS) { @@ -375,13 +375,13 @@ // LHS >s RHS -> true if smin(LHS) > smax(RHS) if (LHS.getSignedMinValue().sgt(RHS.getSignedMaxValue())) return Optional(true); - return None; + return std::nullopt; } Optional KnownBits::sge(const KnownBits &LHS, const KnownBits &RHS) { if (Optional KnownSGT = sgt(RHS, LHS)) return Optional(!*KnownSGT); - return None; + return std::nullopt; } Optional KnownBits::slt(const KnownBits &LHS, const KnownBits &RHS) { diff --git a/llvm/lib/Support/LineIterator.cpp b/llvm/lib/Support/LineIterator.cpp --- a/llvm/lib/Support/LineIterator.cpp +++ b/llvm/lib/Support/LineIterator.cpp @@ -37,7 +37,8 @@ line_iterator::line_iterator(const MemoryBufferRef &Buffer, bool SkipBlanks, char CommentMarker) - : Buffer(Buffer.getBufferSize() ? Optional(Buffer) : None), + : Buffer(Buffer.getBufferSize() ? std::optional(Buffer) + : std::nullopt), CommentMarker(CommentMarker), SkipBlanks(SkipBlanks), CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : nullptr, 0) { @@ -82,7 +83,7 @@ if (*Pos == '\0') { // We've hit the end of the buffer, reset ourselves to the end state. - Buffer = None; + Buffer = std::nullopt; CurrentLine = StringRef(); return; } diff --git a/llvm/lib/Support/LockFileManager.cpp b/llvm/lib/Support/LockFileManager.cpp --- a/llvm/lib/Support/LockFileManager.cpp +++ b/llvm/lib/Support/LockFileManager.cpp @@ -60,7 +60,7 @@ MemoryBuffer::getFile(LockFileName); if (!MBOrErr) { sys::fs::remove(LockFileName); - return None; + return std::nullopt; } MemoryBuffer &MB = *MBOrErr.get(); @@ -77,7 +77,7 @@ // Delete the lock file. It's invalid anyway. sys::fs::remove(LockFileName); - return None; + return std::nullopt; } static std::error_code getHostID(SmallVectorImpl &HostID) { diff --git a/llvm/lib/Support/OptimizedStructLayout.cpp b/llvm/lib/Support/OptimizedStructLayout.cpp --- a/llvm/lib/Support/OptimizedStructLayout.cpp +++ b/llvm/lib/Support/OptimizedStructLayout.cpp @@ -436,7 +436,7 @@ // Phase 2: repeatedly add the best flexible-offset field until // they're all gone. while (!FlexibleFieldsByAlignment.empty()) { - bool Success = tryAddBestField(None); + bool Success = tryAddBestField(std::nullopt); assert(Success && "didn't find a field with no fixed limit?"); (void) Success; } diff --git a/llvm/lib/Support/Program.cpp b/llvm/lib/Support/Program.cpp --- a/llvm/lib/Support/Program.cpp +++ b/llvm/lib/Support/Program.cpp @@ -23,17 +23,18 @@ //===----------------------------------------------------------------------===// static bool Execute(ProcessInfo &PI, StringRef Program, - ArrayRef Args, Optional> Env, - ArrayRef> Redirects, + ArrayRef Args, + std::optional> Env, + ArrayRef> Redirects, unsigned MemoryLimit, std::string *ErrMsg, BitVector *AffinityMask); int sys::ExecuteAndWait(StringRef Program, ArrayRef Args, - Optional> Env, - ArrayRef> Redirects, + std::optional> Env, + ArrayRef> Redirects, unsigned SecondsToWait, unsigned MemoryLimit, std::string *ErrMsg, bool *ExecutionFailed, - Optional *ProcStat, + std::optional *ProcStat, BitVector *AffinityMask) { assert(Redirects.empty() || Redirects.size() == 3); ProcessInfo PI; @@ -54,8 +55,8 @@ } ProcessInfo sys::ExecuteNoWait(StringRef Program, ArrayRef Args, - Optional> Env, - ArrayRef> Redirects, + std::optional> Env, + ArrayRef> Redirects, unsigned MemoryLimit, std::string *ErrMsg, bool *ExecutionFailed, BitVector *AffinityMask) { assert(Redirects.empty() || Redirects.size() == 3); diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -168,7 +168,7 @@ } return ExtensionInfoIterator->Version; } - return None; + return std::nullopt; } void RISCVISAInfo::addExtension(StringRef ExtName, unsigned MajorVersion, @@ -209,7 +209,7 @@ auto ExtIterator = llvm::find_if(SupportedExperimentalExtensions, FindByName(Ext)); if (ExtIterator == std::end(SupportedExperimentalExtensions)) - return None; + return std::nullopt; return ExtIterator->Version; } diff --git a/llvm/lib/Support/ScopedPrinter.cpp b/llvm/lib/Support/ScopedPrinter.cpp --- a/llvm/lib/Support/ScopedPrinter.cpp +++ b/llvm/lib/Support/ScopedPrinter.cpp @@ -31,7 +31,8 @@ startLine() << Label << ":"; if (!Str.empty()) OS << " " << Str; - OS << " (" << format_bytes(Data, None, Data.size(), 1, 0, true) << ")\n"; + OS << " (" << format_bytes(Data, std::nullopt, Data.size(), 1, 0, true) + << ")\n"; } } diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp --- a/llvm/lib/Support/Signals.cpp +++ b/llvm/lib/Support/Signals.cpp @@ -192,8 +192,8 @@ } } - Optional Redirects[] = {InputFile.str(), OutputFile.str(), - StringRef("")}; + std::optional Redirects[] = {InputFile.str(), OutputFile.str(), + StringRef("")}; StringRef Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining", #ifdef _WIN32 // Pass --relative-address on Windows so that we don't @@ -203,7 +203,7 @@ #endif "--demangle"}; int RunResult = - sys::ExecuteAndWait(LLVMSymbolizerPath, Args, None, Redirects); + sys::ExecuteAndWait(LLVMSymbolizerPath, Args, std::nullopt, Redirects); if (RunResult != 0) return false; diff --git a/llvm/lib/Support/SymbolRemappingReader.cpp b/llvm/lib/Support/SymbolRemappingReader.cpp --- a/llvm/lib/Support/SymbolRemappingReader.cpp +++ b/llvm/lib/Support/SymbolRemappingReader.cpp @@ -52,7 +52,7 @@ .Case("name", FK::Name) .Case("type", FK::Type) .Case("encoding", FK::Encoding) - .Default(None); + .Default(std::nullopt); if (!FragmentKind) return ReportError("Invalid kind, expected 'name', 'type', or 'encoding'," " found '" + Parts[0] + "'"); diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp --- a/llvm/lib/Support/Threading.cpp +++ b/llvm/lib/Support/Threading.cpp @@ -90,7 +90,7 @@ #endif -Optional +std::optional llvm::get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default) { if (Num == "all") return llvm::hardware_concurrency(); @@ -98,7 +98,7 @@ return Default; unsigned V; if (Num.getAsInteger(10, V)) - return None; // malformed 'Num' value + return std::nullopt; // malformed 'Num' value if (V == 0) return Default; diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp --- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp +++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp @@ -294,7 +294,7 @@ bool DoesStartWith = startsWith(Name, "HANGUL SYLLABLE ", Strict, Consummed, NameStart, NeedleStart); if (!DoesStartWith) - return None; + return std::nullopt; Name = Name.substr(Consummed); int L = -1, V = -1, T = -1; Name = Name.substr(findSyllable(Name, Strict, NameStart, L, 0)); @@ -314,7 +314,7 @@ std::uint32_t(T); } // Otherwise, it's an illegal syllable name. - return None; + return std::nullopt; } struct GeneratedNamesData { @@ -367,13 +367,13 @@ } return V; } - return None; + return std::nullopt; } static llvm::Optional nameToCodepoint(StringRef Name, bool Strict, BufferType &Buffer) { if (Name.empty()) - return None; + return std::nullopt; llvm::Optional Res = nameToHangulCodePoint(Name, Strict, Buffer); if (!Res) @@ -397,7 +397,7 @@ } return Value; } - return None; + return std::nullopt; } llvm::Optional nameToCodepointStrict(StringRef Name) { @@ -412,7 +412,7 @@ BufferType Buffer; auto Opt = nameToCodepoint(Name, false, Buffer); if (!Opt) - return None; + return std::nullopt; return LooseMatchingResult{*Opt, Buffer}; } diff --git a/llvm/lib/Support/Unix/Process.inc b/llvm/lib/Support/Unix/Process.inc --- a/llvm/lib/Support/Unix/Process.inc +++ b/llvm/lib/Support/Unix/Process.inc @@ -177,7 +177,7 @@ std::string NameStr = Name.str(); const char *Val = ::getenv(NameStr.c_str()); if (!Val) - return None; + return std::nullopt; return std::string(Val); } diff --git a/llvm/lib/Support/Unix/Program.inc b/llvm/lib/Support/Unix/Program.inc --- a/llvm/lib/Support/Unix/Program.inc +++ b/llvm/lib/Support/Unix/Program.inc @@ -95,7 +95,7 @@ return errc::no_such_file_or_directory; } -static bool RedirectIO(Optional Path, int FD, std::string *ErrMsg) { +static bool RedirectIO(std::optional Path, int FD, std::string *ErrMsg) { if (!Path) // Noop return false; std::string File; @@ -172,8 +172,8 @@ } static bool Execute(ProcessInfo &PI, StringRef Program, - ArrayRef Args, Optional> Env, - ArrayRef> Redirects, + ArrayRef Args, std::optional> Env, + ArrayRef> Redirects, unsigned MemoryLimit, std::string *ErrMsg, BitVector *AffinityMask) { if (!llvm::sys::fs::exists(Program)) { @@ -386,7 +386,7 @@ ProcessInfo llvm::sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, bool WaitUntilTerminates, std::string *ErrMsg, - Optional *ProcStat) { + std::optional *ProcStat) { struct sigaction Act, Old; assert(PI.Pid && "invalid pid to wait on, process not started?"); diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc --- a/llvm/lib/Support/Unix/Signals.inc +++ b/llvm/lib/Support/Unix/Signals.inc @@ -412,7 +412,7 @@ } static void InfoSignalHandler(int Sig) { - SaveAndRestore SaveErrnoDuringASignalHandler(errno); + SaveAndRestore SaveErrnoDuringASignalHandler(errno); if (SignalHandlerFunctionType CurrentInfoFunction = InfoSignalFunction) CurrentInfoFunction(); } diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -1013,8 +1013,8 @@ // before. Resolved ToPath must be a File. if (!TargetNode || NewLinkNode || !isa(*TargetNode)) return false; - return addFile(NewLink, 0, nullptr, None, None, None, None, - [&](detail::NewInMemoryNodeInfo NNI) { + return addFile(NewLink, 0, nullptr, std::nullopt, std::nullopt, std::nullopt, + std::nullopt, [&](detail::NewInMemoryNodeInfo NNI) { return std::make_unique( NNI.Path.str(), *cast(*TargetNode)); @@ -1610,7 +1610,7 @@ SmallString<12> Storage; StringRef Value; if (!parseScalarString(N, Value, Storage)) - return None; + return std::nullopt; if (Value.equals_insensitive("fallthrough")) { return RedirectingFileSystem::RedirectKind::Fallthrough; @@ -1619,7 +1619,7 @@ } else if (Value.equals_insensitive("redirect-only")) { return RedirectingFileSystem::RedirectKind::RedirectOnly; } - return None; + return std::nullopt; } struct KeyStatus { diff --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc --- a/llvm/lib/Support/Windows/Program.inc +++ b/llvm/lib/Support/Windows/Program.inc @@ -127,7 +127,7 @@ return R != 0; } -static HANDLE RedirectIO(Optional Path, int fd, +static HANDLE RedirectIO(std::optional Path, int fd, std::string *ErrMsg) { HANDLE h; if (!Path) { @@ -172,8 +172,8 @@ } // namespace llvm static bool Execute(ProcessInfo &PI, StringRef Program, - ArrayRef Args, Optional> Env, - ArrayRef> Redirects, + ArrayRef Args, std::optional> Env, + ArrayRef> Redirects, unsigned MemoryLimit, std::string *ErrMsg, BitVector *AffinityMask) { if (!sys::fs::can_execute(Program)) { @@ -410,7 +410,7 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, bool WaitUntilChildTerminates, std::string *ErrMsg, - Optional *ProcStat) { + std::optional *ProcStat) { assert(PI.Pid && "invalid pid to wait on, process not started?"); assert((PI.Process && PI.Process != INVALID_HANDLE_VALUE) && "invalid process handle to wait on, process not started?"); diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc --- a/llvm/lib/Support/Windows/Threading.inc +++ b/llvm/lib/Support/Windows/Threading.inc @@ -248,7 +248,7 @@ // Finds the proper CPU socket where a thread number should go. Returns 'None' // if the thread shall remain on the actual CPU socket. -Optional +std::optional llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const { ArrayRef Groups = getProcessorGroups(); // Only one CPU socket in the system or process affinity was set, no need to @@ -273,7 +273,7 @@ // Assign the current thread to a more appropriate CPU socket or CPU group void llvm::ThreadPoolStrategy::apply_thread_strategy( unsigned ThreadPoolNum) const { - Optional Socket = compute_cpu_socket(ThreadPoolNum); + std::optional Socket = compute_cpu_socket(ThreadPoolNum); if (!Socket) return; ArrayRef Groups = getProcessorGroups(); diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -259,8 +259,9 @@ Token getNext(); void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, - ArrayRef Ranges = None) { - SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ None, ShowColors); + ArrayRef Ranges = std::nullopt) { + SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ std::nullopt, + ShowColors); } void setError(const Twine &Message, StringRef::iterator Position) { @@ -771,7 +772,7 @@ case 'N': return false; default: - return None; + return std::nullopt; } case 2: switch (S.front()) { @@ -782,7 +783,7 @@ case 'o': if (S[1] == 'n') //[Oo]n return true; - return None; + return std::nullopt; case 'N': if (S[1] == 'O') // NO return false; @@ -790,9 +791,9 @@ case 'n': if (S[1] == 'o') //[Nn]o return false; - return None; + return std::nullopt; default: - return None; + return std::nullopt; } case 3: switch (S.front()) { @@ -803,7 +804,7 @@ case 'o': if (S.drop_front() == "ff") //[Oo]ff return false; - return None; + return std::nullopt; case 'Y': if (S.drop_front() == "ES") // YES return true; @@ -811,9 +812,9 @@ case 'y': if (S.drop_front() == "es") //[Yy]es return true; - return None; + return std::nullopt; default: - return None; + return std::nullopt; } case 4: switch (S.front()) { @@ -824,9 +825,9 @@ case 't': if (S.drop_front() == "rue") //[Tt]rue return true; - return None; + return std::nullopt; default: - return None; + return std::nullopt; } case 5: switch (S.front()) { @@ -837,12 +838,12 @@ case 'f': if (S.drop_front() == "alse") //[Ff]alse return false; - return None; + return std::nullopt; default: - return None; + return std::nullopt; } default: - return None; + return std::nullopt; } } diff --git a/llvm/lib/TableGen/Parser.cpp b/llvm/lib/TableGen/Parser.cpp --- a/llvm/lib/TableGen/Parser.cpp +++ b/llvm/lib/TableGen/Parser.cpp @@ -28,7 +28,7 @@ auto *MainFileBuffer = SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID()); Records.saveInputFilename(MainFileBuffer->getBufferIdentifier().str()); - TGParser Parser(SrcMgr, /*Macros=*/None, Records, + TGParser Parser(SrcMgr, /*Macros=*/std::nullopt, Records, /*NoWarnOnUnusedTemplateArgs=*/false, /*TrackReferenceLocs=*/true); bool ParseResult = Parser.ParseFile(); diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -2648,9 +2648,9 @@ Record::getValueAsOptionalString(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); if (!R || !R->getValue()) - return llvm::None; + return std::nullopt; if (isa(R->getValue())) - return llvm::None; + return std::nullopt; if (StringInit *SI = dyn_cast(R->getValue())) return SI->getValue(); diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -2823,7 +2823,7 @@ SMLoc ValLoc = Lex.getLoc(); Init *Val = ParseValue(CurRec, Type); if (!Val || - SetValue(CurRec, ValLoc, DeclName, None, Val, + SetValue(CurRec, ValLoc, DeclName, std::nullopt, Val, /*AllowSelfAssignment=*/false, /*OverrideDefLoc=*/false)) { // Return the name, even if an error is thrown. This is so that we can // continue to make some progress, even without the value having been diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -209,6 +209,10 @@ "predictable-select-expensive", "PredictableSelectIsExpensive", "true", "Prefer likely predicted branches over selects">; +def FeatureEnableSelectOptimize : SubtargetFeature< + "enable-select-opt", "EnableSelectOptimize", "true", + "Enable the select optimize pass for select loop heuristics">; + def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", "HasCustomCheapAsMoveHandling", "true", "Use custom handling of cheap instructions">; @@ -743,6 +747,7 @@ FeatureFuseAdrpAdd, FeatureFuseLiterals, FeaturePostRAScheduler, + FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65", @@ -750,36 +755,42 @@ FeatureFuseAES, FeatureFuseAddress, FeatureFuseAdrpAdd, - FeatureFuseLiterals]>; + FeatureFuseLiterals, + FeatureEnableSelectOptimize]>; def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", "Cortex-A72 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureFuseLiterals]>; + FeatureFuseLiterals, + FeatureEnableSelectOptimize]>; def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", "Cortex-A73 ARM processors", [ FeatureFuseAES, - FeatureFuseAdrpAdd]>; + FeatureFuseAdrpAdd, + FeatureEnableSelectOptimize]>; def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", "Cortex-A75 ARM processors", [ FeatureFuseAES, - FeatureFuseAdrpAdd]>; + FeatureFuseAdrpAdd, + FeatureEnableSelectOptimize]>; def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", "Cortex-A76 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast]>; + FeatureLSLFast, + FeatureEnableSelectOptimize]>; def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", "Cortex-A77 ARM processors", [ FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast]>; + FeatureLSLFast, + FeatureEnableSelectOptimize]>; def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78", "Cortex-A78 ARM processors", [ @@ -787,7 +798,8 @@ FeatureFuseAES, FeatureFuseAdrpAdd, FeatureLSLFast, - FeaturePostRAScheduler]>; + FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily", "CortexA78C", @@ -796,7 +808,8 @@ FeatureFuseAES, FeatureFuseAdrpAdd, FeatureLSLFast, - FeaturePostRAScheduler]>; + FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", "Cortex-A710 ARM processors", [ @@ -804,7 +817,8 @@ FeatureFuseAES, FeatureFuseAdrpAdd, FeatureLSLFast, - FeaturePostRAScheduler]>; + FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715", "Cortex-A715 ARM processors", [ @@ -812,7 +826,8 @@ FeaturePostRAScheduler, FeatureCmpBccFusion, FeatureLSLFast, - FeatureFuseAdrpAdd]>; + FeatureFuseAdrpAdd, + FeatureEnableSelectOptimize]>; def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily", "CortexR82", @@ -825,7 +840,8 @@ FeatureFuseAES, FeatureFuseAdrpAdd, FeatureLSLFast, - FeaturePostRAScheduler]>; + FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", "Cortex-X2 ARM processors", [ @@ -833,14 +849,16 @@ FeatureFuseAES, FeatureFuseAdrpAdd, FeatureLSLFast, - FeaturePostRAScheduler]>; + FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3", "Cortex-X3 ARM processors", [ FeatureLSLFast, FeatureFuseAdrpAdd, FeatureFuseAES, - FeaturePostRAScheduler]>; + FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX", "Fujitsu A64FX processors", [ @@ -1024,34 +1042,39 @@ FeatureFuseAES, FeatureFuseAdrpAdd, FeatureLSLFast, - FeaturePostRAScheduler]>; + FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2", "Neoverse N2 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, FeatureLSLFast, - FeaturePostRAScheduler]>; + FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Neoverse512TVB", "Neoverse 512-TVB ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, FeatureLSLFast, - FeaturePostRAScheduler]>; + FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1", "Neoverse V1 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, FeatureLSLFast, - FeaturePostRAScheduler]>; + FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2", "Neoverse V2 ARM processors", [ FeatureFuseAES, FeatureLSLFast, - FeaturePostRAScheduler]>; + FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", "Qualcomm Saphira processors", [ @@ -1262,7 +1285,8 @@ // FeatureFuseAdrpAdd is enabled under Generic to allow linker merging // optimizations. def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic, - [FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler]>; + [FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler, + FeatureEnableSelectOptimize]>; def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53, [TuneA35]>; def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53, diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -256,7 +256,7 @@ void AArch64AsmPrinter::emitFunctionHeaderComment() { const AArch64FunctionInfo *FI = MF->getInfo(); Optional OutlinerString = FI->getOutliningStyle(); - if (OutlinerString != None) + if (OutlinerString != std::nullopt) OutStreamer->getCommentOS() << ' ' << OutlinerString; } diff --git a/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp --- a/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp +++ b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp @@ -78,7 +78,7 @@ // computing a size and bail out. if (MI.getOpcode() == AArch64::INLINEASM || MI.getOpcode() == AArch64::INLINEASM_BR) - return None; + return std::nullopt; Size += TII->getInstSizeInBytes(MI); } return Size; diff --git a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp --- a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -242,7 +242,7 @@ switch (MI.getOpcode()) { default: - return None; + return std::nullopt; case AArch64::LD1i64: case AArch64::LD2i64: @@ -645,7 +645,7 @@ // Loads from the stack pointer don't get prefetched. Register BaseReg = MI.getOperand(BaseRegIdx).getReg(); if (BaseReg == AArch64::SP || BaseReg == AArch64::WSP) - return None; + return std::nullopt; LoadInfo LI; LI.DestReg = DestRegIdx == -1 ? Register() : MI.getOperand(DestRegIdx).getReg(); @@ -665,7 +665,7 @@ Off = 0; else if (LI.OffsetOpnd->isGlobal() || LI.OffsetOpnd->isSymbol() || LI.OffsetOpnd->isCPI()) - return None; + return std::nullopt; else if (LI.OffsetOpnd->isReg()) Off = (1 << 5) | TRI->getEncodingValue(LI.OffsetOpnd->getReg()); else diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3576,7 +3576,7 @@ *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg, /*PreferFP=*/false, /*ForSimm=*/true); FrameReg = Reg; - FrameRegUpdate = None; + FrameRegUpdate = std::nullopt; mergeMemRefs(TagStores, CombinedMemRefs); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -541,12 +541,19 @@ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::CTPOP, MVT::i32, Custom); - setOperationAction(ISD::CTPOP, MVT::i64, Custom); - setOperationAction(ISD::CTPOP, MVT::i128, Custom); + if (Subtarget->hasCSSC()) { + setOperationAction(ISD::CTPOP, MVT::i32, Legal); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + setOperationAction(ISD::CTPOP, MVT::i128, Expand); + setOperationAction(ISD::PARITY, MVT::i128, Expand); + } else { + setOperationAction(ISD::CTPOP, MVT::i32, Custom); + setOperationAction(ISD::CTPOP, MVT::i64, Custom); + setOperationAction(ISD::CTPOP, MVT::i128, Custom); - setOperationAction(ISD::PARITY, MVT::i64, Custom); - setOperationAction(ISD::PARITY, MVT::i128, Custom); + setOperationAction(ISD::PARITY, MVT::i64, Custom); + setOperationAction(ISD::PARITY, MVT::i128, Custom); + } setOperationAction(ISD::ABS, MVT::i32, Custom); setOperationAction(ISD::ABS, MVT::i64, Custom); @@ -4329,12 +4336,12 @@ static Optional getConstantLaneNumOfExtractHalfOperand(SDValue &Op) { SDNode *OpNode = Op.getNode(); if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT) - return None; + return std::nullopt; EVT VT = OpNode->getOperand(0).getValueType(); ConstantSDNode *C = dyn_cast(OpNode->getOperand(1)); if (!VT.isFixedLengthVector() || VT.getVectorNumElements() != 2 || !C) - return None; + return std::nullopt; return C->getZExtValue(); } @@ -4657,7 +4664,7 @@ if (S == "__arm_tpidr2_restore") return SMEAttrs(SMEAttrs::SM_Compatible | SMEAttrs::ZA_Shared); } - return None; + return std::nullopt; } SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, @@ -4817,7 +4824,7 @@ Op.getConstantOperandVal(2) - Op.getConstantOperandVal(1); Optional PredPattern = getSVEPredPatternFromNumElements(NumActiveElems); - if ((PredPattern != None) && + if ((PredPattern != std::nullopt) && NumActiveElems <= (MinSVEVectorSize / ElementSize)) return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); } @@ -8413,8 +8420,16 @@ return SDValue(); bool IsParity = Op.getOpcode() == ISD::PARITY; + SDValue Val = Op.getOperand(0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); - // While there is no integer popcount instruction, it can + // for i32, general parity function using EORs is more efficient compared to + // using floating point + if (VT == MVT::i32 && IsParity) + return SDValue(); + + // If there is no CNT instruction available, GPR popcount can // be more efficiently lowered to the following sequence that uses // AdvSIMD registers/instructions as long as the copies to/from // the AdvSIMD registers are cheap. @@ -8422,10 +8437,6 @@ // CNT V0.8B, V0.8B // 8xbyte pop-counts // ADDV B0, V0.8B // sum 8xbyte pop-counts // UMOV X0, V0.B[0] // copy byte result back to integer reg - SDValue Val = Op.getOperand(0); - SDLoc DL(Op); - EVT VT = Op.getValueType(); - if (VT == MVT::i32 || VT == MVT::i64) { if (VT == MVT::i32) Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val); @@ -9003,7 +9014,7 @@ Optional PredPattern; if (Ty.isScalableVector() && IdxVal < 0 && (PredPattern = getSVEPredPatternFromNumElements(std::abs(IdxVal))) != - None) { + std::nullopt) { SDLoc DL(Op); // Create a predicate where all but the last -IdxVal elements are false. @@ -16953,10 +16964,10 @@ // (CSEL 0 1 CC Cond) => !CC static std::optional getCSETCondCode(SDValue Op) { if (Op.getOpcode() != AArch64ISD::CSEL) - return None; + return std::nullopt; auto CC = static_cast(Op.getConstantOperandVal(2)); if (CC == AArch64CC::AL || CC == AArch64CC::NV) - return None; + return std::nullopt; SDValue OpLHS = Op.getOperand(0); SDValue OpRHS = Op.getOperand(1); if (isOneConstant(OpLHS) && isNullConstant(OpRHS)) @@ -16964,7 +16975,7 @@ if (isNullConstant(OpLHS) && isOneConstant(OpRHS)) return getInvertedCondCode(CC); - return None; + return std::nullopt; } // (ADC{S} l r (CMP (CSET HS carry) 1)) => (ADC{S} l r carry) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1652,10 +1652,10 @@ SmallVectorImpl *CCUseInstrs) { MachineBasicBlock *CmpParent = CmpInstr.getParent(); if (MI.getParent() != CmpParent) - return None; + return std::nullopt; if (areCFlagsAliveInSuccessors(CmpParent)) - return None; + return std::nullopt; UsedNZCV NZCVUsedAfterCmp; for (MachineInstr &Instr : instructionsWithoutDebug( @@ -1663,7 +1663,7 @@ if (Instr.readsRegister(AArch64::NZCV, &TRI)) { AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr); if (CC == AArch64CC::Invalid) // Unsupported conditional instruction - return None; + return std::nullopt; NZCVUsedAfterCmp |= getUsedNZCV(CC); if (CCUseInstrs) CCUseInstrs->push_back(&Instr); @@ -2601,10 +2601,10 @@ int64_t Offset; // Filled with the offset of MI. bool OffsetIsScalable; if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI)) - return None; + return std::nullopt; if (!Base->isReg()) - return None; + return std::nullopt; ExtAddrMode AM; AM.BaseReg = Base->getReg(); AM.Displacement = Offset; @@ -8097,7 +8097,7 @@ return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; } - return None; + return std::nullopt; } Optional AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, @@ -8109,11 +8109,11 @@ // destination register. const MachineOperand &Op0 = MI.getOperand(0); if (!Op0.isReg() || Reg != Op0.getReg()) - return None; + return std::nullopt; switch (MI.getOpcode()) { default: - return None; + return std::nullopt; case AArch64::SUBWri: case AArch64::SUBXri: case AArch64::SUBSWri: @@ -8127,7 +8127,7 @@ // TODO: Third operand can be global address (usually some string). if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() || !MI.getOperand(2).isImm()) - return None; + return std::nullopt; int Shift = MI.getOperand(3).getImm(); assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12"); Offset = Sign * (MI.getOperand(2).getImm() << Shift); @@ -8145,7 +8145,7 @@ const TargetRegisterInfo *TRI) { auto DestSrc = TII->isCopyInstr(MI); if (!DestSrc) - return None; + return std::nullopt; Register DestReg = DestSrc->Destination->getReg(); Register SrcReg = DestSrc->Source->getReg(); @@ -8171,7 +8171,7 @@ assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) && "Unhandled ORR[XW]rs copy case"); - return None; + return std::nullopt; } Optional @@ -8185,10 +8185,10 @@ // MOVZWi may be used for producing zero-extended 32-bit immediates in // 64-bit parameters, so we need to consider super-registers. if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) - return None; + return std::nullopt; if (!MI.getOperand(1).isImm()) - return None; + return std::nullopt; int64_t Immediate = MI.getOperand(1).getImm(); int Shift = MI.getOperand(2).getImm(); return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift), diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8529,7 +8529,7 @@ // General Data-Processing Instructions (FEAT_V94_DP) //===----------------------------------------------------------------------===// defm ABS : OneOperandData<0b001000, "abs">, Requires<[HasCSSC]>; -defm CNT : OneOperandData<0b000111, "cnt">, Requires<[HasCSSC]>; +defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>; defm CTZ : OneOperandData<0b000110, "ctz">, Requires<[HasCSSC]>; defm SMAX : ComparisonOp<0, 0, "smax">, Requires<[HasCSSC]>; diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -99,7 +99,7 @@ // If not none, RenameReg can be used to rename the result register of the // first store in a pair. Currently this only works when merging stores // forward. - Optional RenameReg = None; + Optional RenameReg = std::nullopt; LdStPairFlags() = default; @@ -110,7 +110,7 @@ int getSExtIdx() const { return SExtIdx; } void setRenameReg(MCPhysReg R) { RenameReg = R; } - void clearRenameReg() { RenameReg = None; } + void clearRenameReg() { RenameReg = std::nullopt; } Optional getRenameReg() const { return RenameReg; } }; @@ -1509,7 +1509,7 @@ } LLVM_DEBUG(dbgs() << "No rename register found from " << TRI->getRegClassName(RegClass) << "\n"); - return None; + return std::nullopt; } /// Scan the instructions looking for a load/store that can be combined with the diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -176,7 +176,7 @@ [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional { if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) return std::make_pair(Opc, Opc); - return None; + return std::nullopt; }, [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, unsigned Imm1, Register SrcReg, Register NewTmpReg, @@ -342,7 +342,7 @@ return std::make_pair(PosOpc, PosOpc); if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) return std::make_pair(NegOpc, NegOpc); - return None; + return std::nullopt; }, [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, unsigned Imm1, Register SrcReg, Register NewTmpReg, @@ -375,13 +375,13 @@ else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) OP = NegOpcs; else - return None; + return std::nullopt; // Check conditional uses last since it is expensive for scanning // proceeding instructions MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg()); Optional NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI); if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V) - return None; + return std::nullopt; return OP; }, [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, diff --git a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp --- a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp +++ b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp @@ -255,7 +255,7 @@ // - Any other instruction may benefit from being pinned to offset 0. LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n"); if (!ClFirstSlot) - return None; + return std::nullopt; DenseMap RetagScore; SlotWithTag MaxScoreST{-1, -1}; @@ -305,7 +305,7 @@ } if (MaxScoreST.FI < 0) - return None; + return std::nullopt; // If FI's tag is already 0, we are done. if (MaxScoreST.Tag == 0) diff --git a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp --- a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -88,7 +88,7 @@ // If a subtarget does not define resources for STPQi, bail here. if (SCDesc->isValid() && !SCDesc->isVariant()) { - unsigned ResLenWithSTP = BBTrace.getResourceLength(None, SCDesc); + unsigned ResLenWithSTP = BBTrace.getResourceLength(std::nullopt, SCDesc); if (ResLenWithSTP > ResLength) { LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() << " resources " << ResLength << " -> " << ResLenWithSTP diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -17,6 +17,7 @@ #include "AArch64Subtarget.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -28,8 +29,9 @@ public: AArch64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT, bool IsLittleEndian); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT, bool IsLittleEndian); ~AArch64TargetMachine() override; const AArch64Subtarget *getSubtargetImpl(const Function &F) const override; @@ -69,24 +71,26 @@ // class AArch64leTargetMachine : public AArch64TargetMachine { virtual void anchor(); + public: AArch64leTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, CodeGenOpt::Level OL, - bool JIT); + std::optional RM, + std::optional CM, + CodeGenOpt::Level OL, bool JIT); }; // AArch64 big endian target machine. // class AArch64beTargetMachine : public AArch64TargetMachine { virtual void anchor(); + public: AArch64beTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, CodeGenOpt::Level OL, - bool JIT); + std::optional RM, + std::optional CM, + CodeGenOpt::Level OL, bool JIT); }; } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -50,6 +50,7 @@ #include "llvm/Transforms/CFGuard.h" #include "llvm/Transforms/Scalar.h" #include +#include #include using namespace llvm; @@ -130,6 +131,11 @@ cl::desc("Enable optimizations on complex GEPs"), cl::init(false)); +static cl::opt + EnableSelectOpt("aarch64-select-opt", cl::Hidden, + cl::desc("Enable select to branch optimizations"), + cl::init(true)); + static cl::opt BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), cl::desc("Relax out of range conditional branches")); @@ -269,7 +275,7 @@ } static Reloc::Model getEffectiveRelocModel(const Triple &TT, - Optional RM) { + std::optional RM) { // AArch64 Darwin and Windows are always PIC. if (TT.isOSDarwin() || TT.isOSWindows()) return Reloc::PIC_; @@ -282,8 +288,8 @@ } static CodeModel::Model -getEffectiveAArch64CodeModel(const Triple &TT, Optional CM, - bool JIT) { +getEffectiveAArch64CodeModel(const Triple &TT, + std::optional CM, bool JIT) { if (CM) { if (*CM != CodeModel::Small && *CM != CodeModel::Tiny && *CM != CodeModel::Large) { @@ -309,8 +315,8 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT, bool LittleEndian) : LLVMTargetMachine(T, @@ -397,7 +403,7 @@ unsigned MaxSVEVectorSize = 0; Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange); if (VScaleRangeAttr.isValid()) { - Optional VScaleMax = VScaleRangeAttr.getVScaleRangeMax(); + std::optional VScaleMax = VScaleRangeAttr.getVScaleRangeMax(); MinSVEVectorSize = VScaleRangeAttr.getVScaleRangeMin() * 128; MaxSVEVectorSize = VScaleMax ? *VScaleMax * 128 : 0; } else { @@ -448,16 +454,16 @@ AArch64leTargetMachine::AArch64leTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Optional RM, - Optional CM, CodeGenOpt::Level OL, bool JIT) + const TargetOptions &Options, std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {} void AArch64beTargetMachine::anchor() { } AArch64beTargetMachine::AArch64beTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Optional RM, - Optional CM, CodeGenOpt::Level OL, bool JIT) + const TargetOptions &Options, std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {} namespace { @@ -586,6 +592,9 @@ TargetPassConfig::addIRPasses(); + if (getOptLevel() == CodeGenOpt::Aggressive && EnableSelectOpt) + addPass(createSelectOptimizePass()); + addPass(createAArch64StackTaggingPass( /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -25,6 +25,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include +#include namespace llvm { @@ -111,10 +112,10 @@ InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); - Optional instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const; + std::optional instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const; - Optional simplifyDemandedVectorEltsIntrinsic( + std::optional simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -126,7 +127,7 @@ return ST->getMinVectorRegisterBitWidth(); } - Optional getVScaleForTuning() const { + std::optional getVScaleForTuning() const { return ST->getVScaleForTuning(); } @@ -369,14 +370,14 @@ } InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind); InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = std::nullopt); /// Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store @@ -387,6 +388,8 @@ int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const; /// @} + + bool enableSelectOptimize() { return ST->enableSelectOptimize(); } }; } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" #include +#include using namespace llvm; using namespace llvm::PatternMatch; @@ -521,8 +522,8 @@ /// The function will remove redundant reinterprets casting in the presence /// of the control flow -static Optional processPhiNode(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional processPhiNode(InstCombiner &IC, + IntrinsicInst &II) { SmallVector Worklist; auto RequiredType = II.getType(); @@ -531,7 +532,7 @@ // Don't create a new Phi unless we can remove the old one. if (!PN->hasOneUse()) - return None; + return std::nullopt; for (Value *IncValPhi : PN->incoming_values()) { auto *Reinterpret = dyn_cast(IncValPhi); @@ -539,7 +540,7 @@ Reinterpret->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool || RequiredType != Reinterpret->getArgOperand(0)->getType()) - return None; + return std::nullopt; } // Create the new Phi @@ -568,11 +569,11 @@ // and` into a ` and`. This is profitable because // to_svbool must zero the new lanes during widening, whereas // from_svbool is free. -static Optional tryCombineFromSVBoolBinOp(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional +tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II) { auto BinOp = dyn_cast(II.getOperand(0)); if (!BinOp) - return None; + return std::nullopt; auto IntrinsicID = BinOp->getIntrinsicID(); switch (IntrinsicID) { @@ -585,7 +586,7 @@ case Intrinsic::aarch64_sve_orr_z: break; default: - return None; + return std::nullopt; } auto BinOpPred = BinOp->getOperand(0); @@ -595,12 +596,12 @@ auto PredIntr = dyn_cast(BinOpPred); if (!PredIntr || PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool) - return None; + return std::nullopt; auto PredOp = PredIntr->getOperand(0); auto PredOpTy = cast(PredOp->getType()); if (PredOpTy != II.getType()) - return None; + return std::nullopt; IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); @@ -620,8 +621,8 @@ return IC.replaceInstUsesWith(II, NarrowedBinOp); } -static Optional instCombineConvertFromSVBool(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional +instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II) { // If the reinterpret instruction operand is a PHI Node if (isa(II.getArgOperand(0))) return processPhiNode(IC, II); @@ -663,32 +664,32 @@ // If no viable replacement in the conversion chain was found, there is // nothing to do. if (!EarliestReplacement) - return None; + return std::nullopt; return IC.replaceInstUsesWith(II, EarliestReplacement); } -static Optional instCombineSVESel(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVESel(InstCombiner &IC, + IntrinsicInst &II) { IRBuilder<> Builder(&II); auto Select = Builder.CreateSelect(II.getOperand(0), II.getOperand(1), II.getOperand(2)); return IC.replaceInstUsesWith(II, Select); } -static Optional instCombineSVEDup(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVEDup(InstCombiner &IC, + IntrinsicInst &II) { IntrinsicInst *Pg = dyn_cast(II.getArgOperand(1)); if (!Pg) - return None; + return std::nullopt; if (Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue) - return None; + return std::nullopt; const auto PTruePattern = cast(Pg->getOperand(0))->getZExtValue(); if (PTruePattern != AArch64SVEPredPattern::vl1) - return None; + return std::nullopt; // The intrinsic is inserting into lane zero so use an insert instead. auto *IdxTy = Type::getInt64Ty(II.getContext()); @@ -700,8 +701,8 @@ return IC.replaceInstUsesWith(II, Insert); } -static Optional instCombineSVEDupX(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVEDupX(InstCombiner &IC, + IntrinsicInst &II) { // Replace DupX with a regular IR splat. IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); @@ -712,8 +713,8 @@ return IC.replaceInstUsesWith(II, Splat); } -static Optional instCombineSVECmpNE(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVECmpNE(InstCombiner &IC, + IntrinsicInst &II) { LLVMContext &Ctx = II.getContext(); IRBuilder<> Builder(Ctx); Builder.SetInsertPoint(&II); @@ -721,49 +722,49 @@ // Check that the predicate is all active auto *Pg = dyn_cast(II.getArgOperand(0)); if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue) - return None; + return std::nullopt; const auto PTruePattern = cast(Pg->getOperand(0))->getZExtValue(); if (PTruePattern != AArch64SVEPredPattern::all) - return None; + return std::nullopt; // Check that we have a compare of zero.. auto *SplatValue = dyn_cast_or_null(getSplatValue(II.getArgOperand(2))); if (!SplatValue || !SplatValue->isZero()) - return None; + return std::nullopt; // ..against a dupq auto *DupQLane = dyn_cast(II.getArgOperand(1)); if (!DupQLane || DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane) - return None; + return std::nullopt; // Where the dupq is a lane 0 replicate of a vector insert if (!cast(DupQLane->getArgOperand(1))->isZero()) - return None; + return std::nullopt; auto *VecIns = dyn_cast(DupQLane->getArgOperand(0)); if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert) - return None; + return std::nullopt; // Where the vector insert is a fixed constant vector insert into undef at // index zero if (!isa(VecIns->getArgOperand(0))) - return None; + return std::nullopt; if (!cast(VecIns->getArgOperand(2))->isZero()) - return None; + return std::nullopt; auto *ConstVec = dyn_cast(VecIns->getArgOperand(1)); if (!ConstVec) - return None; + return std::nullopt; auto *VecTy = dyn_cast(ConstVec->getType()); auto *OutTy = dyn_cast(II.getType()); if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements()) - return None; + return std::nullopt; unsigned NumElts = VecTy->getNumElements(); unsigned PredicateBits = 0; @@ -772,7 +773,7 @@ for (unsigned I = 0; I < NumElts; ++I) { auto *Arg = dyn_cast(ConstVec->getAggregateElement(I)); if (!Arg) - return None; + return std::nullopt; if (!Arg->isZero()) PredicateBits |= 1 << (I * (16 / NumElts)); } @@ -797,7 +798,7 @@ // Ensure all relevant bits are set for (unsigned I = 0; I < 16; I += PredSize) if ((PredicateBits & (1 << I)) == 0) - return None; + return std::nullopt; auto *PTruePat = ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all); @@ -813,8 +814,8 @@ return IC.replaceInstUsesWith(II, ConvertFromSVBool); } -static Optional instCombineSVELast(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVELast(InstCombiner &IC, + IntrinsicInst &II) { IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); Value *Pg = II.getArgOperand(0); @@ -855,10 +856,10 @@ auto *IntrPG = dyn_cast(Pg); if (!IntrPG) - return None; + return std::nullopt; if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue) - return None; + return std::nullopt; const auto PTruePattern = cast(IntrPG->getOperand(0))->getZExtValue(); @@ -866,7 +867,7 @@ // Can the intrinsic's predicate be converted to a known constant index? unsigned MinNumElts = getNumElementsFromSVEPredPattern(PTruePattern); if (!MinNumElts) - return None; + return std::nullopt; unsigned Idx = MinNumElts - 1; // Increment the index if extracting the element after the last active @@ -879,7 +880,7 @@ // maintain what the user asked for until an alternative is proven faster. auto *PgVTy = cast(Pg->getType()); if (Idx >= PgVTy->getMinNumElements()) - return None; + return std::nullopt; // The intrinsic is extracting a fixed lane so use an extract instead. auto *IdxTy = Type::getInt64Ty(II.getContext()); @@ -889,8 +890,8 @@ return IC.replaceInstUsesWith(II, Extract); } -static Optional instCombineSVECondLast(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVECondLast(InstCombiner &IC, + IntrinsicInst &II) { // The SIMD&FP variant of CLAST[AB] is significantly faster than the scalar // integer variant across a variety of micro-architectures. Replace scalar // integer CLAST[AB] intrinsic with optimal SIMD&FP variant. A simple @@ -906,12 +907,12 @@ Type *Ty = II.getType(); if (!Ty->isIntegerTy()) - return None; + return std::nullopt; Type *FPTy; switch (cast(Ty)->getBitWidth()) { default: - return None; + return std::nullopt; case 16: FPTy = Builder.getHalfTy(); break; @@ -933,8 +934,8 @@ return IC.replaceInstUsesWith(II, FPIItoInt); } -static Optional instCombineRDFFR(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineRDFFR(InstCombiner &IC, + IntrinsicInst &II) { LLVMContext &Ctx = II.getContext(); IRBuilder<> Builder(Ctx); Builder.SetInsertPoint(&II); @@ -950,7 +951,7 @@ return IC.replaceInstUsesWith(II, RDFFR); } -static Optional +static std::optional instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) { const auto Pattern = cast(II.getArgOperand(0))->getZExtValue(); @@ -968,13 +969,13 @@ unsigned MinNumElts = getNumElementsFromSVEPredPattern(Pattern); return MinNumElts && NumElts >= MinNumElts - ? Optional(IC.replaceInstUsesWith( + ? std::optional(IC.replaceInstUsesWith( II, ConstantInt::get(II.getType(), MinNumElts))) - : None; + : std::nullopt; } -static Optional instCombineSVEPTest(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVEPTest(InstCombiner &IC, + IntrinsicInst &II) { Value *PgVal = II.getArgOperand(0); Value *OpVal = II.getArgOperand(1); @@ -1000,7 +1001,7 @@ IntrinsicInst *Op = dyn_cast(OpVal); if (!Pg || !Op) - return None; + return std::nullopt; Intrinsic::ID OpIID = Op->getIntrinsicID(); @@ -1041,11 +1042,11 @@ return IC.replaceInstUsesWith(II, PTest); } - return None; + return std::nullopt; } -static Optional instCombineSVEVectorFMLA(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional +instCombineSVEVectorFMLA(InstCombiner &IC, IntrinsicInst &II) { // fold (fadd p a (fmul p b c)) -> (fma p a b c) Value *P = II.getOperand(0); Value *A = II.getOperand(1); @@ -1053,18 +1054,18 @@ Value *B, *C; if (!match(FMul, m_Intrinsic( m_Specific(P), m_Value(B), m_Value(C)))) - return None; + return std::nullopt; if (!FMul->hasOneUse()) - return None; + return std::nullopt; llvm::FastMathFlags FAddFlags = II.getFastMathFlags(); // Stop the combine when the flags on the inputs differ in case dropping flags // would lead to us missing out on more beneficial optimizations. if (FAddFlags != cast(FMul)->getFastMathFlags()) - return None; + return std::nullopt; if (!FAddFlags.allowContract()) - return None; + return std::nullopt; IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); @@ -1090,7 +1091,7 @@ m_ConstantInt())); } -static Optional +static std::optional instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); @@ -1113,7 +1114,7 @@ return IC.replaceInstUsesWith(II, MaskedLoad); } -static Optional +static std::optional instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); @@ -1149,14 +1150,14 @@ } } -static Optional instCombineSVEVectorBinOp(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional +instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) { auto *OpPredicate = II.getOperand(0); auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID()); if (BinOpCode == Instruction::BinaryOpsEnd || !match(OpPredicate, m_Intrinsic( m_ConstantInt()))) - return None; + return std::nullopt; IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); Builder.setFastMathFlags(II.getFastMathFlags()); @@ -1165,15 +1166,15 @@ return IC.replaceInstUsesWith(II, BinOp); } -static Optional instCombineSVEVectorFAdd(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional +instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { if (auto FMLA = instCombineSVEVectorFMLA(IC, II)) return FMLA; return instCombineSVEVectorBinOp(IC, II); } -static Optional instCombineSVEVectorMul(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVEVectorMul(InstCombiner &IC, + IntrinsicInst &II) { auto *OpPredicate = II.getOperand(0); auto *OpMultiplicand = II.getOperand(1); auto *OpMultiplier = II.getOperand(2); @@ -1219,8 +1220,8 @@ return instCombineSVEVectorBinOp(IC, II); } -static Optional instCombineSVEUnpack(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVEUnpack(InstCombiner &IC, + IntrinsicInst &II) { IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); Value *UnpackArg = II.getArgOperand(0); @@ -1239,10 +1240,10 @@ return IC.replaceInstUsesWith(II, NewVal); } - return None; + return std::nullopt; } -static Optional instCombineSVETBL(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVETBL(InstCombiner &IC, + IntrinsicInst &II) { auto *OpVal = II.getOperand(0); auto *OpIndices = II.getOperand(1); VectorType *VTy = cast(II.getType()); @@ -1252,7 +1253,7 @@ auto *SplatValue = dyn_cast_or_null(getSplatValue(OpIndices)); if (!SplatValue || SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue())) - return None; + return std::nullopt; // Convert sve_tbl(OpVal sve_dup_x(SplatValue)) to // splat_vector(extractelement(OpVal, SplatValue)) for further optimization. @@ -1266,8 +1267,8 @@ return IC.replaceInstUsesWith(II, VectorSplat); } -static Optional instCombineSVEZip(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVEZip(InstCombiner &IC, + IntrinsicInst &II) { // zip1(uzp1(A, B), uzp2(A, B)) --> A // zip2(uzp1(A, B), uzp2(A, B)) --> B Value *A, *B; @@ -1278,11 +1279,11 @@ return IC.replaceInstUsesWith( II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B)); - return None; + return std::nullopt; } -static Optional instCombineLD1GatherIndex(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional +instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) { Value *Mask = II.getOperand(0); Value *BasePtr = II.getOperand(1); Value *Index = II.getOperand(2); @@ -1302,8 +1303,8 @@ BasePtr->getPointerAlignment(II.getModule()->getDataLayout()); Type *VecPtrTy = PointerType::getUnqual(Ty); - Value *Ptr = Builder.CreateGEP( - cast(Ty)->getElementType(), BasePtr, IndexBase); + Value *Ptr = Builder.CreateGEP(cast(Ty)->getElementType(), + BasePtr, IndexBase); Ptr = Builder.CreateBitCast(Ptr, VecPtrTy); CallInst *MaskedLoad = Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask, PassThru); @@ -1311,11 +1312,11 @@ return IC.replaceInstUsesWith(II, MaskedLoad); } - return None; + return std::nullopt; } -static Optional instCombineST1ScatterIndex(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional +instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) { Value *Val = II.getOperand(0); Value *Mask = II.getOperand(1); Value *BasePtr = II.getOperand(2); @@ -1334,8 +1335,8 @@ Align Alignment = BasePtr->getPointerAlignment(II.getModule()->getDataLayout()); - Value *Ptr = Builder.CreateGEP( - cast(Ty)->getElementType(), BasePtr, IndexBase); + Value *Ptr = Builder.CreateGEP(cast(Ty)->getElementType(), + BasePtr, IndexBase); Type *VecPtrTy = PointerType::getUnqual(Ty); Ptr = Builder.CreateBitCast(Ptr, VecPtrTy); @@ -1344,11 +1345,11 @@ return IC.eraseInstFromFunction(II); } - return None; + return std::nullopt; } -static Optional instCombineSVESDIV(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVESDIV(InstCombiner &IC, + IntrinsicInst &II) { IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); Type *Int32Ty = Builder.getInt32Ty(); @@ -1359,7 +1360,7 @@ Value *SplatValue = getSplatValue(DivVec); ConstantInt *SplatConstantInt = dyn_cast_or_null(SplatValue); if (!SplatConstantInt) - return None; + return std::nullopt; APInt Divisor = SplatConstantInt->getValue(); if (Divisor.isPowerOf2()) { @@ -1378,21 +1379,21 @@ return IC.replaceInstUsesWith(II, NEG); } - return None; + return std::nullopt; } -static Optional instCombineMaxMinNM(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineMaxMinNM(InstCombiner &IC, + IntrinsicInst &II) { Value *A = II.getArgOperand(0); Value *B = II.getArgOperand(1); if (A == B) return IC.replaceInstUsesWith(II, A); - return None; + return std::nullopt; } -static Optional instCombineSVESrshl(InstCombiner &IC, - IntrinsicInst &II) { +static std::optional instCombineSVESrshl(InstCombiner &IC, + IntrinsicInst &II) { IRBuilder<> Builder(&II); Value *Pred = II.getOperand(0); Value *Vec = II.getOperand(1); @@ -1405,21 +1406,20 @@ !match(Vec, m_Intrinsic( m_Value(MergedValue), m_Value(AbsPred), m_Value()))) - return None; + return std::nullopt; // Transform is valid if any of the following are true: // * The ABS merge value is an undef or non-negative // * The ABS predicate is all active // * The ABS predicate and the SRSHL predicates are the same - if (!isa(MergedValue) && - !match(MergedValue, m_NonNegative()) && + if (!isa(MergedValue) && !match(MergedValue, m_NonNegative()) && AbsPred != Pred && !isAllActivePredicate(AbsPred)) - return None; + return std::nullopt; // Only valid when the shift amount is non-negative, otherwise the rounding // behaviour of SRSHL cannot be ignored. if (!match(Shift, m_NonNegative())) - return None; + return std::nullopt; auto LSL = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_lsl, {II.getType()}, {Pred, Vec, Shift}); @@ -1427,7 +1427,7 @@ return IC.replaceInstUsesWith(II, LSL); } -Optional +std::optional AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { Intrinsic::ID IID = II.getIntrinsicID(); @@ -1499,10 +1499,10 @@ return instCombineSVESrshl(IC, II); } - return None; + return std::nullopt; } -Optional AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic( +std::optional AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -1525,7 +1525,7 @@ break; } - return None; + return std::nullopt; } TypeSize @@ -2814,7 +2814,7 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind) { if (TTI::requiresOrderedReduction(FMF)) { if (auto *FixedVTy = dyn_cast(ValTy)) { diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2631,7 +2631,7 @@ } if (Res == std::make_pair(-1, -1)) - return None; + return std::nullopt; return Optional>(Res); } @@ -7044,8 +7044,7 @@ // ::= .tlsdesccall symbol bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) { StringRef Name; - if (check(getParser().parseIdentifier(Name), L, - "expected symbol after directive") || + if (check(getParser().parseIdentifier(Name), L, "expected symbol") || parseToken(AsmToken::EndOfStatement)) return true; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -1246,7 +1246,7 @@ if (!determineAndHandleAssignments( UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs, MIRBuilder, Info.CallConv, Info.IsVarArg, - UsingReturnedArg ? makeArrayRef(OutArgs[0].Regs) : None)) + UsingReturnedArg ? makeArrayRef(OutArgs[0].Regs) : std::nullopt)) return false; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp @@ -23,7 +23,7 @@ if (auto Splat = getVectorSplat(MI, MRI)) return Splat; if (MI.getOpcode() != AArch64::G_DUP) - return None; + return std::nullopt; Register Src = MI.getOperand(1).getReg(); if (auto ValAndVReg = getAnyConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) @@ -36,7 +36,7 @@ const MachineRegisterInfo &MRI) { auto Splat = getAArch64VectorSplat(MI, MRI); if (!Splat || Splat->isReg()) - return None; + return std::nullopt; return Splat->getCst(); } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -217,15 +217,15 @@ /// Emit a floating point comparison between \p LHS and \p RHS. /// \p Pred if given is the intended predicate to use. - MachineInstr *emitFPCompare(Register LHS, Register RHS, - MachineIRBuilder &MIRBuilder, - Optional = None) const; - - MachineInstr *emitInstr(unsigned Opcode, - std::initializer_list DstOps, - std::initializer_list SrcOps, - MachineIRBuilder &MIRBuilder, - const ComplexRendererFns &RenderFns = None) const; + MachineInstr * + emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, + Optional = std::nullopt) const; + + MachineInstr * + emitInstr(unsigned Opcode, std::initializer_list DstOps, + std::initializer_list SrcOps, + MachineIRBuilder &MIRBuilder, + const ComplexRendererFns &RenderFns = std::nullopt) const; /// Helper function to emit an add or sub instruction. /// /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above @@ -689,10 +689,10 @@ auto ValAndVReg = getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true); if (!ValAndVReg) - return None; + return std::nullopt; Immed = ValAndVReg->Value.getSExtValue(); } else - return None; + return std::nullopt; return Immed; } @@ -1795,30 +1795,30 @@ static Optional getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { Optional ShiftImm = getVectorShiftImm(Reg, MRI); if (!ShiftImm) - return None; + return std::nullopt; // Check the immediate is in range for a SHL. int64_t Imm = *ShiftImm; if (Imm < 0) - return None; + return std::nullopt; switch (SrcTy.getElementType().getSizeInBits()) { default: LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift"); - return None; + return std::nullopt; case 8: if (Imm > 7) - return None; + return std::nullopt; break; case 16: if (Imm > 15) - return None; + return std::nullopt; break; case 32: if (Imm > 31) - return None; + return std::nullopt; break; case 64: if (Imm > 63) - return None; + return std::nullopt; break; } return Imm; @@ -4025,8 +4025,8 @@ Register Src1Reg = I.getOperand(1).getReg(); Register Src2Reg = I.getOperand(2).getReg(); auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); - MachineInstr *InsMI = - emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); + MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg, + /* LaneIdx */ 0, RB, MIB); if (!InsMI) return false; MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), @@ -5216,7 +5216,8 @@ if (DstTy.getSizeInBits() != 128) { assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); // This case can be done with TBL1. - MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB); + MachineInstr *Concat = + emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB); if (!Concat) { LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1"); return false; @@ -5388,7 +5389,7 @@ // Note that if our vector is already 128 bits, we end up emitting an extra // register. MachineInstr *InsMI = - emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB); + emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB); if (VecSize < 128) { // If we had to widen to perform the insert, then we have to demote back to @@ -5564,8 +5565,8 @@ for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { // Note that if we don't do a subregister copy, we can end up making an // extra register. - PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB, - MIB); + PrevMI = &*emitLaneInsert(std::nullopt, DstVec, I.getOperand(i).getReg(), + i - 1, RB, MIB); DstVec = PrevMI->getOperand(0).getReg(); } @@ -5956,8 +5957,8 @@ InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { auto MaybeImmed = getImmedFromMO(Root); - if (MaybeImmed == None || *MaybeImmed > 31) - return None; + if (MaybeImmed == std::nullopt || *MaybeImmed > 31) + return std::nullopt; uint64_t Enc = (32 - *MaybeImmed) & 0x1f; return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; } @@ -5965,8 +5966,8 @@ InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { auto MaybeImmed = getImmedFromMO(Root); - if (MaybeImmed == None || *MaybeImmed > 31) - return None; + if (MaybeImmed == std::nullopt || *MaybeImmed > 31) + return std::nullopt; uint64_t Enc = 31 - *MaybeImmed; return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; } @@ -5974,8 +5975,8 @@ InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { auto MaybeImmed = getImmedFromMO(Root); - if (MaybeImmed == None || *MaybeImmed > 63) - return None; + if (MaybeImmed == std::nullopt || *MaybeImmed > 63) + return std::nullopt; uint64_t Enc = (64 - *MaybeImmed) & 0x3f; return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; } @@ -5983,8 +5984,8 @@ InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { auto MaybeImmed = getImmedFromMO(Root); - if (MaybeImmed == None || *MaybeImmed > 63) - return None; + if (MaybeImmed == std::nullopt || *MaybeImmed > 63) + return std::nullopt; uint64_t Enc = 63 - *MaybeImmed; return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; } @@ -6004,7 +6005,7 @@ ShiftAmt = 12; Immed = Immed >> 12; } else - return None; + return std::nullopt; unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); return {{ @@ -6024,8 +6025,8 @@ // here because the ComplexPattern opcode list is only used in // root-level opcode matching. auto MaybeImmed = getImmedFromMO(Root); - if (MaybeImmed == None) - return None; + if (MaybeImmed == std::nullopt) + return std::nullopt; return select12BitValueWithLeftShift(*MaybeImmed); } @@ -6036,17 +6037,17 @@ // We need a register here, because we need to know if we have a 64 or 32 // bit immediate. if (!Root.isReg()) - return None; + return std::nullopt; auto MaybeImmed = getImmedFromMO(Root); - if (MaybeImmed == None) - return None; + if (MaybeImmed == std::nullopt) + return std::nullopt; uint64_t Immed = *MaybeImmed; // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" // have the opposite effect on the C flag, so this pattern mustn't match under // those circumstances. if (Immed == 0) - return None; + return std::nullopt; // Check if we're dealing with a 32-bit type on the root or a 64-bit type on // the root. @@ -6057,7 +6058,7 @@ Immed = ~Immed + 1ULL; if (Immed & 0xFFFFFFFFFF000000ULL) - return None; + return std::nullopt; Immed &= 0xFFFFFFULL; return select12BitValueWithLeftShift(Immed); @@ -6112,21 +6113,21 @@ if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { // Try to look through a ZEXT. if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) - return None; + return std::nullopt; OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg()); OffsetOpc = OffsetInst->getOpcode(); LookedThroughZExt = true; if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) - return None; + return std::nullopt; } // Make sure that the memory op is a valid size. int64_t LegalShiftVal = Log2_32(SizeInBytes); if (LegalShiftVal == 0) - return None; + return std::nullopt; if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) - return None; + return std::nullopt; // Now, try to find the specific G_CONSTANT. Start by assuming that the // register we will offset is the LHS, and the register containing the @@ -6138,13 +6139,13 @@ // We didn't get a constant on the RHS. If the opcode is a shift, then // we're done. if (OffsetOpc == TargetOpcode::G_SHL) - return None; + return std::nullopt; // If we have a G_MUL, we can use either register. Try looking at the RHS. std::swap(OffsetReg, ConstantReg); ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI); if (!ValAndVReg) - return None; + return std::nullopt; } // The value must fit into 3 bits, and must be positive. Make sure that is @@ -6155,19 +6156,19 @@ // a power of 2. If we got a multiply, then we need to check this. if (OffsetOpc == TargetOpcode::G_MUL) { if (!isPowerOf2_32(ImmVal)) - return None; + return std::nullopt; // Got a power of 2. So, the amount we'll shift is the log base-2 of that. ImmVal = Log2_32(ImmVal); } if ((ImmVal & 0x7) != ImmVal) - return None; + return std::nullopt; // We are only allowed to shift by LegalShiftVal. This shift value is built // into the instruction, so we can't just use whatever we want. if (ImmVal != LegalShiftVal) - return None; + return std::nullopt; unsigned SignExtend = 0; if (WantsExt) { @@ -6177,12 +6178,12 @@ MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); if (Ext == AArch64_AM::InvalidShiftExtend) - return None; + return std::nullopt; SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; // We only support SXTW for signed extension here. if (SignExtend && Ext != AArch64_AM::SXTW) - return None; + return std::nullopt; OffsetReg = ExtInst->getOperand(1).getReg(); } @@ -6215,7 +6216,7 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( MachineOperand &Root, unsigned SizeInBytes) const { if (!Root.isReg()) - return None; + return std::nullopt; MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); // We want to find something like this: @@ -6233,7 +6234,7 @@ MachineInstr *PtrAdd = getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) - return None; + return std::nullopt; // Now, try to match an opcode which will match our specific offset. // We want a G_SHL or a G_MUL. @@ -6260,13 +6261,13 @@ // We need a GEP. MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD) - return None; + return std::nullopt; // If this is used more than once, let's not bother folding. // TODO: Check if they are memory ops. If they are, then we can still fold // without having to recompute anything. if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg())) - return None; + return std::nullopt; // Base is the GEP's LHS, offset is its RHS. return {{[=](MachineInstrBuilder &MIB) { @@ -6290,11 +6291,11 @@ unsigned SizeInBytes) const { MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); if (!Root.isReg()) - return None; + return std::nullopt; MachineInstr *PtrAdd = getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); if (!PtrAdd) - return None; + return std::nullopt; // Check for an immediates which cannot be encoded in the [base + imm] // addressing mode, and can't be encoded in an add/sub. If this happens, we'll @@ -6319,7 +6320,7 @@ // mode. if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) - return None; + return std::nullopt; // Helper lambda to decide whether or not it is preferable to emit an add. auto isPreferredADD = [](int64_t ImmOff) { @@ -6340,7 +6341,7 @@ // If the immediate can be encoded in a single add/sub, then bail out. if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) - return None; + return std::nullopt; } // Try to fold shifts into the addressing mode. @@ -6367,7 +6368,7 @@ MachineInstr *PtrAdd = getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) - return None; + return std::nullopt; MachineOperand &LHS = PtrAdd->getOperand(1); MachineOperand &RHS = PtrAdd->getOperand(2); @@ -6398,13 +6399,13 @@ // e.g. // ldr something, [base_reg, ext_reg, sxtw] if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) - return None; + return std::nullopt; // Check if this is an extend. We'll get an extend type if it is. AArch64_AM::ShiftExtendType Ext = getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); if (Ext == AArch64_AM::InvalidShiftExtend) - return None; + return std::nullopt; // Need a 32-bit wide register. MachineIRBuilder MIB(*PtrAdd); @@ -6433,28 +6434,28 @@ Root.getParent()->getParent()->getParent()->getRegInfo(); if (!Root.isReg()) - return None; + return std::nullopt; if (!isBaseWithConstantOffset(Root, MRI)) - return None; + return std::nullopt; MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); MachineOperand &OffImm = RootDef->getOperand(2); if (!OffImm.isReg()) - return None; + return std::nullopt; MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); if (RHS->getOpcode() != TargetOpcode::G_CONSTANT) - return None; + return std::nullopt; int64_t RHSC; MachineOperand &RHSOp1 = RHS->getOperand(1); if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) - return None; + return std::nullopt; RHSC = RHSOp1.getCImm()->getSExtValue(); // If the offset is valid as a scaled immediate, don't match here. if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) - return None; + return std::nullopt; if (RHSC >= -256 && RHSC < 256) { MachineOperand &Base = RootDef->getOperand(1); return {{ @@ -6462,7 +6463,7 @@ [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, }}; } - return None; + return std::nullopt; } InstructionSelector::ComplexRendererFns @@ -6470,23 +6471,23 @@ unsigned Size, MachineRegisterInfo &MRI) const { if (RootDef.getOpcode() != AArch64::G_ADD_LOW) - return None; + return std::nullopt; MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); if (Adrp.getOpcode() != AArch64::ADRP) - return None; + return std::nullopt; // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. auto Offset = Adrp.getOperand(1).getOffset(); if (Offset % Size != 0) - return None; + return std::nullopt; auto GV = Adrp.getOperand(1).getGlobal(); if (GV->isThreadLocal()) - return None; + return std::nullopt; auto &MF = *RootDef.getParent()->getParent(); if (GV->getPointerAlignment(MF.getDataLayout()) < Size) - return None; + return std::nullopt; unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget()); MachineIRBuilder MIRBuilder(RootDef); @@ -6509,7 +6510,7 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); if (!Root.isReg()) - return None; + return std::nullopt; MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { @@ -6552,7 +6553,7 @@ // Before falling back to our general case, check if the unscaled // instructions can handle this. If so, that's preferable. if (selectAddrModeUnscaled(Root, Size)) - return None; + return std::nullopt; return {{ [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, @@ -6583,7 +6584,7 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root, bool AllowROR) const { if (!Root.isReg()) - return None; + return std::nullopt; MachineRegisterInfo &MRI = Root.getParent()->getParent()->getParent()->getRegInfo(); @@ -6592,17 +6593,17 @@ MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); if (ShType == AArch64_AM::InvalidShiftExtend) - return None; + return std::nullopt; if (ShType == AArch64_AM::ROR && !AllowROR) - return None; + return std::nullopt; if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) - return None; + return std::nullopt; // Need an immediate on the RHS. MachineOperand &ShiftRHS = ShiftInst->getOperand(2); auto Immed = getImmedFromMO(ShiftRHS); if (!Immed) - return None; + return std::nullopt; // We have something that we can fold. Fold in the shift's LHS and RHS into // the instruction. @@ -6698,7 +6699,7 @@ AArch64InstructionSelector::selectArithExtendedRegister( MachineOperand &Root) const { if (!Root.isReg()) - return None; + return std::nullopt; MachineRegisterInfo &MRI = Root.getParent()->getParent()->getParent()->getRegInfo(); @@ -6707,10 +6708,10 @@ AArch64_AM::ShiftExtendType Ext; MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); if (!RootDef) - return None; + return std::nullopt; if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) - return None; + return std::nullopt; // Check if we can fold a shift and an extend. if (RootDef->getOpcode() == TargetOpcode::G_SHL) { @@ -6718,24 +6719,24 @@ MachineOperand &RHS = RootDef->getOperand(2); Optional MaybeShiftVal = getImmedFromMO(RHS); if (!MaybeShiftVal) - return None; + return std::nullopt; ShiftVal = *MaybeShiftVal; if (ShiftVal > 4) - return None; + return std::nullopt; // Look for a valid extend instruction on the LHS of the shift. MachineOperand &LHS = RootDef->getOperand(1); MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); if (!ExtDef) - return None; + return std::nullopt; Ext = getExtendTypeForInst(*ExtDef, MRI); if (Ext == AArch64_AM::InvalidShiftExtend) - return None; + return std::nullopt; ExtReg = ExtDef->getOperand(1).getReg(); } else { // Didn't get a shift. Try just folding an extend. Ext = getExtendTypeForInst(*RootDef, MRI); if (Ext == AArch64_AM::InvalidShiftExtend) - return None; + return std::nullopt; ExtReg = RootDef->getOperand(1).getReg(); // If we have a 32 bit instruction which zeroes out the high half of a @@ -6745,7 +6746,7 @@ if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); if (isDef32(*ExtInst)) - return None; + return std::nullopt; } } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -117,7 +117,7 @@ // Look for the first non-undef element. auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); if (FirstRealElt == M.end()) - return None; + return std::nullopt; // Use APInt to handle overflow when calculating expected element. unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); @@ -128,7 +128,7 @@ if (any_of( make_range(std::next(FirstRealElt), M.end()), [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) - return None; + return std::nullopt; // The index of an EXT is the first element if it is not UNDEF. // Watch out for the beginning UNDEFs. The EXT index should be the expected @@ -197,7 +197,7 @@ static std::optional> isINSMask(ArrayRef M, int NumInputElements) { if (M.size() != static_cast(NumInputElements)) - return None; + return std::nullopt; int NumLHSMatch = 0, NumRHSMatch = 0; int LastLHSMismatch = -1, LastRHSMismatch = -1; for (int Idx = 0; Idx < NumInputElements; ++Idx) { @@ -214,7 +214,7 @@ return std::make_pair(true, LastLHSMismatch); if (NumRHSMatch == NumNeededToMatch) return std::make_pair(false, LastRHSMismatch); - return None; + return std::nullopt; } /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a @@ -563,7 +563,7 @@ const MachineRegisterInfo &MRI) { const auto &Ty = MRI.getType(RHS); if (Ty.isVector()) - return None; + return std::nullopt; unsigned Size = Ty.getSizeInBits(); assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?"); @@ -571,16 +571,16 @@ // immediate, then there is nothing to change. auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI); if (!ValAndVReg) - return None; + return std::nullopt; uint64_t C = ValAndVReg->Value.getZExtValue(); if (isLegalArithImmed(C)) - return None; + return std::nullopt; // We have a non-arithmetic immediate. Check if adjusting the immediate and // adjusting the predicate will result in a legal arithmetic immediate. switch (P) { default: - return None; + return std::nullopt; case CmpInst::ICMP_SLT: case CmpInst::ICMP_SGE: // Check for @@ -591,7 +591,7 @@ // When c is not the smallest possible negative number. if ((Size == 64 && static_cast(C) == INT64_MIN) || (Size == 32 && static_cast(C) == INT32_MIN)) - return None; + return std::nullopt; P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; C -= 1; break; @@ -604,7 +604,7 @@ // // When c is not zero. if (C == 0) - return None; + return std::nullopt; P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; C -= 1; break; @@ -618,7 +618,7 @@ // When c is not the largest possible signed integer. if ((Size == 32 && static_cast(C) == INT32_MAX) || (Size == 64 && static_cast(C) == INT64_MAX)) - return None; + return std::nullopt; P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; C += 1; break; @@ -632,7 +632,7 @@ // When c is not the largest possible unsigned integer. if ((Size == 32 && static_cast(C) == UINT32_MAX) || (Size == 64 && C == UINT64_MAX)) - return None; + return std::nullopt; P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; C += 1; break; @@ -643,7 +643,7 @@ if (Size == 32) C = static_cast(C); if (!isLegalArithImmed(C)) - return None; + return std::nullopt; return {{C, P}}; } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -332,7 +332,7 @@ Optional AArch64AsmBackend::getFixupKind(StringRef Name) const { if (!TheTriple.isOSBinFormatELF()) - return None; + return std::nullopt; unsigned Type = llvm::StringSwitch(Name) #define ELF_RELOC(X, Y) .Case(#X, Y) @@ -344,7 +344,7 @@ .Case("BFD_RELOC_64", ELF::R_AARCH64_ABS64) .Default(-1u); if (Type == -1u) - return None; + return std::nullopt; return static_cast(FirstLiteralRelocationKind + Type); } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -34,6 +34,8 @@ ~AArch64ELFObjectWriter() override = default; + MCSectionELF *getMemtagRelocsSection(MCContext &Ctx) const override; + protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; @@ -453,6 +455,12 @@ llvm_unreachable("Unimplemented fixup -> relocation"); } +MCSectionELF * +AArch64ELFObjectWriter::getMemtagRelocsSection(MCContext &Ctx) const { + return Ctx.getELFSection(".memtag.globals.static", + ELF::SHT_AARCH64_MEMTAG_GLOBALS_STATIC, 0); +} + std::unique_ptr llvm::createAArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32) { return std::make_unique(OSABI, IsILP32); diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp --- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp +++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" +#include using namespace llvm; using namespace llvm::PatternMatch; @@ -284,7 +285,7 @@ return false; unsigned MinVScale = Attr.getVScaleRangeMin(); - Optional MaxVScale = Attr.getVScaleRangeMax(); + std::optional MaxVScale = Attr.getVScaleRangeMax(); // The transform needs to know the exact runtime length of scalable vectors if (!MaxVScale || MinVScale != MaxVScale) return false; @@ -347,7 +348,7 @@ return false; unsigned MinVScale = Attr.getVScaleRangeMin(); - Optional MaxVScale = Attr.getVScaleRangeMax(); + std::optional MaxVScale = Attr.getVScaleRangeMax(); // The transform needs to know the exact runtime length of scalable vectors if (!MaxVScale || MinVScale != MaxVScale) return false; diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -540,7 +540,7 @@ getSVEPredPatternFromNumElements(unsigned MinNumElts) { switch (MinNumElts) { default: - return None; + return std::nullopt; case 1: case 2: case 3: @@ -849,7 +849,7 @@ return AArch64PACKey::DA; if (Name == "db") return AArch64PACKey::DB; - return None; + return std::nullopt; } namespace AArch64 { diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -55,19 +55,19 @@ // and Callee has a streaming body, then we can ignore the interface of // Callee. if (BodyOverridesInterface && Callee.hasStreamingBody()) { - return hasStreamingInterfaceOrBody() ? None : Optional(true); + return hasStreamingInterfaceOrBody() ? std::nullopt : Optional(true); } if (Callee.hasStreamingCompatibleInterface()) - return None; + return std::nullopt; // Both non-streaming if (hasNonStreamingInterfaceAndBody() && Callee.hasNonStreamingInterface()) - return None; + return std::nullopt; // Both streaming if (hasStreamingInterfaceOrBody() && Callee.hasStreamingInterface()) - return None; + return std::nullopt; return Callee.hasStreamingInterface(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -518,7 +518,7 @@ if (AllocSize == 0) continue; - MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : None; + MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt; Align ABIAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy); uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -98,7 +98,8 @@ void emitKernelArg(const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, unsigned &Offset, - msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign = None, + msgpack::ArrayDocNode Args, + MaybeAlign PointeeAlign = std::nullopt, StringRef Name = "", StringRef TypeName = "", StringRef BaseTypeName = "", StringRef AccQual = "", StringRef TypeQual = ""); @@ -191,7 +192,8 @@ void emitKernelArg(const Argument &Arg); void emitKernelArg(const DataLayout &DL, Type *Ty, Align Alignment, - ValueKind ValueKind, MaybeAlign PointeeAlign = None, + ValueKind ValueKind, + MaybeAlign PointeeAlign = std::nullopt, StringRef Name = "", StringRef TypeName = "", StringRef BaseTypeName = "", StringRef AccQual = "", StringRef TypeQual = ""); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -506,7 +506,7 @@ .Case("read_only", StringRef("read_only")) .Case("write_only", StringRef("write_only")) .Case("read_write", StringRef("read_write")) - .Default(None); + .Default(std::nullopt); } Optional MetadataStreamerMsgPackV3::getAddressSpaceQualifier( @@ -525,7 +525,7 @@ case AMDGPUAS::REGION_ADDRESS: return StringRef("region"); default: - return None; + return std::nullopt; } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -1080,7 +1080,7 @@ resetEdges(SchedBarrier, DAG); auto InvertedMask = invertSchedBarrierMask((SchedGroupMask)MI.getOperand(0).getImm()); - SchedGroup SG(InvertedMask, None, DAG, TII); + SchedGroup SG(InvertedMask, std::nullopt, DAG, TII); SG.initSchedGroup(); // Preserve original instruction ordering relative to the SCHED_BARRIER. SG.link( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1029,7 +1029,7 @@ Type *BaseArgTy = Arg.getType(); Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy; Align Alignment = DL.getValueOrABITypeAlignment( - IsByRef ? Arg.getParamAlign() : None, MemArgTy); + IsByRef ? Arg.getParamAlign() : std::nullopt, MemArgTy); MaxAlign = std::max(Alignment, MaxAlign); uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/FloatingPointMode.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" +#include using namespace llvm; @@ -114,14 +115,14 @@ /// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with /// modified arguments (based on OldIntr) and replaces InstToReplace with /// this newly created intrinsic call. -static Optional modifyIntrinsicCall( +static std::optional modifyIntrinsicCall( IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function &, SmallVectorImpl &)> Func) { SmallVector ArgTys; if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys)) - return None; + return std::nullopt; SmallVector Args(OldIntr.args()); @@ -149,7 +150,7 @@ return RetValue; } -static Optional +static std::optional simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC) { @@ -252,7 +253,7 @@ // Try to use A16 or G16 if (!ST->hasA16() && !ST->hasG16()) - return None; + return std::nullopt; // Address is interpreted as float if the instruction has a sampler or as // unsigned int if there is no sampler. @@ -269,7 +270,7 @@ if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) { if (OperandIndex < ImageDimIntr->CoordStart || ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) { - return None; + return std::nullopt; } // All gradients can be converted, so convert only them OnlyDerivatives = true; @@ -295,7 +296,7 @@ if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart == ImageDimIntr->CoordStart)) - return None; + return std::nullopt; Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext()) : Type::getInt16Ty(II.getContext()); @@ -348,7 +349,7 @@ return false; } -Optional +std::optional GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { Intrinsic::ID IID = II.getIntrinsicID(); switch (IID) { @@ -1059,7 +1060,7 @@ } } } - return None; + return std::nullopt; } /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics. @@ -1204,7 +1205,7 @@ return Shuffle; } -Optional GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic( +std::optional GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -1228,5 +1229,5 @@ break; } } - return None; + return std::nullopt; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3727,7 +3727,7 @@ unsigned Mods; std::tie(Src, Mods) = selectVOP3ModsImpl(Root); if (!isKnownNeverNaN(Src, *MRI)) - return None; + return std::nullopt; return {{ [=](MachineInstrBuilder &MIB) { @@ -3856,7 +3856,7 @@ Register Base; int64_t Offset; if (!selectSmrdOffset(Root, Base, /* SOffset= */ nullptr, &Offset)) - return None; + return std::nullopt; return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}}; @@ -3868,14 +3868,14 @@ getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo); if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) - return None; + return std::nullopt; const GEPInfo &GEPInfo = AddrInfo[0]; Register PtrReg = GEPInfo.SgprParts[0]; Optional EncodedImm = AMDGPU::getSMRDEncodedLiteralOffset32(STI, GEPInfo.Imm); if (!EncodedImm) - return None; + return std::nullopt; return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, @@ -3887,7 +3887,7 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { Register Base, SOffset; if (!selectSmrdOffset(Root, Base, &SOffset, /* Offset= */ nullptr)) - return None; + return std::nullopt; return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}}; @@ -3898,7 +3898,7 @@ Register Base, SOffset; int64_t Offset; if (!selectSmrdOffset(Root, Base, &SOffset, &Offset)) - return None; + return std::nullopt; return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }, @@ -4018,7 +4018,7 @@ !TII.isInlineConstant(APInt(32, ConstOffset & 0xffffffff)) + !TII.isInlineConstant(APInt(32, ConstOffset >> 32)); if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals) - return None; + return std::nullopt; } } } @@ -4053,7 +4053,7 @@ // drop this. if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF || AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg)) - return None; + return std::nullopt; // It's cheaper to materialize a single 32-bit zero for vaddr than the two // moves required to copy a 64-bit SGPR to VGPR. @@ -4121,7 +4121,7 @@ } if (!isSGPR(SAddr)) - return None; + return std::nullopt; return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(SAddr); }, // saddr @@ -4166,17 +4166,17 @@ auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI); if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD) - return None; + return std::nullopt; Register RHS = AddrDef->MI->getOperand(2).getReg(); if (RBI.getRegBank(RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) - return None; + return std::nullopt; Register LHS = AddrDef->MI->getOperand(1).getReg(); auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI); if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset)) - return None; + return std::nullopt; if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) { int FI = LHSDef->MI->getOperand(1).getIndex(); @@ -4188,7 +4188,7 @@ } if (!isSGPR(LHS)) - return None; + return std::nullopt; return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr @@ -4825,7 +4825,7 @@ // getIConstantVRegVal sexts any values, so see if that matters. Optional OffsetVal = getIConstantVRegSExtVal(Reg, MRI); if (!OffsetVal || !isInt<32>(*OffsetVal)) - return None; + return std::nullopt; return Lo_32(*OffsetVal); } @@ -4868,12 +4868,12 @@ std::tie(SOffset, Offset) = AMDGPU::getBaseWithConstantOffset(*MRI, Root.getReg(), KnownBits); if (!SOffset) - return None; + return std::nullopt; Optional EncodedOffset = AMDGPU::getSMRDEncodedOffset(STI, Offset, /* IsBuffer */ true); if (!EncodedOffset) - return None; + return std::nullopt; assert(MRI->getType(SOffset) == LLT::scalar(32)); return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -92,7 +92,7 @@ for (Argument &Arg : F.args()) { const bool IsByRef = Arg.hasByRefAttr(); Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType(); - MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : None; + MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt; Align ABITypeAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy); uint64_t Size = DL.getTypeSizeInBits(ArgTy); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h @@ -65,7 +65,7 @@ SSID == getSystemOneAddressSpaceSSID()) return 4; - return None; + return std::nullopt; } /// \returns True if \p SSID is restricted to single address space, false @@ -126,7 +126,7 @@ const auto &AIO = getSyncScopeInclusionOrdering(A); const auto &BIO = getSyncScopeInclusionOrdering(B); if (!AIO || !BIO) - return None; + return std::nullopt; bool IsAOneAddressSpace = isOneAddressSpace(A); bool IsBOneAddressSpace = isOneAddressSpace(B); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -282,7 +282,7 @@ Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 2); LoadInst *LoadZU = Builder.CreateAlignedLoad(I32Ty, GEPZU, Align(4)); - MDNode *MD = MDNode::get(Mod->getContext(), None); + MDNode *MD = MDNode::get(Mod->getContext(), std::nullopt); LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD); LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD); ST.makeLIDRangeMetadata(LoadZU); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp @@ -62,7 +62,7 @@ return false; } // Wait until the values are propagated from the predecessors - return None; + return std::nullopt; } public: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -540,7 +540,7 @@ const bool IsByRef = Arg.hasByRefAttr(); Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType(); Align Alignment = DL.getValueOrABITypeAlignment( - IsByRef ? Arg.getParamAlign() : None, ArgTy); + IsByRef ? Arg.getParamAlign() : std::nullopt, ArgTy); uint64_t AllocSize = DL.getTypeAllocSize(ArgTy); ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize; MaxAlign = std::max(MaxAlign, Alignment); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -17,6 +17,7 @@ #include "GCNSubtarget.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Target/TargetMachine.h" +#include #include namespace llvm { @@ -39,8 +40,8 @@ AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL); ~AMDGPUTargetMachine() override; const TargetSubtargetInfo *getSubtargetImpl() const; @@ -77,8 +78,9 @@ public: GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -57,6 +57,7 @@ #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/Transforms/Vectorize.h" +#include using namespace llvm; using namespace llvm::PatternMatch; @@ -521,7 +522,7 @@ return "r600"; } -static Reloc::Model getEffectiveRelocModel(Optional RM) { +static Reloc::Model getEffectiveRelocModel(std::optional RM) { // The AMDGPU toolchain only supports generating shared objects, so we // must always use PIC. return Reloc::PIC_; @@ -530,8 +531,8 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OptLevel) : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), FS, Options, getEffectiveRelocModel(RM), @@ -800,8 +801,8 @@ GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} @@ -1451,7 +1452,7 @@ Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, RegName.Value.size(), SourceMgr::DK_Error, "incorrect register class for field", RegName.Value, - None, None); + std::nullopt, std::nullopt); SourceRange = RegName.SourceRange; return true; }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -19,6 +19,7 @@ #include "AMDGPU.h" #include "llvm/CodeGen/BasicTTIImpl.h" +#include namespace llvm { @@ -132,16 +133,16 @@ unsigned AddrSpace) const; bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const; - Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, - unsigned SrcAddrSpace, unsigned DestAddrSpace, - unsigned SrcAlign, unsigned DestAlign, - Optional AtomicElementSize) const; + Type *getMemcpyLoopLoweringType( + LLVMContext & Context, Value * Length, unsigned SrcAddrSpace, + unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, + std::optional AtomicElementSize) const; void getMemcpyLoopResidualLoweringType( SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, - Optional AtomicCpySize) const; + std::optional AtomicCpySize) const; unsigned getMaxInterleaveFactor(unsigned VF); bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; @@ -188,9 +189,9 @@ bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, InstCombiner &IC) const; - Optional instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const; - Optional simplifyDemandedVectorEltsIntrinsic( + std::optional instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const; + std::optional simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -202,7 +203,7 @@ ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = std::nullopt); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; @@ -213,7 +214,7 @@ int getInlinerVectorBonusPercent() { return 0; } InstructionCost getArithmeticReductionCost( - unsigned Opcode, VectorType *Ty, Optional FMF, + unsigned Opcode, VectorType *Ty, std::optional FMF, TTI::TargetCostKind CostKind); InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" +#include using namespace llvm; @@ -401,7 +402,7 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType( LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, - Optional AtomicElementSize) const { + std::optional AtomicElementSize) const { if (AtomicElementSize) return Type::getIntNTy(Context, *AtomicElementSize * 8); @@ -433,7 +434,7 @@ SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, - Optional AtomicCpySize) const { + std::optional AtomicCpySize) const { assert(RemainingBytes < 16); if (AtomicCpySize) @@ -756,7 +757,7 @@ InstructionCost GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind) { if (TTI::requiresOrderedReduction(FMF)) return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2888,7 +2888,7 @@ case IS_SGPR: return StringRef(".amdgcn.next_free_sgpr"); default: - return None; + return std::nullopt; } } @@ -4092,7 +4092,7 @@ // with 9-bit operands only. Ignore encodings which do not accept these. const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; if ((Desc.TSFlags & Enc) == 0) - return None; + return std::nullopt; for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); @@ -4112,7 +4112,7 @@ } } - return None; + return std::nullopt; } SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -2232,7 +2232,7 @@ Size = 64; // Size = 64 regardless of success or failure. return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); } - return None; + return std::nullopt; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -933,6 +933,9 @@ } bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) { + if (PressureAfter == PressureBefore) + return false; + if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) return true; @@ -956,6 +959,9 @@ } bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) { + if (PressureAfter == PressureBefore) + return false; + if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) return true; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -168,7 +168,7 @@ .Case(#Name, MCFixupKind(FirstLiteralRelocationKind + Value)) #include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" #undef ELF_RELOC - .Default(None); + .Default(std::nullopt); } const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo( diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -105,13 +105,13 @@ return TargetID; } void initializeTargetID(const MCSubtargetInfo &STI) { - assert(TargetID == None && "TargetID can only be initialized once"); + assert(TargetID == std::nullopt && "TargetID can only be initialized once"); TargetID.emplace(STI); } void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString) { initializeTargetID(STI); - assert(getTargetID() != None && "TargetID is None"); + assert(getTargetID() != std::nullopt && "TargetID is None"); getTargetID()->setTargetIDFromFeaturesString(FeatureString); } }; diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.h b/llvm/lib/Target/AMDGPU/R600TargetMachine.h --- a/llvm/lib/Target/AMDGPU/R600TargetMachine.h +++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.h @@ -17,6 +17,7 @@ #include "AMDGPUTargetMachine.h" #include "R600Subtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -31,8 +32,9 @@ public: R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp @@ -18,6 +18,7 @@ #include "R600MachineScheduler.h" #include "R600TargetTransformInfo.h" #include "llvm/Transforms/Scalar.h" +#include using namespace llvm; @@ -50,8 +51,8 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { setRequiresStructuredCFG(true); diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1989,12 +1989,12 @@ return Op.getImm(); if (!Op.isReg()) - return None; + return std::nullopt; MachineInstr *Def = MRI->getUniqueVRegDef(Op.getReg()); if (!Def || Def->getOpcode() != AMDGPU::S_MOV_B32 || !Def->getOperand(1).isImm()) - return None; + return std::nullopt; return Def->getOperand(1).getImm(); } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -587,7 +587,7 @@ if (Any) return AI; - return None; + return std::nullopt; } yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( @@ -651,13 +651,13 @@ Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1, SourceMgr::DK_Error, toString(FIOrErr.takeError()), - "", None, None); + "", std::nullopt, std::nullopt); SourceRange = YamlMFI.ScavengeFI->SourceRange; return true; } ScavengeFI = *FIOrErr; } else { - ScavengeFI = None; + ScavengeFI = std::nullopt; } return false; } diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -664,7 +664,7 @@ return std::make_tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false); - return None; + return std::nullopt; } SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const { @@ -711,7 +711,7 @@ if (!IsSyncScopeInclusion) { reportUnsupported(MI, "Unsupported non-inclusive atomic synchronization scope"); - return None; + return std::nullopt; } SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID(); @@ -730,7 +730,7 @@ auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace); if (!ScopeOrNone) { reportUnsupported(MI, "Unsupported atomic synchronization scope"); - return None; + return std::nullopt; } std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) = *ScopeOrNone; @@ -738,7 +738,7 @@ ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) || ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) { reportUnsupported(MI, "Unsupported atomic address space"); - return None; + return std::nullopt; } } return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace, @@ -751,7 +751,7 @@ assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); if (!(MI->mayLoad() && !MI->mayStore())) - return None; + return std::nullopt; // Be conservative if there are no memory operands. if (MI->getNumMemOperands() == 0) @@ -765,7 +765,7 @@ assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); if (!(!MI->mayLoad() && MI->mayStore())) - return None; + return std::nullopt; // Be conservative if there are no memory operands. if (MI->getNumMemOperands() == 0) @@ -779,7 +779,7 @@ assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) - return None; + return std::nullopt; AtomicOrdering Ordering = static_cast(MI->getOperand(0).getImm()); @@ -788,7 +788,7 @@ auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC); if (!ScopeOrNone) { reportUnsupported(MI, "Unsupported atomic synchronization scope"); - return None; + return std::nullopt; } SIAtomicScope Scope = SIAtomicScope::NONE; @@ -800,7 +800,7 @@ if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) || ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) { reportUnsupported(MI, "Unsupported atomic address space"); - return None; + return std::nullopt; } return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC, @@ -812,7 +812,7 @@ assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); if (!(MI->mayLoad() && MI->mayStore())) - return None; + return std::nullopt; // Be conservative if there are no memory operands. if (MI->getNumMemOperands() == 0) diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -504,17 +504,17 @@ const MachineInstr *DefInst = Def.getParent(); if (!TII->isFoldableCopy(*DefInst)) - return None; + return std::nullopt; const MachineOperand &Copied = DefInst->getOperand(1); if (!Copied.isImm()) - return None; + return std::nullopt; return Copied.getImm(); } } - return None; + return std::nullopt; } std::unique_ptr @@ -697,19 +697,19 @@ auto CheckOROperandsForSDWA = [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType { if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg()) - return CheckRetType(None); + return CheckRetType(std::nullopt); MachineOperand *Op1Def = findSingleRegDef(Op1, MRI); if (!Op1Def) - return CheckRetType(None); + return CheckRetType(std::nullopt); MachineInstr *Op1Inst = Op1Def->getParent(); if (!TII->isSDWA(*Op1Inst)) - return CheckRetType(None); + return CheckRetType(std::nullopt); MachineOperand *Op2Def = findSingleRegDef(Op2, MRI); if (!Op2Def) - return CheckRetType(None); + return CheckRetType(std::nullopt); return CheckRetType(std::make_pair(Op1Def, Op2Def)); }; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -262,15 +262,17 @@ /// /// For subtargets which support it, \p EnableWavefrontSize32 should match /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. -unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, - Optional EnableWavefrontSize32 = None); +unsigned +getVGPRAllocGranule(const MCSubtargetInfo *STI, + Optional EnableWavefrontSize32 = std::nullopt); /// \returns VGPR encoding granularity for given subtarget \p STI. /// /// For subtargets which support it, \p EnableWavefrontSize32 should match /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. -unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, - Optional EnableWavefrontSize32 = None); +unsigned +getVGPREncodingGranule(const MCSubtargetInfo *STI, + Optional EnableWavefrontSize32 = std::nullopt); /// \returns Total number of VGPRs for given subtarget \p STI. unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); @@ -292,7 +294,7 @@ /// For subtargets which support it, \p EnableWavefrontSize32 should match the /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, - Optional EnableWavefrontSize32 = None); + Optional EnableWavefrontSize32 = std::nullopt); } // end namespace IsaInfo diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -100,7 +100,7 @@ Optional getHsaAbiVersion(const MCSubtargetInfo *STI) { if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA) - return None; + return std::nullopt; switch (AmdhsaCodeObjectVersion) { case 2: @@ -2441,25 +2441,26 @@ // The signed version is always a byte offset. if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { assert(hasSMEMByteOffset(ST)); - return isInt<20>(ByteOffset) ? Optional(ByteOffset) : None; + return isInt<20>(ByteOffset) ? Optional(ByteOffset) : std::nullopt; } if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) - return None; + return std::nullopt; int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) ? Optional(EncodedOffset) - : None; + : std::nullopt; } Optional getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset) { if (!isCI(ST) || !isDwordAligned(ByteOffset)) - return None; + return std::nullopt; int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); - return isUInt<32>(EncodedOffset) ? Optional(EncodedOffset) : None; + return isUInt<32>(EncodedOffset) ? Optional(EncodedOffset) + : std::nullopt; } unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) { diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.h b/llvm/lib/Target/ARC/ARCTargetMachine.h --- a/llvm/lib/Target/ARC/ARCTargetMachine.h +++ b/llvm/lib/Target/ARC/ARCTargetMachine.h @@ -15,6 +15,7 @@ #include "ARCSubtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -27,8 +28,9 @@ public: ARCTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); ~ARCTargetMachine() override; const ARCSubtarget *getSubtargetImpl() const { return &Subtarget; } diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/llvm/lib/Target/ARC/ARCTargetMachine.cpp --- a/llvm/lib/Target/ARC/ARCTargetMachine.cpp +++ b/llvm/lib/Target/ARC/ARCTargetMachine.cpp @@ -17,10 +17,11 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/MC/TargetRegistry.h" +#include using namespace llvm; -static Reloc::Model getRelocModel(Optional RM) { +static Reloc::Model getRelocModel(std::optional RM) { return RM.value_or(Reloc::Static); } @@ -28,8 +29,8 @@ ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-" diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1065,7 +1065,7 @@ if (!MI.isMoveReg() || (MI.getOpcode() == ARM::VORRq && MI.getOperand(1).getReg() != MI.getOperand(2).getReg())) - return None; + return std::nullopt; return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; } @@ -1094,7 +1094,7 @@ // We need to produce a fragment description (the call site value of s1 is // /not/ just d8). if (DstReg != Reg) - return None; + return std::nullopt; } return TargetInstrInfo::describeLoadedValue(MI, Reg); } @@ -5565,19 +5565,19 @@ // destination register. const MachineOperand &Op0 = MI.getOperand(0); if (!Op0.isReg() || Reg != Op0.getReg()) - return None; + return std::nullopt; // We describe SUBri or ADDri instructions. if (Opcode == ARM::SUBri) Sign = -1; else if (Opcode != ARM::ADDri) - return None; + return std::nullopt; // TODO: Third operand can be global address (usually some string). Since // strings can be relocated we cannot calculate their offsets for // now. if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm()) - return None; + return std::nullopt; Offset = MI.getOperand(2).getImm() * Sign; return RegImmPair{MI.getOperand(1).getReg(), Offset}; diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -188,10 +188,10 @@ bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt); bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, - MaybeAlign Alignment = None, bool isZExt = true, + MaybeAlign Alignment = std::nullopt, bool isZExt = true, bool allocReg = true); bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, - MaybeAlign Alignment = None); + MaybeAlign Alignment = std::nullopt); bool ARMComputeAddress(const Value *Obj, Address &Addr); void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3); bool ARMIsMemCpySmall(uint64_t Len); diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3536,7 +3536,7 @@ unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; unsigned LastOne = A.countTrailingZeros(); if (A.countPopulation() != (FirstOne - LastOne + 1)) - return None; + return std::nullopt; return std::make_pair(FirstOne, LastOne); } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -30,6 +30,7 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/MachineValueType.h" +#include #include namespace llvm { @@ -888,16 +889,15 @@ MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const override; - bool - splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, - SDValue *Parts, unsigned NumParts, MVT PartVT, - Optional CC) const override; + bool splitValueIntoRegisterParts( + SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, std::optional CC) + const override; - SDValue - joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, - const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, - Optional CC) const override; + SDValue joinRegisterPartsIntoValue( + SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, + unsigned NumParts, MVT PartVT, EVT ValueVT, + std::optional CC) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -110,6 +110,7 @@ #include #include #include +#include #include #include #include @@ -4420,7 +4421,7 @@ bool ARMTargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, Optional CC) const { + unsigned NumParts, MVT PartVT, std::optional CC) const { bool IsABIRegCopy = CC.has_value(); EVT ValueVT = Val.getValueType(); if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && @@ -4438,7 +4439,7 @@ SDValue ARMTargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, Optional CC) const { + MVT PartVT, EVT ValueVT, std::optional CC) const { bool IsABIRegCopy = CC.has_value(); if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h --- a/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -21,6 +21,7 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include +#include namespace llvm { @@ -41,8 +42,9 @@ public: ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool isLittle); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool isLittle); ~ARMBaseTargetMachine() override; const ARMSubtarget *getSubtargetImpl(const Function &F) const override; @@ -86,8 +88,9 @@ public: ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); }; /// ARM/Thumb big endian target machine. @@ -96,8 +99,9 @@ public: ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -53,6 +53,7 @@ #include "llvm/Transforms/Scalar.h" #include #include +#include #include using namespace llvm; @@ -195,7 +196,7 @@ } static Reloc::Model getEffectiveRelocModel(const Triple &TT, - Optional RM) { + std::optional RM) { if (!RM) // Default relocation model on Darwin is PIC. return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static; @@ -216,8 +217,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool isLittle) : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), @@ -316,16 +317,16 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -26,6 +26,7 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Function.h" #include "llvm/MC/SubtargetFeature.h" +#include namespace llvm { @@ -118,9 +119,9 @@ return !ST->isTargetDarwin() && !ST->hasMVEFloatOps(); } - Optional instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const; - Optional simplifyDemandedVectorEltsIntrinsic( + std::optional instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const; + std::optional simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -215,7 +216,7 @@ ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = std::nullopt); bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; @@ -274,11 +275,11 @@ const Instruction *I = nullptr); InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind); InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind); InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include using namespace llvm; @@ -117,7 +118,7 @@ return TTI::AMK_None; } -Optional +std::optional ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { using namespace PatternMatch; Intrinsic::ID IID = II.getIntrinsicID(); @@ -243,13 +244,13 @@ return IC.eraseInstFromFunction(*User); } } - return None; + return std::nullopt; } } - return None; + return std::nullopt; } -Optional ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic( +std::optional ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -271,7 +272,7 @@ // The other lanes will be defined from the inserted elements. UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1) : APInt::getHighBitsSet(2, 1)); - return None; + return std::nullopt; }; switch (II.getIntrinsicID()) { @@ -288,7 +289,7 @@ break; } - return None; + return std::nullopt; } InstructionCost ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, @@ -1653,7 +1654,7 @@ InstructionCost ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind) { if (TTI::requiresOrderedReduction(FMF)) return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind); @@ -1678,7 +1679,7 @@ InstructionCost ARMTTIImpl::getExtendedReductionCost( unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, - Optional FMF, TTI::TargetCostKind CostKind) { + std::optional FMF, TTI::TargetCostKind CostKind) { EVT ValVT = TLI->getValueType(DL, ValTy); EVT ResVT = TLI->getValueType(DL, ResTy); diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -429,15 +429,15 @@ VPTState.CurPosition = ~0U; } - void Note(SMLoc L, const Twine &Msg, SMRange Range = None) { + void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) { return getParser().Note(L, Msg, Range); } - bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) { + bool Warning(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) { return getParser().Warning(L, Msg, Range); } - bool Error(SMLoc L, const Twine &Msg, SMRange Range = None) { + bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) { return getParser().Error(L, Msg, Range); } @@ -11399,7 +11399,7 @@ TagLoc = Parser.getTok().getLoc(); if (Parser.getTok().is(AsmToken::Identifier)) { StringRef Name = Parser.getTok().getIdentifier(); - Optional Ret = ELFAttrs::attrTypeFromString( + std::optional Ret = ELFAttrs::attrTypeFromString( Name, ARMBuildAttrs::getARMAttributeTags()); if (!Ret) { Error(TagLoc, "attribute name not recognised: " + Name); diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -48,7 +48,7 @@ } // end anonymous namespace Optional ARMAsmBackend::getFixupKind(StringRef Name) const { - return None; + return std::nullopt; } Optional ARMAsmBackendELF::getFixupKind(StringRef Name) const { @@ -62,7 +62,7 @@ .Case("BFD_RELOC_32", ELF::R_ARM_ABS32) .Default(-1u); if (Type == -1u) - return None; + return std::nullopt; return static_cast(FirstLiteralRelocationKind + Type); } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -444,12 +444,12 @@ evaluateMemOpAddrForAddrMode_i12(const MCInst &Inst, const MCInstrDesc &Desc, unsigned MemOpIndex, uint64_t Addr) { if (MemOpIndex + 1 >= Desc.getNumOperands()) - return None; + return std::nullopt; const MCOperand &MO1 = Inst.getOperand(MemOpIndex); const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1); if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm()) - return None; + return std::nullopt; int32_t OffImm = (int32_t)MO2.getImm(); // Special value for #-0. All others are normal. @@ -463,13 +463,13 @@ unsigned MemOpIndex, uint64_t Addr) { if (MemOpIndex + 2 >= Desc.getNumOperands()) - return None; + return std::nullopt; const MCOperand &MO1 = Inst.getOperand(MemOpIndex); const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1); const MCOperand &MO3 = Inst.getOperand(MemOpIndex + 2); if (!MO1.isReg() || MO1.getReg() != ARM::PC || MO2.getReg() || !MO3.isImm()) - return None; + return std::nullopt; unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); ARM_AM::AddrOpc Op = ARM_AM::getAM3Op(MO3.getImm()); @@ -484,12 +484,12 @@ unsigned MemOpIndex, uint64_t Addr) { if (MemOpIndex + 1 >= Desc.getNumOperands()) - return None; + return std::nullopt; const MCOperand &MO1 = Inst.getOperand(MemOpIndex); const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1); if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm()) - return None; + return std::nullopt; unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm()); @@ -503,12 +503,12 @@ evaluateMemOpAddrForAddrMode5FP16(const MCInst &Inst, const MCInstrDesc &Desc, unsigned MemOpIndex, uint64_t Addr) { if (MemOpIndex + 1 >= Desc.getNumOperands()) - return None; + return std::nullopt; const MCOperand &MO1 = Inst.getOperand(MemOpIndex); const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1); if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm()) - return None; + return std::nullopt; unsigned ImmOffs = ARM_AM::getAM5FP16Offset(MO2.getImm()); ARM_AM::AddrOpc Op = ARM_AM::getAM5FP16Op(MO2.getImm()); @@ -523,12 +523,12 @@ evaluateMemOpAddrForAddrModeT2_i8s4(const MCInst &Inst, const MCInstrDesc &Desc, unsigned MemOpIndex, uint64_t Addr) { if (MemOpIndex + 1 >= Desc.getNumOperands()) - return None; + return std::nullopt; const MCOperand &MO1 = Inst.getOperand(MemOpIndex); const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1); if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm()) - return None; + return std::nullopt; int32_t OffImm = (int32_t)MO2.getImm(); assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); @@ -545,7 +545,7 @@ unsigned MemOpIndex, uint64_t Addr) { const MCOperand &MO1 = Inst.getOperand(MemOpIndex); if (!MO1.isImm()) - return None; + return std::nullopt; int32_t OffImm = (int32_t)MO1.getImm(); @@ -569,14 +569,14 @@ // Only load instructions can have PC-relative memory addressing. if (!Desc.mayLoad()) - return None; + return std::nullopt; // PC-relative addressing does not update the base register. uint64_t TSFlags = Desc.TSFlags; unsigned IndexMode = (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; if (IndexMode != ARMII::IndexModeNone) - return None; + return std::nullopt; // Find the memory addressing operand in the instruction. unsigned OpIndex = Desc.NumDefs; @@ -584,7 +584,7 @@ Desc.OpInfo[OpIndex].OperandType != MCOI::OPERAND_MEMORY) ++OpIndex; if (OpIndex == Desc.getNumOperands()) - return None; + return std::nullopt; // Base address for PC-relative addressing is always 32-bit aligned. Addr &= ~0x3; @@ -609,7 +609,7 @@ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); switch (AddrMode) { default: - return None; + return std::nullopt; case ARMII::AddrMode_i12: return evaluateMemOpAddrForAddrMode_i12(Inst, Desc, OpIndex, Addr); case ARMII::AddrMode3: diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.h b/llvm/lib/Target/AVR/AVRTargetMachine.h --- a/llvm/lib/Target/AVR/AVRTargetMachine.h +++ b/llvm/lib/Target/AVR/AVRTargetMachine.h @@ -22,6 +22,8 @@ #include "AVRSelectionDAGInfo.h" #include "AVRSubtarget.h" +#include + namespace llvm { /// A generic AVR implementation. @@ -29,8 +31,9 @@ public: AVRTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); const AVRSubtarget *getSubtargetImpl() const; const AVRSubtarget *getSubtargetImpl(const Function &) const override; diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp --- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp +++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp @@ -23,6 +23,8 @@ #include "MCTargetDesc/AVRMCTargetDesc.h" #include "TargetInfo/AVRTargetInfo.h" +#include + namespace llvm { static const char *AVRDataLayout = @@ -37,15 +39,15 @@ return CPU; } -static Reloc::Model getEffectiveRelocModel(Optional RM) { +static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::Static); } AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, AVRDataLayout, TT, getCPU(CPU), FS, Options, getEffectiveRelocModel(RM), diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.h b/llvm/lib/Target/BPF/BPFTargetMachine.h --- a/llvm/lib/Target/BPF/BPFTargetMachine.h +++ b/llvm/lib/Target/BPF/BPFTargetMachine.h @@ -15,6 +15,7 @@ #include "BPFSubtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { class BPFTargetMachine : public LLVMTargetMachine { @@ -24,8 +25,9 @@ public: BPFTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); const BPFSubtarget *getSubtargetImpl() const { return &Subtarget; } const BPFSubtarget *getSubtargetImpl(const Function &) const override { diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -27,6 +27,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" +#include using namespace llvm; static cl:: @@ -57,15 +58,15 @@ return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"; } -static Reloc::Model getEffectiveRelocModel(Optional RM) { +static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::PIC_); } BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(RM), diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp --- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp +++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp @@ -1637,7 +1637,7 @@ TagLoc = Parser.getTok().getLoc(); if (Parser.getTok().is(AsmToken::Identifier)) { StringRef Name = Parser.getTok().getIdentifier(); - Optional Ret = + std::optional Ret = ELFAttrs::attrTypeFromString(Name, CSKYAttrs::getCSKYAttributeTags()); if (!Ret) { Error(TagLoc, "attribute name not recognised: " + Name); diff --git a/llvm/lib/Target/CSKY/CSKYTargetMachine.h b/llvm/lib/Target/CSKY/CSKYTargetMachine.h --- a/llvm/lib/Target/CSKY/CSKYTargetMachine.h +++ b/llvm/lib/Target/CSKY/CSKYTargetMachine.h @@ -16,6 +16,7 @@ #include "CSKYSubtarget.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -26,8 +27,9 @@ public: CSKYTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp --- a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp +++ b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" +#include using namespace llvm; @@ -48,8 +49,8 @@ CSKYTargetMachine::CSKYTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM.value_or(Reloc::Static), diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.h b/llvm/lib/Target/DirectX/DirectXTargetMachine.h --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.h +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.h @@ -13,6 +13,7 @@ #include "DirectXSubtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { class Function; @@ -23,8 +24,9 @@ public: DirectXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); ~DirectXTargetMachine() override; diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include using namespace llvm; @@ -82,8 +83,8 @@ DirectXTargetMachine::DirectXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-" diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h --- a/llvm/lib/Target/Hexagon/HexagonDepArch.h +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h @@ -10,6 +10,7 @@ #define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H #include "llvm/ADT/StringSwitch.h" +#include namespace llvm { namespace Hexagon { @@ -29,8 +30,8 @@ V73 }; -inline Optional getCpu(StringRef CPU) { - return StringSwitch>(CPU) +inline std::optional getCpu(StringRef CPU) { + return StringSwitch>(CPU) .Case("generic", Hexagon::ArchEnum::V5) .Case("hexagonv5", Hexagon::ArchEnum::V5) .Case("hexagonv55", Hexagon::ArchEnum::V55) @@ -45,7 +46,7 @@ .Case("hexagonv71", Hexagon::ArchEnum::V71) .Case("hexagonv71t", Hexagon::ArchEnum::V71) .Case("hexagonv73", Hexagon::ArchEnum::V73) - .Default(None); + .Default(std::nullopt); } } // namespace Hexagon } // namespace llvm diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1018,7 +1018,7 @@ if (HasAllocFrame) return HasCall ? It : std::next(It); } - return None; + return std::nullopt; } void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const { diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -57,6 +57,7 @@ #include #include #include +#include #include #include diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -31,6 +31,7 @@ #include #include #include +#include using namespace llvm; @@ -92,7 +93,7 @@ HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - Optional ArchVer = Hexagon::getCpu(CPUString); + std::optional ArchVer = Hexagon::getCpu(CPUString); if (ArchVer) HexagonArchVersion = *ArchVer; else diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -17,6 +17,7 @@ #include "HexagonSubtarget.h" #include "HexagonTargetObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -29,8 +30,9 @@ public: HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); ~HexagonTargetMachine() override; const HexagonSubtarget *getSubtargetImpl(const Function &F) const override; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -28,6 +28,7 @@ #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" +#include using namespace llvm; @@ -192,7 +193,7 @@ FunctionPass *createHexagonVExtract(); } // end namespace llvm; -static Reloc::Model getEffectiveRelocModel(Optional RM) { +static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::Static); } @@ -222,8 +223,8 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) // Specify the vector alignment explicitly. For v512x1, the calculated // alignment would be 512*alignment(i1), which is 512 bytes, instead of diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -129,7 +129,7 @@ ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, Type *SubTp, - ArrayRef Args = None); + ArrayRef Args = std::nullopt); InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -127,7 +126,7 @@ Value *createHvxIntrinsic(IRBuilderBase &Builder, Intrinsic::ID IntID, Type *RetTy, ArrayRef Args, - ArrayRef ArgTys = None) const; + ArrayRef ArgTys = std::nullopt) const; SmallVector splitVectorElements(IRBuilderBase &Builder, Value *Vec, unsigned ToWidth) const; Value *joinVectorElements(IRBuilderBase &Builder, ArrayRef Values, @@ -1381,7 +1380,7 @@ break; } - if (Results.back() == nullptr) + if (Results.empty() || Results.back() == nullptr) return nullptr; Value *Cat = HVC.concat(Builder, Results); @@ -2391,7 +2390,8 @@ BasicBlock::const_iterator To, const T &IgnoreInsts) const -> bool { - auto getLocOrNone = [this](const Instruction &I) -> Optional { + auto getLocOrNone = + [this](const Instruction &I) -> std::optional { if (const auto *II = dyn_cast(&I)) { switch (II->getIntrinsicID()) { case Intrinsic::masked_load: diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -24,6 +24,7 @@ #include "llvm/Support/SMLoc.h" #include #include +#include #include namespace llvm { @@ -148,12 +149,12 @@ // Number of duplex insns unsigned duplex; unsigned pSlot3Cnt; - Optional PrefSlot3Inst; + std::optional PrefSlot3Inst; unsigned memops; unsigned ReservedSlotMask; SmallVector branchInsts; - Optional Slot1AOKLoc; - Optional NoSlot1StoreLoc; + std::optional Slot1AOKLoc; + std::optional NoSlot1StoreLoc; }; // Insn handles in a bundle. HexagonPacket Packet; @@ -179,7 +180,7 @@ const bool DoShuffle); void permitNonSlot(); - Optional tryAuction(HexagonPacketSummary const &Summary); + std::optional tryAuction(HexagonPacketSummary const &Summary); HexagonPacketSummary GetPacketSummary(); bool ValidPacketMemoryOps(HexagonPacketSummary const &Summary) const; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include #include #include @@ -316,7 +317,7 @@ } bool HexagonShuffler::ValidResourceUsage(HexagonPacketSummary const &Summary) { - Optional ShuffledPacket = tryAuction(Summary); + std::optional ShuffledPacket = tryAuction(Summary); if (!ShuffledPacket) { reportResourceError(Summary, "slot error"); @@ -623,7 +624,7 @@ return !CheckFailure; } -llvm::Optional +std::optional HexagonShuffler::tryAuction(HexagonPacketSummary const &Summary) { HexagonPacket PacketResult = Packet; HexagonUnitAuction AuctionCore(Summary.ReservedSlotMask); @@ -642,7 +643,7 @@ << llvm::format_hex(ISJ.Core.getUnits(), 4, true) << "\n"; ); - Optional Res; + std::optional Res; if (ValidSlots) Res = PacketResult; diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.h b/llvm/lib/Target/Lanai/LanaiTargetMachine.h --- a/llvm/lib/Target/Lanai/LanaiTargetMachine.h +++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.h @@ -18,6 +18,7 @@ #include "LanaiSelectionDAGInfo.h" #include "LanaiSubtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -29,8 +30,8 @@ LanaiTargetMachine(const Target &TheTarget, const Triple &TargetTriple, StringRef Cpu, StringRef FeatureString, const TargetOptions &Options, - Optional RelocationModel, - Optional CodeModel, + std::optional RM, + std::optional CodeModel, CodeGenOpt::Level OptLevel, bool JIT); const LanaiSubtarget * diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp --- a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp +++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" +#include using namespace llvm; @@ -47,16 +48,15 @@ "-S64"; // 64 bit natural stack alignment } -static Reloc::Model getEffectiveRelocModel(Optional RM) { +static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::PIC_); } -LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT, - StringRef Cpu, StringRef FeatureString, - const TargetOptions &Options, - Optional RM, - Optional CodeModel, - CodeGenOpt::Level OptLevel, bool JIT) +LanaiTargetMachine::LanaiTargetMachine( + const Target &T, const Triple &TT, StringRef Cpu, StringRef FeatureString, + const TargetOptions &Options, std::optional RM, + std::optional CodeModel, CodeGenOpt::Level OptLevel, + bool JIT) : LLVMTargetMachine(T, computeDataLayout(), TT, Cpu, FeatureString, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CodeModel, CodeModel::Medium), diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -107,8 +107,6 @@ return true; } return false; - - return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS); } bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -822,6 +822,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDLoc DL(N); + EVT VT = N->getValueType(0); switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to legalize this operation"); @@ -829,7 +830,7 @@ case ISD::SRA: case ISD::SRL: case ISD::ROTR: - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + assert(VT == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); if (N->getOperand(1).getOpcode() != ISD::Constant) { Results.push_back(customLegalizeToWOp(N, DAG, 2)); @@ -844,32 +845,31 @@ } break; case ISD::FP_TO_SINT: { - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + assert(VT == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); SDValue Src = N->getOperand(0); - EVT VT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); + EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); if (getTypeAction(*DAG.getContext(), Src.getValueType()) != TargetLowering::TypeSoftenFloat) { - SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, VT, Src); - Results.push_back(DAG.getNode(ISD::BITCAST, DL, N->getValueType(0), Dst)); + SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src); + Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst)); return; } // If the FP type needs to be softened, emit a library call using the 'si' // version. If we left it to default legalization we'd end up with 'di'. RTLIB::Libcall LC; - LC = RTLIB::getFPTOSINT(Src.getValueType(), N->getValueType(0)); + LC = RTLIB::getFPTOSINT(Src.getValueType(), VT); MakeLibCallOptions CallOptions; EVT OpVT = Src.getValueType(); - CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + CallOptions.setTypeListBeforeSoften(OpVT, VT, true); SDValue Chain = SDValue(); SDValue Result; std::tie(Result, Chain) = - makeLibCall(DAG, LC, N->getValueType(0), Src, CallOptions, DL, Chain); + makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain); Results.push_back(Result); break; } case ISD::BITCAST: { - EVT VT = N->getValueType(0); SDValue Src = N->getOperand(0); EVT SrcVT = Src.getValueType(); if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && @@ -881,7 +881,7 @@ break; } case ISD::FP_TO_UINT: { - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + assert(VT == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); auto &TLI = DAG.getTargetLoweringInfo(); SDValue Tmp1, Tmp2; @@ -891,7 +891,6 @@ } case ISD::BSWAP: { SDValue Src = N->getOperand(0); - EVT VT = N->getValueType(0); assert((VT == MVT::i16 || VT == MVT::i32) && "Unexpected custom legalization"); MVT GRLenVT = Subtarget.getGRLenVT(); @@ -914,7 +913,6 @@ } case ISD::BITREVERSE: { SDValue Src = N->getOperand(0); - EVT VT = N->getValueType(0); assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && "Unexpected custom legalization"); MVT GRLenVT = Subtarget.getGRLenVT(); @@ -935,15 +933,14 @@ } case ISD::CTLZ: case ISD::CTTZ: { - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + assert(VT == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); Results.push_back(customLegalizeToWOp(N, DAG, 1)); break; } case ISD::INTRINSIC_W_CHAIN: { - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + assert(VT == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); - EVT VT = N->getValueType(0); SDValue Op2 = N->getOperand(2); SDValue Op3 = N->getOperand(3); @@ -988,7 +985,7 @@ else DAG.getContext()->emitError( "On LA32, only 32-bit registers can be read."); - Results.push_back(DAG.getUNDEF(N->getValueType(0))); + Results.push_back(DAG.getUNDEF(VT)); Results.push_back(N->getOperand(0)); break; } diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h @@ -15,6 +15,7 @@ #include "LoongArchSubtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -25,9 +26,9 @@ public: LoongArchTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, CodeGenOpt::Level OL, - bool JIT); + std::optional RM, + std::optional CM, + CodeGenOpt::Level OL, bool JIT); ~LoongArchTargetMachine() override; const LoongArchSubtarget *getSubtargetImpl(const Function &F) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/MC/TargetRegistry.h" +#include using namespace llvm; @@ -39,14 +40,14 @@ } static Reloc::Model getEffectiveRelocModel(const Triple &TT, - Optional RM) { + std::optional RM) { return RM.value_or(Reloc::Static); } LoongArchTargetMachine::LoongArchTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Optional RM, - Optional CM, CodeGenOpt::Level OL, bool JIT) + const TargetOptions &Options, std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.h b/llvm/lib/Target/M68k/M68kTargetMachine.h --- a/llvm/lib/Target/M68k/M68kTargetMachine.h +++ b/llvm/lib/Target/M68k/M68kTargetMachine.h @@ -22,6 +22,8 @@ #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" +#include + namespace llvm { class formatted_raw_ostream; class M68kRegisterInfo; @@ -35,8 +37,9 @@ public: M68kTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); ~M68kTargetMachine() override; diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/llvm/lib/Target/M68k/M68kTargetMachine.cpp --- a/llvm/lib/Target/M68k/M68kTargetMachine.cpp +++ b/llvm/lib/Target/M68k/M68kTargetMachine.cpp @@ -27,6 +27,7 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/PassRegistry.h" #include +#include using namespace llvm; @@ -70,7 +71,7 @@ } Reloc::Model getEffectiveRelocModel(const Triple &TT, - Optional RM) { + std::optional RM) { // If not defined we default to static if (!RM.has_value()) return Reloc::Static; @@ -78,7 +79,7 @@ return *RM; } -CodeModel::Model getEffectiveCodeModel(Optional CM, +CodeModel::Model getEffectiveCodeModel(std::optional CM, bool JIT) { if (!CM) { return CodeModel::Small; @@ -94,8 +95,8 @@ M68kTargetMachine::M68kTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/llvm/lib/Target/MSP430/MSP430TargetMachine.h --- a/llvm/lib/Target/MSP430/MSP430TargetMachine.h +++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.h @@ -16,6 +16,7 @@ #include "MSP430Subtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { class StringRef; @@ -24,13 +25,14 @@ /// class MSP430TargetMachine : public LLVMTargetMachine { std::unique_ptr TLOF; - MSP430Subtarget Subtarget; + MSP430Subtarget Subtarget; public: MSP430TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); ~MSP430TargetMachine() override; const MSP430Subtarget *getSubtargetImpl(const Function &F) const override { diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp --- a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/TargetRegistry.h" +#include using namespace llvm; extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430Target() { @@ -26,7 +27,7 @@ RegisterTargetMachine X(getTheMSP430Target()); } -static Reloc::Model getEffectiveRelocModel(Optional RM) { +static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::Static); } @@ -38,8 +39,8 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS, Options, getEffectiveRelocModel(RM), diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp --- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp @@ -100,7 +100,7 @@ Mips16InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { if (MI.isMoveReg()) return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; - return None; + return std::nullopt; } void Mips16InstrInfo::storeRegToStack(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp --- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -786,7 +786,7 @@ StringRef CPU = MIPS_MC::selectMipsCPU(TT, TM.getTargetCPU()); StringRef FS = TM.getTargetFeatureString(); const MipsTargetMachine &MTM = static_cast(TM); - const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM, None); + const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM, std::nullopt); bool IsABICalls = STI.isABICalls(); const MipsABIInfo &ABI = MTM.getABI(); diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp --- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -956,7 +956,7 @@ // TODO: Handle cases where the Reg is sub- or super-register of the // DestReg. if (TRI->isSuperRegister(Reg, DestReg) || TRI->isSubRegister(Reg, DestReg)) - return None; + return std::nullopt; } return TargetInstrInfo::describeLoadedValue(MI, Reg); @@ -968,7 +968,7 @@ // destination register. const MachineOperand &Op0 = MI.getOperand(0); if (!Op0.isReg() || Reg != Op0.getReg()) - return None; + return std::nullopt; switch (MI.getOpcode()) { case Mips::ADDiu: @@ -983,5 +983,5 @@ // TODO: Handle case where Sop1 is a frame-index. } } - return None; + return std::nullopt; } diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp --- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -228,7 +228,7 @@ // from copyPhysReg function. if (isReadOrWriteToDSPReg(MI, isDSPControlWrite)) { if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != (1 << 4)) - return None; + return std::nullopt; else if (isDSPControlWrite) { return DestSourcePair{MI.getOperand(2), MI.getOperand(0)}; @@ -238,7 +238,7 @@ } else if (MI.isMoveReg() || isORCopyInst(MI)) { return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; } - return None; + return std::nullopt; } void MipsSEInstrInfo:: diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.h b/llvm/lib/Target/Mips/MipsTargetMachine.h --- a/llvm/lib/Target/Mips/MipsTargetMachine.h +++ b/llvm/lib/Target/Mips/MipsTargetMachine.h @@ -21,6 +21,7 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include +#include namespace llvm { @@ -39,8 +40,9 @@ public: MipsTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT, bool isLittle); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT, bool isLittle); ~MipsTargetMachine() override; TargetTransformInfo getTargetTransformInfo(const Function &F) const override; @@ -83,8 +85,9 @@ public: MipsebTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); }; /// Mips32/64 little endian target machine. @@ -95,8 +98,9 @@ public: MipselTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); }; } // end namespace llvm diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp --- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" +#include #include using namespace llvm; @@ -105,7 +106,7 @@ } static Reloc::Model getEffectiveRelocModel(bool JIT, - Optional RM) { + std::optional RM) { if (!RM || JIT) return Reloc::Static; return *RM; @@ -119,8 +120,8 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT, bool isLittle) : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT, @@ -128,11 +129,12 @@ getEffectiveCodeModel(CM, CodeModel::Small), OL), isLittle(isLittle), TLOF(std::make_unique()), ABI(MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)), - Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this, None), + Subtarget(nullptr), + DefaultSubtarget(TT, CPU, FS, isLittle, *this, std::nullopt), NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16", - isLittle, *this, None), + isLittle, *this, std::nullopt), Mips16Subtarget(TT, CPU, FS.empty() ? "+mips16" : FS.str() + ",+mips16", - isLittle, *this, None) { + isLittle, *this, std::nullopt) { Subtarget = &DefaultSubtarget; initAsmInfo(); @@ -147,8 +149,8 @@ MipsebTargetMachine::MipsebTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {} @@ -157,8 +159,8 @@ MipselTargetMachine::MipselTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {} diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -833,7 +833,7 @@ case MVT::f64: return Opcode_f64; default: - return None; + return std::nullopt; } } @@ -1087,11 +1087,11 @@ NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar); break; case NVPTXISD::LoadV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar, - NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None, - NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar, - NVPTX::LDV_f32_v4_avar, None); + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::LDV_i8_v4_avar, NVPTX::LDV_i16_v4_avar, + NVPTX::LDV_i32_v4_avar, std::nullopt, + NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar, + NVPTX::LDV_f32_v4_avar, std::nullopt); break; } if (!Opcode) @@ -1114,11 +1114,11 @@ NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi); break; case NVPTXISD::LoadV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi, - NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None, - NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi, - NVPTX::LDV_f32_v4_asi, None); + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::LDV_i8_v4_asi, NVPTX::LDV_i16_v4_asi, + NVPTX::LDV_i32_v4_asi, std::nullopt, + NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi, + NVPTX::LDV_f32_v4_asi, std::nullopt); break; } if (!Opcode) @@ -1145,9 +1145,9 @@ case NVPTXISD::LoadV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64, - NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None, + NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt, NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64, - NVPTX::LDV_f32_v4_ari_64, None); + NVPTX::LDV_f32_v4_ari_64, std::nullopt); break; } } else { @@ -1162,11 +1162,11 @@ NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari); break; case NVPTXISD::LoadV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari, - NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None, - NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari, - NVPTX::LDV_f32_v4_ari, None); + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::LDV_i8_v4_ari, NVPTX::LDV_i16_v4_ari, + NVPTX::LDV_i32_v4_ari, std::nullopt, + NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari, + NVPTX::LDV_f32_v4_ari, std::nullopt); break; } } @@ -1193,9 +1193,9 @@ case NVPTXISD::LoadV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64, - NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None, + NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt, NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64, - NVPTX::LDV_f32_v4_areg_64, None); + NVPTX::LDV_f32_v4_areg_64, std::nullopt); break; } } else { @@ -1213,9 +1213,9 @@ case NVPTXISD::LoadV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg, - NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None, + NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, std::nullopt, NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg, - NVPTX::LDV_f32_v4_areg, None); + NVPTX::LDV_f32_v4_areg, std::nullopt); break; } } @@ -1347,22 +1347,22 @@ break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None, - NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar, - NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar, + NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar, + NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt, + NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar, + NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar, + NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt); break; case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None, - NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar, - NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar, + NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar, + NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt, + NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar, + NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar, + NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt); break; } if (!Opcode) @@ -1423,22 +1423,22 @@ break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None, - NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, + NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, + NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt, + NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64, + NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64, + NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt); break; case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None, - NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64, + NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64, + NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt, + NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64, + NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64, + NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt); break; } } else { @@ -1493,22 +1493,22 @@ break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None, - NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, + NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, + NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt, + NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32, + NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32, + NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt); break; case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None, - NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32, + NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32, + NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt, + NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32, + NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32, + NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt); break; } } @@ -1569,22 +1569,22 @@ break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None, - NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64, - NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64, + NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64, + NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt, + NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64, + NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64, + NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt); break; case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None, - NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64, - NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64, + NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64, + NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt, + NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64, + NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64, + NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt); break; } } else { @@ -1639,22 +1639,22 @@ break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32, - NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32, - NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None, - NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32, - NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32, - NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32, + NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32, + NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt, + NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32, + NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32, + NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt); break; case NVPTXISD::LDUV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32, - NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32, - NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None, - NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32, - NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32, - NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32, + NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32, + NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt, + NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32, + NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32, + NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt); break; } } @@ -1955,11 +1955,11 @@ NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar); break; case NVPTXISD::StoreV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar, - NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None, - NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar, - NVPTX::STV_f32_v4_avar, None); + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar, + NVPTX::STV_i32_v4_avar, std::nullopt, + NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar, + NVPTX::STV_f32_v4_avar, std::nullopt); break; } StOps.push_back(Addr); @@ -1976,11 +1976,11 @@ NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi); break; case NVPTXISD::StoreV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi, - NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None, - NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi, - NVPTX::STV_f32_v4_asi, None); + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::STV_i8_v4_asi, NVPTX::STV_i16_v4_asi, + NVPTX::STV_i32_v4_asi, std::nullopt, + NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi, + NVPTX::STV_f32_v4_asi, std::nullopt); break; } StOps.push_back(Base); @@ -2002,9 +2002,9 @@ case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64, - NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None, + NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt, NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64, - NVPTX::STV_f32_v4_ari_64, None); + NVPTX::STV_f32_v4_ari_64, std::nullopt); break; } } else { @@ -2019,11 +2019,11 @@ NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari); break; case NVPTXISD::StoreV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari, - NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None, - NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari, - NVPTX::STV_f32_v4_ari, None); + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari, + NVPTX::STV_i32_v4_ari, std::nullopt, + NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari, + NVPTX::STV_f32_v4_ari, std::nullopt); break; } } @@ -2045,9 +2045,9 @@ case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64, - NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None, + NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt, NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64, - NVPTX::STV_f32_v4_areg_64, None); + NVPTX::STV_f32_v4_areg_64, std::nullopt); break; } } else { @@ -2063,11 +2063,11 @@ NVPTX::STV_f64_v2_areg); break; case NVPTXISD::StoreV4: - Opcode = - pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg, - NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None, - NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg, - NVPTX::STV_f32_v4_areg, None); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg, + NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, std::nullopt, + NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg, + NVPTX::STV_f32_v4_areg, std::nullopt); break; } } @@ -2136,9 +2136,9 @@ case 4: Opcode = pickOpcodeForVT( MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8, - NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None, + NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, std::nullopt, NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2, - NVPTX::LoadParamMemV4F32, None); + NVPTX::LoadParamMemV4F32, std::nullopt); break; } if (!Opcode) @@ -2219,9 +2219,9 @@ case 4: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16, - NVPTX::StoreRetvalV4I32, None, + NVPTX::StoreRetvalV4I32, std::nullopt, NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2, - NVPTX::StoreRetvalV4F32, None); + NVPTX::StoreRetvalV4F32, std::nullopt); break; } if (!Opcode) @@ -2298,9 +2298,9 @@ case 4: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16, - NVPTX::StoreParamV4I32, None, + NVPTX::StoreParamV4I32, std::nullopt, NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2, - NVPTX::StoreParamV4F32, None); + NVPTX::StoreParamV4F32, std::nullopt); break; } if (!Opcode) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1661,7 +1661,7 @@ } GlobalAddressSDNode *Func = dyn_cast(Callee.getNode()); - MaybeAlign retAlignment = None; + MaybeAlign retAlignment = std::nullopt; // Handle Result if (Ins.size() > 0) { diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -16,6 +16,7 @@ #include "ManagedStringPool.h" #include "NVPTXSubtarget.h" #include "llvm/Target/TargetMachine.h" +#include #include namespace llvm { @@ -36,9 +37,9 @@ public: NVPTXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OP, bool is64bit); - + std::optional RM, + std::optional CM, CodeGenOpt::Level OP, + bool is64bit); ~NVPTXTargetMachine() override; const NVPTXSubtarget *getSubtargetImpl(const Function &) const override { return &Subtarget; @@ -76,20 +77,24 @@ class NVPTXTargetMachine32 : public NVPTXTargetMachine { virtual void anchor(); + public: NVPTXTargetMachine32(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); }; class NVPTXTargetMachine64 : public NVPTXTargetMachine { virtual void anchor(); + public: NVPTXTargetMachine64(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); }; } // end namespace llvm diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -35,6 +35,7 @@ #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Vectorize.h" #include +#include #include using namespace llvm; @@ -111,8 +112,8 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool is64bit) // The pic relocation model is used regardless of what the client has // specified, as it is the only relocation model currently supported. @@ -138,8 +139,8 @@ NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} @@ -148,8 +149,8 @@ NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} @@ -290,6 +291,7 @@ // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). disablePass(&PrologEpilogCodeInserterID); + disablePass(&MachineLateInstrsCleanupID); disablePass(&MachineCopyPropagationID); disablePass(&TailDuplicateID); disablePass(&StackMapLivenessID); diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -21,6 +21,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/TargetLowering.h" +#include namespace llvm { @@ -53,8 +54,8 @@ AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM; } - Optional instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const; + std::optional instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const; // Loads and stores can be vectorized if the alignment is at least as big as // the load/store we want to vectorize. diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -414,12 +414,12 @@ llvm_unreachable("All SpecialCase enumerators should be handled in switch."); } -Optional +std::optional NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) { return I; } - return None; + return std::nullopt; } InstructionCost NVPTXTTIImpl::getArithmeticInstrCost( diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -268,7 +268,7 @@ if (Type != -1u) return static_cast(FirstLiteralRelocationKind + Type); } - return None; + return std::nullopt; } MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp @@ -200,7 +200,7 @@ const MCSubtargetInfo &STI) { // Need at least two operands. if (Inst.getNumOperands() < 2) - return None; + return std::nullopt; unsigned LastOp = Inst.getNumOperands() - 1; // The last operand needs to be an MCExpr and it needs to have a variant kind @@ -208,13 +208,13 @@ // link time GOT PC Rel opt instruction and we can ignore it and return None. const MCOperand &Operand = Inst.getOperand(LastOp); if (!Operand.isExpr()) - return None; + return std::nullopt; // Check for the variant kind VK_PPC_PCREL_OPT in this expression. const MCExpr *Expr = Operand.getExpr(); const MCSymbolRefExpr *SymExpr = static_cast(Expr); if (!SymExpr || SymExpr->getKind() != MCSymbolRefExpr::VK_PPC_PCREL_OPT) - return None; + return std::nullopt; return (Inst.getOpcode() == PPC::PLDpc); } diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -616,7 +616,10 @@ VMUL10EUQ, VSUBCUQ, VSUBUQM, + XSCMPEQQP, XSCMPEXPQP, + XSCMPGEQP, + XSCMPGTQP, XSCMPOQP, XSCMPUQP, XSMAXCQP, diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp --- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -263,34 +263,13 @@ // Add other inputs for the PHI node. if (ML->isLoopLatch(Exiting)) { - // Normally there must be only two predecessors for the loop header, one is - // the Preheader and the other one is loop latch Exiting. In hardware loop + // There must be only two predecessors for the loop header, one is the + // Preheader and the other one is loop latch Exiting. In hardware loop // insertion pass, the block containing DecreaseCTRloop must dominate all // loop latches. So there must be only one latch. - // But there are some optimizations after ISEL, like tail duplicator, may - // merge the two-predecessor loop header with its successor. If the - // successor happens to be a header of nest loop, then we will have a header - // which has more than 2 predecessors. - assert(llvm::is_contained(ML->getHeader()->predecessors(), Exiting) && - "Loop latch is not loop header predecessor!"); - assert(llvm::is_contained(ML->getHeader()->predecessors(), Preheader) && - "Loop preheader is not loop header predecessor!"); - + assert(ML->getHeader()->pred_size() == 2 && + "Loop header predecessor is not right!"); PHIMIB.addReg(ADDIDef).addMBB(Exiting); - - if (ML->getHeader()->pred_size() > 2) { - Register HeaderIncoming = MRI->createVirtualRegister( - Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass - : &PPC::GPRC_and_GPRC_NOR0RegClass); - BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(), DebugLoc(), - TII->get(TargetOpcode::COPY), HeaderIncoming) - .addReg(PHIDef); - - for (MachineBasicBlock *P : ML->getHeader()->predecessors()) { - if (P != Preheader && P != Exiting) - PHIMIB.addReg(HeaderIncoming).addMBB(P); - } - } } else { // If the block containing DecreaseCTRloop is not a loop latch, we can use // ADDIDef as the value for all other blocks for the PHI. In hardware loop diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -226,7 +226,7 @@ case CmpInst::FCMP_OLE: case CmpInst::FCMP_ONE: default: - return None; + return std::nullopt; case CmpInst::FCMP_OEQ: case CmpInst::ICMP_EQ: diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -260,7 +260,7 @@ /// signed 16-bit immediate. bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, - None) == PPC::AM_DForm; + std::nullopt) == PPC::AM_DForm; } /// SelectPCRelForm - Returns true if address N can be represented by @@ -268,21 +268,22 @@ bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, - None) == PPC::AM_PCRel; + std::nullopt) == PPC::AM_PCRel; } /// SelectPDForm - Returns true if address N can be represented by Prefixed /// DForm addressing mode (a base register, plus a signed 34-bit immediate. bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, - None) == PPC::AM_PrefixDForm; + std::nullopt) == + PPC::AM_PrefixDForm; } /// SelectXForm - Returns true if address N can be represented by the /// addressing mode of XForm instructions (an indexed [r+r] operation). bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG, - None) == PPC::AM_XForm; + std::nullopt) == PPC::AM_XForm; } /// SelectForceXForm - Given the specified address, force it to be @@ -300,7 +301,8 @@ /// bit signed displacement. /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, + std::nullopt); } /// SelectAddrIdx4 - Given the specified address, check to see if it can be @@ -337,7 +339,8 @@ /// displacement. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, + std::nullopt); } /// SelectAddrImmX4 - Returns true if the address N can be represented by diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -29,6 +29,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" #include "llvm/Support/MachineValueType.h" +#include #include namespace llvm { @@ -854,7 +855,7 @@ /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, - MaybeAlign EncodingAlignment = None) const; + MaybeAlign EncodingAlignment = std::nullopt) const; /// SelectAddressRegImm - Returns true if the address N can be represented /// by a base register plus a signed 16-bit displacement [r+imm], and if it @@ -1162,10 +1163,10 @@ PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; - bool - splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, - SDValue *Parts, unsigned NumParts, MVT PartVT, - Optional CC) const override; + bool splitValueIntoRegisterParts( + SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, std::optional CC) + const override; /// Structure that collects some common arguments that get passed around /// between the functions for call lowering. struct CallFlags { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -96,6 +96,7 @@ #include #include #include +#include #include #include @@ -3044,7 +3045,7 @@ // LDU/STU can only handle immediates that are a multiple of 4. if (VT != MVT::i64) { - if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None)) + if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt)) return false; } else { // LDU/STU need an address with at least 4-byte alignment. @@ -3126,7 +3127,7 @@ SDValue Ops[] = { GA, Reg }; return DAG.getMemIntrinsicNode( PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), None, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), std::nullopt, MachineMemOperand::MOLoad); } @@ -18024,7 +18025,7 @@ bool PPCTargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, Optional CC) const { + unsigned NumParts, MVT PartVT, std::optional CC) const { EVT ValVT = Val.getValueType(); // If we are splitting a scalar integer into f64 parts (i.e. so they // can be placed into VFRC registers), we need to zero extend and diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2119,7 +2119,11 @@ PPC::ZERO8 : PPC::ZERO; } + LLVM_DEBUG(dbgs() << "Folded immediate zero for: "); + LLVM_DEBUG(UseMI.dump()); UseMI.getOperand(UseIdx).setReg(ZeroReg); + LLVM_DEBUG(dbgs() << "Into: "); + LLVM_DEBUG(UseMI.dump()); return true; } @@ -4808,7 +4812,7 @@ } } - LLVM_DEBUG(dbgs() << "Replacing instruction:\n"); + LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n"); LLVM_DEBUG(MI.dump()); LLVM_DEBUG(dbgs() << "Fed by:\n"); LLVM_DEBUG(DefMI.dump()); @@ -4894,7 +4898,7 @@ ForwardKilledOperandReg = MI.getOperand(III.OpNoForForwarding).getReg(); // Do the transform - LLVM_DEBUG(dbgs() << "Replacing instruction:\n"); + LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n"); LLVM_DEBUG(MI.dump()); LLVM_DEBUG(dbgs() << "Fed by:\n"); LLVM_DEBUG(DefMI.dump()); @@ -4982,7 +4986,7 @@ // We know that, the MI and DefMI both meet the pattern, and // the Imm also meet the requirement with the new Imm-form. // It is safe to do the transformation now. - LLVM_DEBUG(dbgs() << "Replacing instruction:\n"); + LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n"); LLVM_DEBUG(MI.dump()); LLVM_DEBUG(dbgs() << "Fed by:\n"); LLVM_DEBUG(DefMI.dump()); @@ -5117,6 +5121,10 @@ bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD || Opc == PPC::SRD_rec; + LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: "); + LLVM_DEBUG(MI.dump()); + LLVM_DEBUG(dbgs() << "Fed by load-immediate: "); + LLVM_DEBUG(DefMI.dump()); MI.setDesc(get(III.ImmOpcode)); if (ConstantOpNo == III.OpNoForForwarding) { // Converting shifts to immediate form is a bit tricky since they may do @@ -5200,6 +5208,10 @@ // y = XOP reg, ForwardKilledOperandReg(killed) if (ForwardKilledOperandReg != ~0U) fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg); + + LLVM_DEBUG(dbgs() << "With: "); + LLVM_DEBUG(MI.dump()); + LLVM_DEBUG(dbgs() << "\n"); return true; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -1100,6 +1100,12 @@ } } +let Predicates = [IsISA3_1] in { + def XSCMPEQQP : X_VT5_VA5_VB5<63, 68, "xscmpeqqp", []>; + def XSCMPGEQP : X_VT5_VA5_VB5<63, 196, "xscmpgeqp", []>; + def XSCMPGTQP : X_VT5_VA5_VB5<63, 228, "xscmpgtqp", []>; +} + let Predicates = [PCRelativeMemops] in { // Load i32 def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))), diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -380,6 +380,10 @@ for (auto RegMBB : PHIOps) NewPHI.add(RegMBB.first).add(RegMBB.second); ChangedPHIMap[PHI] = NewPHI.getInstr(); + LLVM_DEBUG(dbgs() << "Converting PHI: "); + LLVM_DEBUG(PHI->dump()); + LLVM_DEBUG(dbgs() << "To: "); + LLVM_DEBUG(NewPHI.getInstr()->dump()); } } @@ -425,6 +429,8 @@ // If the previous instruction was marked for elimination, // remove it now. if (ToErase) { + LLVM_DEBUG(dbgs() << "Deleting instruction: "); + LLVM_DEBUG(ToErase->dump()); ToErase->eraseFromParent(); ToErase = nullptr; } diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp --- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp +++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp @@ -58,7 +58,7 @@ bool isSupported() const { return Supported; } std::optional depOpIdx() const { if (DepOpIdx < 0) - return None; + return std::nullopt; return DepOpIdx; } diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -17,6 +17,7 @@ #include "PPCSubtarget.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -37,8 +38,9 @@ public: PPCTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); ~PPCTargetMachine() override; diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -46,6 +46,7 @@ #include "llvm/Transforms/Scalar.h" #include #include +#include #include using namespace llvm; @@ -245,7 +246,7 @@ } static Reloc::Model getEffectiveRelocModel(const Triple &TT, - Optional RM) { + std::optional RM) { assert((!TT.isOSAIX() || !RM || *RM == Reloc::PIC_) && "Invalid relocation model for AIX."); @@ -260,9 +261,9 @@ return Reloc::Static; } -static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT, - Optional CM, - bool JIT) { +static CodeModel::Model +getEffectivePPCCodeModel(const Triple &TT, std::optional CM, + bool JIT) { if (CM) { if (*CM == CodeModel::Tiny) report_fatal_error("Target does not support the tiny CodeModel", false); @@ -324,8 +325,8 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU, computeFSAdditions(FS, OL, TT), Options, @@ -500,6 +501,11 @@ } void PPCPassConfig::addMachineSSAOptimization() { + // Run CTR loops pass before any cfg modification pass to prevent the + // canonical form of hardware loop from being destroied. + if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) + addPass(createPPCCTRLoopsPass()); + // PPCBranchCoalescingPass need to be done before machine sinking // since it merges empty blocks. if (EnableBranchCoalescing && getOptLevel() != CodeGenOpt::None) @@ -540,16 +546,6 @@ if (EnableExtraTOCRegDeps) addPass(createPPCTOCRegDepsPass()); - // Run CTR loops pass before MachinePipeliner pass. - // MachinePipeliner will pipeline all instructions before the terminator, but - // we don't want DecreaseCTRPseudo to be pipelined. - // Note we may lose some MachinePipeliner opportunities if we run CTR loops - // generation pass before MachinePipeliner and the loop is converted back to - // a normal loop. We can revisit this later for running PPCCTRLoops after - // MachinePipeliner and handling DecreaseCTRPseudo in MachinePipeliner pass. - if (getOptLevel() != CodeGenOpt::None) - addPass(createPPCCTRLoopsPass()); - if (getOptLevel() != CodeGenOpt::None) addPass(&MachinePipelinerID); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -20,6 +20,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/TargetLowering.h" +#include namespace llvm { @@ -41,8 +42,8 @@ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} - Optional instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const; + std::optional instCombineIntrinsic(InstCombiner & IC, + IntrinsicInst & II) const; /// \name Scalar TTI Implementations /// @{ @@ -113,7 +114,7 @@ ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, Type *SubTp, - ArrayRef Args = None); + ArrayRef Args = std::nullopt); InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/Local.h" +#include using namespace llvm; @@ -60,7 +61,7 @@ return TTI::PSK_Software; } -Optional +std::optional PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { Intrinsic::ID IID = II.getIntrinsicID(); switch (IID) { @@ -160,7 +161,7 @@ } break; } - return None; + return std::nullopt; } InstructionCost PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2168,7 +2168,7 @@ TagLoc = Parser.getTok().getLoc(); if (Parser.getTok().is(AsmToken::Identifier)) { StringRef Name = Parser.getTok().getIdentifier(); - Optional Ret = + std::optional Ret = ELFAttrs::attrTypeFromString(Name, RISCVAttrs::getRISCVAttributeTags()); if (!Ret) { Error(TagLoc, "attribute name not recognised: " + Name); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -41,7 +41,7 @@ if (Type != -1u) return static_cast(FirstLiteralRelocationKind + Type); } - return None; + return std::nullopt; } const MCFixupKindInfo & diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -19,6 +19,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/TargetParser.h" +#include namespace llvm { class RISCVSubtarget; @@ -551,16 +552,15 @@ MachineMemOperand::Flags Flags = MachineMemOperand::MONone, unsigned *Fast = nullptr) const override; - bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, - SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, - Optional CC) const override; + bool splitValueIntoRegisterParts( + SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, std::optional CC) + const override; - SDValue - joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, - const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, - Optional CC) const override; + SDValue joinRegisterPartsIntoValue( + SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, + unsigned NumParts, MVT PartVT, EVT ValueVT, + std::optional CC) const override; static RISCVII::VLMUL getLMUL(MVT VT); inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2183,7 +2183,7 @@ if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) == APFloatBase::opInvalidOp) || !IsExact) - return None; + return std::nullopt; return ValInt.extractBitsAsZExtValue(BitWidth, 0); } @@ -2216,19 +2216,19 @@ if (IsInteger) { // The BUILD_VECTOR must be all constants. if (!isa(Op.getOperand(Idx))) - return None; + return std::nullopt; Val = Op.getConstantOperandVal(Idx) & maskTrailingOnes(EltSizeInBits); } else { // The BUILD_VECTOR must be all constants. if (!isa(Op.getOperand(Idx))) - return None; + return std::nullopt; if (auto ExactInteger = getExactInteger( cast(Op.getOperand(Idx))->getValueAPF(), EltSizeInBits)) Val = *ExactInteger; else - return None; + return std::nullopt; } if (PrevElt) { @@ -2248,7 +2248,7 @@ if (Remainder != ValDiff) { // The difference must cleanly divide the element span. if (Remainder != 0) - return None; + return std::nullopt; ValDiff /= IdxDiff; IdxDiff = 1; } @@ -2256,12 +2256,12 @@ if (!SeqStepNum) SeqStepNum = ValDiff; else if (ValDiff != SeqStepNum) - return None; + return std::nullopt; if (!SeqStepDenom) SeqStepDenom = IdxDiff; else if (IdxDiff != *SeqStepDenom) - return None; + return std::nullopt; } // Record this non-undef element for later. @@ -2271,7 +2271,7 @@ // We need to have logged a step for this to count as a legal index sequence. if (!SeqStepNum || !SeqStepDenom) - return None; + return std::nullopt; // Loop back through the sequence and validate elements we might have skipped // while waiting for a valid step. While doing this, log any sequence addend. @@ -2293,7 +2293,7 @@ if (!SeqAddend) SeqAddend = Addend; else if (Addend != SeqAddend) - return None; + return std::nullopt; } assert(SeqAddend && "Must have an addend if we have a step"); @@ -8843,7 +8843,7 @@ bool AllowZExt) { assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) - return None; + return std::nullopt; if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/false), @@ -8854,7 +8854,7 @@ Root->getOpcode(), /*IsSExt=*/true), Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/true); - return None; + return std::nullopt; } /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) @@ -8878,7 +8878,7 @@ const NodeExtensionHelper &LHS, const NodeExtensionHelper &RHS) { if (!RHS.areVLAndMaskCompatible(Root)) - return None; + return std::nullopt; // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar // sext/zext? @@ -8887,12 +8887,12 @@ if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W)) return CombineResult( NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false), - Root, LHS, /*SExtLHS=*/None, RHS, /*SExtRHS=*/false); + Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false); if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W)) return CombineResult( NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true), - Root, LHS, /*SExtLHS=*/None, RHS, /*SExtRHS=*/true); - return None; + Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true); + return std::nullopt; } /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS)) @@ -8925,9 +8925,9 @@ const NodeExtensionHelper &LHS, const NodeExtensionHelper &RHS) { if (!LHS.SupportsSExt || !RHS.SupportsZExt) - return None; + return std::nullopt; if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) - return None; + return std::nullopt; return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); } @@ -11477,7 +11477,7 @@ if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) return ArgIdx.index(); } - return None; + return std::nullopt; } void RISCVTargetLowering::analyzeInputArgs( @@ -13351,7 +13351,7 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, Optional CC) const { + unsigned NumParts, MVT PartVT, std::optional CC) const { bool IsABIRegCopy = CC.has_value(); EVT ValueVT = Val.getValueType(); if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { @@ -13405,7 +13405,7 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, Optional CC) const { + MVT PartVT, EVT ValueVT, std::optional CC) const { bool IsABIRegCopy = CC.has_value(); if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { SDValue Val = Parts[0]; diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -90,7 +90,7 @@ static Optional getEEWForLoadStore(const MachineInstr &MI) { switch (getRVVMCOpcode(MI.getOpcode())) { default: - return None; + return std::nullopt; case RISCV::VLE8_V: case RISCV::VLSE8_V: case RISCV::VSE8_V: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1124,7 +1124,7 @@ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; break; } - return None; + return std::nullopt; } void RISCVInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, @@ -2387,7 +2387,7 @@ RISCV::isRVVSpillForZvlsseg(unsigned Opcode) { switch (Opcode) { default: - return None; + return std::nullopt; case RISCV::PseudoVSPILL2_M1: case RISCV::PseudoVRELOAD2_M1: return std::make_pair(2u, 1u); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -370,7 +370,7 @@ Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>; defm : FPUnaryOpDynFrmAlias_m; -let mayRaiseFPException = 0 in +let Predicates = [HasStdExtF], mayRaiseFPException = 0 in def FMV_X_W : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR32, "fmv.x.w">, Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>; @@ -392,7 +392,7 @@ Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>; defm : FPUnaryOpDynFrmAlias_m; -let mayRaiseFPException = 0 in +let Predicates = [HasStdExtF], mayRaiseFPException = 0 in def FMV_W_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR32, GPR, "fmv.w.x">, Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -287,15 +287,22 @@ } if (!IsRVVSpill) { - // TODO: Consider always storing the low bits of the immediate in the - // offset so that large immediate is cheaper to materialize? - if (isInt<12>(Offset.getFixed())) { - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); - Offset = StackOffset::get(0, Offset.getScalable()); - } else { - // Since we're going to materialize the full offset below, clear the - // portion encoded in the immediate. + if (MI.getOpcode() == RISCV::ADDI && !isInt<12>(Offset.getFixed())) { + // We chose to emit the canonical immediate sequence rather than folding + // the offset into the using add under the theory that doing so doesn't + // save dynamic instruction count and some target may fuse the canonical + // 32 bit immediate sequence. We still need to clear the portion of the + // offset encoded in the immediate. MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); + } else { + // We can encode an add with 12 bit signed immediate in the immediate + // operand of our user instruction. As a result, the remaining + // offset can by construction, at worst, a LUI and a ADD. + int64_t Val = Offset.getFixed(); + int64_t Lo12 = SignExtend64<12>(Val); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12); + Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12, + Offset.getScalable()); } } @@ -306,7 +313,7 @@ else DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); adjustReg(*II->getParent(), II, DL, DestReg, FrameReg, Offset, - MachineInstr::NoFlags, None); + MachineInstr::NoFlags, std::nullopt); MI.getOperand(FIOperandNum).ChangeToRegister(DestReg, /*IsDef*/false, /*IsImp*/false, /*IsKill*/true); @@ -438,14 +445,21 @@ } }; - // For now we support the compressible instructions which can encode all - // registers and have a single register source. - // TODO: Add more compressed instructions. + // This is all of the compressible binary instructions. If an instruction + // needs GPRC register class operands \p NeedGPRC will be set to true. auto isCompressible = [](const MachineInstr &MI, bool &NeedGPRC) { NeedGPRC = false; switch (MI.getOpcode()) { default: return false; + case RISCV::AND: + case RISCV::OR: + case RISCV::XOR: + case RISCV::SUB: + case RISCV::ADDW: + case RISCV::SUBW: + NeedGPRC = true; + return true; case RISCV::ANDI: NeedGPRC = true; return MI.getOperand(2).isImm() && isInt<6>(MI.getOperand(2).getImm()); @@ -462,18 +476,35 @@ } }; + // Returns true if this operand is compressible. For non-registers it always + // returns true. Immediate range was already checked in isCompressible. + // For registers, it checks if the register is a GPRC register. reg-reg + // instructions that require GPRC need all register operands to be GPRC. + auto isCompressibleOpnd = [&](const MachineOperand &MO) { + if (!MO.isReg()) + return true; + Register Reg = MO.getReg(); + Register PhysReg = + Register::isPhysicalRegister(Reg) ? Reg : Register(VRM->getPhys(Reg)); + return PhysReg && RISCV::GPRCRegClass.contains(PhysReg); + }; + for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) { const MachineInstr &MI = *MO.getParent(); + unsigned OpIdx = MI.getOperandNo(&MO); bool NeedGPRC; if (isCompressible(MI, NeedGPRC)) { - unsigned OpIdx = MI.getOperandNo(&MO); if (OpIdx == 0 && MI.getOperand(1).isReg()) { - tryAddHint(MO, MI.getOperand(1), NeedGPRC); - if (MI.isCommutable() && MI.getOperand(2).isReg()) + if (!NeedGPRC || isCompressibleOpnd(MI.getOperand(2))) + tryAddHint(MO, MI.getOperand(1), NeedGPRC); + if (MI.isCommutable() && MI.getOperand(2).isReg() && + (!NeedGPRC || isCompressibleOpnd(MI.getOperand(1)))) tryAddHint(MO, MI.getOperand(2), NeedGPRC); - } else if (OpIdx == 1) { + } else if (OpIdx == 1 && + (!NeedGPRC || isCompressibleOpnd(MI.getOperand(2)))) { tryAddHint(MO, MI.getOperand(0), NeedGPRC); - } else if (MI.isCommutable() && OpIdx == 2) { + } else if (MI.isCommutable() && OpIdx == 2 && + (!NeedGPRC || isCompressibleOpnd(MI.getOperand(1)))) { tryAddHint(MO, MI.getOperand(0), NeedGPRC); } } diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/llvm/lib/Target/RISCV/RISCVTargetMachine.h --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.h +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.h @@ -18,6 +18,7 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { class RISCVTargetMachine : public LLVMTargetMachine { @@ -27,8 +28,9 @@ public: RISCVTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); const RISCVSubtarget *getSubtargetImpl(const Function &F) const override; // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget, diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -35,6 +35,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" +#include using namespace llvm; static cl::opt EnableRedundantCopyElimination( @@ -75,15 +76,15 @@ } static Reloc::Model getEffectiveRelocModel(const Triple &TT, - Optional RM) { + std::optional RM) { return RM.value_or(Reloc::Static); } RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), @@ -285,6 +286,10 @@ void RISCVPassConfig::addPostRegAlloc() { if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination) addPass(createRISCVRedundantCopyEliminationPass()); + + // Temporarily disabled until post-RA pseudo expansion problem is fixed, + // see D123394 and D139169. + disablePass(&MachineLateInstrsCleanupID); } yaml::MachineFunctionInfo * diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -22,6 +22,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/IR/Function.h" +#include namespace llvm { @@ -79,8 +80,8 @@ return ST->hasVInstructions() ? PredicationStyle::Data : PredicationStyle::None; } - Optional getMaxVScale() const; - Optional getVScaleForTuning() const; + std::optional getMaxVScale() const; + std::optional getVScaleForTuning() const; TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; @@ -115,7 +116,7 @@ ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = std::nullopt); InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); @@ -136,12 +137,12 @@ TTI::TargetCostKind CostKind); InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind); InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind); InstructionCost diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/CostTable.h" #include "llvm/CodeGen/TargetLowering.h" #include +#include using namespace llvm; #define DEBUG_TYPE "riscvtti" @@ -196,13 +197,13 @@ } } -Optional RISCVTTIImpl::getMaxVScale() const { +std::optional RISCVTTIImpl::getMaxVScale() const { if (ST->hasVInstructions()) return ST->getRealMaxVLen() / RISCV::RVVBitsPerBlock; return BaseT::getMaxVScale(); } -Optional RISCVTTIImpl::getVScaleForTuning() const { +std::optional RISCVTTIImpl::getVScaleForTuning() const { if (ST->hasVInstructions()) if (unsigned MinVLen = ST->getRealMinVLen(); MinVLen >= RISCV::RVVBitsPerBlock) @@ -235,10 +236,9 @@ std::pair LT = getTypeLegalizationCost(Tp); unsigned Cost = 2; // vslidedown+vslideup. - // TODO: LMUL should increase cost. // TODO: Multiplying by LT.first implies this legalizes into multiple copies // of similar code, but I think we expand through memory. - return Cost * LT.first; + return Cost * LT.first * getLMULCost(LT.second); } InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, @@ -784,7 +784,7 @@ InstructionCost RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind) { if (isa(Ty) && !ST->useRVVForFixedLengthVectors()) return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); @@ -815,7 +815,7 @@ InstructionCost RISCVTTIImpl::getExtendedReductionCost( unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, - Optional FMF, TTI::TargetCostKind CostKind) { + std::optional FMF, TTI::TargetCostKind CostKind) { if (isa(ValTy) && !ST->useRVVForFixedLengthVectors()) return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy, FMF, CostKind); @@ -1120,8 +1120,7 @@ case ISD::FSUB: case ISD::FMUL: case ISD::FNEG: { - // TODO: We should be accounting for LMUL and scaling costs for LMUL > 1. - return ConstantMatCost + LT.first * 1; + return ConstantMatCost + getLMULCost(LT.second) * LT.first * 1; } default: return ConstantMatCost + diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h @@ -15,6 +15,7 @@ #include "SPIRVSubtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { class SPIRVTargetMachine : public LLVMTargetMachine { @@ -24,8 +25,9 @@ public: SPIRVTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); const SPIRVSubtarget *getSubtargetImpl() const { return &Subtarget; } diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -30,6 +30,7 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" #include "llvm/Target/TargetOptions.h" +#include using namespace llvm; @@ -52,7 +53,7 @@ "v96:128-v192:256-v256:256-v512:512-v1024:1024"; } -static Reloc::Model getEffectiveRelocModel(Optional RM) { +static Reloc::Model getEffectiveRelocModel(std::optional RM) { if (!RM) return Reloc::PIC_; return *RM; @@ -64,8 +65,8 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(RM), diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -154,7 +154,7 @@ .Case("BFD_RELOC_64", ELF::R_SPARC_64) .Default(-1u); if (Type == -1u) - return None; + return std::nullopt; return static_cast(FirstLiteralRelocationKind + Type); } diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.h b/llvm/lib/Target/Sparc/SparcTargetMachine.h --- a/llvm/lib/Target/Sparc/SparcTargetMachine.h +++ b/llvm/lib/Target/Sparc/SparcTargetMachine.h @@ -16,6 +16,7 @@ #include "SparcInstrInfo.h" #include "SparcSubtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -24,11 +25,13 @@ SparcSubtarget Subtarget; bool is64Bit; mutable StringMap> SubtargetMap; + public: SparcTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT, bool is64bit); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT, bool is64bit); ~SparcTargetMachine() override; const SparcSubtarget *getSubtargetImpl() const { return &Subtarget; } @@ -45,22 +48,26 @@ /// class SparcV8TargetMachine : public SparcTargetMachine { virtual void anchor(); + public: SparcV8TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); }; /// Sparc 64-bit target machine /// class SparcV9TargetMachine : public SparcTargetMachine { virtual void anchor(); + public: SparcV9TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); }; class SparcelTargetMachine : public SparcTargetMachine { @@ -69,8 +76,9 @@ public: SparcelTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); }; } // end namespace llvm diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp --- a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/TargetRegistry.h" +#include using namespace llvm; extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTarget() { @@ -54,7 +55,7 @@ return Ret; } -static Reloc::Model getEffectiveRelocModel(Optional RM) { +static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::Static); } @@ -69,7 +70,7 @@ // // All code models require that the text segment is smaller than 2GB. static CodeModel::Model -getEffectiveSparcCodeModel(Optional CM, Reloc::Model RM, +getEffectiveSparcCodeModel(std::optional CM, Reloc::Model RM, bool Is64Bit, bool JIT) { if (CM) { if (*CM == CodeModel::Tiny) @@ -87,10 +88,13 @@ } /// Create an ILP32 architecture model -SparcTargetMachine::SparcTargetMachine( - const Target &T, const Triple &TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Optional RM, - Optional CM, CodeGenOpt::Level OL, bool JIT, bool is64bit) +SparcTargetMachine::SparcTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + std::optional RM, + std::optional CM, + CodeGenOpt::Level OL, bool JIT, + bool is64bit) : LLVMTargetMachine(T, computeDataLayout(TT, is64bit), TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveSparcCodeModel( @@ -187,8 +191,8 @@ SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {} @@ -197,8 +201,8 @@ SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {} @@ -207,7 +211,7 @@ SparcelTargetMachine::SparcelTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {} diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -124,7 +124,7 @@ .Default(-1u); if (Type != -1u) return static_cast(FirstLiteralRelocationKind + Type); - return None; + return std::nullopt; } const MCFixupKindInfo & diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" +#include namespace llvm { namespace SystemZISD { @@ -417,7 +418,7 @@ } unsigned getNumRegisters(LLVMContext &Context, EVT VT, - Optional RegisterVT) const override { + std::optional RegisterVT) const override { // i128 inline assembly operand. if (VT == MVT::i128 && RegisterVT && *RegisterVT == MVT::Untyped) return 1; @@ -556,15 +557,14 @@ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; bool allowTruncateForTailCall(Type *, Type *) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; - bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, - SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, - Optional CC) const override; - SDValue - joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, - const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, - Optional CC) const override; + bool splitValueIntoRegisterParts( + SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, std::optional CC) + const override; + SDValue joinRegisterPartsIntoValue( + SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, + unsigned NumParts, MVT PartVT, EVT ValueVT, + std::optional CC) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include +#include using namespace llvm; @@ -1450,7 +1451,7 @@ bool SystemZTargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, Optional CC) const { + unsigned NumParts, MVT PartVT, std::optional CC) const { EVT ValueVT = Val.getValueType(); assert((ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || @@ -1466,7 +1467,7 @@ SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, Optional CC) const { + MVT PartVT, EVT ValueVT, std::optional CC) const { assert((ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -21,6 +21,7 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include +#include namespace llvm { @@ -32,8 +33,9 @@ public: SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); ~SystemZTargetMachine() override; const SystemZSubtarget *getSubtargetImpl(const Function &) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Transforms/Scalar.h" +#include #include using namespace llvm; @@ -84,7 +85,7 @@ return std::make_unique(); } -static Reloc::Model getEffectiveRelocModel(Optional RM) { +static Reloc::Model getEffectiveRelocModel(std::optional RM) { // Static code is suitable for use in a dynamic executable; there is no // separate DynamicNoPIC model. if (!RM || *RM == Reloc::DynamicNoPIC) @@ -122,8 +123,8 @@ // of copy relocs, so locally-binding data symbols might not be in // the range of LARL. We need the Medium model in that case. static CodeModel::Model -getEffectiveSystemZCodeModel(Optional CM, Reloc::Model RM, - bool JIT) { +getEffectiveSystemZCodeModel(std::optional CM, + Reloc::Model RM, bool JIT) { if (CM) { if (*CM == CodeModel::Tiny) report_fatal_error("Target does not support the tiny CodeModel", false); @@ -139,8 +140,8 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine( T, computeDataLayout(TT), TT, CPU, FS, Options, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -93,7 +93,7 @@ ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = std::nullopt); unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -300,8 +300,8 @@ } if (isa(&I)) { Type *MemAccessTy = I.getOperand(0)->getType(); - NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0, - TTI::TCK_RecipThroughput); + NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, + std::nullopt, 0, TTI::TCK_RecipThroughput); } } diff --git a/llvm/lib/Target/TargetMachineC.cpp b/llvm/lib/Target/TargetMachineC.cpp --- a/llvm/lib/Target/TargetMachineC.cpp +++ b/llvm/lib/Target/TargetMachineC.cpp @@ -24,6 +24,7 @@ #include "llvm/Target/CodeGenCWrappers.h" #include "llvm/Target/TargetMachine.h" #include +#include using namespace llvm; @@ -99,7 +100,7 @@ const char *Triple, const char *CPU, const char *Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, LLVMCodeModel CodeModel) { - Optional RM; + std::optional RM; switch (Reloc){ case LLVMRelocStatic: RM = Reloc::Static; @@ -124,7 +125,7 @@ } bool JIT; - Optional CM = unwrap(CodeModel, JIT); + std::optional CM = unwrap(CodeModel, JIT); CodeGenOpt::Level OL; switch (Level) { diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h --- a/llvm/lib/Target/VE/VECustomDAG.h +++ b/llvm/lib/Target/VE/VECustomDAG.h @@ -154,7 +154,7 @@ /// getNode { SDValue getNode(unsigned OC, SDVTList VTL, ArrayRef OpV, - Optional Flags = None) const { + Optional Flags = std::nullopt) const { auto N = DAG.getNode(OC, DL, VTL, OpV); if (Flags) N->setFlags(*Flags); @@ -162,7 +162,7 @@ } SDValue getNode(unsigned OC, ArrayRef ResVT, ArrayRef OpV, - Optional Flags = None) const { + Optional Flags = std::nullopt) const { auto N = DAG.getNode(OC, DL, ResVT, OpV); if (Flags) N->setFlags(*Flags); @@ -170,7 +170,7 @@ } SDValue getNode(unsigned OC, EVT ResVT, ArrayRef OpV, - Optional Flags = None) const { + Optional Flags = std::nullopt) const { auto N = DAG.getNode(OC, DL, ResVT, OpV); if (Flags) N->setFlags(*Flags); diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp --- a/llvm/lib/Target/VE/VECustomDAG.cpp +++ b/llvm/lib/Target/VE/VECustomDAG.cpp @@ -79,7 +79,7 @@ case ISD::EXPERIMENTAL_VP_STRIDED_STORE: return VEISD::VVP_STORE; } - return None; + return std::nullopt; } bool maySafelyIgnoreMask(SDValue Op) { @@ -185,7 +185,7 @@ return 5; } - return None; + return std::nullopt; } Optional getMaskPos(unsigned Opc) { @@ -208,7 +208,7 @@ return 2; } - return None; + return std::nullopt; } bool isLegalAVL(SDValue AVL) { return AVL->getOpcode() == VEISD::LEGALAVL; } diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp --- a/llvm/lib/Target/VE/VEFrameLowering.cpp +++ b/llvm/lib/Target/VE/VEFrameLowering.cpp @@ -357,8 +357,8 @@ // Emit stack adjust instructions MaybeAlign RuntimeAlign = - NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : None; - assert((RuntimeAlign == None || !FuncInfo->isLeafProc()) && + NeedsStackRealignment ? MaybeAlign(MFI.getMaxAlign()) : std::nullopt; + assert((RuntimeAlign == std::nullopt || !FuncInfo->isLeafProc()) && "SP has to be saved in order to align variable sized stack object!"); emitSPAdjustment(MF, MBB, MBBI, -(int64_t)NumBytes, RuntimeAlign); @@ -408,7 +408,7 @@ .addImm(0); } else { // Emit stack adjust instructions. - emitSPAdjustment(MF, MBB, MBBI, NumBytes, None); + emitSPAdjustment(MF, MBB, MBBI, NumBytes, std::nullopt); } // Emit Epilogue instructions to restore multiple registers. diff --git a/llvm/lib/Target/VE/VETargetMachine.h b/llvm/lib/Target/VE/VETargetMachine.h --- a/llvm/lib/Target/VE/VETargetMachine.h +++ b/llvm/lib/Target/VE/VETargetMachine.h @@ -16,6 +16,7 @@ #include "VEInstrInfo.h" #include "VESubtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -29,8 +30,9 @@ public: VETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); ~VETargetMachine() override; const VESubtarget *getSubtargetImpl() const { return &Subtarget; } diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp --- a/llvm/lib/Target/VE/VETargetMachine.cpp +++ b/llvm/lib/Target/VE/VETargetMachine.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/TargetRegistry.h" +#include using namespace llvm; @@ -60,7 +61,7 @@ return Ret; } -static Reloc::Model getEffectiveRelocModel(Optional RM) { +static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::Static); } @@ -79,8 +80,8 @@ VETargetMachine::VETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(RM), diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp --- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -129,21 +129,21 @@ // Start of a code section: we're parsing only the function count. int64_t FunctionCount; if (!nextLEB(FunctionCount, Bytes, Size, false)) - return None; + return std::nullopt; outs() << " # " << FunctionCount << " functions in section."; } else { // Parse the start of a single function. int64_t BodySize, LocalEntryCount; if (!nextLEB(BodySize, Bytes, Size, false) || !nextLEB(LocalEntryCount, Bytes, Size, false)) - return None; + return std::nullopt; if (LocalEntryCount) { outs() << " .local "; for (int64_t I = 0; I < LocalEntryCount; I++) { int64_t Count, Type; if (!nextLEB(Count, Bytes, Size, false) || !nextLEB(Type, Bytes, Size, false)) - return None; + return std::nullopt; for (int64_t J = 0; J < Count; J++) { if (I || J) outs() << ", "; diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp @@ -39,7 +39,7 @@ return wasm::ValType::FUNCREF; if (Type == "externref") return wasm::ValType::EXTERNREF; - return None; + return std::nullopt; } WebAssembly::BlockType WebAssembly::parseBlockType(StringRef Type) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -63,7 +63,7 @@ // linear memory. const AllocaInst *AI = MFI.getObjectAllocation(FrameIndex); if (!AI || !WebAssembly::isWasmVarAddressSpace(AI->getAddressSpace())) - return None; + return std::nullopt; // Otherwise, allocate this object in the named value stack, outside of linear // memory. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1452,7 +1452,7 @@ static Optional IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG) { const FrameIndexSDNode *FI = dyn_cast(Op); if (!FI) - return None; + return std::nullopt; auto &MF = DAG.getMachineFunction(); return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex()); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -773,7 +773,7 @@ // Output parameter assignment Label = LabelPHI; EndBB = EndBB1; - LongjmpResult = IRB.CreateCall(GetTempRet0F, None, "longjmp_result"); + LongjmpResult = IRB.CreateCall(GetTempRet0F, std::nullopt, "longjmp_result"); } void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) { @@ -1227,7 +1227,7 @@ CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc"); Value *Poison = PoisonValue::get(LPI->getType()); Value *Pair0 = IRB.CreateInsertValue(Poison, FMCI, 0, "pair0"); - Value *TempRet0 = IRB.CreateCall(GetTempRet0F, None, "tempret0"); + Value *TempRet0 = IRB.CreateCall(GetTempRet0F, std::nullopt, "tempret0"); Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1"); LPI->replaceAllUsesWith(Pair1); @@ -1355,7 +1355,7 @@ Instruction *NewSetjmpTable = IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable"); Instruction *NewSetjmpTableSize = - IRB.CreateCall(GetTempRet0F, None, "setjmpTableSize"); + IRB.CreateCall(GetTempRet0F, std::nullopt, "setjmpTableSize"); SetjmpTableInsts.push_back(NewSetjmpTable); SetjmpTableSizeInsts.push_back(NewSetjmpTableSize); ToErase.push_back(CI); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h @@ -17,6 +17,7 @@ #include "WebAssemblySubtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -27,9 +28,9 @@ public: WebAssemblyTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, CodeGenOpt::Level OL, - bool JIT); + std::optional RM, + std::optional CM, + CodeGenOpt::Level OL, bool JIT); ~WebAssemblyTargetMachine() override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -32,6 +32,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LowerAtomicPass.h" #include "llvm/Transforms/Utils.h" +#include using namespace llvm; #define DEBUG_TYPE "wasm" @@ -85,7 +86,7 @@ // WebAssembly Lowering public interface. //===----------------------------------------------------------------------===// -static Reloc::Model getEffectiveRelocModel(Optional RM, +static Reloc::Model getEffectiveRelocModel(std::optional RM, const Triple &TT) { if (!RM) { // Default to static relocation model. This should always be more optimial @@ -108,8 +109,8 @@ /// WebAssemblyTargetMachine::WebAssemblyTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Optional RM, - Optional CM, CodeGenOpt::Level OL, bool JIT) + const TargetOptions &Options, std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine( T, TT.isArch64Bit() @@ -500,6 +501,7 @@ // them. // These functions all require the NoVRegs property. + disablePass(&MachineLateInstrsCleanupID); disablePass(&MachineCopyPropagationID); disablePass(&PostRAMachineSinkingID); disablePass(&PostRASchedulerID); diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1078,7 +1078,7 @@ void setTypeInfo(AsmTypeInfo Type) { CurType = Type; } }; - bool Error(SMLoc L, const Twine &Msg, SMRange Range = None, + bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt, bool MatchingInlineAsm = false) { MCAsmParser &Parser = getParser(); if (MatchingInlineAsm) { @@ -4137,7 +4137,7 @@ bool MatchingInlineAsm) { assert(!Operands.empty() && "Unexpect empty operand list!"); assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); - SMRange EmptyRange = None; + SMRange EmptyRange = std::nullopt; // First, handle aliases that expand to multiple instructions. MatchFPUWaitAlias(IDLoc, static_cast(*Operands[0]), Operands, @@ -4396,7 +4396,7 @@ assert(!Operands.empty() && "Unexpect empty operand list!"); assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); StringRef Mnemonic = (static_cast(*Operands[0])).getToken(); - SMRange EmptyRange = None; + SMRange EmptyRange = std::nullopt; StringRef Base = (static_cast(*Operands[0])).getToken(); unsigned Prefixes = getPrefixes(Operands); @@ -4887,7 +4887,7 @@ return true; if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); + return TokError("expected end of directive"); getParser().Lex(); getStreamer().emitWinCFIPushReg(Reg, Loc); @@ -4907,7 +4907,7 @@ return true; if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); + return TokError("expected end of directive"); getParser().Lex(); getStreamer().emitWinCFISetFrame(Reg, Off, Loc); @@ -4927,7 +4927,7 @@ return true; if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); + return TokError("expected end of directive"); getParser().Lex(); getStreamer().emitWinCFISaveReg(Reg, Off, Loc); @@ -4947,7 +4947,7 @@ return true; if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); + return TokError("expected end of directive"); getParser().Lex(); getStreamer().emitWinCFISaveXMM(Reg, Off, Loc); @@ -4968,7 +4968,7 @@ } if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); + return TokError("expected end of directive"); getParser().Lex(); getStreamer().emitWinCFIPushFrame(Code, Loc); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -613,7 +613,7 @@ .Default(-1u); } if (Type == -1u) - return None; + return std::nullopt; return static_cast(FirstLiteralRelocationKind + Type); } return MCAsmBackend::getFixupKind(Name); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -643,7 +643,7 @@ const MCInstrDesc &MCID = Info->get(Inst.getOpcode()); int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags); if (MemOpStart == -1) - return None; + return std::nullopt; MemOpStart += X86II::getOperandBias(MCID); const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg); @@ -653,24 +653,24 @@ const MCOperand &Disp = Inst.getOperand(MemOpStart + X86::AddrDisp); if (SegReg.getReg() != 0 || IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 || !Disp.isImm()) - return None; + return std::nullopt; // RIP-relative addressing. if (BaseReg.getReg() == X86::RIP) return Addr + Size + Disp.getImm(); - return None; + return std::nullopt; } Optional X86MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst, uint64_t Size) const { if (Inst.getOpcode() != X86::LEA64r) - return None; + return std::nullopt; const MCInstrDesc &MCID = Info->get(Inst.getOpcode()); int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags); if (MemOpStart == -1) - return None; + return std::nullopt; MemOpStart += X86II::getOperandBias(MCID); const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg); const MCOperand &BaseReg = Inst.getOperand(MemOpStart + X86::AddrBaseReg); @@ -680,7 +680,7 @@ // Must be a simple rip-relative address. if (BaseReg.getReg() != X86::RIP || SegReg.getReg() != 0 || IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 || !Disp.isImm()) - return None; + return std::nullopt; // rip-relative ModR/M immediate is 32 bits. assert(Size > 4 && "invalid instruction size for rip-relative lea"); return Size - 4; diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -54,10 +54,11 @@ /// the number of bytes to probe in RAX/EAX. /// \p InstrNum optionally contains a debug-info instruction number for the /// new stack pointer. - void emitStackProbe( - MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, - Optional InstrNum = None) const; + void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + bool InProlog, + Optional + InstrNum = std::nullopt) const; bool stackProbeFunctionModifiesSP() const override; diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -449,8 +449,8 @@ // Create zero. SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); - SDValue Zero = - SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0); + SDValue Zero = SDValue( + CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0); if (VT == MVT::i64) { Zero = SDValue( CurDAG->getMachineNode( @@ -1375,7 +1375,7 @@ SDVTList VTs = CurDAG->getVTList(MVT::Other); SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp}; Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT, - MPI, /*Align*/ None, + MPI, /*Align*/ std::nullopt, MachineMemOperand::MOStore); if (N->getFlags().hasNoFPExcept()) { SDNodeFlags Flags = Store->getFlags(); @@ -1393,7 +1393,7 @@ SDValue Ops[] = {Store, MemTmp}; Result = CurDAG->getMemIntrinsicNode( X86ISD::FLD, dl, VTs, Ops, MemVT, MPI, - /*Align*/ None, MachineMemOperand::MOLoad); + /*Align*/ std::nullopt, MachineMemOperand::MOLoad); if (N->getFlags().hasNoFPExcept()) { SDNodeFlags Flags = Result->getFlags(); Flags.setNoFPExcept(true); @@ -3505,11 +3505,11 @@ Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo()); }; auto checkOneUse = [checkUses](SDValue Op, - Optional AllowExtraUses = None) { + Optional AllowExtraUses = std::nullopt) { return checkUses(Op, 1, AllowExtraUses); }; auto checkTwoUse = [checkUses](SDValue Op, - Optional AllowExtraUses = None) { + Optional AllowExtraUses = std::nullopt) { return checkUses(Op, 2, AllowExtraUses); }; @@ -5411,8 +5411,8 @@ } else { // Zero out the high part, effectively zero extending the input. SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); - SDValue ClrNode = - SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0); + SDValue ClrNode = SDValue( + CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0); switch (NVT.SimpleTy) { case MVT::i16: ClrNode = diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1654,16 +1654,15 @@ MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const override; - bool - splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, - SDValue *Parts, unsigned NumParts, MVT PartVT, - Optional CC) const override; - - SDValue - joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, - const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, - Optional CC) const override; + bool splitValueIntoRegisterParts( + SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, std::optional CC) + const override; + + SDValue joinRegisterPartsIntoValue( + SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, + unsigned NumParts, MVT PartVT, EVT ValueVT, + std::optional CC) const override; bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2777,7 +2777,7 @@ bool X86TargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, Optional CC) const { + unsigned NumParts, MVT PartVT, std::optional CC) const { bool IsABIRegCopy = CC.has_value(); EVT ValueVT = Val.getValueType(); if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { @@ -2794,7 +2794,7 @@ SDValue X86TargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, Optional CC) const { + MVT PartVT, EVT ValueVT, std::optional CC) const { bool IsABIRegCopy = CC.has_value(); if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { unsigned ValueBits = ValueVT.getSizeInBits(); @@ -3815,14 +3815,14 @@ // in their paired GPR. So we only need to save the GPR to their home // slots. // TODO: __vectorcall will change this. - return None; + return std::nullopt; } bool isSoftFloat = Subtarget.useSoftFloat(); if (isSoftFloat || !Subtarget.hasSSE1()) // Kernel mode asks for SSE to be disabled, so there are no XMM argument // registers. - return None; + return std::nullopt; static const MCPhysReg XMMArgRegs64Bit[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, @@ -22990,13 +22990,14 @@ SDValue Ops[] = { Chain, StackPtr }; Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI, - /*Align*/ None, MachineMemOperand::MOLoad); + /*Align*/ std::nullopt, + MachineMemOperand::MOLoad); Chain = Src.getValue(1); } SDValue StoreOps[] = { Chain, Src, StackPtr }; Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL, DAG.getVTList(MVT::Other), - StoreOps, DstVT, MPI, /*Align*/ None, + StoreOps, DstVT, MPI, /*Align*/ std::nullopt, MachineMemOperand::MOStore); return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI); @@ -26453,7 +26454,7 @@ SDValue VAARG = DAG.getMemIntrinsicNode( Subtarget.isTarget64BitLP64() ? X86ISD::VAARG_64 : X86ISD::VAARG_X32, dl, VTs, InstOps, MVT::i64, MachinePointerInfo(SV), - /*Alignment=*/None, + /*Alignment=*/std::nullopt, MachineMemOperand::MOLoad | MachineMemOperand::MOStore); Chain = VAARG.getValue(1); @@ -32353,9 +32354,9 @@ MPI, MaybeAlign(), MachineMemOperand::MOStore); SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue LdOps[] = {Chain, StackPtr}; - SDValue Value = - DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, LdOps, MVT::i64, MPI, - /*Align*/ None, MachineMemOperand::MOLoad); + SDValue Value = DAG.getMemIntrinsicNode( + X86ISD::FILD, dl, Tys, LdOps, MVT::i64, MPI, + /*Align*/ std::nullopt, MachineMemOperand::MOLoad); Chain = Value.getValue(1); // Now use an FIST to do the atomic store. @@ -33939,7 +33940,7 @@ SDValue StoreOps[] = { Chain, Result, StackPtr }; Chain = DAG.getMemIntrinsicNode( X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, - MPI, None /*Align*/, MachineMemOperand::MOStore); + MPI, std::nullopt /*Align*/, MachineMemOperand::MOStore); // Finally load the value back from the stack temporary and return it. // This load is not atomic and doesn't need to be. diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/IntrinsicsX86.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" +#include using namespace llvm; @@ -924,7 +925,7 @@ return Builder.CreateShuffleVector(V1, makeArrayRef(Indexes, Size)); } -Optional +std::optional X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width, unsigned DemandedWidth) { @@ -1730,10 +1731,10 @@ default: break; } - return None; + return std::nullopt; } -Optional X86TTIImpl::simplifyDemandedUseBitsIntrinsic( +std::optional X86TTIImpl::simplifyDemandedUseBitsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const { switch (II.getIntrinsicID()) { @@ -1770,10 +1771,10 @@ break; } } - return None; + return std::nullopt; } -Optional X86TTIImpl::simplifyDemandedVectorEltsIntrinsic( +std::optional X86TTIImpl::simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -2025,5 +2026,5 @@ UndefElts.setHighBits(VWidth / 2); break; } - return None; + return std::nullopt; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3577,7 +3577,7 @@ X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { if (MI.isMoveReg()) return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; - return None; + return std::nullopt; } static unsigned getLoadStoreOpcodeForFP16(bool Load, const X86Subtarget &STI) { @@ -3734,18 +3734,18 @@ const MCInstrDesc &Desc = MemI.getDesc(); int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags); if (MemRefBegin < 0) - return None; + return std::nullopt; MemRefBegin += X86II::getOperandBias(Desc); auto &BaseOp = MemI.getOperand(MemRefBegin + X86::AddrBaseReg); if (!BaseOp.isReg()) // Can be an MO_FrameIndex - return None; + return std::nullopt; const MachineOperand &DispMO = MemI.getOperand(MemRefBegin + X86::AddrDisp); // Displacement can be symbolic if (!DispMO.isImm()) - return None; + return std::nullopt; ExtAddrMode AM; AM.BaseReg = BaseOp.getReg(); @@ -9068,7 +9068,7 @@ // possible. if (MI.getOpcode() == X86::MOV8rr || MI.getOpcode() == X86::MOV16rr || !TRI->isSuperRegister(DestReg, DescribedReg)) - return None; + return std::nullopt; assert(MI.getOpcode() == X86::MOV32rr && "Unexpected super-register case"); return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); @@ -9087,12 +9087,12 @@ case X86::LEA64_32r: { // We may need to describe a 64-bit parameter with a 32-bit LEA. if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) - return None; + return std::nullopt; // Operand 4 could be global address. For now we do not support // such situation. if (!MI.getOperand(4).isImm() || !MI.getOperand(2).isImm()) - return None; + return std::nullopt; const MachineOperand &Op1 = MI.getOperand(1); const MachineOperand &Op2 = MI.getOperand(3); @@ -9103,12 +9103,12 @@ // %rsi = lea %rsi, 4, ... if ((Op1.isReg() && Op1.getReg() == MI.getOperand(0).getReg()) || Op2.getReg() == MI.getOperand(0).getReg()) - return None; + return std::nullopt; else if ((Op1.isReg() && Op1.getReg() != X86::NoRegister && TRI->regsOverlap(Op1.getReg(), MI.getOperand(0).getReg())) || (Op2.getReg() != X86::NoRegister && TRI->regsOverlap(Op2.getReg(), MI.getOperand(0).getReg()))) - return None; + return std::nullopt; int64_t Coef = MI.getOperand(2).getImm(); int64_t Offset = MI.getOperand(4).getImm(); @@ -9127,7 +9127,7 @@ if (Op && Op2.getReg() != X86::NoRegister) { int dwarfReg = TRI->getDwarfRegNum(Op2.getReg(), false); if (dwarfReg < 0) - return None; + return std::nullopt; else if (dwarfReg < 32) { Ops.push_back(dwarf::DW_OP_breg0 + dwarfReg); Ops.push_back(0); @@ -9162,14 +9162,14 @@ case X86::MOV8ri: case X86::MOV16ri: // TODO: Handle MOV8ri and MOV16ri. - return None; + return std::nullopt; case X86::MOV32ri: case X86::MOV64ri: case X86::MOV64ri32: // MOV32ri may be used for producing zero-extended 32-bit immediates in // 64-bit parameters, so we need to consider super-registers. if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) - return None; + return std::nullopt; return ParamLoadedValue(MI.getOperand(1), Expr); case X86::MOV8rr: case X86::MOV16rr: @@ -9180,10 +9180,10 @@ // 64-bit parameters are zero-materialized using XOR32rr, so also consider // super-registers. if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg)) - return None; + return std::nullopt; if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) return ParamLoadedValue(MachineOperand::CreateImm(0), Expr); - return None; + return std::nullopt; } case X86::MOVSX64rr32: { // We may need to describe the lower 32 bits of the MOVSX; for example, in @@ -9193,7 +9193,7 @@ // $rdi = MOVSX64rr32 $ebx // $esi = MOV32rr $edi if (!TRI->isSubRegisterEq(MI.getOperand(0).getReg(), Reg)) - return None; + return std::nullopt; Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {}); diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -246,8 +246,8 @@ Builder.CreateBitCast(LD->getOperand(0), Builder.getInt8PtrTy()); std::array Args = {Row, Col, I8Ptr, Stride}; - Value *NewInst = - Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, None, Args); + Value *NewInst = Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, + std::nullopt, Args); Bitcast->replaceAllUsesWith(NewInst); } @@ -273,7 +273,8 @@ Value *I8Ptr = Builder.CreateBitCast(ST->getOperand(1), Builder.getInt8PtrTy()); std::array Args = {Row, Col, I8Ptr, Stride, Tile}; - Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, None, Args); + Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, std::nullopt, + Args); if (Bitcast->hasOneUse()) return; // %13 = bitcast x86_amx %src to <256 x i32> @@ -323,7 +324,7 @@ std::tie(Row, Col) = getShape(II, OpNo); std::array Args = {Row, Col, I8Ptr, Stride}; Value *NewInst = Builder.CreateIntrinsic( - Intrinsic::x86_tileloadd64_internal, None, Args); + Intrinsic::x86_tileloadd64_internal, std::nullopt, Args); Bitcast->replaceAllUsesWith(NewInst); } else { // %2 = bitcast x86_amx %src to <256 x i32> @@ -340,7 +341,8 @@ Value *Row = II->getOperand(0); Value *Col = II->getOperand(1); std::array Args = {Row, Col, I8Ptr, Stride, Src}; - Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, None, Args); + Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, std::nullopt, + Args); Value *NewInst = Builder.CreateLoad(Bitcast->getType(), AllocaAddr); Bitcast->replaceAllUsesWith(NewInst); } @@ -472,8 +474,8 @@ Value *Stride = Builder.getInt64(64); std::array Args = {Row, Col, Ptr, Stride, TileDef}; - Instruction *TileStore = - Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, None, Args); + Instruction *TileStore = Builder.CreateIntrinsic( + Intrinsic::x86_tilestored64_internal, std::nullopt, Args); return TileStore; } @@ -497,8 +499,8 @@ Value *Stride = Builder.getInt64(64); std::array Args = {Row, Col, Ptr, Stride}; - Value *TileLoad = - Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, None, Args); + Value *TileLoad = Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, + std::nullopt, Args); UserI->replaceUsesOfWith(V, TileLoad); } @@ -791,7 +793,7 @@ auto *Block = OldPN->getIncomingBlock(I); BasicBlock::iterator Iter = Block->getTerminator()->getIterator(); Instruction *NewInst = Builder.CreateIntrinsic( - Intrinsic::x86_tilezero_internal, None, {Row, Col}); + Intrinsic::x86_tilezero_internal, std::nullopt, {Row, Col}); NewInst->moveBefore(&*Iter); NewInst = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {IncValue->getType()}, {NewInst}); @@ -936,7 +938,8 @@ Value *I8Ptr = Builder.CreateBitCast(ST->getOperand(1), Builder.getInt8PtrTy()); std::array Args = {Row, Col, I8Ptr, Stride, Tile}; - Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, None, Args); + Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, std::nullopt, + Args); } // %65 = load <256 x i32>, <256 x i32>* %p, align 64 @@ -979,8 +982,8 @@ } std::array Args = {Row, Col, I8Ptr, Stride}; - Value *NewInst = - Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, None, Args); + Value *NewInst = Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, + std::nullopt, Args); Cast->replaceAllUsesWith(NewInst); return EraseLoad; @@ -1158,7 +1161,7 @@ std::array Args = { Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty())}; Value *NewInst = Builder.CreateIntrinsic( - Intrinsic::x86_tileloadd64_internal, None, Args); + Intrinsic::x86_tileloadd64_internal, std::nullopt, Args); AMXCast->replaceAllUsesWith(NewInst); AMXCast->eraseFromParent(); } else { @@ -1177,7 +1180,8 @@ Value *Col = II->getOperand(1); std::array Args = { Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty()), Src}; - Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, None, Args); + Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, std::nullopt, + Args); Value *NewInst = Builder.CreateLoad(AMXCast->getType(), AllocaAddr); AMXCast->replaceAllUsesWith(NewInst); AMXCast->eraseFromParent(); diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -438,7 +438,7 @@ case MachineOperand::MO_Register: // Ignore all implicit register operands. if (MO.isImplicit()) - return None; + return std::nullopt; return MCOperand::createReg(MO.getReg()); case MachineOperand::MO_Immediate: return MCOperand::createImm(MO.getImm()); @@ -457,7 +457,7 @@ MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); case MachineOperand::MO_RegisterMask: // Ignore call clobbers. - return None; + return std::nullopt; } } diff --git a/llvm/lib/Target/X86/X86PreAMXConfig.cpp b/llvm/lib/Target/X86/X86PreAMXConfig.cpp --- a/llvm/lib/Target/X86/X86PreAMXConfig.cpp +++ b/llvm/lib/Target/X86/X86PreAMXConfig.cpp @@ -199,7 +199,8 @@ preWriteTileCfg(I8Ptr, Builder, Shapes); - Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, None, {I8Ptr}); + Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, std::nullopt, + {I8Ptr}); } // Todo: We may need to handle "more than one store" case in the future. diff --git a/llvm/lib/Target/X86/X86TargetMachine.h b/llvm/lib/Target/X86/X86TargetMachine.h --- a/llvm/lib/Target/X86/X86TargetMachine.h +++ b/llvm/lib/Target/X86/X86TargetMachine.h @@ -19,6 +19,7 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include +#include namespace llvm { @@ -34,8 +35,9 @@ public: X86TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); ~X86TargetMachine() override; const X86Subtarget *getSubtargetImpl(const Function &F) const override; diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -51,6 +51,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/CFGuard.h" #include +#include #include using namespace llvm; @@ -162,9 +163,8 @@ return Ret; } -static Reloc::Model getEffectiveRelocModel(const Triple &TT, - bool JIT, - Optional RM) { +static Reloc::Model getEffectiveRelocModel(const Triple &TT, bool JIT, + std::optional RM) { bool is64Bit = TT.getArch() == Triple::x86_64; if (!RM) { // JIT codegen should use static relocations by default, since it's @@ -204,8 +204,9 @@ return *RM; } -static CodeModel::Model getEffectiveX86CodeModel(Optional CM, - bool JIT, bool Is64Bit) { +static CodeModel::Model +getEffectiveX86CodeModel(std::optional CM, bool JIT, + bool Is64Bit) { if (CM) { if (*CM == CodeModel::Tiny) report_fatal_error("Target does not support the tiny CodeModel", false); @@ -221,8 +222,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine( T, computeDataLayout(TT), TT, CPU, FS, Options, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -19,6 +19,7 @@ #include "X86TargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" +#include namespace llvm { @@ -113,9 +114,9 @@ /// \name Cache TTI Implementation /// @{ - llvm::Optional getCacheSize( + std::optional getCacheSize( TargetTransformInfo::CacheLevel Level) const override; - llvm::Optional getCacheAssociativity( + std::optional getCacheAssociativity( TargetTransformInfo::CacheLevel Level) const override; /// @} @@ -136,7 +137,7 @@ ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = None); + ArrayRef Args = std::nullopt); InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, @@ -171,13 +172,13 @@ InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr); - Optional instCombineIntrinsic(InstCombiner &IC, - IntrinsicInst &II) const; - Optional + std::optional instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const; + std::optional simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const; - Optional simplifyDemandedVectorEltsIntrinsic( + std::optional simplifyDemandedVectorEltsIntrinsic( InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -189,7 +190,7 @@ TTI::TargetCostKind CostKind); InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind); InstructionCost getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -56,6 +56,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" +#include using namespace llvm; @@ -75,7 +76,7 @@ unsigned CodeSizeCost = ~0U; unsigned SizeAndLatencyCost = ~0U; - llvm::Optional + std::optional operator[](TargetTransformInfo::TargetCostKind Kind) const { unsigned Cost = ~0U; switch (Kind) { @@ -93,7 +94,7 @@ break; } if (Cost == ~0U) - return None; + return std::nullopt; return Cost; } }; @@ -108,7 +109,7 @@ return ST->hasPOPCNT() ? TTI::PSK_FastHardware : TTI::PSK_Software; } -llvm::Optional X86TTIImpl::getCacheSize( +std::optional X86TTIImpl::getCacheSize( TargetTransformInfo::CacheLevel Level) const { switch (Level) { case TargetTransformInfo::CacheLevel::L1D: @@ -138,7 +139,7 @@ llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } -llvm::Optional X86TTIImpl::getCacheAssociativity( +std::optional X86TTIImpl::getCacheAssociativity( TargetTransformInfo::CacheLevel Level) const { // - Penryn // - Nehalem @@ -1488,8 +1489,8 @@ SubLT.second.getVectorNumElements()); int ExtractIndex = alignDown((Index % NumElts), NumSubElts); InstructionCost ExtractCost = - getShuffleCost(TTI::SK_ExtractSubvector, VecTy, None, CostKind, - ExtractIndex, SubTy); + getShuffleCost(TTI::SK_ExtractSubvector, VecTy, std::nullopt, + CostKind, ExtractIndex, SubTy); // If the original size is 32-bits or more, we can use pshufd. Otherwise // if we have SSSE3 we can use pshufb. @@ -1641,7 +1642,7 @@ InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, - None, CostKind, 0, nullptr); + std::nullopt, CostKind, 0, nullptr); } return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp); @@ -4368,8 +4369,8 @@ EVT VT = TLI->getValueType(DL, Val); if (VT.getScalarType() != MScalarTy || VT.getSizeInBits() >= 128) SubTy = FixedVectorType::get(ScalarType, SubNumElts); - ShuffleCost = getShuffleCost(TTI::SK_PermuteTwoSrc, SubTy, None, CostKind, - 0, SubTy); + ShuffleCost = getShuffleCost(TTI::SK_PermuteTwoSrc, SubTy, std::nullopt, + CostKind, 0, SubTy); } int IntOrFpCost = ScalarType->isFloatingPointTy() ? 0 : 1; return ShuffleCost + IntOrFpCost + RegisterFileMoveCost; @@ -4452,8 +4453,8 @@ // FIXME: we don't need to extract if all non-demanded elements // are legalization-inserted padding. if (!LaneEltMask.isAllOnes()) - Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind, - I * NumEltsPerLane, LaneTy); + Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt, + CostKind, I * NumEltsPerLane, LaneTy); Cost += BaseT::getScalarizationOverhead(LaneTy, LaneEltMask, Insert, false); } @@ -4470,8 +4471,8 @@ if (!AffectedLanes[I] || (Lane == 0 && FullyAffectedLegalVectors[LegalVec])) continue; - Cost += getShuffleCost(TTI::SK_InsertSubvector, Ty, None, CostKind, - I * NumEltsPerLane, LaneTy); + Cost += getShuffleCost(TTI::SK_InsertSubvector, Ty, std::nullopt, + CostKind, I * NumEltsPerLane, LaneTy); } } } @@ -4530,8 +4531,8 @@ NumEltsPerLane, I * NumEltsPerLane); if (LaneEltMask.isNullValue()) continue; - Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind, - I * NumEltsPerLane, LaneTy); + Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt, + CostKind, I * NumEltsPerLane, LaneTy); Cost += BaseT::getScalarizationOverhead(LaneTy, LaneEltMask, false, Extract); } @@ -4650,9 +4651,9 @@ DemandedDstElts.zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors); unsigned NumDstVectorsDemanded = DemandedDstVectors.countPopulation(); - InstructionCost SingleShuffleCost = - getShuffleCost(TTI::SK_PermuteSingleSrc, SingleDstVecTy, /*Mask=*/None, - CostKind, /*Index=*/0, /*SubTp=*/nullptr); + InstructionCost SingleShuffleCost = getShuffleCost( + TTI::SK_PermuteSingleSrc, SingleDstVecTy, /*Mask=*/std::nullopt, CostKind, + /*Index=*/0, /*SubTp=*/nullptr); return NumDstVectorsDemanded * SingleShuffleCost; } @@ -4779,7 +4780,8 @@ if (!Is0thSubVec) Cost += getShuffleCost(IsLoad ? TTI::ShuffleKind::SK_InsertSubvector : TTI::ShuffleKind::SK_ExtractSubvector, - VTy, None, CostKind, NumEltDone(), CurrVecTy); + VTy, std::nullopt, CostKind, NumEltDone(), + CurrVecTy); } // While we can directly load/store ZMM, YMM, and 64-bit halves of XMM, @@ -4858,17 +4860,17 @@ if (VT.isSimple() && LT.second != VT.getSimpleVT() && LT.second.getVectorNumElements() == NumElem) // Promotion requires extend/truncate for data and a shuffle for mask. - Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVTy, None, CostKind, 0, - nullptr) + - getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, None, CostKind, 0, - nullptr); + Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVTy, std::nullopt, + CostKind, 0, nullptr) + + getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, std::nullopt, + CostKind, 0, nullptr); else if (LT.first * LT.second.getVectorNumElements() > NumElem) { auto *NewMaskTy = FixedVectorType::get(MaskTy->getElementType(), LT.second.getVectorNumElements()); // Expanding requires fill mask with zeroes - Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, None, CostKind, - 0, MaskTy); + Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, std::nullopt, + CostKind, 0, MaskTy); } // Pre-AVX512 - each maskmov load costs 2 + store costs ~8. @@ -4909,7 +4911,7 @@ InstructionCost X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, - Optional FMF, + std::optional FMF, TTI::TargetCostKind CostKind) { if (TTI::requiresOrderedReduction(FMF)) return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind); @@ -5113,8 +5115,9 @@ // If we're reducing from 256/512 bits, use an extract_subvector. if (Size > 128) { auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts); - ReductionCost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, - CostKind, NumVecElts, SubTy); + ReductionCost += + getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt, CostKind, + NumVecElts, SubTy); Ty = SubTy; } else if (Size == 128) { // Reducing from 128 bits is a permute of v2f64/v2i64. @@ -5126,7 +5129,7 @@ ShufTy = FixedVectorType::get(Type::getInt64Ty(ValVTy->getContext()), 2); ReductionCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, - None, CostKind, 0, nullptr); + std::nullopt, CostKind, 0, nullptr); } else if (Size == 64) { // Reducing from 64 bits is a shuffle of v4f32/v4i32. FixedVectorType *ShufTy; @@ -5137,7 +5140,7 @@ ShufTy = FixedVectorType::get(Type::getInt32Ty(ValVTy->getContext()), 4); ReductionCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, - None, CostKind, 0, nullptr); + std::nullopt, CostKind, 0, nullptr); } else { // Reducing from smaller size is a shift by immediate. auto *ShiftTy = FixedVectorType::get( @@ -5414,8 +5417,8 @@ // If we're reducing from 256/512 bits, use an extract_subvector. if (Size > 128) { auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts); - MinMaxCost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind, - NumVecElts, SubTy); + MinMaxCost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt, + CostKind, NumVecElts, SubTy); Ty = SubTy; } else if (Size == 128) { // Reducing from 128 bits is a permute of v2f64/v2i64. @@ -5425,8 +5428,8 @@ FixedVectorType::get(Type::getDoubleTy(ValTy->getContext()), 2); else ShufTy = FixedVectorType::get(Type::getInt64Ty(ValTy->getContext()), 2); - MinMaxCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, - CostKind, 0, nullptr); + MinMaxCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, + std::nullopt, CostKind, 0, nullptr); } else if (Size == 64) { // Reducing from 64 bits is a shuffle of v4f32/v4i32. FixedVectorType *ShufTy; @@ -5434,8 +5437,8 @@ ShufTy = FixedVectorType::get(Type::getFloatTy(ValTy->getContext()), 4); else ShufTy = FixedVectorType::get(Type::getInt32Ty(ValTy->getContext()), 4); - MinMaxCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, - CostKind, 0, nullptr); + MinMaxCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, + std::nullopt, CostKind, 0, nullptr); } else { // Reducing from smaller size is a shift by immediate. auto *ShiftTy = FixedVectorType::get( @@ -6238,8 +6241,8 @@ TTI::ShuffleKind ShuffleKind = (NumOfMemOps > 1) ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc; - InstructionCost ShuffleCost = - getShuffleCost(ShuffleKind, SingleMemOpTy, None, CostKind, 0, nullptr); + InstructionCost ShuffleCost = getShuffleCost( + ShuffleKind, SingleMemOpTy, std::nullopt, CostKind, 0, nullptr); unsigned NumOfLoadsInInterleaveGrp = Indices.size() ? Indices.size() : Factor; @@ -6296,7 +6299,7 @@ // shuffle. unsigned NumOfSources = Factor; // The number of values to be merged. InstructionCost ShuffleCost = getShuffleCost( - TTI::SK_PermuteTwoSrc, SingleMemOpTy, None, CostKind, 0, nullptr); + TTI::SK_PermuteTwoSrc, SingleMemOpTy, std::nullopt, CostKind, 0, nullptr); unsigned NumOfShufflesPerStore = NumOfSources - 1; // The SK_MergeTwoSrc shuffle clobbers one of src operands. diff --git a/llvm/lib/Target/XCore/XCoreTargetMachine.h b/llvm/lib/Target/XCore/XCoreTargetMachine.h --- a/llvm/lib/Target/XCore/XCoreTargetMachine.h +++ b/llvm/lib/Target/XCore/XCoreTargetMachine.h @@ -19,6 +19,7 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include +#include namespace llvm { class StringRef; @@ -30,8 +31,9 @@ public: XCoreTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); ~XCoreTargetMachine() override; const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; } diff --git a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp --- a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp @@ -22,15 +22,16 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CodeGen.h" +#include using namespace llvm; -static Reloc::Model getEffectiveRelocModel(Optional RM) { +static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::Static); } static CodeModel::Model -getEffectiveXCoreCodeModel(Optional CM) { +getEffectiveXCoreCodeModel(std::optional CM) { if (CM) { if (*CM != CodeModel::Small && *CM != CodeModel::Large) report_fatal_error("Target only supports CodeModel Small or Large"); @@ -44,8 +45,8 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine( T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32", diff --git a/llvm/lib/Testing/Support/Annotations.cpp b/llvm/lib/Testing/Support/Annotations.cpp --- a/llvm/lib/Testing/Support/Annotations.cpp +++ b/llvm/lib/Testing/Support/Annotations.cpp @@ -37,15 +37,15 @@ All.push_back( {Code.size(), size_t(-1), Name.value_or(""), Payload.value_or("")}); Points[Name.value_or("")].push_back(All.size() - 1); - Name = llvm::None; - Payload = llvm::None; + Name = std::nullopt; + Payload = std::nullopt; continue; } if (Text.consume_front("[[")) { OpenRanges.push_back( {Code.size(), size_t(-1), Name.value_or(""), Payload.value_or("")}); - Name = llvm::None; - Payload = llvm::None; + Name = std::nullopt; + Payload = std::nullopt; continue; } Require(!Name, "$name should be followed by ^ or [["); diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp --- a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -101,7 +101,7 @@ // aarch64-w64-mingw32-llvm-dlltool-10.exe -> aarch64-w64-mingw32 ProgName = ProgName.rtrim("0123456789.-"); if (!ProgName.consume_back_insensitive("dlltool")) - return None; + return std::nullopt; ProgName.consume_back_insensitive("llvm-"); ProgName.consume_back_insensitive("-"); return ProgName.str(); diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -107,7 +107,7 @@ // Pull information from the function attributes. auto Size = Resume->getParamDereferenceableBytes(0); if (!Size) - return None; + return std::nullopt; return std::make_pair(Size, Resume->getParamAlign(0).valueOrOne()); } diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -892,7 +892,7 @@ RetType = Builder.createPointerType(nullptr, Layout.getTypeSizeInBits(Ty), Layout.getABITypeAlignment(Ty) * CHAR_BIT, - /*DWARFAddressSpace=*/None, Name); + /*DWARFAddressSpace=*/std::nullopt, Name); } else if (Ty->isStructTy()) { auto *DIStruct = Builder.createStructType( Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty), @@ -1149,8 +1149,8 @@ // Add header fields for the resume and destroy functions. // We can rely on these being perfectly packed. - (void)B.addField(FnPtrTy, None, /*header*/ true); - (void)B.addField(FnPtrTy, None, /*header*/ true); + (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true); + (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true); // PromiseAlloca field needs to be explicitly added here because it's // a header field with a fixed offset based on its alignment. Hence it @@ -1164,7 +1164,7 @@ unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); Type *IndexType = Type::getIntNTy(C, IndexBits); - SwitchIndexFieldId = B.addField(IndexType, None); + SwitchIndexFieldId = B.addField(IndexType, std::nullopt); } else { assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); } @@ -1189,8 +1189,8 @@ if (const Argument *A = dyn_cast(S.first)) if (A->hasByValAttr()) FieldType = A->getParamByValType(); - FieldIDType Id = - B.addField(FieldType, None, false /*header*/, true /*IsSpillOfValue*/); + FieldIDType Id = B.addField(FieldType, std::nullopt, false /*header*/, + true /*IsSpillOfValue*/); FrameData.setFieldIndex(S.first, Id); } diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -201,8 +201,8 @@ assert(MustTailCallFuncBlock && "Must have a single predecessor block"); auto It = MustTailCallFuncBlock->getTerminator()->getIterator(); auto *MustTailCall = cast(&*std::prev(It)); - CoroEndBlock->getInstList().splice( - End->getIterator(), MustTailCallFuncBlock->getInstList(), MustTailCall); + CoroEndBlock->splice(End->getIterator(), MustTailCallFuncBlock, + MustTailCall->getIterator()); // Insert the return instruction. Builder.SetInsertPoint(End); diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -489,7 +489,7 @@ Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, /* AllowNonInbounds */ true); if (Ptr != Arg) - return None; + return std::nullopt; if (Offset.getSignificantBits() >= 64) return false; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1079,7 +1079,7 @@ for (auto &CB : SimplificationCallbacks.lookup(IRP)) { Optional SimplifiedV = CB(IRP, &AA, UsedAssumedInformation); if (!SimplifiedV) - return llvm::None; + return std::nullopt; if (isa_and_nonnull(*SimplifiedV)) return cast(*SimplifiedV); return nullptr; @@ -1091,7 +1091,7 @@ AA::ValueScope::Interprocedural, UsedAssumedInformation)) { if (Values.empty()) - return llvm::None; + return std::nullopt; if (auto *C = dyn_cast_or_null( AAPotentialValues::getSingleValue(*this, AA, IRP, Values))) return C; @@ -1113,7 +1113,7 @@ if (!getAssumedSimplifiedValues(IRP, AA, Values, S, UsedAssumedInformation)) return &IRP.getAssociatedValue(); if (Values.empty()) - return llvm::None; + return std::nullopt; if (AA) if (Value *V = AAPotentialValues::getSingleValue(*this, *AA, IRP, Values)) return V; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -2292,7 +2292,7 @@ return DerefAA.getKnownDereferenceableBytes(); } - Optional Loc = MemoryLocation::getOrNone(I); + std::optional Loc = MemoryLocation::getOrNone(I); if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() || I->isVolatile()) return 0; @@ -2861,7 +2861,7 @@ // If it is known (which we tested above) but it doesn't have a value, // then we can assume `undef` and hence the instruction is UB. KnownUBInsts.insert(I); - return llvm::None; + return std::nullopt; } if (!*SimplifiedV) return nullptr; @@ -2869,7 +2869,7 @@ } if (isa(V)) { KnownUBInsts.insert(I); - return llvm::None; + return std::nullopt; } return V; } @@ -4238,7 +4238,7 @@ if (!UseV->getType()->isPointerTy()) return; - Optional Loc = MemoryLocation::getOrNone(I); + std::optional Loc = MemoryLocation::getOrNone(I); if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() || I->isVolatile()) return; @@ -5534,7 +5534,7 @@ Optional COpt = AA.getAssumedConstant(A); if (!COpt) { - SimplifiedAssociatedValue = llvm::None; + SimplifiedAssociatedValue = std::nullopt; A.recordDependence(AA, *this, DepClassTy::OPTIONAL); return true; } @@ -6091,7 +6091,7 @@ if (!isa(InitVal)) { IRBuilder<> Builder(Alloca->getNextNode()); // TODO: Use alignment above if align!=1 - Builder.CreateMemSet(Alloca, InitVal, Size, None); + Builder.CreateMemSet(Alloca, InitVal, Size, std::nullopt); } HasChanged = ChangeStatus::CHANGED; } @@ -6108,7 +6108,7 @@ return APInt(64, 0); if (auto *CI = dyn_cast_or_null(SimpleV.value())) return CI->getValue(); - return llvm::None; + return std::nullopt; } Optional getSize(Attributor &A, const AbstractAttribute &AA, @@ -6439,7 +6439,7 @@ namespace { struct AAPrivatizablePtrImpl : public AAPrivatizablePtr { AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A) - : AAPrivatizablePtr(IRP, A), PrivatizableType(llvm::None) {} + : AAPrivatizablePtr(IRP, A), PrivatizableType(std::nullopt) {} ChangeStatus indicatePessimisticFixpoint() override { AAPrivatizablePtr::indicatePessimisticFixpoint(); @@ -9630,7 +9630,7 @@ if (Unreachable.count(&Fn)) return false; - return llvm::None; + return std::nullopt; } /// Set of functions that we know for sure is reachable. @@ -9909,7 +9909,7 @@ if (!COpt.has_value()) { A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL); - return llvm::None; + return std::nullopt; } if (auto *C = COpt.value()) { A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL); @@ -9972,7 +9972,7 @@ return &IRP.getAssociatedValue(); Optional C = askForAssumedConstant(A, AA, IRP, Ty); if (!C) - return llvm::None; + return std::nullopt; if (C.value()) if (auto *CC = AA::getWithType(**C, Ty)) return CC; diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -63,6 +63,7 @@ #include #include #include +#include #include using namespace llvm; @@ -211,7 +212,7 @@ if (MR == ModRefInfo::NoModRef) continue; - Optional Loc = MemoryLocation::getOrNone(&I); + std::optional Loc = MemoryLocation::getOrNone(&I); if (!Loc) { // If no location is known, conservatively assume anything can be // accessed. diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -883,7 +883,7 @@ if (!isa(InitVal)) { IRBuilder<> Builder(CI->getNextNode()); // TODO: Use alignment above if align!=1 - Builder.CreateMemSet(NewGV, InitVal, AllocSize, None); + Builder.CreateMemSet(NewGV, InitVal, AllocSize, std::nullopt); } // Update users of the allocation to use the new global instead. diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -469,7 +469,7 @@ // See if we have a constants Constant *CST = dyn_cast(V); if (!CST) - return None; + return std::nullopt; // Holds a mapping from a global value number to a Constant. DenseMap::iterator GVNToConstantIt; @@ -684,7 +684,8 @@ Unit /* Context */, F->getName(), MangledNameStream.str(), Unit /* File */, 0 /* Line 0 is reserved for compiler-generated code. */, - DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */ + DB.createSubroutineType( + DB.getOrCreateTypeArray(std::nullopt)), /* void type */ 0, /* Line 0 is reserved for compiler-generated code. */ DINode::DIFlags::FlagArtificial /* Compiler-generated code. */, /* Outlined code is optimized code by definition. */ @@ -1193,7 +1194,7 @@ Optional OGVN = Cand.getGVN(Incoming); if (!OGVN && Blocks.contains(IncomingBlock)) { Region.IgnoreRegion = true; - return None; + return std::nullopt; } // If the incoming block isn't in the region, we don't have to worry about @@ -2013,7 +2014,7 @@ MatchingNum++; } - return None; + return std::nullopt; } /// Remove empty output blocks from the outlined region. diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -50,6 +50,7 @@ #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include +#include using namespace llvm; using namespace omp; @@ -1508,7 +1509,7 @@ continue; auto IsPotentiallyAffectedByBarrier = - [](Optional Loc) { + [](std::optional Loc) { const Value *Obj = (Loc && Loc->Ptr) ? getUnderlyingObject(Loc->Ptr) : nullptr; @@ -1538,11 +1539,12 @@ }; if (MemIntrinsic *MI = dyn_cast(I)) { - Optional Loc = MemoryLocation::getForDest(MI); + std::optional Loc = + MemoryLocation::getForDest(MI); if (IsPotentiallyAffectedByBarrier(Loc)) return false; if (MemTransferInst *MTI = dyn_cast(I)) { - Optional Loc = + std::optional Loc = MemoryLocation::getForSource(MTI); if (IsPotentiallyAffectedByBarrier(Loc)) return false; @@ -1554,7 +1556,7 @@ if (LI->hasMetadata(LLVMContext::MD_invariant_load)) continue; - Optional Loc = MemoryLocation::getOrNone(I); + std::optional Loc = MemoryLocation::getOrNone(I); if (IsPotentiallyAffectedByBarrier(Loc)) return false; } @@ -2340,7 +2342,7 @@ virtual Optional getReplacementValue(InternalControlVar ICV, const Instruction *I, Attributor &A) const { - return None; + return std::nullopt; } /// Return an assumed unique ICV value if a single candidate is found. If @@ -2445,7 +2447,7 @@ const auto *CB = dyn_cast(&I); if (!CB || CB->hasFnAttr("no_openmp") || CB->hasFnAttr("no_openmp_routines")) - return None; + return std::nullopt; auto &OMPInfoCache = static_cast(A.getInfoCache()); auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; @@ -2456,7 +2458,7 @@ if (CalledFunction == nullptr) return nullptr; if (CalledFunction == GetterRFI.Declaration) - return None; + return std::nullopt; if (CalledFunction == SetterRFI.Declaration) { if (ICVReplacementValuesMap[ICV].count(&I)) return ICVReplacementValuesMap[ICV].lookup(&I); @@ -2485,7 +2487,7 @@ // We don't check unique value for a function, so return None. Optional getUniqueReplacementValue(InternalControlVar ICV) const override { - return None; + return std::nullopt; } /// Return the value with which \p I can be replaced for specific \p ICV. diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -814,8 +814,8 @@ for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) { auto *CI = cast(U.getUser()); auto *NewCI = CallInst::Create( - TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)}, None, "", - CI); + TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)}, + std::nullopt, "", CI); CI->replaceAllUsesWith(NewCI); CI->eraseFromParent(); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -243,7 +243,7 @@ // Don't allow pointers. Splat vectors are fine. if (!LHS->getOperand(0)->getType()->isIntOrIntVectorTy() || !RHS->getOperand(0)->getType()->isIntOrIntVectorTy()) - return None; + return std::nullopt; // Here comes the tricky part: // LHS might be of the form L11 & L12 == X, X == L21 & L22, @@ -274,7 +274,7 @@ // Bail if LHS was a icmp that can't be decomposed into an equality. if (!ICmpInst::isEquality(PredL)) - return None; + return std::nullopt; Value *R1 = RHS->getOperand(0); Value *R2 = RHS->getOperand(1); @@ -288,7 +288,7 @@ A = R12; D = R11; } else { - return None; + return std::nullopt; } E = R2; R1 = nullptr; @@ -316,7 +316,7 @@ // Bail if RHS was a icmp that can't be decomposed into an equality. if (!ICmpInst::isEquality(PredR)) - return None; + return std::nullopt; // Look for ANDs on the right side of the RHS icmp. if (!Ok) { @@ -336,7 +336,7 @@ E = R1; Ok = true; } else { - return None; + return std::nullopt; } assert(Ok && "Failed to find AND on the right side of the RHS icmp."); @@ -1019,7 +1019,7 @@ static Optional matchIntPart(Value *V) { Value *X; if (!match(V, m_OneUse(m_Trunc(m_Value(X))))) - return None; + return std::nullopt; unsigned NumOriginalBits = X->getType()->getScalarSizeInBits(); unsigned NumExtractedBits = V->getType()->getScalarSizeInBits(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -72,6 +72,7 @@ #include #include #include +#include #include #include @@ -2345,7 +2346,7 @@ Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load && LHS->getType()->isPointerTy() && isValidAssumeForContext(II, LHS, &DT)) { - MDNode *MD = MDNode::get(II->getContext(), None); + MDNode *MD = MDNode::get(II->getContext(), std::nullopt); LHS->setMetadata(LLVMContext::MD_nonnull, MD); return RemoveConditionFromAssume(II); @@ -2827,7 +2828,7 @@ } default: { // Handle target specific intrinsics - Optional V = targetInstCombineIntrinsic(*II); + std::optional V = targetInstCombineIntrinsic(*II); if (V) return V.value(); break; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -18,6 +18,8 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" +#include + using namespace llvm; using namespace PatternMatch; @@ -979,7 +981,7 @@ Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) { Attribute Attr = Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange); - if (Optional MaxVScale = Attr.getVScaleRangeMax()) { + if (std::optional MaxVScale = Attr.getVScaleRangeMax()) { if (Log2_32(*MaxVScale) < DestWidth) { Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1)); return replaceInstUsesWith(Trunc, VScale); @@ -1348,7 +1350,7 @@ if (CI.getFunction() && CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) { Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange); - if (Optional MaxVScale = Attr.getVScaleRangeMax()) { + if (std::optional MaxVScale = Attr.getVScaleRangeMax()) { unsigned TypeWidth = Src->getType()->getScalarSizeInBits(); if (Log2_32(*MaxVScale) < TypeWidth) { Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1)); @@ -1622,7 +1624,7 @@ if (CI.getFunction() && CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) { Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange); - if (Optional MaxVScale = Attr.getVScaleRangeMax()) { + if (std::optional MaxVScale = Attr.getVScaleRangeMax()) { if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) { Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1)); return replaceInstUsesWith(CI, VScale); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5873,13 +5873,13 @@ if (auto *CI = dyn_cast(C)) { // Bail out if the constant can't be safely incremented/decremented. if (!ConstantIsOk(CI)) - return llvm::None; + return std::nullopt; } else if (auto *FVTy = dyn_cast(Type)) { unsigned NumElts = FVTy->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { Constant *Elt = C->getAggregateElement(i); if (!Elt) - return llvm::None; + return std::nullopt; if (isa(Elt)) continue; @@ -5888,14 +5888,14 @@ // know that this constant is min/max. auto *CI = dyn_cast(Elt); if (!CI || !ConstantIsOk(CI)) - return llvm::None; + return std::nullopt; if (!SafeReplacementConstant) SafeReplacementConstant = CI; } } else { // ConstantExpr? - return llvm::None; + return std::nullopt; } // It may not be safe to change a compare predicate in the presence of diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp @@ -522,7 +522,7 @@ // endless combine looping. for (Instruction *I : llvm::reverse(NewInstructions)) I->eraseFromParent(); - return llvm::None; + return std::nullopt; } return std::make_pair(ArrayRef(NewInstructions), Negated); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -314,47 +314,82 @@ TI->getType()); } - // Cond ? -X : -Y --> -(Cond ? X : Y) - Value *X, *Y; - if (match(TI, m_FNeg(m_Value(X))) && match(FI, m_FNeg(m_Value(Y))) && - (TI->hasOneUse() || FI->hasOneUse())) { - // Intersect FMF from the fneg instructions and union those with the select. - FastMathFlags FMF = TI->getFastMathFlags(); - FMF &= FI->getFastMathFlags(); - FMF |= SI.getFastMathFlags(); - Value *NewSel = Builder.CreateSelect(Cond, X, Y, SI.getName() + ".v", &SI); - if (auto *NewSelI = dyn_cast(NewSel)) - NewSelI->setFastMathFlags(FMF); - Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewSel); - NewFNeg->setFastMathFlags(FMF); - return NewFNeg; - } - - // Min/max intrinsic with a common operand can have the common operand pulled - // after the select. This is the same transform as below for binops, but - // specialized for intrinsic matching and without the restrictive uses clause. - auto *TII = dyn_cast(TI); - auto *FII = dyn_cast(FI); - if (TII && FII && TII->getIntrinsicID() == FII->getIntrinsicID() && - (TII->hasOneUse() || FII->hasOneUse())) { - Value *T0, *T1, *F0, *F1; - if (match(TII, m_MaxOrMin(m_Value(T0), m_Value(T1))) && - match(FII, m_MaxOrMin(m_Value(F0), m_Value(F1)))) { - if (T0 == F0) { - Value *NewSel = Builder.CreateSelect(Cond, T1, F1, "minmaxop", &SI); - return CallInst::Create(TII->getCalledFunction(), {NewSel, T0}); - } - if (T0 == F1) { - Value *NewSel = Builder.CreateSelect(Cond, T1, F0, "minmaxop", &SI); - return CallInst::Create(TII->getCalledFunction(), {NewSel, T0}); - } - if (T1 == F0) { - Value *NewSel = Builder.CreateSelect(Cond, T0, F1, "minmaxop", &SI); - return CallInst::Create(TII->getCalledFunction(), {NewSel, T1}); + Value *OtherOpT, *OtherOpF; + bool MatchIsOpZero; + auto getCommonOp = [&](Instruction *TI, Instruction *FI, + bool Commute) -> Value * { + Value *CommonOp = nullptr; + if (TI->getOperand(0) == FI->getOperand(0)) { + CommonOp = TI->getOperand(0); + OtherOpT = TI->getOperand(1); + OtherOpF = FI->getOperand(1); + MatchIsOpZero = true; + } else if (TI->getOperand(1) == FI->getOperand(1)) { + CommonOp = TI->getOperand(1); + OtherOpT = TI->getOperand(0); + OtherOpF = FI->getOperand(0); + MatchIsOpZero = false; + } else if (!Commute) { + return nullptr; + } else if (TI->getOperand(0) == FI->getOperand(1)) { + CommonOp = TI->getOperand(0); + OtherOpT = TI->getOperand(1); + OtherOpF = FI->getOperand(0); + MatchIsOpZero = true; + } else if (TI->getOperand(1) == FI->getOperand(0)) { + CommonOp = TI->getOperand(1); + OtherOpT = TI->getOperand(0); + OtherOpF = FI->getOperand(1); + MatchIsOpZero = true; + } + return CommonOp; + }; + + if (TI->hasOneUse() || FI->hasOneUse()) { + // Cond ? -X : -Y --> -(Cond ? X : Y) + Value *X, *Y; + if (match(TI, m_FNeg(m_Value(X))) && match(FI, m_FNeg(m_Value(Y)))) { + // Intersect FMF from the fneg instructions and union those with the + // select. + FastMathFlags FMF = TI->getFastMathFlags(); + FMF &= FI->getFastMathFlags(); + FMF |= SI.getFastMathFlags(); + Value *NewSel = + Builder.CreateSelect(Cond, X, Y, SI.getName() + ".v", &SI); + if (auto *NewSelI = dyn_cast(NewSel)) + NewSelI->setFastMathFlags(FMF); + Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewSel); + NewFNeg->setFastMathFlags(FMF); + return NewFNeg; + } + + // Min/max intrinsic with a common operand can have the common operand + // pulled after the select. This is the same transform as below for binops, + // but specialized for intrinsic matching and without the restrictive uses + // clause. + auto *TII = dyn_cast(TI); + auto *FII = dyn_cast(FI); + if (TII && FII && TII->getIntrinsicID() == FII->getIntrinsicID()) { + if (match(TII, m_MaxOrMin(m_Value(), m_Value()))) { + if (Value *MatchOp = getCommonOp(TI, FI, true)) { + Value *NewSel = + Builder.CreateSelect(Cond, OtherOpT, OtherOpF, "minmaxop", &SI); + return CallInst::Create(TII->getCalledFunction(), {NewSel, MatchOp}); + } } - if (T1 == F1) { - Value *NewSel = Builder.CreateSelect(Cond, T0, F0, "minmaxop", &SI); - return CallInst::Create(TII->getCalledFunction(), {NewSel, T1}); + } + + // icmp eq/ne with a common operand also can have the common operand + // pulled after the select. + ICmpInst::Predicate TPred, FPred; + if (match(TI, m_ICmp(TPred, m_Value(), m_Value())) && + match(FI, m_ICmp(FPred, m_Value(), m_Value()))) { + if (TPred == FPred && ICmpInst::isEquality(TPred)) { + if (Value *MatchOp = getCommonOp(TI, FI, true)) { + Value *NewSel = Builder.CreateSelect(Cond, OtherOpT, OtherOpF, + SI.getName() + ".v", &SI); + return new ICmpInst(TPred, NewSel, MatchOp); + } } } } @@ -370,33 +405,9 @@ return nullptr; // Figure out if the operations have any operands in common. - Value *MatchOp, *OtherOpT, *OtherOpF; - bool MatchIsOpZero; - if (TI->getOperand(0) == FI->getOperand(0)) { - MatchOp = TI->getOperand(0); - OtherOpT = TI->getOperand(1); - OtherOpF = FI->getOperand(1); - MatchIsOpZero = true; - } else if (TI->getOperand(1) == FI->getOperand(1)) { - MatchOp = TI->getOperand(1); - OtherOpT = TI->getOperand(0); - OtherOpF = FI->getOperand(0); - MatchIsOpZero = false; - } else if (!TI->isCommutative()) { - return nullptr; - } else if (TI->getOperand(0) == FI->getOperand(1)) { - MatchOp = TI->getOperand(0); - OtherOpT = TI->getOperand(1); - OtherOpF = FI->getOperand(0); - MatchIsOpZero = true; - } else if (TI->getOperand(1) == FI->getOperand(0)) { - MatchOp = TI->getOperand(1); - OtherOpT = TI->getOperand(0); - OtherOpF = FI->getOperand(1); - MatchIsOpZero = true; - } else { + Value *MatchOp = getCommonOp(TI, FI, TI->isCommutative()); + if (!MatchOp) return nullptr; - } // If the select condition is a vector, the operands of the original select's // operands also must be vectors. This may not be the case for getelementptr diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -971,7 +971,7 @@ } default: { // Handle target specific intrinsics - Optional V = targetSimplifyDemandedUseBitsIntrinsic( + std::optional V = targetSimplifyDemandedUseBitsIntrinsic( *II, DemandedMask, Known, KnownBitsComputed); if (V) return V.value(); @@ -1696,7 +1696,7 @@ } default: { // Handle target specific intrinsics - Optional V = targetSimplifyDemandedVectorEltsIntrinsic( + std::optional V = targetSimplifyDemandedVectorEltsIntrinsic( *II, DemandedElts, UndefElts, UndefElts2, UndefElts3, simplifyAndSetOp); if (V) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -871,7 +871,7 @@ if (NumAggElts > 2) return nullptr; - static constexpr auto NotFound = None; + static constexpr auto NotFound = std::nullopt; static constexpr auto FoundMismatch = nullptr; // Try to find a value of each element of an aggregate. @@ -1032,7 +1032,8 @@ Optional SourceAggregate; // Can we find the source aggregate without looking at predecessors? - SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None); + SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt, + /*PredBB=*/std::nullopt); if (Describe(SourceAggregate) != AggregateDescription::NotFound) { if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch) return nullptr; // Conflicting source aggregates! @@ -1540,13 +1541,21 @@ unsigned NumEltsInScalar = ScalarWidth / VecEltWidth; Value *X = T; - if ((IsBigEndian && IndexC == NumEltsInScalar - 1) || - (!IsBigEndian && IndexC == 0)) { + if (IndexC == (IsBigEndian ? NumEltsInScalar - 1 : 0)) { // The insert is to the LSB end of the vector (depends on endian). // That's all we need. } else { - // TODO: Look through a shift-right and translate the insert index. - return nullptr; + // If not, we must match a right-shift to translate the insert index. + uint64_t ShiftC; + if (!match(T, m_OneUse(m_LShr(m_Value(X), m_ConstantInt(ShiftC))))) + return nullptr; + + // Check the shift amount to see if this can be folded to an identity + // shuffle (assuming we are shuffling with an undef base vector). + // Big endian has MSB at vector index 0, so the insert index is flipped. + if (ShiftC != (IsBigEndian ? (NumEltsInScalar - 1 - IndexC) * VecEltWidth + : IndexC * VecEltWidth)) + return nullptr; } // Bitcast the scalar to a vector type with the destination element type. diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -105,6 +105,7 @@ #include #include #include +#include #include #include @@ -169,16 +170,16 @@ static cl::opt ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true)); -Optional +std::optional InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) { // Handle target specific intrinsics if (II.getCalledFunction()->isTargetIntrinsic()) { return TTI.instCombineIntrinsic(*this, II); } - return None; + return std::nullopt; } -Optional InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( +std::optional InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) { // Handle target specific intrinsics @@ -186,10 +187,10 @@ return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known, KnownBitsComputed); } - return None; + return std::nullopt; } -Optional InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( +std::optional InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function @@ -200,7 +201,7 @@ *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, SimplifyAndSetOp); } - return None; + return std::nullopt; } Value *InstCombinerImpl::EmitGEPOffset(User *GEP) { @@ -2709,7 +2710,7 @@ // If the only possible side effect of the call is writing to the alloca, // and the result isn't used, we can safely remove any reads implied by the // call including those which might read the alloca itself. - Optional Dest = MemoryLocation::getForDest(&CB, TLI); + std::optional Dest = MemoryLocation::getForDest(&CB, TLI); return Dest && Dest->Ptr == UsedV; } @@ -2887,7 +2888,7 @@ Module *M = II->getModule(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), - None, "", II->getParent()); + std::nullopt, "", II->getParent()); } // Remove debug intrinsics which describe the value contained within the @@ -4003,7 +4004,7 @@ // to allow reload along used path as described below. Otherwise, this // is simply a store to a dead allocation which will be removed. return false; - Optional Dest = MemoryLocation::getForDest(CB, TLI); + std::optional Dest = MemoryLocation::getForDest(CB, TLI); if (!Dest) return false; auto *AI = dyn_cast(getUnderlyingObject(Dest->Ptr)); @@ -4213,7 +4214,7 @@ auto getOptionalSinkBlockForInst = [this](Instruction *I) -> std::optional { if (!EnableCodeSinking) - return None; + return std::nullopt; BasicBlock *BB = I->getParent(); BasicBlock *UserParent = nullptr; @@ -4223,7 +4224,7 @@ if (U->isDroppable()) continue; if (NumUsers > MaxSinkNumUsers) - return None; + return std::nullopt; Instruction *UserInst = cast(U); // Special handling for Phi nodes - get the block the use occurs in. @@ -4234,14 +4235,14 @@ // sophisticated analysis (i.e finding NearestCommonDominator of // these use blocks). if (UserParent && UserParent != PN->getIncomingBlock(i)) - return None; + return std::nullopt; UserParent = PN->getIncomingBlock(i); } } assert(UserParent && "expected to find user block!"); } else { if (UserParent && UserParent != UserInst->getParent()) - return None; + return std::nullopt; UserParent = UserInst->getParent(); } @@ -4251,7 +4252,7 @@ // Try sinking to another block. If that block is unreachable, then do // not bother. SimplifyCFG should handle it. if (UserParent == BB || !DT.isReachableFromEntry(UserParent)) - return None; + return std::nullopt; auto *Term = UserParent->getTerminator(); // See if the user is one of our successors that has only one @@ -4263,7 +4264,7 @@ // - the User will be executed at most once. // So sinking I down to User is always profitable or neutral. if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term)) - return None; + return std::nullopt; assert(DT.dominates(BB, UserParent) && "Dominance relation broken?"); } @@ -4273,7 +4274,7 @@ // No user or only has droppable users. if (!UserParent) - return None; + return std::nullopt; return UserParent; }; diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1305,12 +1305,13 @@ if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand())) return; Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, - RMW->getValOperand()->getType(), None); + RMW->getValOperand()->getType(), std::nullopt); } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand())) return; Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, - XCHG->getCompareOperand()->getType(), None); + XCHG->getCompareOperand()->getType(), + std::nullopt); } else if (auto CI = dyn_cast(I)) { if (CI->getIntrinsicID() == Intrinsic::masked_load || CI->getIntrinsicID() == Intrinsic::masked_store) { diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -1686,7 +1686,8 @@ for (RegInfo &RI : Scope->RegInfos) { const Region *R = RI.R; unsigned Duplication = getRegionDuplicationCount(R); - dbgs() << "Dup count for R=" << R << " is " << Duplication << "\n"; + CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication + << "\n"); if (Duplication >= CHRDupThreshsold) { CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication << " for this region"); diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1087,8 +1087,8 @@ Type::getInt8PtrTy(*Ctx), IntptrTy}; DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), DFSanSetLabelArgs, /*isVarArg=*/false); - DFSanNonzeroLabelFnTy = - FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false); + DFSanNonzeroLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), std::nullopt, + /*isVarArg=*/false); DFSanVarargWrapperFnTy = FunctionType::get( Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); DFSanConditionalCallbackFnTy = diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -757,12 +757,13 @@ if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand())) return; Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, - RMW->getValOperand()->getType(), None); + RMW->getValOperand()->getType(), std::nullopt); } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand())) return; Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, - XCHG->getCompareOperand()->getType(), None); + XCHG->getCompareOperand()->getType(), + std::nullopt); } else if (auto CI = dyn_cast(I)) { for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) { if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -271,31 +271,31 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const { // Do not instrument the load fetching the dynamic shadow address. if (DynamicShadowOffset == I) - return None; + return std::nullopt; InterestingMemoryAccess Access; if (LoadInst *LI = dyn_cast(I)) { if (!ClInstrumentReads) - return None; + return std::nullopt; Access.IsWrite = false; Access.AccessTy = LI->getType(); Access.Addr = LI->getPointerOperand(); } else if (StoreInst *SI = dyn_cast(I)) { if (!ClInstrumentWrites) - return None; + return std::nullopt; Access.IsWrite = true; Access.AccessTy = SI->getValueOperand()->getType(); Access.Addr = SI->getPointerOperand(); } else if (AtomicRMWInst *RMW = dyn_cast(I)) { if (!ClInstrumentAtomics) - return None; + return std::nullopt; Access.IsWrite = true; Access.AccessTy = RMW->getValOperand()->getType(); Access.Addr = RMW->getPointerOperand(); } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { if (!ClInstrumentAtomics) - return None; + return std::nullopt; Access.IsWrite = true; Access.AccessTy = XCHG->getCompareOperand()->getType(); Access.Addr = XCHG->getPointerOperand(); @@ -306,14 +306,14 @@ unsigned OpOffset = 0; if (F->getIntrinsicID() == Intrinsic::masked_store) { if (!ClInstrumentWrites) - return None; + return std::nullopt; // Masked store has an initial operand for the value. OpOffset = 1; Access.AccessTy = CI->getArgOperand(0)->getType(); Access.IsWrite = true; } else { if (!ClInstrumentReads) - return None; + return std::nullopt; Access.AccessTy = CI->getType(); Access.IsWrite = false; } @@ -325,20 +325,20 @@ } if (!Access.Addr) - return None; + return std::nullopt; // Do not instrument accesses from different address spaces; we cannot deal // with them. Type *PtrTy = cast(Access.Addr->getType()->getScalarType()); if (PtrTy->getPointerAddressSpace() != 0) - return None; + return std::nullopt; // Ignore swifterror addresses. // swifterror memory addresses are mem2reg promoted by instruction // selection. As such they cannot have regular uses like an instrumentation // function and it makes no sense to track them as memory. if (Access.Addr->isSwiftError()) - return None; + return std::nullopt; // Peel off GEPs and BitCasts. auto *Addr = Access.Addr->stripInBoundsOffsets(); @@ -351,12 +351,12 @@ auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat(); if (SectionName.endswith( getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) - return None; + return std::nullopt; } // Do not instrument accesses to LLVM internal variables. if (GV->getName().startswith("__llvm")) - return None; + return std::nullopt; } const DataLayout &DL = I->getModule()->getDataLayout(); diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4128,7 +4128,7 @@ if (ArgOffset + Size > kParamTLSSize) break; const MaybeAlign ParamAlignment(CB.getParamAlign(i)); - MaybeAlign Alignment = llvm::None; + MaybeAlign Alignment = std::nullopt; if (ParamAlignment) Alignment = std::min(*ParamAlignment, kShadowTLSAlignment); Value *AShadowPtr, *AOriginPtr; diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -251,7 +251,7 @@ Type *Ty); void SetNoSanitizeMetadata(Instruction *I) { - I->setMetadata(LLVMContext::MD_nosanitize, MDNode::get(*C, None)); + I->setMetadata(LLVMContext::MD_nosanitize, MDNode::get(*C, std::nullopt)); } std::string getSectionName(const std::string &Section) const; diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp --- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -472,7 +472,8 @@ RVInstMarker->getString(), /*Constraints=*/"", /*hasSideEffects=*/true); - objcarc::createCallInstWithColors(IA, None, "", Inst, BlockColors); + objcarc::createCallInstWithColors(IA, std::nullopt, "", Inst, + BlockColors); } decline_rv_optimization: return false; diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp --- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -999,7 +999,7 @@ CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "", Call); NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), - MDNode::get(C, None)); + MDNode::get(C, std::nullopt)); LLVM_DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) " "since x is otherwise unused.\nOld: " diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp --- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -116,7 +116,7 @@ return Align(DiffUnitsAbs); } - return None; + return std::nullopt; } // There is an address given by an offset OffSCEV from AASCEV which has an diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -86,6 +86,7 @@ #include #include #include +#include #include using namespace llvm; @@ -496,8 +497,8 @@ // FIXME: This should be using the DIExpression in the Alloca's dbg.assign // for the variable, since that could also contain a fragment? return *DIExpression::createFragmentExpression( - DIExpression::get(Inst->getContext(), None), DeadFragment.OffsetInBits, - DeadFragment.SizeInBits); + DIExpression::get(Inst->getContext(), std::nullopt), + DeadFragment.OffsetInBits, DeadFragment.SizeInBits); }; // A DIAssignID to use so that the inserted dbg.assign intrinsics do not @@ -878,6 +879,27 @@ CodeMetrics::collectEphemeralValues(&F, &AC, EphValues); } + LocationSize strengthenLocationSize(const Instruction *I, + LocationSize Size) const { + if (auto *CB = dyn_cast(I)) { + LibFunc F; + if (TLI.getLibFunc(*CB, F) && TLI.has(F) && + (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) { + // Use the precise location size specified by the 3rd argument + // for determining KillingI overwrites DeadLoc if it is a memset_chk + // instruction. memset_chk will write either the amount specified as 3rd + // argument or the function will immediately abort and exit the program. + // NOTE: AA may determine NoAlias if it can prove that the access size + // is larger than the allocation size due to that being UB. To avoid + // returning potentially invalid NoAlias results by AA, limit the use of + // the precise location size to isOverwrite. + if (const auto *Len = dyn_cast(CB->getArgOperand(2))) + return LocationSize::precise(Len->getZExtValue()); + } + } + return Size; + } + /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p /// KillingI instruction) completely overwrites a store to the 'DeadLoc' /// location (by \p DeadI instruction). @@ -897,6 +919,8 @@ if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc)) return OW_Unknown; + LocationSize KillingLocSize = + strengthenLocationSize(KillingI, KillingLoc.Size); const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts(); const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts(); const Value *DeadUndObj = getUnderlyingObject(DeadPtr); @@ -904,16 +928,16 @@ // Check whether the killing store overwrites the whole object, in which // case the size/offset of the dead store does not matter. - if (DeadUndObj == KillingUndObj && KillingLoc.Size.isPrecise()) { + if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise()) { uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F); if (KillingUndObjSize != MemoryLocation::UnknownSize && - KillingUndObjSize == KillingLoc.Size.getValue()) + KillingUndObjSize == KillingLocSize.getValue()) return OW_Complete; } // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll // get imprecise values here, though (except for unknown sizes). - if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) { + if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) { // In case no constant size is known, try to an IR values for the number // of bytes written and check if they match. const auto *KillingMemI = dyn_cast(KillingI); @@ -930,7 +954,7 @@ return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA); } - const uint64_t KillingSize = KillingLoc.Size.getValue(); + const uint64_t KillingSize = KillingLocSize.getValue(); const uint64_t DeadSize = DeadLoc.Size.getValue(); // Query the alias information @@ -1045,9 +1069,9 @@ return !I.first->second; } - Optional getLocForWrite(Instruction *I) const { + std::optional getLocForWrite(Instruction *I) const { if (!I->mayWriteToMemory()) - return None; + return std::nullopt; if (auto *CB = dyn_cast(I)) return MemoryLocation::getForDest(CB, TLI); @@ -1157,7 +1181,7 @@ /// If \p I is a memory terminator like llvm.lifetime.end or free, return a /// pair with the MemoryLocation terminated by \p I and a boolean flag /// indicating whether \p I is a free-like call. - Optional> + std::optional> getLocForTerminator(Instruction *I) const { uint64_t Len; Value *Ptr; @@ -1170,7 +1194,7 @@ return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)}; } - return None; + return std::nullopt; } /// Returns true if \p I is a memory terminator instruction like @@ -1185,7 +1209,7 @@ /// instruction \p AccessI. bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI, Instruction *MaybeTerm) { - Optional> MaybeTermLoc = + std::optional> MaybeTermLoc = getLocForTerminator(MaybeTerm); if (!MaybeTermLoc) @@ -1279,7 +1303,7 @@ bool IsMemTerm, unsigned &PartialLimit) { if (ScanLimit == 0 || WalkerStepLimit == 0) { LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); - return None; + return std::nullopt; } MemoryAccess *Current = StartAccess; @@ -1296,7 +1320,7 @@ !KillingI->mayReadFromMemory(); // Find the next clobbering Mod access for DefLoc, starting at StartAccess. - Optional CurrentLoc; + std::optional CurrentLoc; for (;; Current = cast(Current)->getDefiningAccess()) { LLVM_DEBUG({ dbgs() << " visiting " << *Current; @@ -1312,7 +1336,7 @@ if (CanOptimize && Current != KillingDef->getDefiningAccess()) // The first clobbering def is... none. KillingDef->setOptimized(Current); - return None; + return std::nullopt; } // Cost of a step. Accesses in the same block are more likely to be valid @@ -1322,7 +1346,7 @@ : MemorySSAOtherBBStepCost; if (WalkerStepLimit <= StepCost) { LLVM_DEBUG(dbgs() << " ... hit walker step limit\n"); - return None; + return std::nullopt; } WalkerStepLimit -= StepCost; @@ -1347,14 +1371,14 @@ // instructions that block us from DSEing if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) { LLVM_DEBUG(dbgs() << " ... skip, may throw!\n"); - return None; + return std::nullopt; } // Check for anything that looks like it will be a barrier to further // removal if (isDSEBarrier(KillingUndObj, CurrentI)) { LLVM_DEBUG(dbgs() << " ... skip, barrier\n"); - return None; + return std::nullopt; } // If Current is known to be on path that reads DefLoc or is a read @@ -1362,7 +1386,7 @@ // for intrinsic calls, because the code knows how to handle memcpy // intrinsics. if (!isa(CurrentI) && isReadClobber(KillingLoc, CurrentI)) - return None; + return std::nullopt; // Quick check if there are direct uses that are read-clobbers. if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) { @@ -1372,7 +1396,7 @@ return false; })) { LLVM_DEBUG(dbgs() << " ... found a read clobber\n"); - return None; + return std::nullopt; } // If Current does not have an analyzable write location or is not @@ -1466,7 +1490,7 @@ // Bail out if the number of accesses to check exceeds the scan limit. if (ScanLimit < (WorkList.size() - I)) { LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); - return None; + return std::nullopt; } --ScanLimit; NumDomMemDefChecks++; @@ -1511,14 +1535,14 @@ if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) { LLVM_DEBUG(dbgs() << " ... found throwing instruction\n"); - return None; + return std::nullopt; } // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. if (isReadClobber(MaybeDeadLoc, UseInst)) { LLVM_DEBUG(dbgs() << " ... found read clobber\n"); - return None; + return std::nullopt; } // If this worklist walks back to the original memory access (and the @@ -1527,7 +1551,7 @@ if (MaybeDeadAccess == UseAccess && !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) { LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n"); - return None; + return std::nullopt; } // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check // if it reads the memory location. @@ -1561,7 +1585,7 @@ } else { LLVM_DEBUG(dbgs() << " ... found preceeding def " << *UseInst << "\n"); - return None; + return std::nullopt; } } else PushMemUses(UseDef); @@ -1591,7 +1615,7 @@ // killing block. if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) { if (!AnyUnreachableExit) - return None; + return std::nullopt; // Fall back to CFG scan starting at all non-unreachable roots if not // all paths to the exit go through CommonPred. @@ -1622,7 +1646,7 @@ if (KillingBlocks.count(Current)) continue; if (Current == MaybeDeadAccess->getBlock()) - return None; + return std::nullopt; // MaybeDeadAccess is reachable from the entry, so we don't have to // explore unreachable blocks further. @@ -1633,7 +1657,7 @@ WorkList.insert(Pred); if (WorkList.size() >= MemorySSAPathCheckLimit) - return None; + return std::nullopt; } NumCFGSuccess++; } @@ -2024,12 +2048,13 @@ continue; Instruction *KillingI = KillingDef->getMemoryInst(); - Optional MaybeKillingLoc; - if (State.isMemTerminatorInst(KillingI)) - MaybeKillingLoc = State.getLocForTerminator(KillingI).transform( - [](const std::pair &P) { return P.first; }); - else + std::optional MaybeKillingLoc; + if (State.isMemTerminatorInst(KillingI)) { + if (auto KillingLoc = State.getLocForTerminator(KillingI)) + MaybeKillingLoc = KillingLoc->first; + } else { MaybeKillingLoc = State.getLocForWrite(KillingI); + } if (!MaybeKillingLoc) { LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp --- a/llvm/lib/Transforms/Scalar/Float2Int.cpp +++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp @@ -243,7 +243,7 @@ auto OpIt = SeenInsts.find(OI); assert(OpIt != SeenInsts.end() && "def not seen before use!"); if (OpIt->second == unknownRange()) - return None; // Wait until operand range has been calculated. + return std::nullopt; // Wait until operand range has been calculated. OpRanges.push_back(OpIt->second); } else if (ConstantFP *CF = dyn_cast(O)) { // Work out if the floating point number can be losslessly represented diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -763,14 +763,14 @@ OS, MapClassName2PassName); OS << "<"; - if (Options.AllowPRE != None) + if (Options.AllowPRE != std::nullopt) OS << (Options.AllowPRE.value() ? "" : "no-") << "pre;"; - if (Options.AllowLoadPRE != None) + if (Options.AllowLoadPRE != std::nullopt) OS << (Options.AllowLoadPRE.value() ? "" : "no-") << "load-pre;"; - if (Options.AllowLoadPRESplitBackedge != None) + if (Options.AllowLoadPRESplitBackedge != std::nullopt) OS << (Options.AllowLoadPRESplitBackedge.value() ? "" : "no-") << "split-backedge-load-pre;"; - if (Options.AllowMemDep != None) + if (Options.AllowMemDep != std::nullopt) OS << (Options.AllowMemDep.value() ? "" : "no-") << "memdep"; OS << ">"; } @@ -1129,12 +1129,12 @@ auto *Sel = dyn_cast_or_null(Address); if (!Sel || DepBB != Sel->getParent()) - return None; + return std::nullopt; LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), LoadTy, Sel, DT); LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), LoadTy, Sel, DT); if (!L1 || !L2) - return None; + return std::nullopt; // Ensure there are no accesses that may modify the locations referenced by // either L1 or L2 between L1, L2 and the specified End iterator. @@ -1145,7 +1145,7 @@ return isModSet(AA->getModRefInfo(&I, L1Loc)) || isModSet(AA->getModRefInfo(&I, L2Loc)); })) - return None; + return std::nullopt; return AvailableValue::getSelect(Sel); } @@ -1204,7 +1204,9 @@ canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) { const auto ClobberOff = MD->getClobberOffset(DepLoad); // GVN has no deal with a negative offset. - Offset = (ClobberOff == None || *ClobberOff < 0) ? -1 : *ClobberOff; + Offset = (ClobberOff == std::nullopt || *ClobberOff < 0) + ? -1 + : *ClobberOff; } if (Offset == -1) Offset = diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -654,7 +654,7 @@ uint32_t N = VN.lookupOrAdd(I); LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n"); if (N == ~0U) - return None; + return std::nullopt; VNums[N]++; } unsigned VNumToSink = @@ -662,7 +662,7 @@ if (VNums[VNumToSink] == 1) // Can't sink anything! - return None; + return std::nullopt; // Now restrict the number of incoming blocks down to only those with // VNumToSink. @@ -677,7 +677,7 @@ } for (auto *I : NewInsts) if (shouldAvoidSinkingInstruction(I)) - return None; + return std::nullopt; // If we've restricted the incoming blocks, restrict all needed PHIs also // to that set. @@ -715,7 +715,7 @@ // V exists in this PHI, but the whole PHI is different to NewPHI // (else it would have been removed earlier). We cannot continue // because this isn't representable. - return None; + return std::nullopt; // Which operands need PHIs? // FIXME: If any of these fail, we should partition up the candidates to @@ -728,7 +728,7 @@ return I->getNumOperands() != I0->getNumOperands(); }; if (any_of(NewInsts, hasDifferentNumOperands)) - return None; + return std::nullopt; for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) { ModelledPHI PHI(NewInsts, OpNum, ActivePreds); @@ -736,15 +736,15 @@ continue; if (!canReplaceOperandWithVariable(I0, OpNum)) // We can 't create a PHI from this instruction! - return None; + return std::nullopt; if (NeededPHIs.count(PHI)) continue; if (!PHI.areAllIncomingValuesSameType()) - return None; + return std::nullopt; // Don't create indirect calls! The called value is the final operand. if ((isa(I0) || isa(I0)) && OpNum == E - 1 && PHI.areAnyIncomingValuesConstant()) - return None; + return std::nullopt; NeededPHIs.reserve(NeededPHIs.size()); NeededPHIs.insert(PHI); diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp --- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -246,7 +246,7 @@ public: InductiveRangeCheckElimination(ScalarEvolution &SE, BranchProbabilityInfo *BPI, DominatorTree &DT, - LoopInfo &LI, GetBFIFunc GetBFI = None) + LoopInfo &LI, GetBFIFunc GetBFI = std::nullopt) : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {} bool run(Loop *L, function_ref LPMAddNewLoop); @@ -753,7 +753,7 @@ const char *&FailureReason) { if (!L.isLoopSimplifyForm()) { FailureReason = "loop not in LoopSimplify form"; - return None; + return std::nullopt; } BasicBlock *Latch = L.getLoopLatch(); @@ -761,25 +761,25 @@ if (Latch->getTerminator()->getMetadata(ClonedLoopTag)) { FailureReason = "loop has already been cloned"; - return None; + return std::nullopt; } if (!L.isLoopExiting(Latch)) { FailureReason = "no loop latch"; - return None; + return std::nullopt; } BasicBlock *Header = L.getHeader(); BasicBlock *Preheader = L.getLoopPreheader(); if (!Preheader) { FailureReason = "no preheader"; - return None; + return std::nullopt; } BranchInst *LatchBr = dyn_cast(Latch->getTerminator()); if (!LatchBr || LatchBr->isUnconditional()) { FailureReason = "latch terminator not conditional branch"; - return None; + return std::nullopt; } unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0; @@ -787,13 +787,13 @@ ICmpInst *ICI = dyn_cast(LatchBr->getCondition()); if (!ICI || !isa(ICI->getOperand(0)->getType())) { FailureReason = "latch terminator branch not conditional on integral icmp"; - return None; + return std::nullopt; } const SCEV *LatchCount = SE.getExitCount(&L, Latch); if (isa(LatchCount)) { FailureReason = "could not compute latch count"; - return None; + return std::nullopt; } ICmpInst::Predicate Pred = ICI->getPredicate(); @@ -812,7 +812,7 @@ Pred = ICmpInst::getSwappedPredicate(Pred); } else { FailureReason = "no add recurrences in the icmp"; - return None; + return std::nullopt; } } @@ -848,22 +848,22 @@ const SCEVAddRecExpr *IndVarBase = cast(LeftSCEV); if (IndVarBase->getLoop() != &L) { FailureReason = "LHS in cmp is not an AddRec for this loop"; - return None; + return std::nullopt; } if (!IndVarBase->isAffine()) { FailureReason = "LHS in icmp not induction variable"; - return None; + return std::nullopt; } const SCEV* StepRec = IndVarBase->getStepRecurrence(SE); if (!isa(StepRec)) { FailureReason = "LHS in icmp not induction variable"; - return None; + return std::nullopt; } ConstantInt *StepCI = cast(StepRec)->getValue(); if (ICI->isEquality() && !HasNoSignedWrap(IndVarBase)) { FailureReason = "LHS in icmp needs nsw for equality predicates"; - return None; + return std::nullopt; } assert(!StepCI->isZero() && "Zero step?"); @@ -926,19 +926,19 @@ if (!FoundExpectedPred) { FailureReason = "expected icmp slt semantically, found something else"; - return None; + return std::nullopt; } IsSignedPredicate = ICmpInst::isSigned(Pred); if (!IsSignedPredicate && !AllowUnsignedLatchCondition) { FailureReason = "unsigned latch conditions are explicitly prohibited"; - return None; + return std::nullopt; } if (!isSafeIncreasingBound(IndVarStart, RightSCEV, Step, Pred, LatchBrExitIdx, &L, SE)) { FailureReason = "Unsafe loop bounds"; - return None; + return std::nullopt; } if (LatchBrExitIdx == 0) { // We need to increase the right value unless we have already decreased @@ -989,7 +989,7 @@ if (!FoundExpectedPred) { FailureReason = "expected icmp sgt semantically, found something else"; - return None; + return std::nullopt; } IsSignedPredicate = @@ -997,13 +997,13 @@ if (!IsSignedPredicate && !AllowUnsignedLatchCondition) { FailureReason = "unsigned latch conditions are explicitly prohibited"; - return None; + return std::nullopt; } if (!isSafeDecreasingBound(IndVarStart, RightSCEV, Step, Pred, LatchBrExitIdx, &L, SE)) { FailureReason = "Unsafe bounds"; - return None; + return std::nullopt; } if (LatchBrExitIdx == 0) { @@ -1070,9 +1070,9 @@ // We only support wide range checks and narrow latches. if (!AllowNarrowLatchCondition && RTy != Ty) - return None; + return std::nullopt; if (RTy->getBitWidth() < Ty->getBitWidth()) - return None; + return std::nullopt; LoopConstrainer::SubRanges Result; @@ -1592,9 +1592,9 @@ auto *RCType = dyn_cast(getBegin()->getType()); // Do not work with pointer types. if (!IVType || !RCType) - return None; + return std::nullopt; if (IVType->getBitWidth() > RCType->getBitWidth()) - return None; + return std::nullopt; // IndVar is of the form "A + B * I" (where "I" is the canonical induction // variable, that may or may not exist as a real llvm::Value in the loop) and // this inductive range check is a range check on the "C + D * I" ("C" is @@ -1616,19 +1616,19 @@ // to deal with overflown values. if (!IndVar->isAffine()) - return None; + return std::nullopt; const SCEV *A = NoopOrExtend(IndVar->getStart(), RCType, SE, IsLatchSigned); const SCEVConstant *B = dyn_cast( NoopOrExtend(IndVar->getStepRecurrence(SE), RCType, SE, IsLatchSigned)); if (!B) - return None; + return std::nullopt; assert(!B->isZero() && "Recurrence with zero step?"); const SCEV *C = getBegin(); const SCEVConstant *D = dyn_cast(getStep()); if (D != B) - return None; + return std::nullopt; assert(!D->getValue()->isZero() && "Recurrence with zero step?"); unsigned BitWidth = RCType->getBitWidth(); @@ -1716,7 +1716,7 @@ const Optional &R1, const InductiveRangeCheck::Range &R2) { if (R2.isEmpty(SE, /* IsSigned */ true)) - return None; + return std::nullopt; if (!R1) return R2; auto &R1Value = R1.value(); @@ -1728,7 +1728,7 @@ // TODO: we could widen the smaller range and have this work; but for now we // bail out to keep things simple. if (R1Value.getType() != R2.getType()) - return None; + return std::nullopt; const SCEV *NewBegin = SE.getSMaxExpr(R1Value.getBegin(), R2.getBegin()); const SCEV *NewEnd = SE.getSMinExpr(R1Value.getEnd(), R2.getEnd()); @@ -1736,7 +1736,7 @@ // If the resulting range is empty, just return None. auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd); if (Ret.isEmpty(SE, /* IsSigned */ true)) - return None; + return std::nullopt; return Ret; } @@ -1745,7 +1745,7 @@ const Optional &R1, const InductiveRangeCheck::Range &R2) { if (R2.isEmpty(SE, /* IsSigned */ false)) - return None; + return std::nullopt; if (!R1) return R2; auto &R1Value = R1.value(); @@ -1757,7 +1757,7 @@ // TODO: we could widen the smaller range and have this work; but for now we // bail out to keep things simple. if (R1Value.getType() != R2.getType()) - return None; + return std::nullopt; const SCEV *NewBegin = SE.getUMaxExpr(R1Value.getBegin(), R2.getBegin()); const SCEV *NewEnd = SE.getUMinExpr(R1Value.getEnd(), R2.getEnd()); @@ -1765,7 +1765,7 @@ // If the resulting range is empty, just return None. auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd); if (Ret.isEmpty(SE, /* IsSigned */ false)) - return None; + return std::nullopt; return Ret; } diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -645,7 +645,7 @@ void collectFusionCandidates(const LoopVector &LV) { for (Loop *L : LV) { TTI::PeelingPreferences PP = - gatherPeelingPreferences(L, SE, TTI, None, None); + gatherPeelingPreferences(L, SE, TTI, std::nullopt, std::nullopt); FusionCandidate CurrCand(L, DT, &PDT, ORE, PP); if (!CurrCand.isEligibleForFusion(SE)) continue; @@ -708,14 +708,14 @@ if (isa(TripCount0)) { UncomputableTripCount++; LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!"); - return {false, None}; + return {false, std::nullopt}; } const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L); if (isa(TripCount1)) { UncomputableTripCount++; LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!"); - return {false, None}; + return {false, std::nullopt}; } LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & " @@ -740,7 +740,7 @@ LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not " "have a constant number of iterations. Peeling " "is not benefical\n"); - return {false, None}; + return {false, std::nullopt}; } Optional Difference; diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1350,11 +1350,10 @@ /// \brief Move all instructions except the terminator from FromBB right before /// InsertBefore static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { - auto &ToList = InsertBefore->getParent()->getInstList(); - auto &FromList = FromBB->getInstList(); + BasicBlock *ToBB = InsertBefore->getParent(); - ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(), - FromBB->getTerminator()->getIterator()); + ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(), + FromBB->getTerminator()->getIterator()); } /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact. diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -392,10 +392,10 @@ const SCEV *LHSS = SE->getSCEV(LHS); if (isa(LHSS)) - return None; + return std::nullopt; const SCEV *RHSS = SE->getSCEV(RHS); if (isa(RHSS)) - return None; + return std::nullopt; // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV if (SE->isLoopInvariant(LHSS, L)) { @@ -406,7 +406,7 @@ const SCEVAddRecExpr *AR = dyn_cast(LHSS); if (!AR || AR->getLoop() != L) - return None; + return std::nullopt; return LoopICmp(Pred, AR, RHSS); } @@ -494,9 +494,9 @@ // For now, bail out if latch type is narrower than range type. if (DL.getTypeSizeInBits(LatchType).getFixedSize() < DL.getTypeSizeInBits(RangeCheckType).getFixedSize()) - return None; + return std::nullopt; if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType)) - return None; + return std::nullopt; // We can now safely identify the truncated version of the IV and limit for // RangeCheckType. LoopICmp NewLatchCheck; @@ -504,7 +504,7 @@ NewLatchCheck.IV = dyn_cast( SE.getTruncateExpr(LatchCheck.IV, RangeCheckType)); if (!NewLatchCheck.IV) - return None; + return std::nullopt; NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType); LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType << "can be represented as range check type:" @@ -598,12 +598,12 @@ !isLoopInvariantValue(LatchStart) || !isLoopInvariantValue(LatchLimit)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); - return None; + return std::nullopt; } if (!Expander.isSafeToExpandAt(LatchStart, Guard) || !Expander.isSafeToExpandAt(LatchLimit, Guard)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); - return None; + return std::nullopt; } // guardLimit - guardStart + latchStart - 1 @@ -641,12 +641,12 @@ !isLoopInvariantValue(LatchStart) || !isLoopInvariantValue(LatchLimit)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); - return None; + return std::nullopt; } if (!Expander.isSafeToExpandAt(LatchStart, Guard) || !Expander.isSafeToExpandAt(LatchLimit, Guard)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); - return None; + return std::nullopt; } // The decrement of the latch check IV should be the same as the // rangeCheckIV. @@ -655,7 +655,7 @@ LLVM_DEBUG(dbgs() << "Not the same. PostDecLatchCheckIV: " << *PostDecLatchCheckIV << " and RangeCheckIV: " << *RangeCheck.IV << "\n"); - return None; + return std::nullopt; } // Generate the widened condition for CountDownLoop: @@ -701,26 +701,26 @@ auto RangeCheck = parseLoopICmp(ICI); if (!RangeCheck) { LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); - return None; + return std::nullopt; } LLVM_DEBUG(dbgs() << "Guard check:\n"); LLVM_DEBUG(RangeCheck->dump()); if (RangeCheck->Pred != ICmpInst::ICMP_ULT) { LLVM_DEBUG(dbgs() << "Unsupported range check predicate(" << RangeCheck->Pred << ")!\n"); - return None; + return std::nullopt; } auto *RangeCheckIV = RangeCheck->IV; if (!RangeCheckIV->isAffine()) { LLVM_DEBUG(dbgs() << "Range check IV is not affine!\n"); - return None; + return std::nullopt; } auto *Step = RangeCheckIV->getStepRecurrence(*SE); // We cannot just compare with latch IV step because the latch and range IVs // may have different types. if (!isSupportedStep(Step)) { LLVM_DEBUG(dbgs() << "Range check and latch have IVs different steps!\n"); - return None; + return std::nullopt; } auto *Ty = RangeCheckIV->getType(); auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty); @@ -728,7 +728,7 @@ LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check " "corresponding to range type: " << *Ty << "\n"); - return None; + return std::nullopt; } LoopICmp CurrLatchCheck = *CurrLatchCheckOpt; @@ -739,7 +739,7 @@ "Range and latch steps should be of same type!"); if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) { LLVM_DEBUG(dbgs() << "Range and latch have different step values!\n"); - return None; + return std::nullopt; } if (Step->isOne()) @@ -879,13 +879,13 @@ BasicBlock *LoopLatch = L->getLoopLatch(); if (!LoopLatch) { LLVM_DEBUG(dbgs() << "The loop doesn't have a single latch!\n"); - return None; + return std::nullopt; } auto *BI = dyn_cast(LoopLatch->getTerminator()); if (!BI || !BI->isConditional()) { LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n"); - return None; + return std::nullopt; } BasicBlock *TrueDest = BI->getSuccessor(0); assert( @@ -895,12 +895,12 @@ auto *ICI = dyn_cast(BI->getCondition()); if (!ICI) { LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n"); - return None; + return std::nullopt; } auto Result = parseLoopICmp(ICI); if (!Result) { LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); - return None; + return std::nullopt; } if (TrueDest != L->getHeader()) @@ -910,13 +910,13 @@ // recurrence. if (!Result->IV->isAffine()) { LLVM_DEBUG(dbgs() << "The induction variable is not affine!\n"); - return None; + return std::nullopt; } auto *Step = Result->IV->getStepRecurrence(*SE); if (!isSupportedStep(Step)) { LLVM_DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n"); - return None; + return std::nullopt; } auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) { @@ -934,7 +934,7 @@ if (IsUnsupportedPredicate(Step, Result->Pred)) { LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred << ")!\n"); - return None; + return std::nullopt; } return Result; diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6620,17 +6620,17 @@ const LoopInfo &LI) { if (!L->isInnermost()) { LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n"); - return None; + return std::nullopt; } // Only inspect on simple loop structure if (!L->isLoopSimplifyForm()) { LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n"); - return None; + return std::nullopt; } if (!SE.hasLoopInvariantBackedgeTakenCount(L)) { LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n"); - return None; + return std::nullopt; } BasicBlock *LoopPreheader = L->getLoopPreheader(); @@ -6640,18 +6640,18 @@ // Terminating condition is foldable when it is an eq/ne icmp BranchInst *BI = cast(LoopLatch->getTerminator()); if (BI->isUnconditional()) - return None; + return std::nullopt; Value *TermCond = BI->getCondition(); if (!isa(TermCond) || !cast(TermCond)->isEquality()) { LLVM_DEBUG(dbgs() << "Cannot fold on branching condition that is not an " "ICmpInst::eq / ICmpInst::ne\n"); - return None; + return std::nullopt; } if (!TermCond->hasOneUse()) { LLVM_DEBUG( dbgs() << "Cannot replace terminating condition with more than one use\n"); - return None; + return std::nullopt; } // For `IsToFold`, a primary IV can be replaced by other affine AddRec when it @@ -6769,7 +6769,7 @@ << " ToHelpFold: " << *ToHelpFold << "\n"); if (!ToFold || !ToHelpFold) - return None; + return std::nullopt; return {{ToFold, {ToHelpFold, TermValueS}}}; } diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -284,11 +284,11 @@ ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel) { - TargetTransformInfo::UnrollingPreferences UP = - gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, ORE, OptLevel, - None, None, None, None, None, None); + TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( + L, SE, TTI, nullptr, nullptr, ORE, OptLevel, std::nullopt, std::nullopt, + std::nullopt, std::nullopt, std::nullopt, std::nullopt); TargetTransformInfo::PeelingPreferences PP = - gatherPeelingPreferences(L, SE, TTI, None, None); + gatherPeelingPreferences(L, SE, TTI, std::nullopt, std::nullopt); TransformationMode EnableMode = hasUnrollAndJamTransformation(L); if (EnableMode & TM_Disable) diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -359,11 +359,11 @@ // Only analyze inner loops. We can't properly estimate cost of nested loops // and we won't visit inner loops again anyway. if (!L->isInnermost()) - return None; + return std::nullopt; // Don't simulate loops with a big or unknown tripcount if (!TripCount || TripCount > MaxIterationsCountToAnalyze) - return None; + return std::nullopt; SmallSetVector BBWorklist; SmallSetVector, 4> ExitWorklist; @@ -559,7 +559,7 @@ const Function *Callee = CI->getCalledFunction(); if (!Callee || TTI.isLoweredToCall(Callee)) { LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n"); - return None; + return std::nullopt; } } @@ -574,7 +574,7 @@ << " UnrolledCost: " << UnrolledCost << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize << "\n"); - return None; + return std::nullopt; } } @@ -632,7 +632,7 @@ if (UnrolledCost == RolledDynamicCost) { LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n" << " UnrolledCost: " << UnrolledCost << "\n"); - return None; + return std::nullopt; } } @@ -798,7 +798,7 @@ return TripCount; // if didn't return until here, should continue to other priorties - return None; + return std::nullopt; } static std::optional shouldFullUnroll( @@ -809,7 +809,7 @@ assert(FullUnrollTripCount && "should be non-zero!"); if (FullUnrollTripCount > UP.FullUnrollMaxCount) - return None; + return std::nullopt; // When computing the unrolled size, note that BEInsns are not replicated // like the rest of the loop body. @@ -828,7 +828,7 @@ if (Cost->UnrolledCost < UP.Threshold * Boost / 100) return FullUnrollTripCount; } - return None; + return std::nullopt; } static std::optional @@ -837,7 +837,7 @@ const TargetTransformInfo::UnrollingPreferences &UP) { if (!TripCount) - return None; + return std::nullopt; if (!UP.Partial) { LLVM_DEBUG(dbgs() << " will not try to unroll partially because " @@ -1378,13 +1378,15 @@ Optional ProvidedFullUnrollMaxCount; LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false, - bool ForgetAllSCEV = false, Optional Threshold = None, - Optional Count = None, - Optional AllowPartial = None, Optional Runtime = None, - Optional UpperBound = None, - Optional AllowPeeling = None, - Optional AllowProfileBasedPeeling = None, - Optional ProvidedFullUnrollMaxCount = None) + bool ForgetAllSCEV = false, + Optional Threshold = std::nullopt, + Optional Count = std::nullopt, + Optional AllowPartial = std::nullopt, + Optional Runtime = std::nullopt, + Optional UpperBound = std::nullopt, + Optional AllowPeeling = std::nullopt, + Optional AllowProfileBasedPeeling = std::nullopt, + Optional ProvidedFullUnrollMaxCount = std::nullopt) : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced), ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), @@ -1456,12 +1458,12 @@ // callers. return new LoopUnroll( OptLevel, OnlyWhenForced, ForgetAllSCEV, - Threshold == -1 ? None : Optional(Threshold), - Count == -1 ? None : Optional(Count), - AllowPartial == -1 ? None : Optional(AllowPartial), - Runtime == -1 ? None : Optional(Runtime), - UpperBound == -1 ? None : Optional(UpperBound), - AllowPeeling == -1 ? None : Optional(AllowPeeling)); + Threshold == -1 ? std::nullopt : Optional(Threshold), + Count == -1 ? std::nullopt : Optional(Count), + AllowPartial == -1 ? std::nullopt : Optional(AllowPartial), + Runtime == -1 ? std::nullopt : Optional(Runtime), + UpperBound == -1 ? std::nullopt : Optional(UpperBound), + AllowPeeling == -1 ? std::nullopt : Optional(AllowPeeling)); } Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced, @@ -1489,16 +1491,17 @@ std::string LoopName = std::string(L.getName()); - bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE, - /*BFI*/ nullptr, /*PSI*/ nullptr, - /*PreserveLCSSA*/ true, OptLevel, - OnlyWhenForced, ForgetSCEV, /*Count*/ None, - /*Threshold*/ None, /*AllowPartial*/ false, - /*Runtime*/ false, /*UpperBound*/ false, - /*AllowPeeling*/ true, - /*AllowProfileBasedPeeling*/ false, - /*FullUnrollMaxCount*/ None) != - LoopUnrollResult::Unmodified; + bool Changed = + tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE, + /*BFI*/ nullptr, /*PSI*/ nullptr, + /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced, + ForgetSCEV, /*Count*/ std::nullopt, + /*Threshold*/ std::nullopt, /*AllowPartial*/ false, + /*Runtime*/ false, /*UpperBound*/ false, + /*AllowPeeling*/ true, + /*AllowProfileBasedPeeling*/ false, + /*FullUnrollMaxCount*/ std::nullopt) != + LoopUnrollResult::Unmodified; if (!Changed) return PreservedAnalyses::all(); @@ -1618,9 +1621,9 @@ LoopUnrollResult Result = tryToUnrollLoop( &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI, /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced, - UnrollOpts.ForgetSCEV, /*Count*/ None, - /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, - UnrollOpts.AllowUpperBound, LocalAllowPeeling, + UnrollOpts.ForgetSCEV, /*Count*/ std::nullopt, + /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial, + UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling, UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount); Changed |= Result != LoopUnrollResult::Unmodified; @@ -1646,18 +1649,18 @@ static_cast *>(this)->printPipeline( OS, MapClassName2PassName); OS << "<"; - if (UnrollOpts.AllowPartial != None) + if (UnrollOpts.AllowPartial != std::nullopt) OS << (UnrollOpts.AllowPartial.value() ? "" : "no-") << "partial;"; - if (UnrollOpts.AllowPeeling != None) + if (UnrollOpts.AllowPeeling != std::nullopt) OS << (UnrollOpts.AllowPeeling.value() ? "" : "no-") << "peeling;"; - if (UnrollOpts.AllowRuntime != None) + if (UnrollOpts.AllowRuntime != std::nullopt) OS << (UnrollOpts.AllowRuntime.value() ? "" : "no-") << "runtime;"; - if (UnrollOpts.AllowUpperBound != None) + if (UnrollOpts.AllowUpperBound != std::nullopt) OS << (UnrollOpts.AllowUpperBound.value() ? "" : "no-") << "upperbound;"; - if (UnrollOpts.AllowProfileBasedPeeling != None) + if (UnrollOpts.AllowProfileBasedPeeling != std::nullopt) OS << (UnrollOpts.AllowProfileBasedPeeling.value() ? "" : "no-") << "profile-peeling;"; - if (UnrollOpts.FullUnrollMaxCount != None) + if (UnrollOpts.FullUnrollMaxCount != std::nullopt) OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";"; OS << "O" << UnrollOpts.OptLevel; OS << ">"; diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -590,7 +590,7 @@ if (!isGuaranteedToTransferExecutionToSuccessor(C)) return false; - bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None)); + bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt)); bool NeedLift = false; if (Args.erase(C)) diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -310,19 +310,19 @@ // other comparisons as we would create an orphan use of the value. if (!CmpI->hasOneUse()) { LLVM_DEBUG(dbgs() << "cmp has several uses\n"); - return None; + return std::nullopt; } if (CmpI->getPredicate() != ExpectedPredicate) - return None; + return std::nullopt; LLVM_DEBUG(dbgs() << "cmp " << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne") << "\n"); auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId); if (!Lhs.BaseId) - return None; + return std::nullopt; auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId); if (!Rhs.BaseId) - return None; + return std::nullopt; const auto &DL = CmpI->getModule()->getDataLayout(); return BCECmp(std::move(Lhs), std::move(Rhs), DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()), CmpI); @@ -333,9 +333,11 @@ Optional visitCmpBlock(Value *const Val, BasicBlock *const Block, const BasicBlock *const PhiBlock, BaseIdentifier &BaseId) { - if (Block->empty()) return None; + if (Block->empty()) + return std::nullopt; auto *const BranchI = dyn_cast(Block->getTerminator()); - if (!BranchI) return None; + if (!BranchI) + return std::nullopt; LLVM_DEBUG(dbgs() << "branch\n"); Value *Cond; ICmpInst::Predicate ExpectedPredicate; @@ -351,7 +353,8 @@ // chained). const auto *const Const = cast(Val); LLVM_DEBUG(dbgs() << "const\n"); - if (!Const->isZero()) return None; + if (!Const->isZero()) + return std::nullopt; LLVM_DEBUG(dbgs() << "false\n"); assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch"); BasicBlock *const FalseBlock = BranchI->getSuccessor(1); @@ -361,12 +364,13 @@ } auto *CmpI = dyn_cast(Cond); - if (!CmpI) return None; + if (!CmpI) + return std::nullopt; LLVM_DEBUG(dbgs() << "icmp\n"); Optional Result = visitICmp(CmpI, ExpectedPredicate, BaseId); if (!Result) - return None; + return std::nullopt; BCECmpBlock::InstructionSet BlockInsts( {Result->Lhs.LoadI, Result->Rhs.LoadI, Result->CmpI, BranchI}); diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -71,6 +71,7 @@ #include #include #include +#include #include #include #include @@ -301,7 +302,7 @@ if (!DeoptBundle) { assert(AllowStatepointWithNoDeoptInfo && "Found non-leaf call without deopt info!"); - return None; + return std::nullopt; } return DeoptBundle->Inputs; @@ -1626,10 +1627,10 @@ uint32_t Flags = uint32_t(StatepointFlags::None); SmallVector CallArgs(Call->args()); - Optional> DeoptArgs; + std::optional> DeoptArgs; if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_deopt)) DeoptArgs = Bundle->Inputs; - Optional> TransitionArgs; + std::optional> TransitionArgs; if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_gc_transition)) { TransitionArgs = Bundle->Inputs; // TODO: This flag no longer serves a purpose and can be removed later diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -778,10 +778,6 @@ if (!IsOffsetKnown) return PI.setAborted(&LI); - if (LI.isVolatile() && - LI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) - return PI.setAborted(&LI); - if (isa(LI.getType())) return PI.setAborted(&LI); @@ -796,10 +792,6 @@ if (!IsOffsetKnown) return PI.setAborted(&SI); - if (SI.isVolatile() && - SI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) - return PI.setAborted(&SI); - if (isa(ValOp->getType())) return PI.setAborted(&SI); @@ -837,11 +829,6 @@ if (!IsOffsetKnown) return PI.setAborted(&II); - // Don't replace this with a store with a different address space. TODO: - // Use a store with the casted new alloca? - if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace()) - return PI.setAborted(&II); - insertUse(II, Offset, Length ? Length->getLimitedValue() : AllocSize - Offset.getLimitedValue(), (bool)Length); @@ -861,13 +848,6 @@ if (!IsOffsetKnown) return PI.setAborted(&II); - // Don't replace this with a load/store with a different address space. - // TODO: Use a store with the casted new alloca? - if (II.isVolatile() && - (II.getDestAddressSpace() != DL.getAllocaAddrSpace() || - II.getSourceAddressSpace() != DL.getAllocaAddrSpace())) - return PI.setAborted(&II); - // This side of the transfer is completely out-of-bounds, and so we can // nuke the entire transfer. However, we also need to nuke the other side // if already added to our partitions. @@ -2335,6 +2315,16 @@ // the insertion point is set to point to the user. IRBuilderTy IRB; + // Return the new alloca, addrspacecasted if required to avoid changing the + // addrspace of a volatile access. + Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) { + if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace()) + return &NewAI; + + Type *AccessTy = NewAI.getAllocatedType()->getPointerTo(AddrSpace); + return IRB.CreateAddrSpaceCast(&NewAI, AccessTy); + } + public: AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROAPass &Pass, AllocaInst &OldAI, AllocaInst &NewAI, @@ -2535,7 +2525,9 @@ (canConvertValue(DL, NewAllocaTy, TargetTy) || (IsLoadPastEnd && NewAllocaTy->isIntegerTy() && TargetTy->isIntegerTy()))) { - LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, + Value *NewPtr = + getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile()); + LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr, NewAI.getAlign(), LI.isVolatile(), LI.getName()); if (AATags) @@ -2726,8 +2718,11 @@ } V = convertValue(DL, IRB, V, NewAllocaTy); + Value *NewPtr = + getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile()); + NewSI = - IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), SI.isVolatile()); + IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile()); } else { unsigned AS = SI.getPointerAddressSpace(); Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS)); @@ -2900,8 +2895,9 @@ V = convertValue(DL, IRB, V, AllocaTy); } + Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile()); StoreInst *New = - IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile()); + IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile()); New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) @@ -3054,14 +3050,22 @@ } OtherPtrTy = OtherTy->getPointerTo(OtherAS); - Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, + Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, OtherPtr->getName() + "."); MaybeAlign SrcAlign = OtherAlign; - Value *DstPtr = &NewAI; MaybeAlign DstAlign = SliceAlign; - if (!IsDest) { - std::swap(SrcPtr, DstPtr); + if (!IsDest) std::swap(SrcAlign, DstAlign); + + Value *SrcPtr; + Value *DstPtr; + + if (IsDest) { + DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile()); + SrcPtr = AdjPtr; + } else { + DstPtr = AdjPtr; + SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile()); } Value *Src; @@ -4713,7 +4717,8 @@ bool Changed = false; while (!DeadInsts.empty()) { Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); - if (!I) continue; + if (!I) + continue; LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); // If the instruction is an alloca, find the possible dbg.declare connected diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -489,11 +489,11 @@ // Make sure we're dealing with a vector. Layout.VecTy = dyn_cast(Ty); if (!Layout.VecTy) - return None; + return std::nullopt; // Check that we're dealing with full-byte elements. Layout.ElemTy = Layout.VecTy->getElementType(); if (!DL.typeSizeEqualsStoreSize(Layout.ElemTy)) - return None; + return std::nullopt; Layout.VecAlign = Alignment; Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy); return Layout; diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -124,8 +124,8 @@ TinyPtrVector Invariants; Optional Cost; NonTrivialUnswitchCandidate(Instruction *TI, ArrayRef Invariants, - Optional Cost = None) - : TI(TI), Invariants(Invariants), Cost(Cost) {}; + Optional Cost = std::nullopt) + : TI(TI), Invariants(Invariants), Cost(Cost){}; }; } // end anonymous namespace. @@ -579,8 +579,7 @@ // If fully unswitching, we can use the existing branch instruction. // Splice it into the old PH to gate reaching the new preheader and re-point // its successors. - OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(), - BI); + OldPH->splice(OldPH->end(), BI.getParent(), BI.getIterator()); BI.setCondition(Cond); if (MSSAU) { // Temporarily clone the terminator, to make MSSA update cheaper by @@ -2251,7 +2250,7 @@ if (FullUnswitch) { // Splice the terminator from the original loop and rewrite its // successors. - SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI); + SplitBB->splice(SplitBB->end(), ParentBB, TI.getIterator()); // Keep a clone of the terminator for MSSA updates. Instruction *NewTI = TI.clone(); diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -267,8 +267,7 @@ Start = PTI; // Move all definitions in the successor to the predecessor... - PredBB->getInstList().splice(PTI->getIterator(), BB->getInstList(), - BB->begin(), STI->getIterator()); + PredBB->splice(PTI->getIterator(), BB, BB->begin(), STI->getIterator()); if (MSSAU) MSSAU->moveAllAfterMergeBlocks(BB, PredBB, Start); @@ -288,7 +287,7 @@ PredBB->back().eraseFromParent(); // Move terminator instruction. - PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + PredBB->splice(PredBB->end(), BB); // Terminator may be a memory accessing instruction too. if (MSSAU) @@ -428,7 +427,7 @@ VariableMap; for (auto &I : *BB) { if (DbgValueInst *DVI = dyn_cast(&I)) { - DebugVariable Key(DVI->getVariable(), None, + DebugVariable Key(DVI->getVariable(), std::nullopt, DVI->getDebugLoc()->getInlinedAt()); auto VMI = VariableMap.find(Key); auto *DAI = dyn_cast(DVI); @@ -489,7 +488,7 @@ DenseSet SeenDefForAggregate; // Returns the DebugVariable for DVI with no fragment info. auto GetAggregateVariable = [](DbgValueInst *DVI) { - return DebugVariable(DVI->getVariable(), None, + return DebugVariable(DVI->getVariable(), std::nullopt, DVI->getDebugLoc()->getInlinedAt()); }; diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Support/TypeSize.h" +#include using namespace llvm; @@ -226,7 +227,7 @@ } static bool setAllocSize(Function &F, unsigned ElemSizeArg, - Optional NumElemsArg) { + std::optional NumElemsArg) { if (F.hasFnAttribute(Attribute::AllocSize)) return false; F.addFnAttr(Attribute::getWithAllocSizeArgs(F.getContext(), ElemSizeArg, @@ -441,7 +442,7 @@ break; case LibFunc_aligned_alloc: Changed |= setAlignedAllocParam(F, 0); - Changed |= setAllocSize(F, 1, None); + Changed |= setAllocSize(F, 1, std::nullopt); Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized | AllocFnKind::Aligned); [[fallthrough]]; case LibFunc_valloc: @@ -450,7 +451,7 @@ Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_malloc ? "vec_malloc" : "malloc"); Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized); - Changed |= setAllocSize(F, 0, None); + Changed |= setAllocSize(F, 0, std::nullopt); Changed |= setOnlyAccessesInaccessibleMemory(F); Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -516,7 +517,7 @@ Changed |= setAllocFamily(F, "malloc"); Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Aligned | AllocFnKind::Uninitialized); - Changed |= setAllocSize(F, 1, None); + Changed |= setAllocSize(F, 1, std::nullopt); Changed |= setAlignedAllocParam(F, 0); Changed |= setOnlyAccessesInaccessibleMemory(F); Changed |= setRetNoUndef(F); @@ -543,7 +544,7 @@ F, TheLibFunc == LibFunc_vec_realloc ? "vec_malloc" : "malloc"); Changed |= setAllocKind(F, AllocFnKind::Realloc); Changed |= setAllocatedPointerParam(F, 0); - Changed |= setAllocSize(F, 1, None); + Changed |= setAllocSize(F, 1, std::nullopt); Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); Changed |= setRetNoUndef(F); Changed |= setDoesNotThrow(F); diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp --- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -357,12 +357,12 @@ VisitedSetTy SetL; ValueRange DividendRange = getValueRange(Dividend, SetL); if (DividendRange == VALRNG_LIKELY_LONG) - return None; + return std::nullopt; VisitedSetTy SetR; ValueRange DivisorRange = getValueRange(Divisor, SetR); if (DivisorRange == VALRNG_LIKELY_LONG) - return None; + return std::nullopt; bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT); bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT); @@ -387,7 +387,7 @@ // If the divisor is not a constant, DAGCombiner will convert it to a // multiplication by a magic constant. It isn't clear if it is worth // introducing control flow to get a narrower multiply. - return None; + return std::nullopt; } // After Constant Hoisting pass, long constants may be represented as @@ -397,7 +397,7 @@ if (auto *BCI = dyn_cast(Divisor)) if (BCI->getParent() == SlowDivOrRem->getParent() && isa(BCI->getOperand(0))) - return None; + return std::nullopt; IRBuilder<> Builder(MainBB, MainBB->end()); Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc()); diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -439,6 +439,18 @@ *FailureReason = "Argument type mismatch"; return false; } + + // MustTail call needs stricter type match. See + // Verifier::verifyMustTailCall(). + if (CB.isMustTailCall()) { + PointerType *PF = dyn_cast(FormalTy); + PointerType *PA = dyn_cast(ActualTy); + if (!PF || !PA || PF->getAddressSpace() != PA->getAddressSpace()) { + if (FailureReason) + *FailureReason = "Musttail call Argument type mismatch"; + return false; + } + } } for (; I < NumArgs; I++) { // Vararg functions can have more arguments than parameters. diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -889,7 +889,7 @@ Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. - I->getInstList().splice(I->end(), Dest->getInstList()); + I->splice(I->end(), Dest); // Remove the dest block. Dest->eraseFromParent(); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1539,7 +1539,8 @@ assert(OldSP->getUnit() && "Missing compile unit for subprogram"); DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false, OldSP->getUnit()); - auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None)); + auto SPType = + DIB.createSubroutineType(DIB.getOrCreateTypeArray(std::nullopt)); DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized | DISubprogram::SPFlagLocalToUnit; diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp --- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp +++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp @@ -129,7 +129,7 @@ // Limitation: can only handle branch instruction currently. const BranchInst *BI = dyn_cast(IDom->getTerminator()); if (!BI) - return None; + return std::nullopt; bool Inserted = false; if (PDT.dominates(CurBlock, IDom)) { @@ -149,13 +149,13 @@ Inserted = Conditions.addControlCondition( ControlCondition(BI->getCondition(), false)); } else - return None; + return std::nullopt; if (Inserted) ++NumConditions; if (MaxLookup != 0 && NumConditions > MaxLookup) - return None; + return std::nullopt; CurBlock = IDom; } while (CurBlock != &Dominator); @@ -252,13 +252,13 @@ const Optional BB0Conditions = ControlConditions::collectControlConditions(BB0, *CommonDominator, DT, PDT); - if (BB0Conditions == None) + if (BB0Conditions == std::nullopt) return false; const Optional BB1Conditions = ControlConditions::collectControlConditions(BB1, *CommonDominator, DT, PDT); - if (BB1Conditions == None) + if (BB1Conditions == std::nullopt) return false; return BB0Conditions->isEquivalent(*BB1Conditions); diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -114,7 +114,8 @@ continue; bool InsertedDbgVal = false; - auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None)); + auto SPType = + DIB.createSubroutineType(DIB.getOrCreateTypeArray(std::nullopt)); DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized; if (F.hasPrivateLinkage() || F.hasInternalLinkage()) diff --git a/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/llvm/lib/Transforms/Utils/FlattenCFG.cpp --- a/llvm/lib/Transforms/Utils/FlattenCFG.cpp +++ b/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -285,8 +285,7 @@ CB = PBI->getSuccessor(1 - Idx); // Delete the conditional branch. FirstCondBlock->back().eraseFromParent(); - FirstCondBlock->getInstList() - .splice(FirstCondBlock->end(), CB->getInstList()); + FirstCondBlock->splice(FirstCondBlock->end(), CB); PBI = cast(FirstCondBlock->getTerminator()); Value *CC = PBI->getCondition(); // Merge conditions. @@ -481,8 +480,7 @@ // Merge \param SecondEntryBlock into \param FirstEntryBlock. FirstEntryBlock->back().eraseFromParent(); - FirstEntryBlock->getInstList() - .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); + FirstEntryBlock->splice(FirstEntryBlock->end(), SecondEntryBlock); BranchInst *PBI = cast(FirstEntryBlock->getTerminator()); assert(PBI->getCondition() == CInst2); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1083,7 +1083,7 @@ SmallVector DummyNodes; for (const MDNode *I : MD) { - DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), None)); + DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), std::nullopt)); MDMap[I].reset(DummyNodes.back().get()); } @@ -1896,7 +1896,8 @@ BlockFrequencyInfo *CallerBFI) { if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1) return; - auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; + auto CallSiteCount = + PSI ? PSI->getProfileCount(TheCall, CallerBFI) : std::nullopt; int64_t CallCount = std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount()); updateProfileCallee(Callee, -CallCount, &VMap); @@ -2411,8 +2412,8 @@ // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. - Caller->getEntryBlock().getInstList().splice( - InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I); + Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock, + AI->getIterator(), I); } } @@ -2755,8 +2756,8 @@ // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. - OrigBB->getInstList().splice(CB.getIterator(), FirstNewBlock->getInstList(), - FirstNewBlock->begin(), FirstNewBlock->end()); + OrigBB->splice(CB.getIterator(), &*FirstNewBlock, FirstNewBlock->begin(), + FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); @@ -2896,8 +2897,7 @@ // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. - AfterCallBB->getInstList().splice(AfterCallBB->begin(), - ReturnBB->getInstList()); + AfterCallBB->splice(AfterCallBB->begin(), ReturnBB); if (CreatedBranchToNormalDest) CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); @@ -2927,7 +2927,7 @@ // Splice the code entry block into calling block, right before the // unconditional branch. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes - OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList()); + OrigBB->splice(Br->getIterator(), CalleeEntry); // Remove the unconditional branch. Br->eraseFromParent(); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -489,7 +489,8 @@ } if (auto *FPI = dyn_cast(I)) { - Optional ExBehavior = FPI->getExceptionBehavior(); + std::optional ExBehavior = + FPI->getExceptionBehavior(); return *ExBehavior != fp::ebStrict; } } @@ -799,7 +800,7 @@ // Splice all the instructions from PredBB to DestBB. PredBB->getTerminator()->eraseFromParent(); - DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); + DestBB->splice(DestBB->begin(), PredBB); new UnreachableInst(PredBB->getContext(), PredBB); // If the PredBB is the entry block of the function, move DestBB up to @@ -1204,8 +1205,7 @@ // Copy over any phi, debug or lifetime instruction. BB->getTerminator()->eraseFromParent(); - Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), - BB->getInstList()); + Succ->splice(Succ->getFirstNonPHI()->getIterator(), BB); } else { while (PHINode *PN = dyn_cast(&BB->front())) { // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. @@ -2175,7 +2175,7 @@ // Without knowing signedness, sign/zero extension isn't possible. auto Signedness = Var->getSignedness(); if (!Signedness) - return None; + return std::nullopt; bool Signed = *Signedness == DIBasicType::Signedness::Signed; return DIExpression::appendExt(DII.getExpression(), ToBits, FromBits, @@ -2913,7 +2913,7 @@ unsigned BitWidth = DL.getPointerTypeSizeInBits(NewTy); if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { - MDNode *NN = MDNode::get(OldLI.getContext(), None); + MDNode *NN = MDNode::get(OldLI.getContext(), std::nullopt); NewLI.setMetadata(LLVMContext::MD_nonnull, NN); } } @@ -2962,9 +2962,8 @@ I->setDebugLoc(InsertPt->getDebugLoc()); ++II; } - DomBlock->getInstList().splice(InsertPt->getIterator(), BB->getInstList(), - BB->begin(), - BB->getTerminator()->getIterator()); + DomBlock->splice(InsertPt->getIterator(), BB, BB->begin(), + BB->getTerminator()->getIterator()); } namespace { @@ -3023,7 +3022,7 @@ if (I != BPS.end()) return I->second; - auto &Result = BPS[V] = None; + auto &Result = BPS[V] = std::nullopt; auto BitWidth = V->getType()->getScalarSizeInBits(); // Can't do integer/elements > 128 bits. @@ -3059,7 +3058,7 @@ if (A->Provenance[BitIdx] != BitPart::Unset && B->Provenance[BitIdx] != BitPart::Unset && A->Provenance[BitIdx] != B->Provenance[BitIdx]) - return Result = None; + return Result = std::nullopt; if (A->Provenance[BitIdx] == BitPart::Unset) Result->Provenance[BitIdx] = B->Provenance[BitIdx]; diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -165,7 +165,7 @@ protected: using PeelCounter = std::optional; - const PeelCounter Unknown = None; + const PeelCounter Unknown = std::nullopt; // Add 1 respecting Unknown and return Unknown if result over MaxIterations PeelCounter addOne(PeelCounter PC) const { @@ -250,7 +250,7 @@ } } assert((Iterations <= MaxIterations) && "bad result in phi analysis"); - return Iterations ? Optional(Iterations) : None; + return Iterations ? Optional(Iterations) : std::nullopt; } } // unnamed namespace diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -703,7 +703,7 @@ if (CompletelyUnroll) { if (PreserveOnlyFirst) { if (i == 0) - return None; + return std::nullopt; return j == 0; } // Complete (but possibly inexact) unrolling @@ -711,7 +711,7 @@ return true; if (Info.TripCount && j != Info.TripCount) return false; - return None; + return std::nullopt; } if (ULO.Runtime) { @@ -719,7 +719,7 @@ // exits may be stale. if (IsLatch && j != 0) return false; - return None; + return std::nullopt; } if (j != Info.BreakoutTrip && @@ -728,7 +728,7 @@ // unconditional branch for some iterations. return false; } - return None; + return std::nullopt; }; // Fold branches for iterations where we know that they will exit or not diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -258,7 +258,7 @@ return ElementCount::get(*Width, IsScalable.value_or(false)); } - return None; + return std::nullopt; } Optional llvm::makeFollowupLoopID( @@ -267,7 +267,7 @@ if (!OrigLoopID) { if (AlwaysNew) return nullptr; - return None; + return std::nullopt; } assert(OrigLoopID->getOperand(0) == OrigLoopID); @@ -326,7 +326,7 @@ // Attributes of the followup loop not specified explicity, so signal to the // transformation pass to add suitable attributes. if (!AlwaysNew && !HasAnyFollowup) - return None; + return std::nullopt; // If no attributes were added or remove, the previous loop Id can be reused. if (!AlwaysNew && !Changed) @@ -790,14 +790,14 @@ // we exited the loop. uint64_t LoopWeight, ExitWeight; if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) - return None; + return std::nullopt; if (L->contains(ExitingBranch->getSuccessor(1))) std::swap(LoopWeight, ExitWeight); if (!ExitWeight) // Don't have a way to return predicated infinite - return None; + return std::nullopt; OrigExitWeight = ExitWeight; @@ -824,7 +824,7 @@ return *EstTripCount; } } - return None; + return std::nullopt; } bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp --- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -13,16 +13,15 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/MDBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include using namespace llvm; -void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, - Value *DstAddr, ConstantInt *CopyLen, - Align SrcAlign, Align DstAlign, - bool SrcIsVolatile, bool DstIsVolatile, - bool CanOverlap, - const TargetTransformInfo &TTI, - Optional AtomicElementSize) { +void llvm::createMemCpyLoopKnownSize( + Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, + ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, + bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, + std::optional AtomicElementSize) { // No need to expand zero length copies. if (CopyLen->isZero()) return; @@ -173,13 +172,11 @@ "Bytes copied should match size in the call!"); } -void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, - Value *SrcAddr, Value *DstAddr, - Value *CopyLen, Align SrcAlign, - Align DstAlign, bool SrcIsVolatile, - bool DstIsVolatile, bool CanOverlap, - const TargetTransformInfo &TTI, - Optional AtomicElementSize) { +void llvm::createMemCpyLoopUnknownSize( + Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, + Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, + bool CanOverlap, const TargetTransformInfo &TTI, + std::optional AtomicElementSize) { BasicBlock *PreLoopBB = InsertBefore->getParent(); BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); diff --git a/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp b/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp --- a/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp +++ b/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp @@ -146,7 +146,7 @@ static Optional getSizeInBytes(Optional SizeInBits) { if (!SizeInBits || *SizeInBits % 8 != 0) - return None; + return std::nullopt; return *SizeInBits / 8; } @@ -300,7 +300,7 @@ static Optional nameOrNone(const Value *V) { if (V->hasName()) return V->getName(); - return None; + return std::nullopt; } void MemoryOpRemark::visitVariable(const Value *V, @@ -341,7 +341,7 @@ // If not, get it from the alloca. Optional TySize = AI->getAllocationSizeInBits(DL); Optional Size = - TySize ? getSizeInBytes(TySize->getFixedSize()) : None; + TySize ? getSizeInBytes(TySize->getFixedSize()) : std::nullopt; VariableInfo Var{nameOrNone(AI), Size}; if (!Var.isEmpty()) Result.push_back(std::move(Var)); @@ -361,7 +361,7 @@ uint64_t Size = Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); if (!Size) return; - VIs.push_back({None, Size}); + VIs.push_back({std::nullopt, Size}); } R << (IsRead ? "\n Read Variables: " : "\n Written Variables: "); diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -800,7 +800,7 @@ CmpInst *Cmp = dyn_cast(Condition); if (!Cmp) { // TODO: Make this an assertion once RenamedOp is fully accurate. - return None; + return std::nullopt; } CmpInst::Predicate Pred; @@ -813,7 +813,7 @@ OtherOp = Cmp->getOperand(0); } else { // TODO: Make this an assertion once RenamedOp is fully accurate. - return None; + return std::nullopt; } // Invert predicate along false edge. @@ -825,7 +825,7 @@ case PT_Switch: if (Condition != RenamedOp) { // TODO: Make this an assertion once RenamedOp is fully accurate. - return None; + return std::nullopt; } return {{CmpInst::ICMP_EQ, cast(this)->CaseValue}}; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1603,16 +1603,13 @@ // The debug location is an integral part of a debug info intrinsic // and can't be separated from it or replaced. Instead of attempting // to merge locations, simply hoist both copies of the intrinsic. - BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), - I1); - BIParent->getInstList().splice(BI->getIterator(), BB2->getInstList(), - I2); + BIParent->splice(BI->getIterator(), BB1, I1->getIterator()); + BIParent->splice(BI->getIterator(), BB2, I2->getIterator()); } else { // For a normal instruction, we just move one to right before the // branch, then replace all uses of the other with the first. Finally, // we remove the now redundant second instruction. - BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), - I1); + BIParent->splice(BI->getIterator(), BB1, I1->getIterator()); if (!I2->use_empty()) I2->replaceAllUsesWith(I1); I1->andIRFlags(I2); @@ -3040,8 +3037,8 @@ } // Hoist the instructions. - BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(), - ThenBB->begin(), std::prev(ThenBB->end())); + BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(), + std::prev(ThenBB->end())); // Insert selects and rewrite the PHI operands. IRBuilder Builder(BI); @@ -3270,7 +3267,7 @@ MergeBlockIntoPredecessor(EdgeBB, DTU); // Signal repeat, simplifying any other constants. - return None; + return std::nullopt; } return false; @@ -3285,8 +3282,8 @@ do { // Note that None means "we changed things, but recurse further." Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC); - EverChanged |= Result == None || *Result; - } while (Result == None); + EverChanged |= Result == std::nullopt || *Result; + } while (Result == std::nullopt); return EverChanged; } @@ -3561,7 +3558,7 @@ if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) return {{Instruction::Or, true}}; } - return None; + return std::nullopt; } static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -1042,9 +1042,8 @@ Instruction *UseI) { DefUserPair Key(Def, UseI); auto It = PostIncRangeInfos.find(Key); - return It == PostIncRangeInfos.end() - ? Optional(None) - : Optional(It->second); + return It == PostIncRangeInfos.end() ? Optional(std::nullopt) + : Optional(It->second); } void calculatePostIncRanges(PHINode *OrigPhi); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -3855,7 +3855,7 @@ // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (isFortifiedCallFoldable(CI, 2, None, 1)) { + if (isFortifiedCallFoldable(CI, 2, std::nullopt, 1)) { if (Func == LibFunc_strcpy_chk) return copyFlags(*CI, emitStrCpy(Dst, Src, B, TLI)); else @@ -3886,7 +3886,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI, IRBuilderBase &B) { - if (isFortifiedCallFoldable(CI, 1, None, 0)) + if (isFortifiedCallFoldable(CI, 1, std::nullopt, 0)) return copyFlags(*CI, emitStrLen(CI->getArgOperand(0), B, CI->getModule()->getDataLayout(), TLI)); return nullptr; @@ -3921,7 +3921,7 @@ Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI, IRBuilderBase &B) { - if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) { + if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2)) { SmallVector VariadicArgs(drop_begin(CI->args(), 5)); return copyFlags(*CI, emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), @@ -3933,7 +3933,7 @@ Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI, IRBuilderBase &B) { - if (isFortifiedCallFoldable(CI, 2, None, None, 1)) { + if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1)) { SmallVector VariadicArgs(drop_begin(CI->args(), 4)); return copyFlags(*CI, emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), @@ -3984,7 +3984,7 @@ Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI, IRBuilderBase &B) { - if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) + if (isFortifiedCallFoldable(CI, 3, 1, std::nullopt, 2)) return copyFlags( *CI, emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4), CI->getArgOperand(5), B, TLI)); @@ -3994,7 +3994,7 @@ Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI, IRBuilderBase &B) { - if (isFortifiedCallFoldable(CI, 2, None, None, 1)) + if (isFortifiedCallFoldable(CI, 2, std::nullopt, std::nullopt, 1)) return copyFlags(*CI, emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), CI->getArgOperand(4), B, TLI)); diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp --- a/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -391,8 +391,9 @@ // ensures metadata operands only reference defined SSA values. return (Flags & RF_IgnoreMissingLocals) ? nullptr - : MetadataAsValue::get(V->getContext(), - MDTuple::get(V->getContext(), None)); + : MetadataAsValue::get( + V->getContext(), + MDTuple::get(V->getContext(), std::nullopt)); } if (auto *AL = dyn_cast(MD)) { SmallVector MappedArgs; @@ -578,7 +579,7 @@ const MDNode &N = *cast(Op); if (N.isDistinct()) return mapDistinctNode(N); - return None; + return std::nullopt; } MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) { @@ -619,7 +620,7 @@ if (auto *CMD = dyn_cast(Op)) return wrapConstantAsMetadata(*CMD, M.getVM().lookup(CMD->getValue())); - return None; + return std::nullopt; } Metadata &MDNodeMapper::UniquedGraph::getFwdReference(MDNode &Op) { @@ -848,7 +849,7 @@ assert(isa(MD) && "Expected a metadata node"); - return None; + return std::nullopt; } Metadata *Mapper::mapMetadata(const Metadata *MD) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -417,7 +417,7 @@ if (unsigned ExpectedTC = SE.getSmallConstantMaxTripCount(L)) return ExpectedTC; - return None; + return std::nullopt; } // Forward declare GeneratedRTChecks. @@ -1581,7 +1581,7 @@ /// Convenience function that returns the value of vscale_range iff /// vscale_range.min == vscale_range.max or otherwise returns the value /// returned by the corresponding TLI method. - Optional getVScaleForTuning() const; + std::optional getVScaleForTuning() const; private: unsigned NumPredStores = 0; @@ -4906,7 +4906,7 @@ return MaxScalableVF; // Limit MaxScalableVF by the maximum safe dependence distance. - Optional MaxVScale = TTI.getMaxVScale(); + std::optional MaxVScale = TTI.getMaxVScale(); if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange)) MaxVScale = TheFunction->getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax(); @@ -5268,7 +5268,7 @@ return MaxVF; } -Optional LoopVectorizationCostModel::getVScaleForTuning() const { +std::optional LoopVectorizationCostModel::getVScaleForTuning() const { if (TheFunction->hasFnAttribute(Attribute::VScaleRange)) { auto Attr = TheFunction->getFnAttribute(Attribute::VScaleRange); auto Min = Attr.getVScaleRangeMin(); @@ -5304,7 +5304,7 @@ // Improve estimate for the vector width if it is scalable. unsigned EstimatedWidthA = A.Width.getKnownMinValue(); unsigned EstimatedWidthB = B.Width.getKnownMinValue(); - if (Optional VScale = getVScaleForTuning()) { + if (std::optional VScale = getVScaleForTuning()) { if (A.Width.isScalable()) EstimatedWidthA *= VScale.value(); if (B.Width.isScalable()) @@ -5354,7 +5354,7 @@ #ifndef NDEBUG unsigned AssumedMinimumVscale = 1; - if (Optional VScale = getVScaleForTuning()) + if (std::optional VScale = getVScaleForTuning()) AssumedMinimumVscale = *VScale; unsigned Width = Candidate.Width.isScalable() @@ -5577,7 +5577,7 @@ ElementCount EstimatedRuntimeVF = MainLoopVF; if (MainLoopVF.isScalable()) { EstimatedRuntimeVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue()); - if (Optional VScale = getVScaleForTuning()) + if (std::optional VScale = getVScaleForTuning()) EstimatedRuntimeVF *= *VScale; } @@ -6452,8 +6452,8 @@ bool Reverse = ConsecutiveStride < 0; if (Reverse) - Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, None, - CostKind, 0); + Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, + std::nullopt, CostKind, 0); return Cost; } @@ -6537,8 +6537,8 @@ assert(!Legal->isMaskRequired(I) && "Reverse masked interleaved access not supported."); Cost += Group->getNumMembers() * - TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, None, - CostKind, 0); + TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, + std::nullopt, CostKind, 0); } return Cost; } @@ -6548,7 +6548,7 @@ using namespace llvm::PatternMatch; // Early exit for no inloop reductions if (InLoopReductionChains.empty() || VF.isScalar() || !isa(Ty)) - return None; + return std::nullopt; auto *VectorTy = cast(Ty); // We are looking for a pattern of, and finding the minimal acceptable cost: @@ -6566,7 +6566,7 @@ Instruction *RetI = I; if (match(RetI, m_ZExtOrSExt(m_Value()))) { if (!RetI->hasOneUser()) - return None; + return std::nullopt; RetI = RetI->user_back(); } @@ -6578,7 +6578,7 @@ // Test if the found instruction is a reduction, and if not return an invalid // cost specifying the parent to use the original cost modelling. if (!InLoopReductionImmediateChains.count(RetI)) - return None; + return std::nullopt; // Find the reduction this chain is a part of and calculate the basic cost of // the reduction on its own. @@ -6712,7 +6712,7 @@ } } - return I == RetI ? Optional(BaseCost) : None; + return I == RetI ? Optional(BaseCost) : std::nullopt; } InstructionCost @@ -7498,7 +7498,7 @@ assert(OrigLoop->isInnermost() && "Inner loop expected."); FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC); if (!MaxFactors) // Cases that should not to be vectorized nor interleaved. - return None; + return std::nullopt; // Invalidate interleave groups if all blocks of loop will be predicated. if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) && @@ -10042,7 +10042,7 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks, VectorizationFactor &VF, - Optional VScale, Loop *L, + std::optional VScale, Loop *L, ScalarEvolution &SE) { InstructionCost CheckCost = Checks.getCost(); if (!CheckCost.isValid()) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -293,12 +293,12 @@ if (const auto *IE = dyn_cast(InsertInst)) { const auto *VT = dyn_cast(IE->getType()); if (!VT) - return None; + return std::nullopt; const auto *CI = dyn_cast(IE->getOperand(2)); if (!CI) - return None; + return std::nullopt; if (CI->getValue().uge(VT->getNumElements())) - return None; + return std::nullopt; Index *= VT->getNumElements(); Index += CI->getZExtValue(); return Index; @@ -314,7 +314,7 @@ Index *= AT->getNumElements(); CurrentType = AT->getElementType(); } else { - return None; + return std::nullopt; } Index += I; } @@ -326,7 +326,7 @@ /// elements actually mask the insertelement buildvector, if any. template static SmallBitVector isUndefVector(const Value *V, - ArrayRef ShuffleMask = None) { + ArrayRef ShuffleMask = std::nullopt) { SmallBitVector Res(ShuffleMask.empty() ? 1 : ShuffleMask.size(), true); using T = std::conditional_t; if (isa(V)) @@ -417,10 +417,10 @@ const auto *It = find_if(VL, [](Value *V) { return isa(V); }); if (It == VL.end()) - return None; + return std::nullopt; auto *EI0 = cast(*It); if (isa(EI0->getVectorOperandType())) - return None; + return std::nullopt; unsigned Size = cast(EI0->getVectorOperandType())->getNumElements(); Value *Vec1 = nullptr; @@ -434,19 +434,19 @@ continue; auto *EI = cast(VL[I]); if (isa(EI->getVectorOperandType())) - return None; + return std::nullopt; auto *Vec = EI->getVectorOperand(); // We can extractelement from undef or poison vector. if (isUndefVector(Vec).all()) continue; // All vector operands must have the same number of vector elements. if (cast(Vec->getType())->getNumElements() != Size) - return None; + return std::nullopt; if (isa(EI->getIndexOperand())) continue; auto *Idx = dyn_cast(EI->getIndexOperand()); if (!Idx) - return None; + return std::nullopt; // Undefined behavior if Idx is negative or >= Size. if (Idx->getValue().uge(Size)) continue; @@ -460,7 +460,7 @@ Vec2 = Vec; Mask[I] += Size; } else { - return None; + return std::nullopt; } if (CommonShuffleMode == Permute) continue; @@ -737,12 +737,12 @@ if (Opcode == Instruction::ExtractElement) { auto *CI = dyn_cast(E->getOperand(1)); if (!CI) - return None; + return std::nullopt; return CI->getZExtValue(); } ExtractValueInst *EI = cast(E); if (EI->getNumIndices() != 1) - return None; + return std::nullopt; return *EI->idx_begin(); } @@ -980,7 +980,7 @@ /// \returns the vectorization cost of the subtree that starts at \p VL. /// A negative number means that this is profitable. - InstructionCost getTreeCost(ArrayRef VectorizedVals = None); + InstructionCost getTreeCost(ArrayRef VectorizedVals = std::nullopt); /// Construct a vectorizable tree that starts at \p Roots, ignoring users for /// the purpose of scheduling and extraction in the \p UserIgnoreLst. @@ -1420,7 +1420,7 @@ // Recursively calculate the cost at each level int TmpScore = getScoreAtLevelRec(I1->getOperand(OpIdx1), I2->getOperand(OpIdx2), - I1, I2, CurrLevel + 1, None); + I1, I2, CurrLevel + 1, std::nullopt); // Look for the best score. if (TmpScore > LookAheadHeuristics::ScoreFail && TmpScore > MaxTmpScore) { @@ -1585,7 +1585,7 @@ auto *IdxLaneI = dyn_cast(IdxLaneV); if (!IdxLaneI || !isa(OpIdxLaneV)) return 0; - return R.areAllUsersVectorized(IdxLaneI, None) + return R.areAllUsersVectorized(IdxLaneI, std::nullopt) ? LookAheadHeuristics::ScoreAllUserVectorized : 0; } @@ -1653,7 +1653,7 @@ // Our strategy mode for OpIdx. ReorderingMode RMode = ReorderingModes[OpIdx]; if (RMode == ReorderingMode::Failed) - return None; + return std::nullopt; // The linearized opcode of the operand at OpIdx, Lane. bool OpIdxAPO = getData(OpIdx, Lane).APO; @@ -1662,7 +1662,7 @@ // Sometimes we have more than one option (e.g., Opcode and Undefs), so we // are using the score to differentiate between the two. struct BestOpData { - Optional Idx = None; + Optional Idx = std::nullopt; unsigned Score = 0; } BestOp; BestOp.Score = @@ -1722,7 +1722,7 @@ return BestOp.Idx; } // If we could not find a good match return None. - return None; + return std::nullopt; } /// Helper for reorderOperandVecs. @@ -2153,7 +2153,7 @@ int Score = LookAhead.getScoreAtLevelRec(Candidates[I].first, Candidates[I].second, /*U1=*/nullptr, /*U2=*/nullptr, - /*Level=*/1, None); + /*Level=*/1, std::nullopt); if (Score > BestScore) { BestScore = Score; Index = I; @@ -2655,8 +2655,8 @@ TreeEntry *newTreeEntry(ArrayRef VL, Optional Bundle, const InstructionsState &S, const EdgeInfo &UserTreeIdx, - ArrayRef ReuseShuffleIndices = None, - ArrayRef ReorderIndices = None) { + ArrayRef ReuseShuffleIndices = std::nullopt, + ArrayRef ReorderIndices = std::nullopt) { TreeEntry::EntryState EntryState = Bundle ? TreeEntry::Vectorize : TreeEntry::NeedToGather; return newTreeEntry(VL, EntryState, Bundle, S, UserTreeIdx, @@ -2668,8 +2668,8 @@ Optional Bundle, const InstructionsState &S, const EdgeInfo &UserTreeIdx, - ArrayRef ReuseShuffleIndices = None, - ArrayRef ReorderIndices = None) { + ArrayRef ReuseShuffleIndices = std::nullopt, + ArrayRef ReorderIndices = std::nullopt) { assert(((!Bundle && EntryState == TreeEntry::NeedToGather) || (Bundle && EntryState != TreeEntry::NeedToGather)) && "Need to vectorize gather entry?"); @@ -3580,11 +3580,11 @@ STE = LocalSTE; else if (STE != LocalSTE) // Take the order only from the single vector node. - return None; + return std::nullopt; unsigned Lane = std::distance(STE->Scalars.begin(), find(STE->Scalars, V)); if (Lane >= NumScalars) - return None; + return std::nullopt; if (CurrentOrder[Lane] != NumScalars) { if (Lane != I) continue; @@ -3623,7 +3623,7 @@ } return CurrentOrder; } - return None; + return std::nullopt; } namespace { @@ -3808,14 +3808,14 @@ for (Value *V : TE.Scalars) { auto *L = dyn_cast(V); if (!L || !L->isSimple()) - return None; + return std::nullopt; Ptrs.push_back(L->getPointerOperand()); } BoUpSLP::OrdersType Order; if (clusterSortPtrAccesses(Ptrs, ScalarTy, *DL, *SE, Order)) return Order; - return None; + return std::nullopt; } /// Check if two insertelement instructions are from the same buildvector. @@ -3835,7 +3835,7 @@ auto *IE2 = V; Optional Idx1 = getInsertIndex(IE1); Optional Idx2 = getInsertIndex(IE2); - if (Idx1 == None || Idx2 == None) + if (Idx1 == std::nullopt || Idx2 == std::nullopt) return false; // Go through the vector operand of insertelement instructions trying to find // either VU as the original vector for IE2 or V as the original vector for @@ -3878,7 +3878,7 @@ unsigned Sz = TE.Scalars.size(); if (!ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices, Sz)) - return None; + return std::nullopt; unsigned VF = TE.getVectorFactor(); // Try build correct order for extractelement instructions. SmallVector ReusedMask(TE.ReuseShuffleIndices.begin(), @@ -3940,7 +3940,7 @@ return false; Optional Idx1 = getInsertIndex(IE1); Optional Idx2 = getInsertIndex(IE2); - if (Idx1 == None || Idx2 == None) + if (Idx1 == std::nullopt || Idx2 == std::nullopt) return false; return *Idx1 < *Idx2; } @@ -3950,7 +3950,7 @@ return false; Optional Idx1 = getExtractIndex(EE1); Optional Idx2 = getExtractIndex(EE2); - if (Idx1 == None || Idx2 == None) + if (Idx1 == std::nullopt || Idx2 == std::nullopt) return false; return *Idx1 < *Idx2; } @@ -4011,7 +4011,7 @@ if (Optional Order = findPartiallyOrderedLoads(TE)) return Order; } - return None; + return std::nullopt; } /// Checks if the given mask is a "clustered" mask with the same clusters of @@ -4961,7 +4961,7 @@ })) || !llvm::isPowerOf2_32(NumUniqueScalarValues)) { LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return false; } VL = UniqueValues; @@ -4985,7 +4985,7 @@ })))) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); if (TryToFindDuplicates(S)) - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); return; } @@ -4996,7 +4996,7 @@ cast(S.OpValue)->getVectorOperandType())) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n"); if (TryToFindDuplicates(S)) - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); return; } @@ -5005,14 +5005,14 @@ if (S.OpValue->getType()->isVectorTy() && !isa(S.OpValue)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return; } if (StoreInst *SI = dyn_cast(S.OpValue)) if (SI->getValueOperand()->getType()->isVectorTy()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return; } @@ -5100,7 +5100,7 @@ NotProfitableForVectorization(VL)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n"); if (TryToFindDuplicates(S)) - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); return; } @@ -5114,7 +5114,7 @@ if (EphValues.count(V)) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V << ") is ephemeral.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return; } } @@ -5126,7 +5126,7 @@ if (!E->isSame(VL)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); if (TryToFindDuplicates(S)) - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); return; } @@ -5146,7 +5146,7 @@ LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V << ") is already in tree.\n"); if (TryToFindDuplicates(S)) - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); return; } @@ -5158,7 +5158,7 @@ if (UserIgnoreList && UserIgnoreList->contains(V)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); if (TryToFindDuplicates(S)) - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); return; } @@ -5189,7 +5189,7 @@ // Don't go into unreachable blocks. They may contain instructions with // dependency cycles which confuse the final scheduling. LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return; } @@ -5198,7 +5198,7 @@ // place to insert a shuffle if we need to, so just avoid that issue. if (isa(BB->getTerminator())) { LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return; } @@ -5222,7 +5222,7 @@ assert((!BS.getScheduleData(VL0) || !BS.getScheduleData(VL0)->isPartOfBundle()) && "tryScheduleBundle should cancelScheduling on failure"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); return; } @@ -5242,7 +5242,7 @@ LLVM_DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (terminator use).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); return; } @@ -5309,7 +5309,7 @@ return; } LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); BS.cancelScheduling(VL, VL0); return; @@ -5322,7 +5322,8 @@ ValueSet SourceVectors; for (Value *V : VL) { SourceVectors.insert(cast(V)->getOperand(0)); - assert(getInsertIndex(V) != None && "Non-constant or undef index?"); + assert(getInsertIndex(V) != std::nullopt && + "Non-constant or undef index?"); } if (count_if(VL, [&SourceVectors](Value *V) { @@ -5331,7 +5332,7 @@ // Found 2nd source vector - cancel. LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with " "different source vectors.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); BS.cancelScheduling(VL, VL0); return; } @@ -5357,7 +5358,7 @@ if (IsIdentity) CurrentOrder.clear(); TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, - None, CurrentOrder); + std::nullopt, CurrentOrder); LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n"); constexpr int NumOps = 2; @@ -5408,7 +5409,7 @@ break; case LoadsState::Gather: BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); #ifndef NDEBUG Type *ScalarTy = VL0->getType(); @@ -5443,7 +5444,7 @@ Type *Ty = cast(V)->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); @@ -5476,7 +5477,7 @@ if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) || Cmp->getOperand(0)->getType() != ComparedTy) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); @@ -5568,7 +5569,7 @@ if (I->getNumOperands() != 2) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); return; } @@ -5586,7 +5587,7 @@ LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); return; } @@ -5608,7 +5609,7 @@ LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); return; } @@ -5676,7 +5677,7 @@ if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering stores of non-packed type.\n"); return; @@ -5691,7 +5692,7 @@ auto *SI = cast(V); if (!SI->isSimple()) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple stores.\n"); return; @@ -5739,7 +5740,7 @@ } BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; @@ -5757,7 +5758,7 @@ if (!VecFunc && !isTriviallyVectorizable(ID)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; @@ -5776,7 +5777,7 @@ VecFunc != VFDatabase(*CI2).getVectorizedFunction(Shape)) || !CI->hasIdenticalOperandBundleSchema(*CI2)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *V << "\n"); @@ -5789,7 +5790,7 @@ Value *A1J = CI2->getArgOperand(j); if (ScalarArgs[j] != A1J) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI << " argument " << ScalarArgs[j] << "!=" << A1J @@ -5804,7 +5805,7 @@ CI->op_begin() + CI->getBundleOperandsEndIndex(), CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!=" << *V << '\n'); @@ -5835,7 +5836,7 @@ // then do not vectorize this instruction. if (!S.isAltShuffle()) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; @@ -5898,7 +5899,7 @@ } default: BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); return; @@ -6336,20 +6337,22 @@ unsigned Idx = (Data.second / NumElts) * NumElts; unsigned EENumElts = EEVTy->getNumElements(); if (Idx + NumElts <= EENumElts) { - Cost += TTI->getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, - EEVTy, None, CostKind, Idx, VecTy); + Cost += + TTI->getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, + EEVTy, std::nullopt, CostKind, Idx, VecTy); } else { // Need to round up the subvector type vectorization factor to avoid a // crash in cost model functions. Make SubVT so that Idx + VF of SubVT // <= EENumElts. auto *SubVT = FixedVectorType::get(VecTy->getElementType(), EENumElts - Idx); - Cost += TTI->getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, - EEVTy, None, CostKind, Idx, SubVT); + Cost += + TTI->getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, + EEVTy, std::nullopt, CostKind, Idx, SubVT); } } else { Cost += TTI->getShuffleCost(TargetTransformInfo::SK_InsertSubvector, - VecTy, None, CostKind, 0, EEVTy); + VecTy, std::nullopt, CostKind, 0, EEVTy); } } }; @@ -6417,7 +6420,7 @@ assert(VecTy == FinalVecTy && "No reused scalars expected for broadcast."); return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, - /*Mask=*/None, CostKind, /*Index=*/0, + /*Mask=*/std::nullopt, CostKind, /*Index=*/0, /*SubTp=*/nullptr, /*Args=*/VL[0]); } InstructionCost ReuseShuffleCost = 0; @@ -6507,8 +6510,9 @@ if (NeedInsertSubvectorAnalysis) { // Add the cost for the subvectors insert. for (int I = VF, E = VL.size(); I < E; I += VF) - GatherCost += TTI->getShuffleCost(TTI::SK_InsertSubvector, VecTy, - None, CostKind, I, LoadTy); + GatherCost += + TTI->getShuffleCost(TTI::SK_InsertSubvector, VecTy, + std::nullopt, CostKind, I, LoadTy); } return ReuseShuffleCost + GatherCost - ScalarsCost; } @@ -6721,8 +6725,9 @@ if (InsertVecSz != VecSz) { auto *ActualVecTy = FixedVectorType::get(SrcVecTy->getElementType(), VecSz); - Cost += TTI->getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy, None, - CostKind, OffsetBeg - Offset, InsertVecTy); + Cost += TTI->getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy, + std::nullopt, CostKind, OffsetBeg - Offset, + InsertVecTy); } else { for (unsigned I = 0, End = OffsetBeg - Offset; I < End; ++I) Mask[I] = InMask.test(I) ? UndefMaskElem : I; @@ -7693,7 +7698,7 @@ if (const TreeEntry *VTE = getTreeEntry(V)) VToTEs.insert(VTE); if (VToTEs.empty()) - return None; + return std::nullopt; if (UsedTEs.empty()) { // The first iteration, just insert the list of nodes to vector. UsedTEs.push_back(VToTEs); @@ -7722,7 +7727,7 @@ // If the number of input vectors is greater than 2 - not a permutation, // fallback to the regular gather. if (UsedTEs.size() == 2) - return None; + return std::nullopt; UsedTEs.push_back(SavedVToTEs); Idx = UsedTEs.size() - 1; } @@ -7733,7 +7738,7 @@ if (UsedTEs.empty()) { assert(all_of(TE->Scalars, UndefValue::classof) && "Expected vector of undefs only."); - return None; + return std::nullopt; } unsigned VF = 0; @@ -7767,7 +7772,7 @@ // No 2 source vectors with the same vector factor - give up and do regular // gather. if (Entries.empty()) - return None; + return std::nullopt; } // Build a shuffle mask for better cost estimation and vector emission. @@ -7782,7 +7787,7 @@ // Extra check required by isSingleSourceMaskImpl function (called by // ShuffleVectorInst::isSingleSourceMask). if (Mask[I] >= 2 * E) - return None; + return std::nullopt; } switch (Entries.size()) { case 1: @@ -7792,7 +7797,7 @@ default: break; } - return None; + return std::nullopt; } InstructionCost BoUpSLP::getGatherCost(FixedVectorType *Ty, @@ -9744,7 +9749,7 @@ // dependencies and emit instruction in the wrong order at the actual // scheduling. TryScheduleBundleImpl(/*ReSchedule=*/false, nullptr); - return None; + return std::nullopt; } } @@ -9774,7 +9779,7 @@ TryScheduleBundleImpl(ReSchedule, Bundle); if (!Bundle->isReady()) { cancelScheduling(VL, S.OpValue); - return None; + return std::nullopt; } return Bundle; } @@ -12259,7 +12264,7 @@ if (auto *ST = dyn_cast(CurrentType)) { for (auto *Elt : ST->elements()) if (Elt != ST->getElementType(0)) // check homogeneity - return None; + return std::nullopt; AggregateSize *= ST->getNumElements(); CurrentType = ST->getElementType(0); } else if (auto *AT = dyn_cast(CurrentType)) { @@ -12271,7 +12276,7 @@ } else if (CurrentType->isSingleValueType()) { return AggregateSize; } else { - return None; + return std::nullopt; } } while (true); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp @@ -193,7 +193,7 @@ if (any_of(Values, [Opcode](VPValue *V) { return cast(V)->getOpcode() != Opcode; })) - return None; + return std::nullopt; return {Opcode}; } diff --git a/llvm/lib/WindowsDriver/MSVCPaths.cpp b/llvm/lib/WindowsDriver/MSVCPaths.cpp --- a/llvm/lib/WindowsDriver/MSVCPaths.cpp +++ b/llvm/lib/WindowsDriver/MSVCPaths.cpp @@ -94,9 +94,9 @@ } static bool getWindowsSDKDirViaCommandLine( - llvm::vfs::FileSystem &VFS, llvm::Optional WinSdkDir, - llvm::Optional WinSdkVersion, - llvm::Optional WinSysRoot, std::string &Path, int &Major, + llvm::vfs::FileSystem &VFS, std::optional WinSdkDir, + std::optional WinSdkVersion, + std::optional WinSysRoot, std::string &Path, int &Major, std::string &Version) { if (WinSdkDir || WinSysRoot) { // Don't validate the input; trust the value supplied by the user. @@ -393,9 +393,9 @@ return !VFS.exists(TestPath); } -bool getWindowsSDKDir(vfs::FileSystem &VFS, Optional WinSdkDir, - Optional WinSdkVersion, - Optional WinSysRoot, std::string &Path, +bool getWindowsSDKDir(vfs::FileSystem &VFS, std::optional WinSdkDir, + std::optional WinSdkVersion, + std::optional WinSysRoot, std::string &Path, int &Major, std::string &WindowsSDKIncludeVersion, std::string &WindowsSDKLibVersion) { // Trust /winsdkdir and /winsdkversion if present. @@ -448,10 +448,11 @@ return false; } -bool getUniversalCRTSdkDir(vfs::FileSystem &VFS, Optional WinSdkDir, - Optional WinSdkVersion, - Optional WinSysRoot, std::string &Path, - std::string &UCRTVersion) { +bool getUniversalCRTSdkDir(vfs::FileSystem &VFS, + std::optional WinSdkDir, + std::optional WinSdkVersion, + std::optional WinSysRoot, + std::string &Path, std::string &UCRTVersion) { // If /winsdkdir is passed, use it as location for the UCRT too. // FIXME: Should there be a dedicated /ucrtdir to override /winsdkdir? int Major; @@ -473,9 +474,9 @@ } bool findVCToolChainViaCommandLine(vfs::FileSystem &VFS, - Optional VCToolsDir, - Optional VCToolsVersion, - Optional WinSysRoot, + std::optional VCToolsDir, + std::optional VCToolsVersion, + std::optional WinSysRoot, std::string &Path, ToolsetLayout &VSLayout) { // Don't validate the input; trust the value supplied by the user. // The primary motivation is to prevent unnecessary file and registry access. diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp --- a/llvm/lib/XRay/InstrumentationMap.cpp +++ b/llvm/lib/XRay/InstrumentationMap.cpp @@ -38,14 +38,14 @@ auto I = FunctionIds.find(Addr); if (I != FunctionIds.end()) return I->second; - return None; + return std::nullopt; } Optional InstrumentationMap::getFunctionAddr(int32_t FuncId) const { auto I = FunctionAddresses.find(FuncId); if (I != FunctionAddresses.end()) return I->second; - return None; + return std::nullopt; } using RelocMap = DenseMap; diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s -define i32 @test1(i32 %i, i32* %a) { +define i32 @test1(i32 %i, ptr %a) { ; CHECK: Printing analysis {{.*}} for function 'test1' entry: br label %body @@ -9,8 +9,8 @@ body: %iv = phi i32 [ 0, %entry ], [ %next, %body ] %base = phi i32 [ 0, %entry ], [ %sum, %body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv - %0 = load i32, i32* %arrayidx + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv + %0 = load i32, ptr %arrayidx %sum = add nsw i32 %0, %base %next = add i32 %iv, 1 %exitcond = icmp eq i32 %next, %i @@ -197,7 +197,7 @@ !3 = !{!"branch_weights", i32 100, i32 1} -define i32 @test_cold_call_sites(i32* %a) { +define i32 @test_cold_call_sites(ptr %a) { ; Test that edges to blocks post-dominated by cold call sites ; are marked as not expected to be taken. ; TODO(dnovillo) The calls to regular_function should not be merged, but @@ -209,8 +209,8 @@ ; CHECK: edge entry -> else probability is 0x78787f1d / 0x80000000 = 94.12% [HOT edge] entry: - %gep1 = getelementptr i32, i32* %a, i32 1 - %val1 = load i32, i32* %gep1 + %gep1 = getelementptr i32, ptr %a, i32 1 + %val1 = load i32, ptr %gep1 %cond1 = icmp ugt i32 %val1, 1 br i1 %cond1, label %then, label %else @@ -220,8 +220,8 @@ br label %exit else: - %gep2 = getelementptr i32, i32* %a, i32 2 - %val2 = load i32, i32* %gep2 + %gep2 = getelementptr i32, ptr %a, i32 2 + %val2 = load i32, ptr %gep2 %val3 = call i32 @regular_function(i32 %val2) br label %exit @@ -231,7 +231,7 @@ } ; CHECK-LABEL: test_invoke_code_callsite1 -define i32 @test_invoke_code_callsite1(i1 %c) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define i32 @test_invoke_code_callsite1(i1 %c) personality ptr @__gxx_personality_v0 { entry: br i1 %c, label %if.then, label %if.end ; Edge "entry->if.end" should have higher probability based on the cold call @@ -251,7 +251,7 @@ br label %if.end lpad: - %ll = landingpad { i8*, i32 } + %ll = landingpad { ptr, i32 } cleanup br label %if.end @@ -260,7 +260,7 @@ } ; CHECK-LABEL: test_invoke_code_callsite2 -define i32 @test_invoke_code_callsite2(i1 %c) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define i32 @test_invoke_code_callsite2(i1 %c) personality ptr @__gxx_personality_v0 { entry: br i1 %c, label %if.then, label %if.end @@ -277,7 +277,7 @@ br label %if.end lpad: - %ll = landingpad { i8*, i32 } + %ll = landingpad { ptr, i32 } cleanup call void @ColdFunc() #0 br label %if.end @@ -287,7 +287,7 @@ } ; CHECK-LABEL: test_invoke_code_callsite3 -define i32 @test_invoke_code_callsite3(i1 %c) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define i32 @test_invoke_code_callsite3(i1 %c) personality ptr @__gxx_personality_v0 { entry: br i1 %c, label %if.then, label %if.end ; CHECK: edge entry -> if.then probability is 0x078780e3 / 0x80000000 = 5.88% @@ -306,7 +306,7 @@ br label %if.end lpad: - %ll = landingpad { i8*, i32 } + %ll = landingpad { ptr, i32 } cleanup call void @ColdFunc() #0 br label %if.end @@ -316,7 +316,7 @@ } ; CHECK-LABEL: test_invoke_code_profiled -define void @test_invoke_code_profiled(i1 %c) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define void @test_invoke_code_profiled(i1 %c) personality ptr @__gxx_personality_v0 { entry: ; CHECK: edge entry -> invoke.to0 probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] ; CHECK: edge entry -> lpad probability is 0x00000800 / 0x80000000 = 0.00% @@ -339,7 +339,7 @@ ret void lpad: - %ll = landingpad { i8*, i32 } + %ll = landingpad { ptr, i32 } cleanup ret void } diff --git a/llvm/test/Analysis/BranchProbabilityInfo/deopt-invoke.ll b/llvm/test/Analysis/BranchProbabilityInfo/deopt-invoke.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/deopt-invoke.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/deopt-invoke.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s -declare i32* @"personality_function"() #1 +declare ptr @"personality_function"() #1 declare void @foo(i32) declare void @bar() declare void @llvm.experimental.deoptimize.isVoid(...) @@ -8,7 +8,7 @@ ; Even though the likeliness of 'invoke' to throw an exception is assessed as low ; all other paths are even less likely. Check that hot paths leads to excepion handler. -define void @test1(i32 %0) personality i32* ()* @"personality_function" !prof !1 { +define void @test1(i32 %0) personality ptr @"personality_function" !prof !1 { ;CHECK: edge entry -> unreached probability is 0x00000001 / 0x80000000 = 0.00% ;CHECK: edge entry -> invoke probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] ;CHECK: edge invoke -> invoke.cont.unreached probability is 0x00000000 / 0x80000000 = 0.00% @@ -28,9 +28,9 @@ unreachable land.pad: - %v20 = landingpad { i8*, i32 } + %v20 = landingpad { ptr, i32 } cleanup - %v21 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(256)* inttoptr (i64 8 to i8 addrspace(1)* addrspace(256)*), align 8 + %v21 = load ptr addrspace(1), ptr addrspace(256) inttoptr (i64 8 to ptr addrspace(256)), align 8 br label %exit exit: @@ -38,7 +38,7 @@ ret void } -define void @test2(i32 %0) personality i32* ()* @"personality_function" { +define void @test2(i32 %0) personality ptr @"personality_function" { ;CHECK: edge entry -> unreached probability is 0x00000000 / 0x80000000 = 0.00% ;CHECK: edge entry -> invoke probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] ;CHECK: edge invoke -> invoke.cont.cold probability is 0x7fff8000 / 0x80000000 = 100.00% [HOT edge] @@ -58,9 +58,9 @@ unreachable land.pad: - %v20 = landingpad { i8*, i32 } + %v20 = landingpad { ptr, i32 } cleanup - %v21 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(256)* inttoptr (i64 8 to i8 addrspace(1)* addrspace(256)*), align 8 + %v21 = load ptr addrspace(1), ptr addrspace(256) inttoptr (i64 8 to ptr addrspace(256)), align 8 br label %exit exit: @@ -68,7 +68,7 @@ ret void } -define void @test3(i32 %0) personality i32* ()* @"personality_function" { +define void @test3(i32 %0) personality ptr @"personality_function" { ;CHECK: edge entry -> unreached probability is 0x00000000 / 0x80000000 = 0.00% ;CHECK: edge entry -> invoke probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] ;CHECK: edge invoke -> invoke.cont.cold probability is 0x7fff8000 / 0x80000000 = 100.00% [HOT edge] @@ -87,9 +87,9 @@ unreachable land.pad: - %v20 = landingpad { i8*, i32 } + %v20 = landingpad { ptr, i32 } cleanup - %v21 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(256)* inttoptr (i64 8 to i8 addrspace(1)* addrspace(256)*), align 8 + %v21 = load ptr addrspace(1), ptr addrspace(256) inttoptr (i64 8 to ptr addrspace(256)), align 8 call void @cold() br label %exit diff --git a/llvm/test/Analysis/BranchProbabilityInfo/libfunc_call.ll b/llvm/test/Analysis/BranchProbabilityInfo/libfunc_call.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/libfunc_call.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/libfunc_call.ll @@ -1,21 +1,21 @@ ; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s -declare i32 @strcmp(i8*, i8*) -declare i32 @strncmp(i8*, i8*, i32) -declare i32 @strcasecmp(i8*, i8*) -declare i32 @strncasecmp(i8*, i8*, i32) -declare i32 @memcmp(i8*, i8*) -declare i32 @bcmp(i8*, i8*) -declare i32 @nonstrcmp(i8*, i8*) +declare i32 @strcmp(ptr, ptr) +declare i32 @strncmp(ptr, ptr, i32) +declare i32 @strcasecmp(ptr, ptr) +declare i32 @strncasecmp(ptr, ptr, i32) +declare i32 @memcmp(ptr, ptr) +declare i32 @bcmp(ptr, ptr) +declare i32 @nonstrcmp(ptr, ptr) ; Check that the result of strcmp is considered more likely to be nonzero than ; zero, and equally likely to be (nonzero) positive or negative. -define i32 @test_strcmp_eq(i8* %p, i8* %q) { +define i32 @test_strcmp_eq(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_strcmp_eq' entry: - %val = call i32 @strcmp(i8* %p, i8* %q) + %val = call i32 @strcmp(ptr %p, ptr %q) %cond = icmp eq i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% @@ -34,10 +34,10 @@ ret i32 %result } -define i32 @test_strcmp_eq5(i8* %p, i8* %q) { +define i32 @test_strcmp_eq5(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_strcmp_eq5' entry: - %val = call i32 @strcmp(i8* %p, i8* %q) + %val = call i32 @strcmp(ptr %p, ptr %q) %cond = icmp eq i32 %val, 5 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% @@ -56,10 +56,10 @@ ret i32 %result } -define i32 @test_strcmp_ne(i8* %p, i8* %q) { +define i32 @test_strcmp_ne(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_strcmp_ne' entry: - %val = call i32 @strcmp(i8* %p, i8* %q) + %val = call i32 @strcmp(ptr %p, ptr %q) %cond = icmp ne i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% @@ -78,10 +78,10 @@ ret i32 %result } -define i32 @test_strcmp_sgt(i8* %p, i8* %q) { +define i32 @test_strcmp_sgt(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_strcmp_sgt' entry: - %val = call i32 @strcmp(i8* %p, i8* %q) + %val = call i32 @strcmp(ptr %p, ptr %q) %cond = icmp sgt i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% @@ -100,10 +100,10 @@ ret i32 %result } -define i32 @test_strcmp_slt(i8* %p, i8* %q) { +define i32 @test_strcmp_slt(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_strcmp_slt' entry: - %val = call i32 @strcmp(i8* %p, i8* %q) + %val = call i32 @strcmp(ptr %p, ptr %q) %cond = icmp slt i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% @@ -125,10 +125,10 @@ ; Similarly check other library functions that have the same behaviour -define i32 @test_strncmp_sgt(i8* %p, i8* %q) { +define i32 @test_strncmp_sgt(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_strncmp_sgt' entry: - %val = call i32 @strncmp(i8* %p, i8* %q, i32 4) + %val = call i32 @strncmp(ptr %p, ptr %q, i32 4) %cond = icmp sgt i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% @@ -147,10 +147,10 @@ ret i32 %result } -define i32 @test_strcasecmp_sgt(i8* %p, i8* %q) { +define i32 @test_strcasecmp_sgt(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_strcasecmp_sgt' entry: - %val = call i32 @strcasecmp(i8* %p, i8* %q) + %val = call i32 @strcasecmp(ptr %p, ptr %q) %cond = icmp sgt i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% @@ -169,10 +169,10 @@ ret i32 %result } -define i32 @test_strncasecmp_sgt(i8* %p, i8* %q) { +define i32 @test_strncasecmp_sgt(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_strncasecmp_sgt' entry: - %val = call i32 @strncasecmp(i8* %p, i8* %q, i32 4) + %val = call i32 @strncasecmp(ptr %p, ptr %q, i32 4) %cond = icmp sgt i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% @@ -191,10 +191,10 @@ ret i32 %result } -define i32 @test_memcmp_sgt(i8* %p, i8* %q) { +define i32 @test_memcmp_sgt(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_memcmp_sgt' entry: - %val = call i32 @memcmp(i8* %p, i8* %q) + %val = call i32 @memcmp(ptr %p, ptr %q) %cond = icmp sgt i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% @@ -218,10 +218,10 @@ ; heuristic is applied, i.e. positive more likely than negative, nonzero more ; likely than zero. -define i32 @test_nonstrcmp_eq(i8* %p, i8* %q) { +define i32 @test_nonstrcmp_eq(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_eq' entry: - %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %val = call i32 @nonstrcmp(ptr %p, ptr %q) %cond = icmp eq i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% @@ -240,10 +240,10 @@ ret i32 %result } -define i32 @test_nonstrcmp_ne(i8* %p, i8* %q) { +define i32 @test_nonstrcmp_ne(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_ne' entry: - %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %val = call i32 @nonstrcmp(ptr %p, ptr %q) %cond = icmp ne i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% @@ -262,10 +262,10 @@ ret i32 %result } -define i32 @test_nonstrcmp_sgt(i8* %p, i8* %q) { +define i32 @test_nonstrcmp_sgt(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_sgt' entry: - %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %val = call i32 @nonstrcmp(ptr %p, ptr %q) %cond = icmp sgt i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% @@ -285,10 +285,10 @@ } -define i32 @test_bcmp_eq(i8* %p, i8* %q) { +define i32 @test_bcmp_eq(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_bcmp_eq' entry: - %val = call i32 @bcmp(i8* %p, i8* %q) + %val = call i32 @bcmp(ptr %p, ptr %q) %cond = icmp eq i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% @@ -307,10 +307,10 @@ ret i32 %result } -define i32 @test_bcmp_eq5(i8* %p, i8* %q) { +define i32 @test_bcmp_eq5(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_bcmp_eq5' entry: - %val = call i32 @bcmp(i8* %p, i8* %q) + %val = call i32 @bcmp(ptr %p, ptr %q) %cond = icmp eq i32 %val, 5 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% @@ -331,10 +331,10 @@ -define i32 @test_bcmp_ne(i8* %p, i8* %q) { +define i32 @test_bcmp_ne(ptr %p, ptr %q) { ; CHECK: Printing analysis {{.*}} for function 'test_bcmp_ne' entry: - %val = call i32 @bcmp(i8* %p, i8* %q) + %val = call i32 @bcmp(ptr %p, ptr %q) %cond = icmp ne i32 %val, 0 br i1 %cond, label %then, label %else ; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% diff --git a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll @@ -81,7 +81,7 @@ ret void } -define void @test3(i32 %a, i32 %b, i32* %c) { +define void @test3(i32 %a, i32 %b, ptr %c) { entry: br label %do.body ; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] @@ -89,7 +89,7 @@ do.body: %i.0 = phi i32 [ 0, %entry ], [ %inc4, %if.end ] call void @g1() - %0 = load i32, i32* %c, align 4 + %0 = load i32, ptr %c, align 4 %cmp = icmp slt i32 %0, 42 br i1 %cmp, label %do.body1, label %if.end ; CHECK: edge do.body -> do.body1 probability is 0x40000000 / 0x80000000 = 50.00% @@ -117,7 +117,7 @@ ret void } -define void @test4(i32 %a, i32 %b, i32* %c) { +define void @test4(i32 %a, i32 %b, ptr %c) { entry: br label %do.body ; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] @@ -125,7 +125,7 @@ do.body: %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ] call void @g1() - %0 = load i32, i32* %c, align 4 + %0 = load i32, ptr %c, align 4 %cmp = icmp slt i32 %0, 42 br i1 %cmp, label %return, label %do.body1 ; CHECK: edge do.body -> return probability is 0x04000000 / 0x80000000 = 3.12% @@ -157,7 +157,7 @@ ret void } -define void @test5(i32 %a, i32 %b, i32* %c) { +define void @test5(i32 %a, i32 %b, ptr %c) { entry: br label %do.body ; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] @@ -170,7 +170,7 @@ do.body1: %j.0 = phi i32 [ 0, %do.body ], [ %inc, %if.end ] - %0 = load i32, i32* %c, align 4 + %0 = load i32, ptr %c, align 4 %cmp = icmp slt i32 %0, 42 br i1 %cmp, label %return, label %if.end ; CHECK: edge do.body1 -> return probability is 0x04000000 / 0x80000000 = 3.12% @@ -201,7 +201,7 @@ ret void } -define void @test6(i32 %a, i32 %b, i32* %c) { +define void @test6(i32 %a, i32 %b, ptr %c) { entry: br label %do.body ; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] @@ -215,7 +215,7 @@ do.body1: %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.cond ] call void @g2() - %0 = load i32, i32* %c, align 4 + %0 = load i32, ptr %c, align 4 %cmp = icmp slt i32 %0, 42 br i1 %cmp, label %return, label %do.cond ; CHECK: edge do.body1 -> return probability is 0x04000000 / 0x80000000 = 3.12% @@ -245,7 +245,7 @@ ret void } -define void @test7(i32 %a, i32 %b, i32* %c) { +define void @test7(i32 %a, i32 %b, ptr %c) { entry: %cmp10 = icmp sgt i32 %a, 0 br i1 %cmp10, label %for.body.lr.ph, label %for.end7 @@ -259,7 +259,7 @@ for.body: %i.011 = phi i32 [ 0, %for.body.lr.ph ], [ %inc6, %for.inc5 ] - %0 = load i32, i32* %c, align 4 + %0 = load i32, ptr %c, align 4 %cmp1 = icmp eq i32 %0, %i.011 br i1 %cmp1, label %for.inc5, label %if.end ; CHECK: edge for.body -> for.inc5 probability is 0x40000000 / 0x80000000 = 50.00% @@ -297,7 +297,7 @@ ret void } -define void @test8(i32 %a, i32 %b, i32* %c) { +define void @test8(i32 %a, i32 %b, ptr %c) { entry: %cmp18 = icmp sgt i32 %a, 0 br i1 %cmp18, label %for.body.lr.ph, label %for.end15 @@ -306,8 +306,8 @@ for.body.lr.ph: %cmp216 = icmp sgt i32 %b, 0 - %arrayidx5 = getelementptr inbounds i32, i32* %c, i64 1 - %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2 + %arrayidx5 = getelementptr inbounds i32, ptr %c, i64 1 + %arrayidx9 = getelementptr inbounds i32, ptr %c, i64 2 br label %for.body ; CHECK: edge for.body.lr.ph -> for.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] @@ -320,21 +320,21 @@ for.body3: %j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] - %0 = load i32, i32* %c, align 4 + %0 = load i32, ptr %c, align 4 %cmp4 = icmp eq i32 %0, %j.017 br i1 %cmp4, label %for.inc, label %if.end ; CHECK: edge for.body3 -> for.inc probability is 0x40000000 / 0x80000000 = 50.00% ; CHECK: edge for.body3 -> if.end probability is 0x40000000 / 0x80000000 = 50.00% if.end: - %1 = load i32, i32* %arrayidx5, align 4 + %1 = load i32, ptr %arrayidx5, align 4 %cmp6 = icmp eq i32 %1, %j.017 br i1 %cmp6, label %for.inc, label %if.end8 ; CHECK: edge if.end -> for.inc probability is 0x40000000 / 0x80000000 = 50.00% ; CHECK: edge if.end -> if.end8 probability is 0x40000000 / 0x80000000 = 50.00% if.end8: - %2 = load i32, i32* %arrayidx9, align 4 + %2 = load i32, ptr %arrayidx9, align 4 %cmp10 = icmp eq i32 %2, %j.017 br i1 %cmp10, label %for.inc, label %if.end12 ; CHECK: edge if.end8 -> for.inc probability is 0x40000000 / 0x80000000 = 50.00% @@ -403,7 +403,7 @@ ; Check that the for.body -> if.then edge is considered unlikely due to making ; the if-condition false for the next iteration of the loop. -define i32 @test10(i32 %n, i32* %p) { +define i32 @test10(i32 %n, ptr %p) { entry: br label %for.cond ; CHECK: edge entry -> for.cond probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] @@ -421,8 +421,8 @@ ret i32 %sum.0 for.body: - %arrayidx = getelementptr inbounds i32, i32* %p, i32 %i.0 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %p, i32 %i.0 + %0 = load i32, ptr %arrayidx, align 4 %add = add nsw i32 %sum.0, %0 %inc = add nsw i32 %count.0, 1 %cmp1 = icmp sgt i32 %count.0, 6 @@ -430,7 +430,7 @@ ; CHECK: edge for.body -> if.then probability is 0x2aaaa8e4 / 0x80000000 = 33.33% ; CHECK: edge for.body -> for.inc probability is 0x5555571c / 0x80000000 = 66.67% if.then: - store i32 %add, i32* %arrayidx, align 4 + store i32 %add, ptr %arrayidx, align 4 br label %for.inc ; CHECK: edge if.then -> for.inc probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] @@ -444,7 +444,7 @@ ; Each successor to for.body makes itself not be taken in the next iteration, so ; both should be equally likely -define i32 @test11(i32 %n, i32* %p) { +define i32 @test11(i32 %n, ptr %p) { entry: br label %for.cond ; CHECK: edge entry -> for.cond probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] @@ -463,15 +463,15 @@ for.body: %tobool = icmp eq i32 %flip.0, 0 - %arrayidx1 = getelementptr inbounds i32, i32* %p, i32 %i.0 - %0 = load i32, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %p, i32 %i.0 + %0 = load i32, ptr %arrayidx1, align 4 br i1 %tobool, label %if.else, label %if.then ; CHECK: edge for.body -> if.else probability is 0x40000000 / 0x80000000 = 50.00% ; CHECK: edge for.body -> if.then probability is 0x40000000 / 0x80000000 = 50.00% if.then: %add = add nsw i32 %0, %sum.0 - store i32 %add, i32* %arrayidx1, align 4 + store i32 %add, ptr %arrayidx1, align 4 br label %for.inc ; CHECK: edge if.then -> for.inc probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] @@ -510,7 +510,7 @@ ; CHECK: edge invoke.cont -> exit probability is 0x04000000 / 0x80000000 = 3.12% lpad: - %ll = landingpad { i8*, i32 } + %ll = landingpad { ptr, i32 } cleanup br label %exit diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll @@ -103,11 +103,11 @@ ret i32 %b } -@_ZTIi = external global i8* +@_ZTIi = external global ptr ; CHECK-LABEL: throwSmallException ; CHECK-NOT: invoke i32 @smallFunction -define i32 @throwSmallException(i32 %idx, i32 %limit) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define i32 @throwSmallException(i32 %idx, i32 %limit) #0 personality ptr @__gxx_personality_v0 { entry: %cmp = icmp sge i32 %idx, %limit br i1 %cmp, label %if.then, label %if.end @@ -115,18 +115,18 @@ ; CHECK: edge entry -> if.end probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] if.then: ; preds = %entry - %exception = call i8* @__cxa_allocate_exception(i64 1) #0 + %exception = call ptr @__cxa_allocate_exception(i64 1) #0 invoke i32 @smallFunction(i32 %idx) to label %invoke.cont unwind label %lpad ; CHECK: edge if.then -> invoke.cont probability is 0x40000000 / 0x80000000 = 50.00% ; CHECK: edge if.then -> lpad probability is 0x40000000 / 0x80000000 = 50.00% invoke.cont: ; preds = %if.then - call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1 + call void @__cxa_throw(ptr %exception, ptr @_ZTIi, ptr null) #1 unreachable lpad: ; preds = %if.then - %ll = landingpad { i8*, i32 } + %ll = landingpad { ptr, i32 } cleanup ret i32 %idx @@ -137,13 +137,13 @@ @a = global i32 4 define i32 @smallFunction(i32 %a) { entry: - %r = load volatile i32, i32* @a + %r = load volatile i32, ptr @a ret i32 %r } attributes #0 = { nounwind } attributes #1 = { noreturn } -declare i8* @__cxa_allocate_exception(i64) +declare ptr @__cxa_allocate_exception(i64) declare i32 @__gxx_personality_v0(...) -declare void @__cxa_throw(i8*, i8*, i8*) +declare void @__cxa_throw(ptr, ptr, ptr) diff --git a/llvm/test/Analysis/BranchProbabilityInfo/pointer_heuristics.ll b/llvm/test/Analysis/BranchProbabilityInfo/pointer_heuristics.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/pointer_heuristics.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/pointer_heuristics.ll @@ -1,19 +1,19 @@ ; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s -define i32 @cmp1(i32* readnone %0, i32* readnone %1) { +define i32 @cmp1(ptr readnone %0, ptr readnone %1) { ; CHECK: Printing analysis results of BPI for function 'cmp1': - %3 = icmp eq i32* %0, %1 + %3 = icmp eq ptr %0, %1 br i1 %3, label %4, label %6 ; CHECK: edge -> probability is 0x30000000 / 0x80000000 = 37.50% ; CHECK: edge -> probability is 0x50000000 / 0x80000000 = 62.50% 4: ; preds = %2 - %5 = tail call i32 bitcast (i32 (...)* @f to i32 ()*)() #2 + %5 = tail call i32 @f() #2 br label %8 ; CHECK: edge -> probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] 6: ; preds = %2 - %7 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() #2 + %7 = tail call i32 @g() #2 br label %8 ; CHECK: edge -> probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] @@ -22,20 +22,20 @@ ret i32 %9 } -define i32 @cmp2(i32* readnone %0, i32* readnone %1) { +define i32 @cmp2(ptr readnone %0, ptr readnone %1) { ; CHECK: Printing analysis results of BPI for function 'cmp2': - %3 = icmp eq i32* %0, %1 + %3 = icmp eq ptr %0, %1 br i1 %3, label %6, label %4 ; CHECK: edge -> probability is 0x30000000 / 0x80000000 = 37.50% ; CHECK: edge -> probability is 0x50000000 / 0x80000000 = 62.50% 4: ; preds = %2 - %5 = tail call i32 bitcast (i32 (...)* @f to i32 ()*)() #2 + %5 = tail call i32 @f() #2 br label %8 ; CHECK: edge -> probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] 6: ; preds = %2 - %7 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() #2 + %7 = tail call i32 @g() #2 br label %8 ; CHECK: edge -> probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] @@ -45,19 +45,19 @@ } ; CHECK: Printing analysis results of BPI for function 'cmp3': -define i32 @cmp3(i32* readnone %0) { - %2 = icmp eq i32* %0, null +define i32 @cmp3(ptr readnone %0) { + %2 = icmp eq ptr %0, null br i1 %2, label %3, label %5 ; CHECK: edge -> probability is 0x30000000 / 0x80000000 = 37.50% ; CHECK: edge -> probability is 0x50000000 / 0x80000000 = 62.50% 3: ; preds = %1 - %4 = tail call i32 bitcast (i32 (...)* @f to i32 ()*)() #2 + %4 = tail call i32 @f() #2 br label %7 ; CHECK: edge -> probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] 5: ; preds = %1 - %6 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() #2 + %6 = tail call i32 @g() #2 br label %7 ; CHECK: edge -> probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] diff --git a/llvm/test/Analysis/BranchProbabilityInfo/pr18705.ll b/llvm/test/Analysis/BranchProbabilityInfo/pr18705.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/pr18705.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/pr18705.ll @@ -7,7 +7,7 @@ ; CHECK: edge while.body -> if.then probability is 0x50000000 / 0x80000000 = 62.50% ; CHECK: edge while.body -> if.else probability is 0x30000000 / 0x80000000 = 37.50% -define void @foo1(i32 %n, i32* nocapture %b, i32* nocapture %c, i32* nocapture %d, float* nocapture readonly %f0, float* nocapture readonly %f1) { +define void @foo1(i32 %n, ptr nocapture %b, ptr nocapture %c, ptr nocapture %d, ptr nocapture readonly %f0, ptr nocapture readonly %f1) { entry: %tobool8 = icmp eq i32 %n, 0 br i1 %tobool8, label %while.end, label %while.body.lr.ph @@ -18,36 +18,36 @@ while.body: %indvars.iv = phi i64 [ %0, %while.body.lr.ph ], [ %indvars.iv.next, %if.end ] - %b.addr.011 = phi i32* [ %b, %while.body.lr.ph ], [ %b.addr.1, %if.end ] - %d.addr.010 = phi i32* [ %d, %while.body.lr.ph ], [ %incdec.ptr4, %if.end ] - %c.addr.09 = phi i32* [ %c, %while.body.lr.ph ], [ %c.addr.1, %if.end ] + %b.addr.011 = phi ptr [ %b, %while.body.lr.ph ], [ %b.addr.1, %if.end ] + %d.addr.010 = phi ptr [ %d, %while.body.lr.ph ], [ %incdec.ptr4, %if.end ] + %c.addr.09 = phi ptr [ %c, %while.body.lr.ph ], [ %c.addr.1, %if.end ] %indvars.iv.next = add nsw i64 %indvars.iv, -1 - %arrayidx = getelementptr inbounds float, float* %f0, i64 %indvars.iv.next - %1 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, float* %f1, i64 %indvars.iv.next - %2 = load float, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds float, ptr %f0, i64 %indvars.iv.next + %1 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %f1, i64 %indvars.iv.next + %2 = load float, ptr %arrayidx2, align 4 %cmp = fcmp une float %1, %2 br i1 %cmp, label %if.then, label %if.else if.then: - %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.011, i64 1 - %3 = load i32, i32* %b.addr.011, align 4 + %incdec.ptr = getelementptr inbounds i32, ptr %b.addr.011, i64 1 + %3 = load i32, ptr %b.addr.011, align 4 %add = add nsw i32 %3, 12 - store i32 %add, i32* %b.addr.011, align 4 + store i32 %add, ptr %b.addr.011, align 4 br label %if.end if.else: - %incdec.ptr3 = getelementptr inbounds i32, i32* %c.addr.09, i64 1 - %4 = load i32, i32* %c.addr.09, align 4 + %incdec.ptr3 = getelementptr inbounds i32, ptr %c.addr.09, i64 1 + %4 = load i32, ptr %c.addr.09, align 4 %sub = add nsw i32 %4, -13 - store i32 %sub, i32* %c.addr.09, align 4 + store i32 %sub, ptr %c.addr.09, align 4 br label %if.end if.end: - %c.addr.1 = phi i32* [ %c.addr.09, %if.then ], [ %incdec.ptr3, %if.else ] - %b.addr.1 = phi i32* [ %incdec.ptr, %if.then ], [ %b.addr.011, %if.else ] - %incdec.ptr4 = getelementptr inbounds i32, i32* %d.addr.010, i64 1 - store i32 14, i32* %d.addr.010, align 4 + %c.addr.1 = phi ptr [ %c.addr.09, %if.then ], [ %incdec.ptr3, %if.else ] + %b.addr.1 = phi ptr [ %incdec.ptr, %if.then ], [ %b.addr.011, %if.else ] + %incdec.ptr4 = getelementptr inbounds i32, ptr %d.addr.010, i64 1 + store i32 14, ptr %d.addr.010, align 4 %5 = trunc i64 %indvars.iv.next to i32 %tobool = icmp eq i32 %5, 0 br i1 %tobool, label %while.end, label %while.body diff --git a/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll b/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll @@ -16,15 +16,15 @@ entry: %retval = alloca i32, align 4 %i = alloca i64, align 8 - store i32 0, i32* %retval - store i64 0, i64* @y, align 8 - store i64 0, i64* @x, align 8 + store i32 0, ptr %retval + store i64 0, ptr @y, align 8 + store i64 0, ptr @x, align 8 call void @srand(i32 422304) #3 - store i64 0, i64* %i, align 8 + store i64 0, ptr %i, align 8 br label %for.cond for.cond: ; preds = %for.inc, %entry - %0 = load i64, i64* %i, align 8 + %0 = load i64, ptr %i, align 8 %cmp = icmp ult i64 %0, 13000000000 br i1 %cmp, label %for.body, label %for.end, !prof !1 @@ -37,30 +37,30 @@ br i1 %cmp1, label %if.then, label %if.else, !prof !2 if.then: ; preds = %for.body - %1 = load i64, i64* @x, align 8 + %1 = load i64, ptr @x, align 8 %inc = add i64 %1, 1 - store i64 %inc, i64* @x, align 8 + store i64 %inc, ptr @x, align 8 br label %if.end if.else: ; preds = %for.body - %2 = load i64, i64* @y, align 8 + %2 = load i64, ptr @y, align 8 %inc3 = add i64 %2, 1 - store i64 %inc3, i64* @y, align 8 + store i64 %inc3, ptr @y, align 8 br label %if.end if.end: ; preds = %if.else, %if.then br label %for.inc for.inc: ; preds = %if.end - %3 = load i64, i64* %i, align 8 + %3 = load i64, ptr %i, align 8 %inc4 = add i64 %3, 1 - store i64 %inc4, i64* %i, align 8 + store i64 %inc4, ptr %i, align 8 br label %for.cond for.end: ; preds = %for.cond - %4 = load i64, i64* @x, align 8 - %5 = load i64, i64* @y, align 8 - %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str, i32 0, i32 0), i64 %4, i64 %5) + %4 = load i64, ptr @x, align 8 + %5 = load i64, ptr @y, align 8 + %call5 = call i32 (ptr, ...) @printf(ptr @.str, i64 %4, i64 %5) ret i32 0 } @@ -70,7 +70,7 @@ ; Function Attrs: nounwind declare i32 @rand() #1 -declare i32 @printf(i8*, ...) #2 +declare i32 @printf(ptr, ...) #2 attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Analysis/BranchProbabilityInfo/zero_heuristics.ll b/llvm/test/Analysis/BranchProbabilityInfo/zero_heuristics.ll --- a/llvm/test/Analysis/BranchProbabilityInfo/zero_heuristics.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/zero_heuristics.ll @@ -4,10 +4,10 @@ @B = global i32 0, align 4 ; CHECK-LABEL: eq_opaque_minus_one -define void @eq_opaque_minus_one(i32* %base) { +define void @eq_opaque_minus_one(ptr %base) { entry: %const = bitcast i32 -1 to i32 - %tmp1 = load i32, i32* @B, align 4 + %tmp1 = load i32, ptr @B, align 4 br label %for.body ; CHECK: edge for.body -> if.then probability is 0x30000000 / 0x80000000 = 37.50% @@ -15,15 +15,15 @@ for.body: %tmp4 = phi i32 [ %tmp1, %entry ], [ %tmp7, %for.inc ] %inc.iv = phi i32 [ 0, %entry ], [ %inc, %for.inc ] - %storemerge176.in = getelementptr inbounds i32, i32* %base, i32 %inc.iv - %storemerge176 = load i32, i32* %storemerge176.in, align 4 - store i32 %storemerge176, i32* @A, align 4 + %storemerge176.in = getelementptr inbounds i32, ptr %base, i32 %inc.iv + %storemerge176 = load i32, ptr %storemerge176.in, align 4 + store i32 %storemerge176, ptr @A, align 4 %cmp20 = icmp eq i32 %storemerge176, %const br i1 %cmp20, label %if.then, label %for.inc if.then: %lnot.ext = zext i1 %cmp20 to i32 - store i32 %lnot.ext, i32* @B, align 4 + store i32 %lnot.ext, ptr @B, align 4 br label %for.inc for.inc: @@ -37,10 +37,10 @@ } ; CHECK-LABEL: ne_opaque_minus_one -define void @ne_opaque_minus_one(i32* %base) { +define void @ne_opaque_minus_one(ptr %base) { entry: %const = bitcast i32 -1 to i32 - %tmp1 = load i32, i32* @B, align 4 + %tmp1 = load i32, ptr @B, align 4 br label %for.body ; CHECK: edge for.body -> if.then probability is 0x50000000 / 0x80000000 = 62.50% @@ -48,15 +48,15 @@ for.body: %tmp4 = phi i32 [ %tmp1, %entry ], [ %tmp7, %for.inc ] %inc.iv = phi i32 [ 0, %entry ], [ %inc, %for.inc ] - %storemerge176.in = getelementptr inbounds i32, i32* %base, i32 %inc.iv - %storemerge176 = load i32, i32* %storemerge176.in, align 4 - store i32 %storemerge176, i32* @A, align 4 + %storemerge176.in = getelementptr inbounds i32, ptr %base, i32 %inc.iv + %storemerge176 = load i32, ptr %storemerge176.in, align 4 + store i32 %storemerge176, ptr @A, align 4 %cmp20 = icmp ne i32 %storemerge176, %const br i1 %cmp20, label %if.then, label %for.inc if.then: %lnot.ext = zext i1 %cmp20 to i32 - store i32 %lnot.ext, i32* @B, align 4 + store i32 %lnot.ext, ptr @B, align 4 br label %for.inc for.inc: @@ -70,10 +70,10 @@ } ; CHECK-LABEL: sgt_opaque_minus_one -define void @sgt_opaque_minus_one(i32* %base) { +define void @sgt_opaque_minus_one(ptr %base) { entry: %const = bitcast i32 -1 to i32 - %tmp1 = load i32, i32* @B, align 4 + %tmp1 = load i32, ptr @B, align 4 br label %for.body ; CHECK: edge for.body -> if.then probability is 0x50000000 / 0x80000000 = 62.50% @@ -81,15 +81,15 @@ for.body: %tmp4 = phi i32 [ %tmp1, %entry ], [ %tmp7, %for.inc ] %inc.iv = phi i32 [ 0, %entry ], [ %inc, %for.inc ] - %storemerge176.in = getelementptr inbounds i32, i32* %base, i32 %inc.iv - %storemerge176 = load i32, i32* %storemerge176.in, align 4 - store i32 %storemerge176, i32* @A, align 4 + %storemerge176.in = getelementptr inbounds i32, ptr %base, i32 %inc.iv + %storemerge176 = load i32, ptr %storemerge176.in, align 4 + store i32 %storemerge176, ptr @A, align 4 %cmp20 = icmp sgt i32 %storemerge176, %const br i1 %cmp20, label %if.then, label %for.inc if.then: %lnot.ext = zext i1 %cmp20 to i32 - store i32 %lnot.ext, i32* @B, align 4 + store i32 %lnot.ext, ptr @B, align 4 br label %for.inc for.inc: diff --git a/llvm/test/Analysis/CallGraph/2008-09-09-DirectCall.ll b/llvm/test/Analysis/CallGraph/2008-09-09-DirectCall.ll --- a/llvm/test/Analysis/CallGraph/2008-09-09-DirectCall.ll +++ b/llvm/test/Analysis/CallGraph/2008-09-09-DirectCall.ll @@ -12,6 +12,6 @@ define void @caller() { entry: - call void (...) @callee( void (...)* @callee ) + call void (...) @callee( ptr @callee ) unreachable } diff --git a/llvm/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll b/llvm/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll --- a/llvm/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll +++ b/llvm/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -print-callgraph -disable-output 2>&1 | FileCheck %s -@a = global void ()* @f ; [#uses=0] +@a = global ptr @f ; [#uses=0] ; CHECK: calls function 'f' diff --git a/llvm/test/Analysis/CallGraph/callback-calls.ll b/llvm/test/Analysis/CallGraph/callback-calls.ll --- a/llvm/test/Analysis/CallGraph/callback-calls.ll +++ b/llvm/test/Analysis/CallGraph/callback-calls.ll @@ -5,16 +5,16 @@ ; CHECK-NEXT: CS<{{.*}}> calls function 'broker' ; CHECK-NEXT: CS calls function 'callback' -define void @caller(i32* %arg) { - call void @broker(void (i32*)* @callback, i32* %arg) +define void @caller(ptr %arg) { + call void @broker(ptr @callback, ptr %arg) ret void } -define void @callback(i32* %arg) { +define void @callback(ptr %arg) { ret void } -declare !callback !0 void @broker(void (i32*)*, i32*) +declare !callback !0 void @broker(ptr, ptr) !0 = !{!1} !1 = !{i64 0, i64 1, i1 false} diff --git a/llvm/test/Analysis/CallGraph/ignore-callback-uses.ll b/llvm/test/Analysis/CallGraph/ignore-callback-uses.ll --- a/llvm/test/Analysis/CallGraph/ignore-callback-uses.ll +++ b/llvm/test/Analysis/CallGraph/ignore-callback-uses.ll @@ -4,10 +4,10 @@ ; CHECK-NEXT: CS<{{.*}}> calls function '__kmpc_fork_call' ; CHECK-EMPTY: -%struct.ident_t = type { i32, i32, i32, i32, i8* } +%struct.ident_t = type { i32, i32, i32, i32, ptr } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 ; Function Attrs: noinline nounwind optnone uwtable define dso_local void @f() { @@ -15,7 +15,7 @@ br label %omp_parallel omp_parallel: ; preds = %entry - call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @f..omp_par to void (i32*, i32*, ...)*)) + call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 0, ptr @f..omp_par) br label %omp.par.exit.split omp.par.exit.split: ; preds = %omp_parallel @@ -23,12 +23,12 @@ } ; Function Attrs: norecurse nounwind -define internal void @f..omp_par(i32* noalias %tid.addr, i32* noalias %zero.addr) { +define internal void @f..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr) { omp.par.entry: %tid.addr.local = alloca i32, align 4 - %0 = load i32, i32* %tid.addr, align 4 - store i32 %0, i32* %tid.addr.local, align 4 - %tid = load i32, i32* %tid.addr.local, align 4 + %0 = load i32, ptr %tid.addr, align 4 + store i32 %0, ptr %tid.addr.local, align 4 + %tid = load i32, ptr %tid.addr.local, align 4 br label %omp.par.region omp.par.exit.split.exitStub: ; preds = %omp.par.outlined.exit @@ -45,7 +45,7 @@ } ; Function Attrs: nounwind -declare !callback !2 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #2 +declare !callback !2 void @__kmpc_fork_call(ptr, i32, ptr, ...) #2 !2 = !{!3} !3 = !{i64 2, i64 -1, i64 -1, i1 true} diff --git a/llvm/test/Analysis/CallGraph/llvm-used.ll b/llvm/test/Analysis/CallGraph/llvm-used.ll --- a/llvm/test/Analysis/CallGraph/llvm-used.ll +++ b/llvm/test/Analysis/CallGraph/llvm-used.ll @@ -17,9 +17,9 @@ ; CHECK-NEXT: Call graph node for function: 'used2'<<{{.*}}>> #uses=1 ; CHECK-EMPTY: -@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @used1 to i8*)] -@llvm.compiler.used = appending global [1 x void()*] [void ()* @used2] -@array = appending global [1 x i8*] [i8* bitcast (void ()* @unused to i8*)] +@llvm.used = appending global [1 x ptr] [ptr @used1] +@llvm.compiler.used = appending global [1 x ptr] [ptr @used2] +@array = appending global [1 x ptr] [ptr @unused] define internal void @used1() { entry: diff --git a/llvm/test/Analysis/CallGraph/no-intrinsics.ll b/llvm/test/Analysis/CallGraph/no-intrinsics.ll --- a/llvm/test/Analysis/CallGraph/no-intrinsics.ll +++ b/llvm/test/Analysis/CallGraph/no-intrinsics.ll @@ -3,10 +3,10 @@ ; Check that intrinsics aren't added to the call graph -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) +declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) -define void @f(i8* %out, i8* %in) { - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %out, i8* align 4 %in, i32 100, i1 false) +define void @f(ptr %out, ptr %in) { + call void @llvm.memcpy.p0.p0.i32(ptr align 4 %out, ptr align 4 %in, i32 100, i1 false) ret void } diff --git a/llvm/test/Analysis/CallGraph/non-leaf-intrinsics.ll b/llvm/test/Analysis/CallGraph/non-leaf-intrinsics.ll --- a/llvm/test/Analysis/CallGraph/non-leaf-intrinsics.ll +++ b/llvm/test/Analysis/CallGraph/non-leaf-intrinsics.ll @@ -1,7 +1,7 @@ ; RUN: opt -S -print-callgraph -disable-output < %s 2>&1 | FileCheck %s declare void @llvm.experimental.patchpoint.void(i64, i32, ptr, i32, ...) -declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, ptr, i32, i32, ...) +declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) define private void @f() { ret void @@ -10,7 +10,7 @@ define void @calls_statepoint(ptr addrspace(1) %arg) gc "statepoint-example" { entry: %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) - @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, ptr elementtype(void ()) @f, i32 0, i32 0, i32 0, i32 0) ["gc-live"(ptr addrspace(1) %arg, ptr addrspace(1) %arg, ptr addrspace(1) %arg, ptr addrspace(1) %arg), "deopt" (i32 0, i32 0, i32 0, i32 10, i32 0)] + @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @f, i32 0, i32 0, i32 0, i32 0) ["gc-live"(ptr addrspace(1) %arg, ptr addrspace(1) %arg, ptr addrspace(1) %arg, ptr addrspace(1) %arg), "deopt" (i32 0, i32 0, i32 0, i32 10, i32 0)] ret void } diff --git a/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll b/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll --- a/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll +++ b/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll @@ -3,28 +3,28 @@ define void @get_lane_mask() { ; CHECK-LABEL: 'get_lane_mask' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv8i1_i64 = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv4i1_i64 = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv2i1_i64 = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %mask_nxv8i1_i64 = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %mask_nxv4i1_i64 = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_nxv2i1_i64 = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv1i1_i64 = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv16i1_i32 = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv8i1_i32 = call @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv4i1_i32 = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %mask_nxv16i1_i32 = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %mask_nxv8i1_i32 = call @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_nxv4i1_i32 = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv2i1_i32 = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv1i1_i32 = call @llvm.get.active.lane.mask.nxv1i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_nxv32i1_i64 = call @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv16i1_i16 = call @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %mask_nxv32i1_i64 = call @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %mask_nxv16i1_i16 = call @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll --- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll @@ -17,9 +17,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = fadd <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef @@ -27,17 +27,17 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F32 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F32 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = fadd <1 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F64 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F64 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F64 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F16 = fadd half undef, undef @@ -97,9 +97,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = fsub <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef @@ -107,17 +107,17 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F32 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F32 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F32 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = fsub <1 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F64 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F64 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F64 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F16 = fsub half undef, undef @@ -177,9 +177,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = fmul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = fmul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = fmul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = fmul <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fmul <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef @@ -187,17 +187,17 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = fmul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F32 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F32 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F32 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = fmul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = fmul <1 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F64 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F64 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F64 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = fmul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F16 = fmul half undef, undef @@ -417,9 +417,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = fneg undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = fneg undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F16 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F16 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = fneg undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = fneg <1 x float> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef @@ -427,17 +427,17 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = fneg undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F32 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F32 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16F32 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = fneg undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = fneg <1 x double> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F64 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F64 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8F64 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = fneg undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F16 = fneg half undef diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll --- a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll @@ -17,9 +17,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I16 = add undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I16 = add undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I16 = add undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32I16 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8I16 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16I16 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32I16 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = add <1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = add <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef @@ -27,17 +27,17 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = add <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I32 = add undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I32 = add undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I32 = add undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I32 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I32 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I32 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I32 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = add <1 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = add <4 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = add <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I64 = add undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I64 = add undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I64 = add undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I64 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I64 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I64 = add undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I64 = add undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I16 = add i16 undef, undef @@ -97,9 +97,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = sub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = sub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I16 = sub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I16 = sub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I16 = sub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32I16 = sub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8I16 = sub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16I16 = sub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32I16 = sub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = sub <1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = sub <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef @@ -107,17 +107,17 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = sub <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = sub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I32 = sub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I32 = sub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I32 = sub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I32 = sub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I32 = sub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I32 = sub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I32 = sub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = sub <1 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = sub <4 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = sub <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I64 = sub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I64 = sub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I64 = sub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I64 = sub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I64 = sub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I64 = sub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I64 = sub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I16 = sub i16 undef, undef @@ -177,9 +177,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = mul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = mul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I16 = mul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I16 = mul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I16 = mul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32I16 = mul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8I16 = mul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16I16 = mul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32I16 = mul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = mul <1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = mul <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = mul <4 x i32> undef, undef @@ -187,17 +187,17 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = mul <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = mul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I32 = mul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I32 = mul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I32 = mul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I32 = mul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I32 = mul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I32 = mul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I32 = mul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = mul <1 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = mul <2 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = mul <4 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = mul <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I64 = mul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I64 = mul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I64 = mul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I64 = mul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I64 = mul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I64 = mul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I64 = mul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I16 = mul i16 undef, undef @@ -257,9 +257,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = shl undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = shl undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I16 = shl undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I16 = shl undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I16 = shl undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32I16 = shl undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8I16 = shl undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16I16 = shl undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32I16 = shl undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = shl <1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = shl <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = shl <4 x i32> undef, undef @@ -267,17 +267,17 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = shl <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = shl undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I32 = shl undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I32 = shl undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I32 = shl undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I32 = shl undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I32 = shl undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I32 = shl undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I32 = shl undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = shl <1 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = shl <2 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = shl <4 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = shl <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I64 = shl undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I64 = shl undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I64 = shl undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I64 = shl undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I64 = shl undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I64 = shl undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I64 = shl undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I16 = shl i16 undef, undef @@ -337,9 +337,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = lshr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = lshr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I16 = lshr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I16 = lshr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I16 = lshr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32I16 = lshr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8I16 = lshr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16I16 = lshr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32I16 = lshr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = lshr <1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = lshr <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = lshr <4 x i32> undef, undef @@ -347,17 +347,17 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = lshr <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = lshr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I32 = lshr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I32 = lshr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I32 = lshr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I32 = lshr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I32 = lshr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I32 = lshr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I32 = lshr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = lshr <1 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = lshr <2 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = lshr <4 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = lshr <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I64 = lshr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I64 = lshr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I64 = lshr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I64 = lshr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I64 = lshr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I64 = lshr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I64 = lshr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I16 = lshr i16 undef, undef @@ -417,9 +417,9 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I16 = ashr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I16 = ashr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I16 = ashr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I16 = ashr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I16 = ashr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV32I16 = ashr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8I16 = ashr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16I16 = ashr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32I16 = ashr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = ashr <1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = ashr <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = ashr <4 x i32> undef, undef @@ -427,17 +427,17 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = ashr <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I32 = ashr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I32 = ashr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I32 = ashr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I32 = ashr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV16I32 = ashr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4I32 = ashr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8I32 = ashr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16I32 = ashr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = ashr <1 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = ashr <2 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = ashr <4 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = ashr <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1I64 = ashr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2I64 = ashr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4I64 = ashr undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV8I64 = ashr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2I64 = ashr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4I64 = ashr undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8I64 = ashr undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I16 = ashr i16 undef, undef diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll --- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll @@ -15,7 +15,7 @@ define void @powi( %vec) { ; CHECK-LABEL: 'powi' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll --- a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll @@ -106,17 +106,17 @@ define void @vector_splice() { ; CHECK-LABEL: 'vector_splice' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16i8 = call @llvm.experimental.vector.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv32i8 = call @llvm.experimental.vector.splice.nxv32i8( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i8 = call @llvm.experimental.vector.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv32i8 = call @llvm.experimental.vector.splice.nxv32i8( zeroinitializer, zeroinitializer, i32 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i16 = call @llvm.experimental.vector.splice.nxv2i16( zeroinitializer, zeroinitializer, i32 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i16 = call @llvm.experimental.vector.splice.nxv4i16( zeroinitializer, zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i16 = call @llvm.experimental.vector.splice.nxv8i16( zeroinitializer, zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16i16 = call @llvm.experimental.vector.splice.nxv16i16( zeroinitializer, zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i32 = call @llvm.experimental.vector.splice.nxv4i32( zeroinitializer, zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i32 = call @llvm.experimental.vector.splice.nxv8i32( zeroinitializer, zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i64 = call @llvm.experimental.vector.splice.nxv2i64( zeroinitializer, zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i64 = call @llvm.experimental.vector.splice.nxv4i64( zeroinitializer, zeroinitializer, i32 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16i1 = call @llvm.experimental.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv8i16 = call @llvm.experimental.vector.splice.nxv8i16( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv16i16 = call @llvm.experimental.vector.splice.nxv16i16( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv4i32 = call @llvm.experimental.vector.splice.nxv4i32( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv8i32 = call @llvm.experimental.vector.splice.nxv8i32( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv2i64 = call @llvm.experimental.vector.splice.nxv2i64( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv4i64 = call @llvm.experimental.vector.splice.nxv4i64( zeroinitializer, zeroinitializer, i32 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i1 = call @llvm.experimental.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i1 = call @llvm.experimental.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i1 = call @llvm.experimental.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i1 = call @llvm.experimental.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 1) diff --git a/llvm/test/Analysis/CostModel/RISCV/splice.ll b/llvm/test/Analysis/CostModel/RISCV/splice.ll --- a/llvm/test/Analysis/CostModel/RISCV/splice.ll +++ b/llvm/test/Analysis/CostModel/RISCV/splice.ll @@ -7,51 +7,51 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i8 = call @llvm.experimental.vector.splice.nxv2i8( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i8 = call @llvm.experimental.vector.splice.nxv4i8( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i8 = call @llvm.experimental.vector.splice.nxv8i8( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i8 = call @llvm.experimental.vector.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32i8 = call @llvm.experimental.vector.splice.nxv32i8( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv64i8 = call @llvm.experimental.vector.splice.nxv64i8( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16i8 = call @llvm.experimental.vector.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32i8 = call @llvm.experimental.vector.splice.nxv32i8( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64i8 = call @llvm.experimental.vector.splice.nxv64i8( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i16 = call @llvm.experimental.vector.splice.nxv1i16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i16 = call @llvm.experimental.vector.splice.nxv2i16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i16 = call @llvm.experimental.vector.splice.nxv4i16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i16 = call @llvm.experimental.vector.splice.nxv8i16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i16 = call @llvm.experimental.vector.splice.nxv16i16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32i16 = call @llvm.experimental.vector.splice.nxv32i16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv64i16 = call @llvm.experimental.vector.splice.nxv64i16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8i16 = call @llvm.experimental.vector.splice.nxv8i16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16i16 = call @llvm.experimental.vector.splice.nxv16i16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32i16 = call @llvm.experimental.vector.splice.nxv32i16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64i16 = call @llvm.experimental.vector.splice.nxv64i16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i32 = call @llvm.experimental.vector.splice.nxv1i32( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i32 = call @llvm.experimental.vector.splice.nxv2i32( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i32 = call @llvm.experimental.vector.splice.nxv4i32( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i32 = call @llvm.experimental.vector.splice.nxv8i32( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16i32 = call @llvm.experimental.vector.splice.nxv16i32( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv32i32 = call @llvm.experimental.vector.splice.nxv32i32( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv64i32 = call @llvm.experimental.vector.splice.nxv64i32( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv4i32 = call @llvm.experimental.vector.splice.nxv4i32( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv8i32 = call @llvm.experimental.vector.splice.nxv8i32( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv16i32 = call @llvm.experimental.vector.splice.nxv16i32( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv32i32 = call @llvm.experimental.vector.splice.nxv32i32( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv64i32 = call @llvm.experimental.vector.splice.nxv64i32( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1i64 = call @llvm.experimental.vector.splice.nxv1i64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2i64 = call @llvm.experimental.vector.splice.nxv2i64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4i64 = call @llvm.experimental.vector.splice.nxv4i64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8i64 = call @llvm.experimental.vector.splice.nxv8i64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16i64 = call @llvm.experimental.vector.splice.nxv16i64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32i64 = call @llvm.experimental.vector.splice.nxv32i64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64i64 = call @llvm.experimental.vector.splice.nxv64i64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv2i64 = call @llvm.experimental.vector.splice.nxv2i64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv4i64 = call @llvm.experimental.vector.splice.nxv4i64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv8i64 = call @llvm.experimental.vector.splice.nxv8i64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv16i64 = call @llvm.experimental.vector.splice.nxv16i64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv32i64 = call @llvm.experimental.vector.splice.nxv32i64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %splice.nxv64i64 = call @llvm.experimental.vector.splice.nxv64i64( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f16 = call @llvm.experimental.vector.splice.nxv1f16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f16 = call @llvm.experimental.vector.splice.nxv2f16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f16 = call @llvm.experimental.vector.splice.nxv4f16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f16 = call @llvm.experimental.vector.splice.nxv8f16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16f16 = call @llvm.experimental.vector.splice.nxv16f16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv32f16 = call @llvm.experimental.vector.splice.nxv32f16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv64f16 = call @llvm.experimental.vector.splice.nxv64f16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8f16 = call @llvm.experimental.vector.splice.nxv8f16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16f16 = call @llvm.experimental.vector.splice.nxv16f16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32f16 = call @llvm.experimental.vector.splice.nxv32f16( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64f16 = call @llvm.experimental.vector.splice.nxv64f16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f32 = call @llvm.experimental.vector.splice.nxv1f32( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f32 = call @llvm.experimental.vector.splice.nxv2f32( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f32 = call @llvm.experimental.vector.splice.nxv4f32( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f32 = call @llvm.experimental.vector.splice.nxv8f32( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv16f32 = call @llvm.experimental.vector.splice.nxv16f32( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv32f32 = call @llvm.experimental.vector.splice.nxv32f32( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv64f32 = call @llvm.experimental.vector.splice.nxv64f32( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv4f32 = call @llvm.experimental.vector.splice.nxv4f32( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv8f32 = call @llvm.experimental.vector.splice.nxv8f32( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv16f32 = call @llvm.experimental.vector.splice.nxv16f32( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv32f32 = call @llvm.experimental.vector.splice.nxv32f32( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv64f32 = call @llvm.experimental.vector.splice.nxv64f32( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f64 = call @llvm.experimental.vector.splice.nxv1f64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f64 = call @llvm.experimental.vector.splice.nxv2f64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f64 = call @llvm.experimental.vector.splice.nxv4f64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv8f64 = call @llvm.experimental.vector.splice.nxv8f64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16f64 = call @llvm.experimental.vector.splice.nxv16f64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32f64 = call @llvm.experimental.vector.splice.nxv32f64( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64f64 = call @llvm.experimental.vector.splice.nxv64f64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv2f64 = call @llvm.experimental.vector.splice.nxv2f64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv4f64 = call @llvm.experimental.vector.splice.nxv4f64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv8f64 = call @llvm.experimental.vector.splice.nxv8f64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv16f64 = call @llvm.experimental.vector.splice.nxv16f64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv32f64 = call @llvm.experimental.vector.splice.nxv32f64( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %splice.nxv64f64 = call @llvm.experimental.vector.splice.nxv64f64( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %splice.nxv1i8 = call @llvm.experimental.vector.splice.nxv1i8( zeroinitializer, zeroinitializer, i32 -1) diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll old mode 100755 new mode 100644 diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i32.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i32.ll old mode 100755 new mode 100644 diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i64.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i64.ll old mode 100755 new mode 100644 diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll old mode 100755 new mode 100644 diff --git a/llvm/test/Analysis/DDG/basic-a.ll b/llvm/test/Analysis/DDG/basic-a.ll --- a/llvm/test/Analysis/DDG/basic-a.ll +++ b/llvm/test/Analysis/DDG/basic-a.ll @@ -29,14 +29,14 @@ ; CHECK: Node Address:[[N4]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, ptr %a, i64 %i.02 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N6:0x[0-9a-f]*]] ; CHECK: Node Address:[[N3]]:multi-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 -; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.02 +; CHECK-NEXT: %0 = load float, ptr %arrayidx, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] @@ -54,29 +54,29 @@ ; CHECK: Node Address:[[N6]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: store float %add, float* %arrayidx1, align 4 +; CHECK-NEXT: store float %add, ptr %arrayidx1, align 4 ; CHECK-NEXT: Edges:none! ;; No memory dependencies. -;; void test1(unsigned long n, float * restrict a, float * restrict b) { +;; void test1(unsigned long n, ptr restrict a, ptr restrict b) { ;; for (unsigned long i = 0; i < n; i++) ;; a[i] = b[i] + n; ;; } -define void @test1(i64 %n, float* noalias %a, float* noalias %b) { +define void @test1(i64 %n, ptr noalias %a, ptr noalias %b) { entry: %exitcond1 = icmp ne i64 0, %n br i1 %exitcond1, label %test1.for.body, label %for.end test1.for.body: ; preds = %entry, %test1.for.body %i.02 = phi i64 [ %inc, %test1.for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.02 + %0 = load float, ptr %arrayidx, align 4 %conv = uitofp i64 %n to float %add = fadd float %0, %conv - %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 - store float %add, float* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %a, i64 %i.02 + store float %add, ptr %arrayidx1, align 4 %inc = add i64 %i.02, 1 %exitcond = icmp ne i64 %inc, %n br i1 %exitcond, label %test1.for.body, label %for.end @@ -116,22 +116,22 @@ ; CHECK: Node Address:[[N5]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %i.02 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N7:0x[0-9a-f]*]] ; CHECK: Node Address:[[N4]]:multi-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 -; CHECK-NEXT: %1 = load float, float* %arrayidx1, align 4 +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds float, ptr %a, i64 %i.02 +; CHECK-NEXT: %1 = load float, ptr %arrayidx1, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] ; CHECK-NEXT: [memory] to [[N7]] ; CHECK: Node Address:[[N3]]:multi-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 -; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.02 +; CHECK-NEXT: %0 = load float, ptr %arrayidx, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N8]] @@ -143,31 +143,31 @@ ; CHECK: Node Address:[[N7]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: store float %add, float* %arrayidx2, align 4 +; CHECK-NEXT: store float %add, ptr %arrayidx2, align 4 ; CHECK-NEXT: Edges:none! ;; Loop-independent memory dependencies. -;; void test2(unsigned long n, float * restrict a, float * restrict b) { +;; void test2(unsigned long n, ptr restrict a, ptr restrict b) { ;; for (unsigned long i = 0; i < n; i++) ;; a[i] = b[i] + a[i]; ;; } -define void @test2(i64 %n, float* noalias %a, float* noalias %b) { +define void @test2(i64 %n, ptr noalias %a, ptr noalias %b) { entry: %exitcond1 = icmp ne i64 0, %n br i1 %exitcond1, label %test2.for.body, label %for.end test2.for.body: ; preds = %entry, %test2.for.body %i.02 = phi i64 [ %inc, %test2.for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 - %0 = load float, float* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.02 - %1 = load float, float* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.02 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %a, i64 %i.02 + %1 = load float, ptr %arrayidx1, align 4 %add = fadd float %0, %1 - %arrayidx2 = getelementptr inbounds float, float* %a, i64 %i.02 - store float %add, float* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %i.02 + store float %add, ptr %arrayidx2, align 4 %inc = add i64 %i.02, 1 %exitcond = icmp ne i64 %inc, %n br i1 %exitcond, label %test2.for.body, label %for.end diff --git a/llvm/test/Analysis/DDG/basic-b.ll b/llvm/test/Analysis/DDG/basic-b.ll --- a/llvm/test/Analysis/DDG/basic-b.ll +++ b/llvm/test/Analysis/DDG/basic-b.ll @@ -30,21 +30,21 @@ ; CHECK: Node Address:[[N6]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %i.02 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] ; CHECK: Node Address:[[N5]]:multi-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %sub1 = add i64 %i.02, -1 -; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %sub1 +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %sub1 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N8]] ; CHECK: Node Address:[[N4]]:multi-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 -; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.02 +; CHECK-NEXT: %0 = load float, ptr %arrayidx, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N8]] @@ -52,7 +52,7 @@ ; CHECK-NEXT: --- start of nodes in pi-block --- ; CHECK: Node Address:[[N9:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %1 = load float, float* %arrayidx2, align 4 +; CHECK-NEXT: %1 = load float, ptr %arrayidx2, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N10:0x[0-9a-f]*]] @@ -64,7 +64,7 @@ ; CHECK: Node Address:[[N11]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: store float %add, float* %arrayidx3, align 4 +; CHECK-NEXT: store float %add, ptr %arrayidx3, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [memory] to [[N9]] ; CHECK-NEXT:--- end of nodes in pi-block --- @@ -74,12 +74,12 @@ ;; Loop-carried dependence requiring edge-reversal to expose a cycle ;; in the graph. -;; void test(unsigned long n, float * restrict a, float * restrict b) { +;; void test(unsigned long n, ptr restrict a, ptr restrict b) { ;; for (unsigned long i = 1; i < n-1; i++) ;; a[i] = b[i] + a[i-1]; ;; } -define void @test1(i64 %n, float* noalias %a, float* noalias %b) { +define void @test1(i64 %n, ptr noalias %a, ptr noalias %b) { entry: %sub = add i64 %n, -1 %cmp1 = icmp ult i64 1, %sub @@ -87,14 +87,14 @@ test1.for.body: ; preds = %entry, %test1.for.body %i.02 = phi i64 [ %inc, %test1.for.body ], [ 1, %entry ] - %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.02 + %0 = load float, ptr %arrayidx, align 4 %sub1 = add i64 %i.02, -1 - %arrayidx2 = getelementptr inbounds float, float* %a, i64 %sub1 - %1 = load float, float* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %sub1 + %1 = load float, ptr %arrayidx2, align 4 %add = fadd float %0, %1 - %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 - store float %add, float* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %i.02 + store float %add, ptr %arrayidx3, align 4 %inc = add i64 %i.02, 1 %cmp = icmp ult i64 %inc, %sub br i1 %cmp, label %test1.for.body, label %for.end @@ -134,23 +134,23 @@ ; CHECK: Node Address:[[N6]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %i.02 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N8:0x[0-9a-f]*]] ; CHECK: Node Address:[[N5]]:multi-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %add1 = add i64 %i.02, 1 -; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %add1 -; CHECK-NEXT: %1 = load float, float* %arrayidx2, align 4 +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %add1 +; CHECK-NEXT: %1 = load float, ptr %arrayidx2, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N9:0x[0-9a-f]*]] ; CHECK-NEXT: [memory] to [[N8]] ; CHECK: Node Address:[[N4]]:multi-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 -; CHECK-NEXT: %0 = load float, float* %arrayidx, align 4 +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.02 +; CHECK-NEXT: %0 = load float, ptr %arrayidx, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N9]] @@ -162,17 +162,17 @@ ; CHECK: Node Address:[[N8]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: store float %add, float* %arrayidx3, align 4 +; CHECK-NEXT: store float %add, ptr %arrayidx3, align 4 ; CHECK-NEXT: Edges:none! ;; Forward loop-carried dependence *not* causing a cycle. -;; void test2(unsigned long n, float * restrict a, float * restrict b) { +;; void test2(unsigned long n, ptr restrict a, ptr restrict b) { ;; for (unsigned long i = 1; i < n-1; i++) ;; a[i] = b[i] + a[i+1]; ;; } -define void @test2(i64 %n, float* noalias %a, float* noalias %b) { +define void @test2(i64 %n, ptr noalias %a, ptr noalias %b) { entry: %sub = add i64 %n, -1 %cmp1 = icmp ult i64 1, %sub @@ -180,14 +180,14 @@ test2.for.body: ; preds = %entry, %test2.for.body %i.02 = phi i64 [ %inc, %test2.for.body ], [ 1, %entry ] - %arrayidx = getelementptr inbounds float, float* %b, i64 %i.02 - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.02 + %0 = load float, ptr %arrayidx, align 4 %add1 = add i64 %i.02, 1 - %arrayidx2 = getelementptr inbounds float, float* %a, i64 %add1 - %1 = load float, float* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %add1 + %1 = load float, ptr %arrayidx2, align 4 %add = fadd float %0, %1 - %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.02 - store float %add, float* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %i.02 + store float %add, ptr %arrayidx3, align 4 %inc = add i64 %i.02, 1 %cmp = icmp ult i64 %inc, %sub br i1 %cmp, label %test2.for.body, label %for.end diff --git a/llvm/test/Analysis/DDG/basic-loopnest.ll b/llvm/test/Analysis/DDG/basic-loopnest.ll --- a/llvm/test/Analysis/DDG/basic-loopnest.ll +++ b/llvm/test/Analysis/DDG/basic-loopnest.ll @@ -58,40 +58,40 @@ ; CHECK: Node Address:[[N14]]:multi-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %4 = mul nsw i64 %i.04, %n -; CHECK-NEXT: %arrayidx10 = getelementptr inbounds float, float* %a, i64 %4 +; CHECK-NEXT: %arrayidx10 = getelementptr inbounds float, ptr %a, i64 %4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N6]] ; CHECK: Node Address:[[N6]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx11 = getelementptr inbounds float, float* %arrayidx10, i64 %j.02 +; CHECK-NEXT: %arrayidx11 = getelementptr inbounds float, ptr %arrayidx10, i64 %j.02 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N18:0x[0-9a-f]*]] ; CHECK: Node Address:[[N13]]:multi-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %2 = mul nsw i64 %i.04, %n -; CHECK-NEXT: %arrayidx6 = getelementptr inbounds float, float* %a, i64 %2 +; CHECK-NEXT: %arrayidx6 = getelementptr inbounds float, ptr %a, i64 %2 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N8]] ; CHECK: Node Address:[[N8]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx8 = getelementptr inbounds float, float* %arrayidx6, i64 %sub7 +; CHECK-NEXT: %arrayidx8 = getelementptr inbounds float, ptr %arrayidx6, i64 %sub7 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N18]] ; CHECK: Node Address:[[N12]]:multi-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %0 = mul nsw i64 %i.04, %n -; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %0 +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, ptr %b, i64 %0 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N4]] ; CHECK: Node Address:[[N4]]:multi-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 -; CHECK-NEXT: %1 = load float, float* %arrayidx5, align 4 +; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, ptr %arrayidx, i64 %j.02 +; CHECK-NEXT: %1 = load float, ptr %arrayidx5, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N18]] @@ -99,7 +99,7 @@ ; CHECK-NEXT:--- start of nodes in pi-block --- ; CHECK: Node Address:[[N22:0x[0-9a-f]*]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %3 = load float, float* %arrayidx8, align 4 +; CHECK-NEXT: %3 = load float, ptr %arrayidx8, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N23:0x[0-9a-f]*]] @@ -111,7 +111,7 @@ ; CHECK: Node Address:[[N24]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: store float %add, float* %arrayidx11, align 4 +; CHECK-NEXT: store float %add, ptr %arrayidx11, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [memory] to [[N22]] ; CHECK-NEXT:--- end of nodes in pi-block --- @@ -154,7 +154,7 @@ ;; a[i][j] = b[i][j] + a[i][j-1]; ;; } -define void @test1(i64 %n, float* noalias %a, float* noalias %b) { +define void @test1(i64 %n, ptr noalias %a, ptr noalias %b) { entry: %exitcond3 = icmp ne i64 0, %n br i1 %exitcond3, label %test1.for.cond1.preheader, label %for.end14 @@ -168,19 +168,19 @@ for.body4: ; preds = %test1.for.cond1.preheader, %for.body4 %j.02 = phi i64 [ %inc, %for.body4 ], [ 1, %test1.for.cond1.preheader ] %0 = mul nsw i64 %i.04, %n - %arrayidx = getelementptr inbounds float, float* %b, i64 %0 - %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 - %1 = load float, float* %arrayidx5, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %0 + %arrayidx5 = getelementptr inbounds float, ptr %arrayidx, i64 %j.02 + %1 = load float, ptr %arrayidx5, align 4 %2 = mul nsw i64 %i.04, %n - %arrayidx6 = getelementptr inbounds float, float* %a, i64 %2 + %arrayidx6 = getelementptr inbounds float, ptr %a, i64 %2 %sub7 = add i64 %j.02, -1 - %arrayidx8 = getelementptr inbounds float, float* %arrayidx6, i64 %sub7 - %3 = load float, float* %arrayidx8, align 4 + %arrayidx8 = getelementptr inbounds float, ptr %arrayidx6, i64 %sub7 + %3 = load float, ptr %arrayidx8, align 4 %add = fadd float %1, %3 %4 = mul nsw i64 %i.04, %n - %arrayidx10 = getelementptr inbounds float, float* %a, i64 %4 - %arrayidx11 = getelementptr inbounds float, float* %arrayidx10, i64 %j.02 - store float %add, float* %arrayidx11, align 4 + %arrayidx10 = getelementptr inbounds float, ptr %a, i64 %4 + %arrayidx11 = getelementptr inbounds float, ptr %arrayidx10, i64 %j.02 + store float %add, ptr %arrayidx11, align 4 %inc = add i64 %j.02, 1 %cmp2 = icmp ult i64 %inc, %sub br i1 %cmp2, label %for.body4, label %for.inc12 @@ -253,27 +253,27 @@ ; CHECK: Node Address:[[N13]]:multi-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %4 = mul nsw i64 %i.04, %n -; CHECK-NEXT: %arrayidx10 = getelementptr inbounds float, float* %a, i64 %4 +; CHECK-NEXT: %arrayidx10 = getelementptr inbounds float, ptr %a, i64 %4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N5]] ; CHECK: Node Address:[[N5]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx11 = getelementptr inbounds float, float* %arrayidx10, i64 %j.02 +; CHECK-NEXT: %arrayidx11 = getelementptr inbounds float, ptr %arrayidx10, i64 %j.02 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N17:0x[0-9a-f]*]] ; CHECK: Node Address:[[N12]]:multi-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %2 = mul nsw i64 %i.04, %n -; CHECK-NEXT: %arrayidx6 = getelementptr inbounds float, float* %a, i64 %2 +; CHECK-NEXT: %arrayidx6 = getelementptr inbounds float, ptr %a, i64 %2 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N7]] ; CHECK: Node Address:[[N7]]:multi-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx8 = getelementptr inbounds float, float* %arrayidx6, i64 %add7 -; CHECK-NEXT: %3 = load float, float* %arrayidx8, align 4 +; CHECK-NEXT: %arrayidx8 = getelementptr inbounds float, ptr %arrayidx6, i64 %add7 +; CHECK-NEXT: %3 = load float, ptr %arrayidx8, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N20:0x[0-9a-f]*]] ; CHECK-NEXT: [memory] to [[N17]] @@ -281,14 +281,14 @@ ; CHECK: Node Address:[[N11]]:multi-instruction ; CHECK-NEXT: Instructions: ; CHECK-NEXT: %0 = mul nsw i64 %i.04, %n -; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %b, i64 %0 +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, ptr %b, i64 %0 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N3]] ; CHECK: Node Address:[[N3]]:multi-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 -; CHECK-NEXT: %1 = load float, float* %arrayidx5, align 4 +; CHECK-NEXT: %arrayidx5 = getelementptr inbounds float, ptr %arrayidx, i64 %j.02 +; CHECK-NEXT: %1 = load float, ptr %arrayidx5, align 4 ; CHECK-NEXT: Edges: ; CHECK-NEXT: [def-use] to [[N20]] @@ -300,7 +300,7 @@ ; CHECK: Node Address:[[N17]]:single-instruction ; CHECK-NEXT: Instructions: -; CHECK-NEXT: store float %add, float* %arrayidx11, align 4 +; CHECK-NEXT: store float %add, ptr %arrayidx11, align 4 ; CHECK-NEXT: Edges:none! ; CHECK: Node Address:[[N23:0x[0-9a-f]*]]:single-instruction @@ -340,7 +340,7 @@ ;; a[i][j] = b[i][j] + a[i][j+1]; ;; } -define void @test2(i64 %n, float* noalias %a, float* noalias %b) { +define void @test2(i64 %n, ptr noalias %a, ptr noalias %b) { entry: %exitcond3 = icmp ne i64 0, %n br i1 %exitcond3, label %test2.for.cond1.preheader, label %for.end14 @@ -354,19 +354,19 @@ for.body4: ; preds = %test2.for.cond1.preheader, %for.body4 %j.02 = phi i64 [ %inc, %for.body4 ], [ 1, %test2.for.cond1.preheader ] %0 = mul nsw i64 %i.04, %n - %arrayidx = getelementptr inbounds float, float* %b, i64 %0 - %arrayidx5 = getelementptr inbounds float, float* %arrayidx, i64 %j.02 - %1 = load float, float* %arrayidx5, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %0 + %arrayidx5 = getelementptr inbounds float, ptr %arrayidx, i64 %j.02 + %1 = load float, ptr %arrayidx5, align 4 %2 = mul nsw i64 %i.04, %n - %arrayidx6 = getelementptr inbounds float, float* %a, i64 %2 + %arrayidx6 = getelementptr inbounds float, ptr %a, i64 %2 %add7 = add i64 %j.02, 1 - %arrayidx8 = getelementptr inbounds float, float* %arrayidx6, i64 %add7 - %3 = load float, float* %arrayidx8, align 4 + %arrayidx8 = getelementptr inbounds float, ptr %arrayidx6, i64 %add7 + %3 = load float, ptr %arrayidx8, align 4 %add = fadd float %1, %3 %4 = mul nsw i64 %i.04, %n - %arrayidx10 = getelementptr inbounds float, float* %a, i64 %4 - %arrayidx11 = getelementptr inbounds float, float* %arrayidx10, i64 %j.02 - store float %add, float* %arrayidx11, align 4 + %arrayidx10 = getelementptr inbounds float, ptr %a, i64 %4 + %arrayidx11 = getelementptr inbounds float, ptr %arrayidx10, i64 %j.02 + store float %add, ptr %arrayidx11, align 4 %inc = add i64 %j.02, 1 %cmp2 = icmp ult i64 %inc, %sub br i1 %cmp2, label %for.body4, label %for.inc12 diff --git a/llvm/test/Analysis/DDG/print-dot-ddg.ll b/llvm/test/Analysis/DDG/print-dot-ddg.ll --- a/llvm/test/Analysis/DDG/print-dot-ddg.ll +++ b/llvm/test/Analysis/DDG/print-dot-ddg.ll @@ -10,7 +10,7 @@ ; printed properly and that multiple memory dependencies on a single edge ; are shown in the full dot graph. ; -; void foo(float * restrict A, float * restrict B, int n) { +; void foo(ptr restrict A, ptr restrict B, int n) { ; for (int i = 0; i < n; i++) { ; A[i] = A[i] + B[i]; ; B[i+1] = A[i] + 1; @@ -24,22 +24,22 @@ ; CHECK: {{Node0x.*}} -> {{Node0x.*}}[label="[rooted]"] ; CHECK-COUNT-6: {{Node0x.*}} -> {{Node0x.*}}[label="[def-use]"] ; CHECK-NOT: {{Node0x.*}} -> {{Node0x.*}}[label="[def-use]"] -; CHECK: [shape=record,label="{\\n %arrayidx10 = getelementptr inbounds float, float* %B, i64 %indvars.iv.next\n}"]; -; CHECK: [shape=record,label="{\\n %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv\n %0 = load float, float* %arrayidx, align 4\n}"]; +; CHECK: [shape=record,label="{\\n %arrayidx10 = getelementptr inbounds float, ptr %B, i64 %indvars.iv.next\n}"]; +; CHECK: [shape=record,label="{\\n %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv\n %0 = load float, ptr %arrayidx, align 4\n}"]; ; CHECK: {{Node0x.*}} -> {{Node0x.*}}[label="[consistent anti [0|<]!, consistent input [0|<]!]"] -; CHECK: [shape=record,label="{\\n--- start of nodes in pi-block ---\n\\n %1 = load float, float* %arrayidx2, align 4\n\n\\n %add = fadd fast float %0, %1\n\n\\n store float %add, float* %arrayidx4, align 4\n\n\\n %2 = load float, float* %arrayidx6, align 4\n %add7 = fadd fast float %2, 1.000000e+00\n\n\\n store float %add7, float* %arrayidx10, align 4\n--- end of nodes in pi-block ---\n}"]; +; CHECK: [shape=record,label="{\\n--- start of nodes in pi-block ---\n\\n %1 = load float, ptr %arrayidx2, align 4\n\n\\n %add = fadd fast float %0, %1\n\n\\n store float %add, ptr %arrayidx4, align 4\n\n\\n %2 = load float, ptr %arrayidx6, align 4\n %add7 = fadd fast float %2, 1.000000e+00\n\n\\n store float %add7, ptr %arrayidx10, align 4\n--- end of nodes in pi-block ---\n}"]; ; CHECK-ONLY: digraph "DDG for 'foo.for.body'" ; CHECK-ONLY-NEXT: label="DDG for 'foo.for.body'"; ; CHECK-ONLY: [shape=record,label="{pi-block\nwith\n2 nodes\n}"]; ; CHECK-ONLY-COUNT-6: {{Node0x.*}} -> {{Node0x.*}}[label="[def-use]"]; ; CHECK-NOT: {{Node0x.*}} -> {{Node0x.*}}[label="[def-use]"]; -; CHECK-ONLY: [shape=record,label="{ %arrayidx10 = getelementptr inbounds float, float* %B, i64 %indvars.iv.next\n}"]; -; CHECK-ONLY: [shape=record,label="{ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv\n %0 = load float, float* %arrayidx, align 4\n}"]; +; CHECK-ONLY: [shape=record,label="{ %arrayidx10 = getelementptr inbounds float, ptr %B, i64 %indvars.iv.next\n}"]; +; CHECK-ONLY: [shape=record,label="{ %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv\n %0 = load float, ptr %arrayidx, align 4\n}"]; ; CHECK-ONLY: {{Node0x.*}} -> {{Node0x.*}}[label="[memory]"] ; CHECK-ONLY: [shape=record,label="{pi-block\nwith\n5 nodes\n}"]; -define void @foo(float* noalias %A, float* noalias %B, i32 signext %n) { +define void @foo(ptr noalias %A, ptr noalias %B, i32 signext %n) { entry: %cmp1 = icmp sgt i32 %n, 0 br i1 %cmp1, label %for.body.preheader, label %for.end @@ -50,19 +50,19 @@ for.body: ; preds = %for.body.preheader, %for.body %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv - %1 = load float, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv + %1 = load float, ptr %arrayidx2, align 4 %add = fadd fast float %0, %1 - %arrayidx4 = getelementptr inbounds float, float* %A, i64 %indvars.iv - store float %add, float* %arrayidx4, align 4 - %arrayidx6 = getelementptr inbounds float, float* %A, i64 %indvars.iv - %2 = load float, float* %arrayidx6, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %A, i64 %indvars.iv + store float %add, ptr %arrayidx4, align 4 + %arrayidx6 = getelementptr inbounds float, ptr %A, i64 %indvars.iv + %2 = load float, ptr %arrayidx6, align 4 %add7 = fadd fast float %2, 1.000000e+00 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %arrayidx10 = getelementptr inbounds float, float* %B, i64 %indvars.iv.next - store float %add7, float* %arrayidx10, align 4 + %arrayidx10 = getelementptr inbounds float, ptr %B, i64 %indvars.iv.next + store float %add7, ptr %arrayidx10, align 4 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond, label %for.body, label %for.end.loopexit diff --git a/llvm/test/Analysis/DDG/root-node.ll b/llvm/test/Analysis/DDG/root-node.ll --- a/llvm/test/Analysis/DDG/root-node.ll +++ b/llvm/test/Analysis/DDG/root-node.ll @@ -14,14 +14,14 @@ ; CHECK: %i2.03 = phi i64 [ 0, %for.body.lr.ph ], [ %inc2, %test1.for.body ] ;; // Two separate components in the graph. Root node must link to both. -;; void test1(unsigned long n, float * restrict a, float * restrict b) { +;; void test1(unsigned long n, ptr restrict a, ptr restrict b) { ;; for (unsigned long i1 = 0, i2 = 0; i1 < n; i1++, i2++) { ;; a[i1] = 1; ;; b[i2] = -1; ;; } ;; } -define void @test1(i64 %n, float* noalias %a, float* noalias %b) { +define void @test1(i64 %n, ptr noalias %a, ptr noalias %b) { entry: %cmp1 = icmp ult i64 0, %n br i1 %cmp1, label %for.body.lr.ph, label %for.end @@ -32,10 +32,10 @@ test1.for.body: ; preds = %for.body.lr.ph, %test1.for.body %i2.03 = phi i64 [ 0, %for.body.lr.ph ], [ %inc2, %test1.for.body ] %i1.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %test1.for.body ] - %arrayidx = getelementptr inbounds float, float* %a, i64 %i1.02 - store float 1.000000e+00, float* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds float, float* %b, i64 %i2.03 - store float -1.000000e+00, float* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i64 %i1.02 + store float 1.000000e+00, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %i2.03 + store float -1.000000e+00, ptr %arrayidx1, align 4 %inc = add i64 %i1.02, 1 %inc2 = add i64 %i2.03, 1 %cmp = icmp ult i64 %inc, %n diff --git a/llvm/test/Analysis/GlobalsModRef/volatile-instrs.ll b/llvm/test/Analysis/GlobalsModRef/volatile-instrs.ll --- a/llvm/test/Analysis/GlobalsModRef/volatile-instrs.ll +++ b/llvm/test/Analysis/GlobalsModRef/volatile-instrs.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -dse -S | FileCheck %s +; RUN: opt < %s -passes=dse -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Analysis/MemorySSA/pr40749.ll b/llvm/test/Analysis/MemorySSA/pr40749.ll --- a/llvm/test/Analysis/MemorySSA/pr40749.ll +++ b/llvm/test/Analysis/MemorySSA/pr40749.ll @@ -1,4 +1,4 @@ -; RUN: opt -licm -verify-memoryssa -S < %s | FileCheck %s +; RUN: opt -passes=licm -verify-memoryssa -S < %s | FileCheck %s ; REQUIRES: asserts target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" diff --git a/llvm/test/Analysis/MemorySSA/pr40754.ll b/llvm/test/Analysis/MemorySSA/pr40754.ll --- a/llvm/test/Analysis/MemorySSA/pr40754.ll +++ b/llvm/test/Analysis/MemorySSA/pr40754.ll @@ -1,4 +1,4 @@ -; RUN: opt -licm -verify-memoryssa -S < %s | FileCheck %s +; RUN: opt -passes=licm -verify-memoryssa -S < %s | FileCheck %s ; REQUIRES: asserts target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" diff --git a/llvm/test/Analysis/MemorySSA/pr41254.ll b/llvm/test/Analysis/MemorySSA/pr41254.ll --- a/llvm/test/Analysis/MemorySSA/pr41254.ll +++ b/llvm/test/Analysis/MemorySSA/pr41254.ll @@ -1,4 +1,4 @@ -; RUN: opt -licm -verify-memoryssa -S < %s | FileCheck %s +; RUN: opt -passes=licm -verify-memoryssa -S < %s | FileCheck %s ; REQUIRES: asserts target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" diff --git a/llvm/test/Analysis/MemorySSA/pr42940.ll b/llvm/test/Analysis/MemorySSA/pr42940.ll --- a/llvm/test/Analysis/MemorySSA/pr42940.ll +++ b/llvm/test/Analysis/MemorySSA/pr42940.ll @@ -1,4 +1,4 @@ -; RUN: opt -licm -verify-memoryssa -S %s | FileCheck %s +; RUN: opt -passes=licm -verify-memoryssa -S %s | FileCheck %s ; REQUIRES: asserts target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" diff --git a/llvm/test/Analysis/MemorySSA/pr43320.ll b/llvm/test/Analysis/MemorySSA/pr43320.ll --- a/llvm/test/Analysis/MemorySSA/pr43320.ll +++ b/llvm/test/Analysis/MemorySSA/pr43320.ll @@ -1,4 +1,4 @@ -; RUN: opt -licm -verify-memoryssa -S < %s | FileCheck %s +; RUN: opt -passes=licm -verify-memoryssa -S < %s | FileCheck %s ; REQUIRES: asserts target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Analysis/MemorySSA/pr43426.ll b/llvm/test/Analysis/MemorySSA/pr43426.ll --- a/llvm/test/Analysis/MemorySSA/pr43426.ll +++ b/llvm/test/Analysis/MemorySSA/pr43426.ll @@ -1,4 +1,4 @@ -; RUN: opt -licm -S %s | FileCheck %s +; RUN: opt -passes=licm -S %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/MemorySSA/pr43493.ll b/llvm/test/Analysis/MemorySSA/pr43493.ll --- a/llvm/test/Analysis/MemorySSA/pr43493.ll +++ b/llvm/test/Analysis/MemorySSA/pr43493.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-rotate -verify-memoryssa -S %s | FileCheck %s +; RUN: opt -passes=loop-rotate -verify-memoryssa -S %s | FileCheck %s ; REQUIRES: asserts ; CHECK-LABEL: @func_35() diff --git a/llvm/test/Analysis/MemorySSA/pr43540.ll b/llvm/test/Analysis/MemorySSA/pr43540.ll --- a/llvm/test/Analysis/MemorySSA/pr43540.ll +++ b/llvm/test/Analysis/MemorySSA/pr43540.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -licm %s | FileCheck %s +; RUN: opt -S -passes=licm %s | FileCheck %s @v_1 = global i8 0, align 1 @v_2 = global i8 0, align 1 diff --git a/llvm/test/Analysis/MemorySSA/pr43541.ll b/llvm/test/Analysis/MemorySSA/pr43541.ll --- a/llvm/test/Analysis/MemorySSA/pr43541.ll +++ b/llvm/test/Analysis/MemorySSA/pr43541.ll @@ -1,4 +1,4 @@ -; RUN: opt -gvn-hoist -S < %s | FileCheck %s +; RUN: opt -passes=gvn-hoist -S < %s | FileCheck %s ; REQUIRES: asserts %struct.job_pool.6.7 = type { i32 } diff --git a/llvm/test/Analysis/MemorySSA/pr44027.ll b/llvm/test/Analysis/MemorySSA/pr44027.ll --- a/llvm/test/Analysis/MemorySSA/pr44027.ll +++ b/llvm/test/Analysis/MemorySSA/pr44027.ll @@ -1,4 +1,4 @@ -; RUN: opt -gvn-hoist -verify-memoryssa -S < %s | FileCheck %s +; RUN: opt -passes=gvn-hoist -verify-memoryssa -S < %s | FileCheck %s ; REQUIRES: asserts target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/MemorySSA/pr45976.ll b/llvm/test/Analysis/MemorySSA/pr45976.ll --- a/llvm/test/Analysis/MemorySSA/pr45976.ll +++ b/llvm/test/Analysis/MemorySSA/pr45976.ll @@ -1,4 +1,4 @@ -; RUN: opt -licm -S < %s | FileCheck %s +; RUN: opt -passes=licm -S < %s | FileCheck %s ; REQUIRES: asserts @global1 = external global i64, align 8 diff --git a/llvm/test/Analysis/MemorySSA/renamephis.ll b/llvm/test/Analysis/MemorySSA/renamephis.ll --- a/llvm/test/Analysis/MemorySSA/renamephis.ll +++ b/llvm/test/Analysis/MemorySSA/renamephis.ll @@ -1,4 +1,4 @@ -; RUN: opt -licm -verify-memoryssa -S %s | FileCheck %s +; RUN: opt -passes=licm -verify-memoryssa -S %s | FileCheck %s ; REQUIRES: asserts target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/MemorySSA/unreachable.ll b/llvm/test/Analysis/MemorySSA/unreachable.ll --- a/llvm/test/Analysis/MemorySSA/unreachable.ll +++ b/llvm/test/Analysis/MemorySSA/unreachable.ll @@ -1,4 +1,4 @@ -; RUN: opt -licm -verify-memoryssa %s -S | FileCheck %s +; RUN: opt -passes=licm -verify-memoryssa %s -S | FileCheck %s ; REQUIRES: asserts ; Ensure verification doesn't fail with unreachable blocks. diff --git a/llvm/test/Analysis/MustExecute/infinite_loops.ll b/llvm/test/Analysis/MustExecute/infinite_loops.ll --- a/llvm/test/Analysis/MustExecute/infinite_loops.ll +++ b/llvm/test/Analysis/MustExecute/infinite_loops.ll @@ -72,7 +72,7 @@ } ; Make sure that sdiv is NOT marked as mustexec. -define void @test_impossible_exit_in_untaken_block(i1 %cond, i32 %a, i32 %b, i32* %p) { +define void @test_impossible_exit_in_untaken_block(i1 %cond, i32 %a, i32 %b, ptr %p) { ; CHECK-LABEL: @test_impossible_exit_in_untaken_block( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -82,7 +82,7 @@ ; CHECK: maybe_taken: ; CHECK-NOT: mustexec ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: store i32 [[DIV]], i32* [[P:%.*]] +; CHECK-NEXT: store i32 [[DIV]], ptr [[P:%.*]] ; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[EXIT:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; (mustexec in: loop) @@ -99,7 +99,7 @@ maybe_taken: %div = sdiv i32 %a, %b - store i32 %div, i32* %p + store i32 %div, ptr %p br i1 true, label %backedge, label %exit backedge: diff --git a/llvm/test/Analysis/MustExecute/loop-header.ll b/llvm/test/Analysis/MustExecute/loop-header.ll --- a/llvm/test/Analysis/MustExecute/loop-header.ll +++ b/llvm/test/Analysis/MustExecute/loop-header.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -disable-output -print-mustexecute %s 2>&1 | FileCheck %s -define i1 @header_with_icf(i32* noalias %p, i32 %high) { +define i1 @header_with_icf(ptr noalias %p, i32 %high) { ; CHECK-LABEL: @header_with_icf( ; CHECK-LABEL: loop: ; CHECK: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; (mustexec in: loop) -; CHECK: %v = load i32, i32* %p, align 4 ; (mustexec in: loop) +; CHECK: %v = load i32, ptr %p, align 4 ; (mustexec in: loop) ; CHECK: call void @maythrow_and_use(i32 %v) ; (mustexec in: loop) ; CHECK-NOT: mustexec @@ -14,7 +14,7 @@ loop: %iv = phi i32 [0, %entry], [%iv.next, %loop] - %v = load i32, i32* %p + %v = load i32, ptr %p call void @maythrow_and_use(i32 %v) %iv.next = add nsw nuw i32 %iv, 1 %exit.test = icmp slt i32 %iv, %high @@ -24,11 +24,11 @@ ret i1 false } -define i1 @split_header(i32* noalias %p, i32 %high) { +define i1 @split_header(ptr noalias %p, i32 %high) { ; CHECK-LABEL: @split_header( ; CHECK-LABEL: loop: ; CHECK: %iv = phi i32 [ 0, %entry ], [ %iv.next, %next ] ; (mustexec in: loop) -; CHECK: %v = load i32, i32* %p, align 4 ; (mustexec in: loop) +; CHECK: %v = load i32, ptr %p, align 4 ; (mustexec in: loop) ; CHECK: br label %next ; (mustexec in: loop) ; CHECK-NOT: mustexec entry: @@ -36,7 +36,7 @@ loop: %iv = phi i32 [0, %entry], [%iv.next, %next] - %v = load i32, i32* %p + %v = load i32, ptr %p br label %next next: call void @maythrow_and_use(i32 %v) @@ -50,13 +50,13 @@ ; FIXME: everything in inner loop header should be must execute ; for outer as well -define i1 @nested(i32* noalias %p, i32 %high) { +define i1 @nested(ptr noalias %p, i32 %high) { ; CHECK-LABEL: @nested ; CHECK-LABEL: loop: ; preds = %next ; CHECK: %iv = phi i32 [ 0, %entry ], [ %iv.next, %next ] ; (mustexec in: loop) ; CHECK: br label %inner_loop ; (mustexec in: loop) ; CHECK-LABEL: inner_loop: -; CHECK: %v = load i32, i32* %p, align 4 ; (mustexec in: inner_loop) +; CHECK: %v = load i32, ptr %p, align 4 ; (mustexec in: inner_loop) ; CHECK: %inner.test = icmp eq i32 %v, 0 ; (mustexec in: inner_loop) ; CHECK: br i1 %inner.test, label %inner_loop, label %next ; (mustexec in: inner_loop) ; CHECK-NOT: mustexec @@ -69,7 +69,7 @@ br label %inner_loop inner_loop: - %v = load i32, i32* %p + %v = load i32, ptr %p %inner.test = icmp eq i32 %v, 0 br i1 %inner.test, label %inner_loop, label %next @@ -83,13 +83,13 @@ ret i1 false } -define i1 @nested_no_throw(i32* noalias %p, i32 %high) { +define i1 @nested_no_throw(ptr noalias %p, i32 %high) { ; CHECK-LABEL: @nested_no_throw ; CHECK-LABEL: loop: ; preds = %next ; CHECK: %iv = phi i32 [ 0, %entry ], [ %iv.next, %next ] ; (mustexec in: loop) ; CHECK: br label %inner_loop ; (mustexec in: loop) ; CHECK-LABEL: inner_loop: -; CHECK: %v = load i32, i32* %p, align 4 ; (mustexec in 2 loops: inner_loop, loop) +; CHECK: %v = load i32, ptr %p, align 4 ; (mustexec in 2 loops: inner_loop, loop) ; CHECK: %inner.test = icmp eq i32 %v, 0 ; (mustexec in 2 loops: inner_loop, loop) ; CHECK: br i1 %inner.test, label %inner_loop, label %next ; (mustexec in 2 loops: inner_loop, loop) ; CHECK-LABEL: next: @@ -105,7 +105,7 @@ br label %inner_loop inner_loop: - %v = load i32, i32* %p + %v = load i32, ptr %p %inner.test = icmp eq i32 %v, 0 br i1 %inner.test, label %inner_loop, label %next @@ -121,13 +121,13 @@ ; Since all the instructions in the loop dominate the only exit ; and there's no implicit control flow in the loop, all must execute ; FIXME: handled by loop safety info, test it -define i1 @nothrow_loop(i32* noalias %p, i32 %high) { +define i1 @nothrow_loop(ptr noalias %p, i32 %high) { ; CHECK-LABEL: @nothrow_loop( ; CHECK-LABEL: loop: ; CHECK: %iv = phi i32 [ 0, %entry ], [ %iv.next, %next ] ; (mustexec in: loop) ; CHECK: br label %next ; (mustexec in: loop) ; CHECK-LABEL: next: -; CHECK: %v = load i32, i32* %p, align 4 ; (mustexec in: loop) +; CHECK: %v = load i32, ptr %p, align 4 ; (mustexec in: loop) ; CHECK: %iv.next = add nuw nsw i32 %iv, 1 ; (mustexec in: loop) ; CHECK: %exit.test = icmp slt i32 %iv, %high ; (mustexec in: loop) ; CHECK: br i1 %exit.test, label %exit, label %loop ; (mustexec in: loop) @@ -140,7 +140,7 @@ %iv = phi i32 [0, %entry], [%iv.next, %next] br label %next next: - %v = load i32, i32* %p + %v = load i32, ptr %p %iv.next = add nsw nuw i32 %iv, 1 %exit.test = icmp slt i32 %iv, %high br i1 %exit.test, label %exit, label %loop diff --git a/llvm/test/Analysis/MustExecute/must_be_executed_context.ll b/llvm/test/Analysis/MustExecute/must_be_executed_context.ll --- a/llvm/test/Analysis/MustExecute/must_be_executed_context.ll +++ b/llvm/test/Analysis/MustExecute/must_be_executed_context.ll @@ -386,37 +386,37 @@ declare void @G() nounwind willreturn -declare i32 @g(i32*) nounwind willreturn +declare i32 @g(ptr) nounwind willreturn -declare void @h(i32*) nounwind willreturn +declare void @h(ptr) nounwind willreturn -define i32 @nonnull_exec_ctx_1(i32* %a, i32 %b) { +define i32 @nonnull_exec_ctx_1(ptr %a, i32 %b) { ; MBEC: -- Explore context of: %tmp3 = icmp eq i32 %b, 0 ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp3 = icmp eq i32 %b, 0 ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: -- Explore context of: br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp3 = icmp eq i32 %b, 0 -; MBEC-NEXT: -- Explore context of: %tmp5 = tail call i32 @g(i32* nonnull %a) -; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp5 = tail call i32 @g(i32* nonnull %a) +; MBEC-NEXT: -- Explore context of: %tmp5 = tail call i32 @g(ptr nonnull %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp5 = tail call i32 @g(ptr nonnull %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_1] ret i32 %tmp5 ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp3 = icmp eq i32 %b, 0 ; MBEC-NEXT: -- Explore context of: ret i32 %tmp5 ; MBEC-NEXT: [F: nonnull_exec_ctx_1] ret i32 %tmp5 -; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp5 = tail call i32 @g(i32* nonnull %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp5 = tail call i32 @g(ptr nonnull %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp3 = icmp eq i32 %b, 0 ; MBEC-NEXT: -- Explore context of: %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] -; MBEC-NEXT: [F: nonnull_exec_ctx_1] tail call void @h(i32* %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_1] tail call void @h(ptr %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp8 = add nuw i32 %tmp7, 1 ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp9 = icmp eq i32 %tmp8, %b ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp9, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp3 = icmp eq i32 %b, 0 -; MBEC-NEXT: -- Explore context of: tail call void @h(i32* %a) -; MBEC-NEXT: [F: nonnull_exec_ctx_1] tail call void @h(i32* %a) +; MBEC-NEXT: -- Explore context of: tail call void @h(ptr %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_1] tail call void @h(ptr %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp8 = add nuw i32 %tmp7, 1 ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp9 = icmp eq i32 %tmp8, %b ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp9, label %ex, label %hd @@ -427,7 +427,7 @@ ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp8 = add nuw i32 %tmp7, 1 ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp9 = icmp eq i32 %tmp8, %b ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp9, label %ex, label %hd -; MBEC-NEXT: [F: nonnull_exec_ctx_1] tail call void @h(i32* %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_1] tail call void @h(ptr %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp3 = icmp eq i32 %b, 0 @@ -435,7 +435,7 @@ ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp9 = icmp eq i32 %tmp8, %b ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp9, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp8 = add nuw i32 %tmp7, 1 -; MBEC-NEXT: [F: nonnull_exec_ctx_1] tail call void @h(i32* %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_1] tail call void @h(ptr %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp3 = icmp eq i32 %b, 0 @@ -443,7 +443,7 @@ ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp9, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp9 = icmp eq i32 %tmp8, %b ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp8 = add nuw i32 %tmp7, 1 -; MBEC-NEXT: [F: nonnull_exec_ctx_1] tail call void @h(i32* %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_1] tail call void @h(ptr %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] ; MBEC-NEXT: [F: nonnull_exec_ctx_1] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_1] %tmp3 = icmp eq i32 %b, 0 @@ -452,54 +452,54 @@ br i1 %tmp3, label %ex, label %hd ex: - %tmp5 = tail call i32 @g(i32* nonnull %a) + %tmp5 = tail call i32 @g(ptr nonnull %a) ret i32 %tmp5 hd: %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] - tail call void @h(i32* %a) + tail call void @h(ptr %a) %tmp8 = add nuw i32 %tmp7, 1 %tmp9 = icmp eq i32 %tmp8, %b br i1 %tmp9, label %ex, label %hd } -define i32 @nonnull_exec_ctx_2(i32* %a, i32 %b) nounwind willreturn { +define i32 @nonnull_exec_ctx_2(ptr %a, i32 %b) nounwind willreturn { ; MBEC: -- Explore context of: %tmp3 = icmp eq i32 %b, 0 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp3 = icmp eq i32 %b, 0 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp3, label %ex, label %hd -; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(i32* nonnull %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(ptr nonnull %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] ret i32 %tmp5 ; MBEC-NEXT: -- Explore context of: br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp3, label %ex, label %hd -; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(i32* nonnull %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(ptr nonnull %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] ret i32 %tmp5 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp3 = icmp eq i32 %b, 0 -; MBEC-NEXT: -- Explore context of: %tmp5 = tail call i32 @g(i32* nonnull %a) -; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(i32* nonnull %a) +; MBEC-NEXT: -- Explore context of: %tmp5 = tail call i32 @g(ptr nonnull %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(ptr nonnull %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] ret i32 %tmp5 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp3 = icmp eq i32 %b, 0 ; MBEC-NEXT: -- Explore context of: ret i32 %tmp5 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] ret i32 %tmp5 -; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(i32* nonnull %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(ptr nonnull %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp3 = icmp eq i32 %b, 0 ; MBEC-NEXT: -- Explore context of: %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] -; MBEC-NEXT: [F: nonnull_exec_ctx_2] tail call void @h(i32* %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] tail call void @h(ptr %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp8 = add nuw i32 %tmp7, 1 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp9 = icmp eq i32 %tmp8, %b ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp9, label %ex, label %hd -; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(i32* nonnull %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(ptr nonnull %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] ret i32 %tmp5 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp3 = icmp eq i32 %b, 0 -; MBEC-NEXT: -- Explore context of: tail call void @h(i32* %a) -; MBEC-NEXT: [F: nonnull_exec_ctx_2] tail call void @h(i32* %a) +; MBEC-NEXT: -- Explore context of: tail call void @h(ptr %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] tail call void @h(ptr %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp8 = add nuw i32 %tmp7, 1 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp9 = icmp eq i32 %tmp8, %b ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp9, label %ex, label %hd -; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(i32* nonnull %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(ptr nonnull %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] ret i32 %tmp5 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp3, label %ex, label %hd @@ -508,29 +508,29 @@ ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp8 = add nuw i32 %tmp7, 1 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp9 = icmp eq i32 %tmp8, %b ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp9, label %ex, label %hd -; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(i32* nonnull %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(ptr nonnull %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] ret i32 %tmp5 -; MBEC-NEXT: [F: nonnull_exec_ctx_2] tail call void @h(i32* %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] tail call void @h(ptr %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp3 = icmp eq i32 %b, 0 ; MBEC-NEXT: -- Explore context of: %tmp9 = icmp eq i32 %tmp8, %b ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp9 = icmp eq i32 %tmp8, %b ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp9, label %ex, label %hd -; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(i32* nonnull %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(ptr nonnull %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] ret i32 %tmp5 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp8 = add nuw i32 %tmp7, 1 -; MBEC-NEXT: [F: nonnull_exec_ctx_2] tail call void @h(i32* %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] tail call void @h(ptr %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp3 = icmp eq i32 %b, 0 ; MBEC-NEXT: -- Explore context of: br i1 %tmp9, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp9, label %ex, label %hd -; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(i32* nonnull %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp5 = tail call i32 @g(ptr nonnull %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] ret i32 %tmp5 ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp9 = icmp eq i32 %tmp8, %b ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp8 = add nuw i32 %tmp7, 1 -; MBEC-NEXT: [F: nonnull_exec_ctx_2] tail call void @h(i32* %a) +; MBEC-NEXT: [F: nonnull_exec_ctx_2] tail call void @h(ptr %a) ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] ; MBEC-NEXT: [F: nonnull_exec_ctx_2] br i1 %tmp3, label %ex, label %hd ; MBEC-NEXT: [F: nonnull_exec_ctx_2] %tmp3 = icmp eq i32 %b, 0 @@ -539,12 +539,12 @@ br i1 %tmp3, label %ex, label %hd ex: - %tmp5 = tail call i32 @g(i32* nonnull %a) + %tmp5 = tail call i32 @g(ptr nonnull %a) ret i32 %tmp5 hd: %tmp7 = phi i32 [ %tmp8, %hd ], [ 0, %en ] - tail call void @h(i32* %a) + tail call void @h(ptr %a) %tmp8 = add nuw i32 %tmp7, 1 %tmp9 = icmp eq i32 %tmp8, %b br i1 %tmp9, label %ex, label %hd diff --git a/llvm/test/Analysis/PhiValues/basic.ll b/llvm/test/Analysis/PhiValues/basic.ll --- a/llvm/test/Analysis/PhiValues/basic.ll +++ b/llvm/test/Analysis/PhiValues/basic.ll @@ -3,7 +3,7 @@ @X = common global i32 0 ; CHECK-LABEL: PHI Values for function: simple -define void @simple(i32* %ptr) { +define void @simple(ptr %ptr) { entry: br i1 undef, label %if, label %else @@ -21,7 +21,7 @@ ; CHECK: PHI %phi2 has values: ; CHECK-DAG: @X ; CHECK-DAG: %ptr - %phi2 = phi i32* [ @X, %if ], [ %ptr, %else ] + %phi2 = phi ptr [ @X, %if ], [ %ptr, %else ] ret void } diff --git a/llvm/test/Analysis/PhiValues/big_phi.ll b/llvm/test/Analysis/PhiValues/big_phi.ll --- a/llvm/test/Analysis/PhiValues/big_phi.ll +++ b/llvm/test/Analysis/PhiValues/big_phi.ll @@ -5,15 +5,15 @@ ; analysis doesn't repeatedly add a phis values to itself until it segfaults. ; CHECK-LABEL: PHI Values for function: fn -define void @fn(i8* %arg) { +define void @fn(ptr %arg) { entry: br label %for.body for.body: ; CHECK: PHI %phi1 has values: -; CHECK-DAG: i8* %arg -; CHECK-DAG: i8* undef - %phi1 = phi i8* [ %arg, %entry ], [ %phi2, %end ] +; CHECK-DAG: ptr %arg +; CHECK-DAG: ptr undef + %phi1 = phi ptr [ %arg, %entry ], [ %phi2, %end ] switch i32 undef, label %end [ i32 1, label %bb1 i32 2, label %bb2 @@ -71,8 +71,8 @@ end: ; CHECK: PHI %phi2 has values: -; CHECK-DAG: i8* %arg -; CHECK-DAG: i8* undef - %phi2 = phi i8* [ %phi1, %for.body ], [ %phi1, %bb1 ], [ %phi1, %bb2 ], [ %phi1, %bb3 ], [ %phi1, %bb4 ], [ %phi1, %bb5 ], [ %phi1, %bb6 ], [ %phi1, %bb7 ], [ undef, %bb8 ], [ %phi1, %bb9 ], [ %phi1, %bb10 ], [ %phi1, %bb11 ], [ %phi1, %bb12 ], [ %phi1, %bb13 ] +; CHECK-DAG: ptr %arg +; CHECK-DAG: ptr undef + %phi2 = phi ptr [ %phi1, %for.body ], [ %phi1, %bb1 ], [ %phi1, %bb2 ], [ %phi1, %bb3 ], [ %phi1, %bb4 ], [ %phi1, %bb5 ], [ %phi1, %bb6 ], [ %phi1, %bb7 ], [ undef, %bb8 ], [ %phi1, %bb9 ], [ %phi1, %bb10 ], [ %phi1, %bb11 ], [ %phi1, %bb12 ], [ %phi1, %bb13 ] br label %for.body } diff --git a/llvm/test/Analysis/PhiValues/long_phi_chain.ll b/llvm/test/Analysis/PhiValues/long_phi_chain.ll --- a/llvm/test/Analysis/PhiValues/long_phi_chain.ll +++ b/llvm/test/Analysis/PhiValues/long_phi_chain.ll @@ -4,14 +4,14 @@ ; phi values analysis to segfault if it's not careful about that kind of thing. ; CHECK-LABEL: PHI Values for function: fn -define void @fn(i32* %arg) { +define void @fn(ptr %arg) { entry: br label %while1.cond while1.cond: ; CHECK: PHI %phi1 has values: -; CHECK: i32* %arg - %phi1 = phi i32* [ %arg, %entry ], [ %phi2, %while1.then ] +; CHECK: ptr %arg + %phi1 = phi ptr [ %arg, %entry ], [ %phi2, %while1.then ] br i1 undef, label %while1.end, label %while1.body while1.body: @@ -22,8 +22,8 @@ while1.then: ; CHECK: PHI %phi2 has values: -; CHECK: i32* %arg - %phi2 = phi i32* [ %arg, %while1.if ], [ %phi1, %while1.body ] +; CHECK: ptr %arg + %phi2 = phi ptr [ %arg, %while1.if ], [ %phi1, %while1.body ] br label %while1.cond while1.end: @@ -31,8 +31,8 @@ while2.cond1: ; CHECK: PHI %phi3 has values: -; CHECK: i32* %arg - %phi3 = phi i32* [ %phi1, %while1.end ], [ %phi5, %while2.then ] +; CHECK: ptr %arg + %phi3 = phi ptr [ %phi1, %while1.end ], [ %phi5, %while2.then ] br i1 undef, label %while2.end, label %while2.body1 while2.body1: @@ -40,8 +40,8 @@ while2.cond2: ; CHECK: PHI %phi4 has values: -; CHECK: i32* %arg - %phi4 = phi i32* [ %phi3, %while2.body1 ], [ %phi4, %while2.if ] +; CHECK: ptr %arg + %phi4 = phi ptr [ %phi3, %while2.body1 ], [ %phi4, %while2.if ] br i1 undef, label %while2.then, label %while2.if while2.if: @@ -49,8 +49,8 @@ while2.then: ; CHECK: PHI %phi5 has values: -; CHECK: i32* %arg - %phi5 = phi i32* [ %phi3, %while2.body1 ], [ %phi4, %while2.cond2 ] +; CHECK: ptr %arg + %phi5 = phi ptr [ %phi3, %while2.body1 ], [ %phi4, %while2.cond2 ] br label %while2.cond1 while2.end: @@ -58,14 +58,14 @@ while3.cond1: ; CHECK: PHI %phi6 has values: -; CHECK: i32* %arg - %phi6 = phi i32* [ %phi3, %while2.end ], [ %phi7, %while3.cond2 ] +; CHECK: ptr %arg + %phi6 = phi ptr [ %phi3, %while2.end ], [ %phi7, %while3.cond2 ] br i1 undef, label %while3.end, label %while3.cond2 while3.cond2: ; CHECK: PHI %phi7 has values: -; CHECK: i32* %arg - %phi7 = phi i32* [ %phi6, %while3.cond1 ], [ %phi7, %while3.body ] +; CHECK: ptr %arg + %phi7 = phi ptr [ %phi6, %while3.cond1 ], [ %phi7, %while3.body ] br i1 undef, label %while3.cond1, label %while3.body while3.body: @@ -76,8 +76,8 @@ while4.cond1: ; CHECK: PHI %phi8 has values: -; CHECK: i32* %arg - %phi8 = phi i32* [ %phi6, %while3.end ], [ %phi10, %while4.then ] +; CHECK: ptr %arg + %phi8 = phi ptr [ %phi6, %while3.end ], [ %phi10, %while4.then ] br i1 undef, label %while4.end, label %while4.if while4.if: @@ -85,8 +85,8 @@ while4.cond2: ; CHECK: PHI %phi9 has values: -; CHECK: i32* %arg - %phi9 = phi i32* [ %phi8, %while4.if ], [ %phi9, %while4.body ] +; CHECK: ptr %arg + %phi9 = phi ptr [ %phi8, %while4.if ], [ %phi9, %while4.body ] br i1 undef, label %while4.then, label %while4.body while4.body: @@ -94,8 +94,8 @@ while4.then: ; CHECK: PHI %phi10 has values: -; CHECK: i32* %arg - %phi10 = phi i32* [ %phi8, %while4.if ], [ %phi9, %while4.cond2 ] +; CHECK: ptr %arg + %phi10 = phi ptr [ %phi8, %while4.if ], [ %phi9, %while4.cond2 ] br label %while4.cond1 while4.end: @@ -103,8 +103,8 @@ while5.cond: ; CHECK: PHI %phi11 has values: -; CHECK: i32* %arg - %phi11 = phi i32* [ %phi8, %while4.end ], [ %phi13, %while5.then ] +; CHECK: ptr %arg + %phi11 = phi ptr [ %phi8, %while4.end ], [ %phi13, %while5.then ] br i1 undef, label %while5.end, label %while5.body1 while5.body1: @@ -112,8 +112,8 @@ while5.if: ; CHECK: PHI %phi12 has values: -; CHECK: i32* %arg - %phi12 = phi i32* [ %phi11, %while5.body1 ], [ %phi12, %while5.body2 ] +; CHECK: ptr %arg + %phi12 = phi ptr [ %phi11, %while5.body1 ], [ %phi12, %while5.body2 ] br i1 undef, label %while5.then, label %while5.body2 while5.body2: @@ -121,8 +121,8 @@ while5.then: ; CHECK: PHI %phi13 has values: -; CHECK: i32* %arg - %phi13 = phi i32* [ %phi11, %while5.body1 ], [ %phi12, %while5.if ] +; CHECK: ptr %arg + %phi13 = phi ptr [ %phi11, %while5.body1 ], [ %phi12, %while5.if ] br label %while5.cond while5.end: @@ -130,13 +130,13 @@ while6.cond1: ; CHECK: PHI %phi14 has values: -; CHECK: i32* %arg - %phi14 = phi i32* [ %phi11, %while5.end ], [ %phi14, %while6.cond1 ] +; CHECK: ptr %arg + %phi14 = phi ptr [ %phi11, %while5.end ], [ %phi14, %while6.cond1 ] br i1 undef, label %while6.cond2, label %while6.cond1 while6.cond2: ; CHECK: PHI %phi15 has values: -; CHECK: i32* %arg - %phi15 = phi i32* [ %phi14, %while6.cond1 ], [ %phi15, %while6.cond2 ] +; CHECK: ptr %arg + %phi15 = phi ptr [ %phi14, %while6.cond1 ], [ %phi15, %while6.cond2 ] br label %while6.cond2 } diff --git a/llvm/test/Analysis/PostDominators/infinite-loop.ll b/llvm/test/Analysis/PostDominators/infinite-loop.ll --- a/llvm/test/Analysis/PostDominators/infinite-loop.ll +++ b/llvm/test/Analysis/PostDominators/infinite-loop.ll @@ -4,7 +4,7 @@ define void @fn1() { entry: - store i32 5, i32* @a, align 4 + store i32 5, ptr @a, align 4 %call = call i32 (...) @foo() %tobool = icmp ne i32 %call, 0 br i1 %tobool, label %if.then, label %if.end @@ -16,7 +16,7 @@ br label %loop if.end: ; preds = %entry - store i32 6, i32* @a, align 4 + store i32 6, ptr @a, align 4 ret void } diff --git a/llvm/test/Analysis/PostDominators/infinite-loop2.ll b/llvm/test/Analysis/PostDominators/infinite-loop2.ll --- a/llvm/test/Analysis/PostDominators/infinite-loop2.ll +++ b/llvm/test/Analysis/PostDominators/infinite-loop2.ll @@ -4,7 +4,7 @@ define void @fn1() { entry: - store i32 5, i32* @a, align 4 + store i32 5, ptr @a, align 4 %call = call i32 (...) @foo() %tobool = icmp ne i32 %call, 0 br i1 %tobool, label %if.then, label %if.end @@ -13,12 +13,12 @@ br label %loop loop: ; preds = %loop, %if.then - %0 = load i32, i32* @a, align 4 + %0 = load i32, ptr @a, align 4 call void @bar(i32 %0) br label %loop if.end: ; preds = %entry - store i32 6, i32* @a, align 4 + store i32 6, ptr @a, align 4 ret void } diff --git a/llvm/test/Analysis/PostDominators/infinite-loop3.ll b/llvm/test/Analysis/PostDominators/infinite-loop3.ll --- a/llvm/test/Analysis/PostDominators/infinite-loop3.ll +++ b/llvm/test/Analysis/PostDominators/infinite-loop3.ll @@ -4,7 +4,7 @@ define void @fn1() { entry: - store i32 5, i32* @a, align 4 + store i32 5, ptr @a, align 4 %call = call i32 (...) @foo() %tobool = icmp ne i32 %call, 0 br i1 %tobool, label %if.then, label %if.end @@ -13,12 +13,12 @@ br label %loop loop: ; preds = %loop, %if.then - %0 = load i32, i32* @a, align 4 + %0 = load i32, ptr @a, align 4 call void @bar(i32 %0) br i1 true, label %loop, label %if.then if.end: ; preds = %entry - store i32 6, i32* @a, align 4 + store i32 6, ptr @a, align 4 ret void } diff --git a/llvm/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll b/llvm/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll --- a/llvm/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll +++ b/llvm/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -S | FileCheck %s +; RUN: opt < %s -passes=indvars -S | FileCheck %s ; PR1798 ; CHECK: printd(i32 1206807378) diff --git a/llvm/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll b/llvm/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll --- a/llvm/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll +++ b/llvm/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll @@ -1,4 +1,4 @@ -; RUN: opt -indvars < %s +; RUN: opt -passes=indvars < %s ; PR9424: Attempt to use a SCEVCouldNotCompute object! ; The inner loop computes the Step and Start of the outer loop. ; Call that Vexit. The outer End value is max(2,Vexit), because diff --git a/llvm/test/Analysis/ScalarEvolution/expander-replace-congruent-ivs.ll b/llvm/test/Analysis/ScalarEvolution/expander-replace-congruent-ivs.ll --- a/llvm/test/Analysis/ScalarEvolution/expander-replace-congruent-ivs.ll +++ b/llvm/test/Analysis/ScalarEvolution/expander-replace-congruent-ivs.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -indvars < %s | FileCheck %s +; RUN: opt -S -passes=indvars < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" diff --git a/llvm/test/Analysis/ScalarEvolution/ext-antecedent.ll b/llvm/test/Analysis/ScalarEvolution/ext-antecedent.ll --- a/llvm/test/Analysis/ScalarEvolution/ext-antecedent.ll +++ b/llvm/test/Analysis/ScalarEvolution/ext-antecedent.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -indvars < %s | FileCheck %s +; RUN: opt -S -passes=indvars < %s | FileCheck %s declare void @use(i1) diff --git a/llvm/test/Analysis/ScalarEvolution/guards.ll b/llvm/test/Analysis/ScalarEvolution/guards.ll --- a/llvm/test/Analysis/ScalarEvolution/guards.ll +++ b/llvm/test/Analysis/ScalarEvolution/guards.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -indvars < %s | FileCheck %s +; RUN: opt -S -passes=indvars < %s | FileCheck %s ; Check that SCEV is able to recognize and use guards to prove ; conditions gaurding loop entries and backedges. This isn't intended diff --git a/llvm/test/Analysis/ScalarEvolution/implied-via-addition.ll b/llvm/test/Analysis/ScalarEvolution/implied-via-addition.ll --- a/llvm/test/Analysis/ScalarEvolution/implied-via-addition.ll +++ b/llvm/test/Analysis/ScalarEvolution/implied-via-addition.ll @@ -1,4 +1,4 @@ -; RUN: opt -indvars -S < %s | FileCheck %s +; RUN: opt -passes=indvars -S < %s | FileCheck %s declare void @use(i1) diff --git a/llvm/test/Analysis/ScalarEvolution/infer-via-ranges.ll b/llvm/test/Analysis/ScalarEvolution/infer-via-ranges.ll --- a/llvm/test/Analysis/ScalarEvolution/infer-via-ranges.ll +++ b/llvm/test/Analysis/ScalarEvolution/infer-via-ranges.ll @@ -1,4 +1,4 @@ -; RUN: opt -indvars -S < %s | FileCheck %s +; RUN: opt -passes=indvars -S < %s | FileCheck %s define void @infer_via_ranges(i32 *%arr, i32 %n) { ; CHECK-LABEL: @infer_via_ranges diff --git a/llvm/test/Analysis/ScalarEvolution/latch-dominating-conditions.ll b/llvm/test/Analysis/ScalarEvolution/latch-dominating-conditions.ll --- a/llvm/test/Analysis/ScalarEvolution/latch-dominating-conditions.ll +++ b/llvm/test/Analysis/ScalarEvolution/latch-dominating-conditions.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -indvars < %s | FileCheck %s +; RUN: opt -S -passes=indvars < %s | FileCheck %s declare void @side_effect(i1) diff --git a/llvm/test/Analysis/ScalarEvolution/nw-sub-is-not-nw-add.ll b/llvm/test/Analysis/ScalarEvolution/nw-sub-is-not-nw-add.ll --- a/llvm/test/Analysis/ScalarEvolution/nw-sub-is-not-nw-add.ll +++ b/llvm/test/Analysis/ScalarEvolution/nw-sub-is-not-nw-add.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -indvars < %s | FileCheck %s +; RUN: opt -S -passes=indvars < %s | FileCheck %s ; Check that SCEV does not assume sub nuw X Y == add nuw X, -Y define void @f(i32* %loc) { diff --git a/llvm/test/Analysis/ScalarEvolution/pr18606-min-zeros.ll b/llvm/test/Analysis/ScalarEvolution/pr18606-min-zeros.ll --- a/llvm/test/Analysis/ScalarEvolution/pr18606-min-zeros.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr18606-min-zeros.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -indvars < %s | FileCheck %s +; RUN: opt -S -passes=indvars < %s | FileCheck %s ; CHECK: @test ; CHECK: %5 = add i32 %local_6_, %local_0_ diff --git a/llvm/test/Analysis/ScalarEvolution/pr18606.ll b/llvm/test/Analysis/ScalarEvolution/pr18606.ll --- a/llvm/test/Analysis/ScalarEvolution/pr18606.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr18606.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -indvars < %s | FileCheck %s +; RUN: opt -S -passes=indvars < %s | FileCheck %s ; CHECK: @main ; CHECK: %mul.lcssa5 = phi i32 [ %a.promoted4, %entry ], [ %mul.30, %for.body3 ] diff --git a/llvm/test/Analysis/ScalarEvolution/pr35890.ll b/llvm/test/Analysis/ScalarEvolution/pr35890.ll --- a/llvm/test/Analysis/ScalarEvolution/pr35890.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr35890.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalar-evolution-max-arith-depth=0 -indvars -S | FileCheck %s +; RUN: opt < %s -scalar-evolution-max-arith-depth=0 -passes=indvars -S | FileCheck %s target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128-ni:1" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/pr3909.ll b/llvm/test/Analysis/ScalarEvolution/pr3909.ll --- a/llvm/test/Analysis/ScalarEvolution/pr3909.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr3909.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -disable-output +; RUN: opt < %s -passes=indvars -disable-output ; PR 3909 diff --git a/llvm/test/Analysis/ScalarEvolution/pr44605.ll b/llvm/test/Analysis/ScalarEvolution/pr44605.ll --- a/llvm/test/Analysis/ScalarEvolution/pr44605.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr44605.ll @@ -2,7 +2,7 @@ ; NOTE: Only %local_3_4 is important here. ; All other instructions are needed to lure LLVM into executing ; specific code to trigger a bug. -; RUN: opt < %s -indvars -S | FileCheck %s +; RUN: opt < %s -passes=indvars -S | FileCheck %s define i32 @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: diff --git a/llvm/test/Analysis/ScalarEvolution/scev-canonical-mode.ll b/llvm/test/Analysis/ScalarEvolution/scev-canonical-mode.ll --- a/llvm/test/Analysis/ScalarEvolution/scev-canonical-mode.ll +++ b/llvm/test/Analysis/ScalarEvolution/scev-canonical-mode.ll @@ -1,6 +1,6 @@ ; PR26529: Check the assumption of IndVarSimplify to do SCEV expansion in literal mode ; instead of CanonicalMode is properly maintained in SCEVExpander::expand. -; RUN: opt -indvars < %s +; RUN: opt -passes=indvars < %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/ScalarEvolution/scev-expander-incorrect-nowrap.ll b/llvm/test/Analysis/ScalarEvolution/scev-expander-incorrect-nowrap.ll --- a/llvm/test/Analysis/ScalarEvolution/scev-expander-incorrect-nowrap.ll +++ b/llvm/test/Analysis/ScalarEvolution/scev-expander-incorrect-nowrap.ll @@ -1,4 +1,4 @@ -; RUN: opt -indvars -S < %s | FileCheck %s +; RUN: opt -passes=indvars -S < %s | FileCheck %s declare void @use(i32) declare void @use.i8(i8) diff --git a/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll b/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll --- a/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll +++ b/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll @@ -5,13 +5,13 @@ %tmp = alloca i8 %dst = alloca i8 %src = alloca i8 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false), !alias.scope ![[SCOPE:[0-9]+]] - call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %src), !noalias !4 - store i8 %input, i8* %src - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1 false), !alias.scope !0 - call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !4 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1 false), !alias.scope !4 - %ret_value = load i8, i8* %dst +; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %src, i64 1, i1 false), !alias.scope ![[SCOPE:[0-9]+]] + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %src), !noalias !4 + store i8 %input, ptr %src + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false), !alias.scope !0 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %src), !noalias !4 + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 1, i1 false), !alias.scope !4 + %ret_value = load i8, ptr %dst ret i8 %ret_value } @@ -20,9 +20,9 @@ ; CHECK-DAG: ![[CALLEE0_B:[0-9]+]] = distinct !{!{{[0-9]+}}, !{{[0-9]+}}, !"callee0: %b"} ; CHECK-DAG: ![[SCOPE]] = !{![[CALLEE0_A]], ![[CALLEE0_B]]} -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) !0 = !{!1, !7} !1 = distinct !{!1, !3, !"callee0: %a"} diff --git a/llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll b/llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll --- a/llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll +++ b/llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll @@ -2,20 +2,20 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define void @foo1(float* nocapture %a, float* nocapture readonly %c) #0 { +define void @foo1(ptr nocapture %a, ptr nocapture readonly %c) #0 { entry: ; CHECK-LABEL: Function: foo1 - %0 = load float, float* %c, align 4, !alias.scope !0 - %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 - store float %0, float* %arrayidx.i, align 4, !noalias !6 + %0 = load float, ptr %c, align 4, !alias.scope !0 + %arrayidx.i = getelementptr inbounds float, ptr %a, i64 5 + store float %0, ptr %arrayidx.i, align 4, !noalias !6 - %1 = load float, float* %c, align 4, !alias.scope !7 - %arrayidx.i2 = getelementptr inbounds float, float* %a, i64 15 - store float %1, float* %arrayidx.i2, align 4, !noalias !6 + %1 = load float, ptr %c, align 4, !alias.scope !7 + %arrayidx.i2 = getelementptr inbounds float, ptr %a, i64 15 + store float %1, ptr %arrayidx.i2, align 4, !noalias !6 - %2 = load float, float* %c, align 4, !alias.scope !6 - %arrayidx.i3 = getelementptr inbounds float, float* %a, i64 16 - store float %2, float* %arrayidx.i3, align 4, !noalias !7 + %2 = load float, ptr %c, align 4, !alias.scope !6 + %arrayidx.i3 = getelementptr inbounds float, ptr %a, i64 16 + store float %2, ptr %arrayidx.i3, align 4, !noalias !7 ret void } @@ -40,16 +40,16 @@ ; A list of scopes from both domains. !0 = !{!1, !3, !4} -; CHECK: NoAlias: %0 = load float, float* %c, align 4, !alias.scope !0 <-> store float %0, float* %arrayidx.i, align 4, !noalias !6 -; CHECK: NoAlias: %0 = load float, float* %c, align 4, !alias.scope !0 <-> store float %1, float* %arrayidx.i2, align 4, !noalias !6 -; CHECK: MayAlias: %0 = load float, float* %c, align 4, !alias.scope !0 <-> store float %2, float* %arrayidx.i3, align 4, !noalias !7 -; CHECK: NoAlias: %1 = load float, float* %c, align 4, !alias.scope !7 <-> store float %0, float* %arrayidx.i, align 4, !noalias !6 -; CHECK: NoAlias: %1 = load float, float* %c, align 4, !alias.scope !7 <-> store float %1, float* %arrayidx.i2, align 4, !noalias !6 -; CHECK: NoAlias: %1 = load float, float* %c, align 4, !alias.scope !7 <-> store float %2, float* %arrayidx.i3, align 4, !noalias !7 -; CHECK: NoAlias: %2 = load float, float* %c, align 4, !alias.scope !6 <-> store float %0, float* %arrayidx.i, align 4, !noalias !6 -; CHECK: NoAlias: %2 = load float, float* %c, align 4, !alias.scope !6 <-> store float %1, float* %arrayidx.i2, align 4, !noalias !6 -; CHECK: MayAlias: %2 = load float, float* %c, align 4, !alias.scope !6 <-> store float %2, float* %arrayidx.i3, align 4, !noalias !7 -; CHECK: NoAlias: store float %1, float* %arrayidx.i2, align 4, !noalias !6 <-> store float %0, float* %arrayidx.i, align 4, !noalias !6 -; CHECK: NoAlias: store float %2, float* %arrayidx.i3, align 4, !noalias !7 <-> store float %0, float* %arrayidx.i, align 4, !noalias !6 -; CHECK: NoAlias: store float %2, float* %arrayidx.i3, align 4, !noalias !7 <-> store float %1, float* %arrayidx.i2, align 4, !noalias !6 +; CHECK: NoAlias: %0 = load float, ptr %c, align 4, !alias.scope !0 <-> store float %0, ptr %arrayidx.i, align 4, !noalias !6 +; CHECK: NoAlias: %0 = load float, ptr %c, align 4, !alias.scope !0 <-> store float %1, ptr %arrayidx.i2, align 4, !noalias !6 +; CHECK: MayAlias: %0 = load float, ptr %c, align 4, !alias.scope !0 <-> store float %2, ptr %arrayidx.i3, align 4, !noalias !7 +; CHECK: NoAlias: %1 = load float, ptr %c, align 4, !alias.scope !7 <-> store float %0, ptr %arrayidx.i, align 4, !noalias !6 +; CHECK: NoAlias: %1 = load float, ptr %c, align 4, !alias.scope !7 <-> store float %1, ptr %arrayidx.i2, align 4, !noalias !6 +; CHECK: NoAlias: %1 = load float, ptr %c, align 4, !alias.scope !7 <-> store float %2, ptr %arrayidx.i3, align 4, !noalias !7 +; CHECK: NoAlias: %2 = load float, ptr %c, align 4, !alias.scope !6 <-> store float %0, ptr %arrayidx.i, align 4, !noalias !6 +; CHECK: NoAlias: %2 = load float, ptr %c, align 4, !alias.scope !6 <-> store float %1, ptr %arrayidx.i2, align 4, !noalias !6 +; CHECK: MayAlias: %2 = load float, ptr %c, align 4, !alias.scope !6 <-> store float %2, ptr %arrayidx.i3, align 4, !noalias !7 +; CHECK: NoAlias: store float %1, ptr %arrayidx.i2, align 4, !noalias !6 <-> store float %0, ptr %arrayidx.i, align 4, !noalias !6 +; CHECK: NoAlias: store float %2, ptr %arrayidx.i3, align 4, !noalias !7 <-> store float %0, ptr %arrayidx.i, align 4, !noalias !6 +; CHECK: NoAlias: store float %2, ptr %arrayidx.i3, align 4, !noalias !7 <-> store float %1, ptr %arrayidx.i2, align 4, !noalias !6 diff --git a/llvm/test/Analysis/ScopedNoAliasAA/basic.ll b/llvm/test/Analysis/ScopedNoAliasAA/basic.ll --- a/llvm/test/Analysis/ScopedNoAliasAA/basic.ll +++ b/llvm/test/Analysis/ScopedNoAliasAA/basic.ll @@ -2,22 +2,22 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define void @foo1(float* nocapture %a, float* nocapture readonly %c) #0 { +define void @foo1(ptr nocapture %a, ptr nocapture readonly %c) #0 { entry: ; CHECK-LABEL: Function: foo1 - %0 = load float, float* %c, align 4, !alias.scope !2 - %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 - store float %0, float* %arrayidx.i, align 4, !noalias !2 - %1 = load float, float* %c, align 4 - %arrayidx = getelementptr inbounds float, float* %a, i64 7 - store float %1, float* %arrayidx, align 4 + %0 = load float, ptr %c, align 4, !alias.scope !2 + %arrayidx.i = getelementptr inbounds float, ptr %a, i64 5 + store float %0, ptr %arrayidx.i, align 4, !noalias !2 + %1 = load float, ptr %c, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i64 7 + store float %1, ptr %arrayidx, align 4 ret void -; CHECK: NoAlias: %0 = load float, float* %c, align 4, !alias.scope !0 <-> store float %0, float* %arrayidx.i, align 4, !noalias !0 -; CHECK: MayAlias: %0 = load float, float* %c, align 4, !alias.scope !0 <-> store float %1, float* %arrayidx, align 4 -; CHECK: MayAlias: %1 = load float, float* %c, align 4 <-> store float %0, float* %arrayidx.i, align 4, !noalias !0 -; CHECK: MayAlias: %1 = load float, float* %c, align 4 <-> store float %1, float* %arrayidx, align 4 -; CHECK: NoAlias: store float %1, float* %arrayidx, align 4 <-> store float %0, float* %arrayidx.i, align 4, !noalias !0 +; CHECK: NoAlias: %0 = load float, ptr %c, align 4, !alias.scope !0 <-> store float %0, ptr %arrayidx.i, align 4, !noalias !0 +; CHECK: MayAlias: %0 = load float, ptr %c, align 4, !alias.scope !0 <-> store float %1, ptr %arrayidx, align 4 +; CHECK: MayAlias: %1 = load float, ptr %c, align 4 <-> store float %0, ptr %arrayidx.i, align 4, !noalias !0 +; CHECK: MayAlias: %1 = load float, ptr %c, align 4 <-> store float %1, ptr %arrayidx, align 4 +; CHECK: NoAlias: store float %1, ptr %arrayidx, align 4 <-> store float %0, ptr %arrayidx.i, align 4, !noalias !0 } attributes #0 = { nounwind uwtable } diff --git a/llvm/test/Analysis/ScopedNoAliasAA/basic2.ll b/llvm/test/Analysis/ScopedNoAliasAA/basic2.ll --- a/llvm/test/Analysis/ScopedNoAliasAA/basic2.ll +++ b/llvm/test/Analysis/ScopedNoAliasAA/basic2.ll @@ -2,31 +2,31 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +define void @foo2(ptr nocapture %a, ptr nocapture %b, ptr nocapture readonly %c) #0 { entry: ; CHECK-LABEL: Function: foo2 - %0 = load float, float* %c, align 4, !alias.scope !0 - %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 - store float %0, float* %arrayidx.i, align 4, !alias.scope !5, !noalias !4 - %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 - store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias !5 - %1 = load float, float* %c, align 4 - %arrayidx = getelementptr inbounds float, float* %a, i64 7 - store float %1, float* %arrayidx, align 4 + %0 = load float, ptr %c, align 4, !alias.scope !0 + %arrayidx.i = getelementptr inbounds float, ptr %a, i64 5 + store float %0, ptr %arrayidx.i, align 4, !alias.scope !5, !noalias !4 + %arrayidx1.i = getelementptr inbounds float, ptr %b, i64 8 + store float %0, ptr %arrayidx1.i, align 4, !alias.scope !0, !noalias !5 + %1 = load float, ptr %c, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i64 7 + store float %1, ptr %arrayidx, align 4 ret void -; CHECK: MayAlias: %0 = load float, float* %c, align 4, !alias.scope !0 <-> store float %0, float* %arrayidx.i, align 4, !alias.scope !4, !noalia +; CHECK: MayAlias: %0 = load float, ptr %c, align 4, !alias.scope !0 <-> store float %0, ptr %arrayidx.i, align 4, !alias.scope !4, !noalia ; CHECK: s !5 -; CHECK: MayAlias: %0 = load float, float* %c, align 4, !alias.scope !0 <-> store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noali +; CHECK: MayAlias: %0 = load float, ptr %c, align 4, !alias.scope !0 <-> store float %0, ptr %arrayidx1.i, align 4, !alias.scope !0, !noali ; CHECK: as !4 -; CHECK: MayAlias: %0 = load float, float* %c, align 4, !alias.scope !0 <-> store float %1, float* %arrayidx, align 4 -; CHECK: MayAlias: %1 = load float, float* %c, align 4 <-> store float %0, float* %arrayidx.i, align 4, !alias.scope !4, !noalias !5 -; CHECK: MayAlias: %1 = load float, float* %c, align 4 <-> store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias !4 -; CHECK: MayAlias: %1 = load float, float* %c, align 4 <-> store float %1, float* %arrayidx, align 4 -; CHECK: NoAlias: store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias !4 <-> store float %0, float* %arrayidx.i, align +; CHECK: MayAlias: %0 = load float, ptr %c, align 4, !alias.scope !0 <-> store float %1, ptr %arrayidx, align 4 +; CHECK: MayAlias: %1 = load float, ptr %c, align 4 <-> store float %0, ptr %arrayidx.i, align 4, !alias.scope !4, !noalias !5 +; CHECK: MayAlias: %1 = load float, ptr %c, align 4 <-> store float %0, ptr %arrayidx1.i, align 4, !alias.scope !0, !noalias !4 +; CHECK: MayAlias: %1 = load float, ptr %c, align 4 <-> store float %1, ptr %arrayidx, align 4 +; CHECK: NoAlias: store float %0, ptr %arrayidx1.i, align 4, !alias.scope !0, !noalias !4 <-> store float %0, ptr %arrayidx.i, align ; CHECK: 4, !alias.scope !4, !noalias !5 -; CHECK: NoAlias: store float %1, float* %arrayidx, align 4 <-> store float %0, float* %arrayidx.i, align 4, !alias.scope !4, !noalias !5 -; CHECK: MayAlias: store float %1, float* %arrayidx, align 4 <-> store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias ! +; CHECK: NoAlias: store float %1, ptr %arrayidx, align 4 <-> store float %0, ptr %arrayidx.i, align 4, !alias.scope !4, !noalias !5 +; CHECK: MayAlias: store float %1, ptr %arrayidx, align 4 <-> store float %0, ptr %arrayidx1.i, align 4, !alias.scope !0, !noalias ! ; CHECK: 4 } diff --git a/llvm/test/Analysis/StackSafetyAnalysis/Inputs/ipa-alias.ll b/llvm/test/Analysis/StackSafetyAnalysis/Inputs/ipa-alias.ll --- a/llvm/test/Analysis/StackSafetyAnalysis/Inputs/ipa-alias.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/Inputs/ipa-alias.ll @@ -1,21 +1,21 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux" -attributes #0 = { noinline sanitize_memtag "target-features"="+mte,+neon" } - -@InterposableAliasWrite1 = linkonce dso_local alias void(i8*), void(i8*)* @Write1 +@InterposableAliasWrite1 = linkonce dso_local alias void(ptr), ptr @Write1 -@PreemptableAliasWrite1 = dso_preemptable alias void(i8*), void(i8*)* @Write1 -@AliasToPreemptableAliasWrite1 = dso_local alias void(i8*), void(i8*)* @PreemptableAliasWrite1 +@PreemptableAliasWrite1 = dso_preemptable alias void(ptr), ptr @Write1 +@AliasToPreemptableAliasWrite1 = dso_local alias void(ptr), ptr @PreemptableAliasWrite1 -@AliasWrite1 = dso_local alias void(i8*), void(i8*)* @Write1 +@AliasWrite1 = dso_local alias void(ptr), ptr @Write1 -@BitcastAliasWrite1 = dso_local alias void(i32*), bitcast (void(i8*)* @Write1 to void(i32*)*) -@AliasToBitcastAliasWrite1 = dso_local alias void(i8*), bitcast (void(i32*)* @BitcastAliasWrite1 to void(i8*)*) +@BitcastAliasWrite1 = dso_local alias void(ptr), ptr @Write1 +@AliasToBitcastAliasWrite1 = dso_local alias void(ptr), ptr @BitcastAliasWrite1 -define dso_local void @Write1(i8* %p) #0 { +define dso_local void @Write1(ptr %p) #0 { entry: - store i8 0, i8* %p, align 1 + store i8 0, ptr %p, align 1 ret void } + +attributes #0 = { noinline sanitize_memtag "target-features"="+mte,+neon" } diff --git a/llvm/test/Analysis/StackSafetyAnalysis/Inputs/ipa.ll b/llvm/test/Analysis/StackSafetyAnalysis/Inputs/ipa.ll --- a/llvm/test/Analysis/StackSafetyAnalysis/Inputs/ipa.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/Inputs/ipa.ll @@ -3,99 +3,95 @@ attributes #0 = { noinline sanitize_memtag "target-features"="+mte,+neon" } -define dso_local void @Write1(i8* %p) #0 { +define dso_local void @Write1(ptr %p) #0 { entry: - store i8 0, i8* %p, align 1 + store i8 0, ptr %p, align 1 ret void } -define dso_local void @Write4(i8* %p) #0 { +define dso_local void @Write4(ptr %p) #0 { entry: - %0 = bitcast i8* %p to i32* - store i32 0, i32* %0, align 1 + store i32 0, ptr %p, align 1 ret void } -define dso_local void @Write4_2(i8* %p, i8* %q) #0 { +define dso_local void @Write4_2(ptr %p, ptr %q) #0 { entry: - %0 = bitcast i8* %p to i32* - store i32 0, i32* %0, align 1 - %1 = bitcast i8* %q to i32* - store i32 0, i32* %1, align 1 + store i32 0, ptr %p, align 1 + store i32 0, ptr %q, align 1 ret void } -define dso_local void @Write8(i8* %p) #0 { +define dso_local void @Write8(ptr %p) #0 { entry: - %0 = bitcast i8* %p to i64* - store i64 0, i64* %0, align 1 + store i64 0, ptr %p, align 1 ret void } -define dso_local i8* @WriteAndReturn8(i8* %p) #0 { +define dso_local ptr @WriteAndReturn8(ptr %p) #0 { entry: - store i8 0, i8* %p, align 1 - ret i8* %p + store i8 0, ptr %p, align 1 + ret ptr %p } -declare dso_local void @ExternalCall(i8* %p) +declare dso_local void @ExternalCall(ptr %p) -define dso_preemptable void @PreemptableWrite1(i8* %p) #0 { +define dso_preemptable void @PreemptableWrite1(ptr %p) #0 { entry: - store i8 0, i8* %p, align 1 + store i8 0, ptr %p, align 1 ret void } -define linkonce dso_local void @InterposableWrite1(i8* %p) #0 { +define linkonce dso_local void @InterposableWrite1(ptr %p) #0 { entry: - store i8 0, i8* %p, align 1 + store i8 0, ptr %p, align 1 ret void } -define dso_local i8* @ReturnDependent(i8* %p) #0 { +define dso_local ptr @ReturnDependent(ptr %p) #0 { entry: - %p2 = getelementptr i8, i8* %p, i64 2 - ret i8* %p2 + %p2 = getelementptr i8, ptr %p, i64 2 + ret ptr %p2 } ; access range [2, 6) -define dso_local void @Rec0(i8* %p) #0 { +define dso_local void @Rec0(ptr %p) #0 { entry: - %p1 = getelementptr i8, i8* %p, i64 2 - call void @Write4(i8* %p1) + %p1 = getelementptr i8, ptr %p, i64 2 + call void @Write4(ptr %p1) ret void } ; access range [3, 7) -define dso_local void @Rec1(i8* %p) #0 { +define dso_local void @Rec1(ptr %p) #0 { entry: - %p1 = getelementptr i8, i8* %p, i64 1 - call void @Rec0(i8* %p1) + %p1 = getelementptr i8, ptr %p, i64 1 + call void @Rec0(ptr %p1) ret void } ; access range [-2, 2) -define dso_local void @Rec2(i8* %p) #0 { +define dso_local void @Rec2(ptr %p) #0 { entry: - %p1 = getelementptr i8, i8* %p, i64 -5 - call void @Rec1(i8* %p1) + %p1 = getelementptr i8, ptr %p, i64 -5 + call void @Rec1(ptr %p1) ret void } ; Recursive function that passes %acc unchanged => access range [0, 4). -define dso_local void @RecursiveNoOffset(i32* %p, i32 %size, i32* %acc) { +define dso_local void @RecursiveNoOffset(ptr %p, i32 %size, ptr %acc) { entry: %cmp = icmp eq i32 %size, 0 br i1 %cmp, label %return, label %if.end if.end: - %0 = load i32, i32* %p, align 4 - %1 = load i32, i32* %acc, align 4 - %add = add nsw i32 %1, %0 - store i32 %add, i32* %acc, align 4 - %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 + %load0 = load i32, ptr %p, align 4 + %load1 = load i32, ptr %acc, align 4 + %add = add nsw i32 %load1, %load0 + store i32 %add, ptr %acc, align 4 + %add.ptr = getelementptr inbounds i32, ptr %p, i64 1 %sub = add nsw i32 %size, -1 - tail call void @RecursiveNoOffset(i32* %add.ptr, i32 %sub, i32* %acc) + tail call void @RecursiveNoOffset(ptr %add.ptr, i32 %sub, ptr %acc) ret void return: @@ -103,65 +99,65 @@ } ; Recursive function that advances %acc on each iteration => access range unlimited. -define dso_local void @RecursiveWithOffset(i32 %size, i32* %acc) { +define dso_local void @RecursiveWithOffset(i32 %size, ptr %acc) { entry: %cmp = icmp eq i32 %size, 0 br i1 %cmp, label %return, label %if.end if.end: - store i32 0, i32* %acc, align 4 - %acc2 = getelementptr inbounds i32, i32* %acc, i64 1 + store i32 0, ptr %acc, align 4 + %acc2 = getelementptr inbounds i32, ptr %acc, i64 1 %sub = add nsw i32 %size, -1 - tail call void @RecursiveWithOffset(i32 %sub, i32* %acc2) + tail call void @RecursiveWithOffset(i32 %sub, ptr %acc2) ret void return: ret void } -define dso_local i64* @ReturnAlloca() { +define dso_local ptr @ReturnAlloca() { entry: %x = alloca i64, align 4 - ret i64* %x + ret ptr %x } -define dso_local void @Write1Private(i8* %p) #0 { +define dso_local void @Write1Private(ptr %p) #0 { entry: - call void @Private(i8* %p) + call void @Private(ptr %p) ret void } -define dso_local void @Write1SameModule(i8* %p) #0 { +define dso_local void @Write1SameModule(ptr %p) #0 { entry: - call void @Write1(i8* %p) + call void @Write1(ptr %p) ret void } -declare void @Write1Module0(i8* %p) +declare void @Write1Module0(ptr %p) -define dso_local void @Write1DiffModule(i8* %p) #0 { +define dso_local void @Write1DiffModule(ptr %p) #0 { entry: - call void @Write1Module0(i8* %p) + call void @Write1Module0(ptr %p) ret void } -define private dso_local void @Private(i8* %p) #0 { +define private dso_local void @Private(ptr %p) #0 { entry: - %p1 = getelementptr i8, i8* %p, i64 -1 - store i8 0, i8* %p1, align 1 + %p1 = getelementptr i8, ptr %p, i64 -1 + store i8 0, ptr %p1, align 1 ret void } -define dso_local void @Write1Weak(i8* %p) #0 { +define dso_local void @Write1Weak(ptr %p) #0 { entry: - call void @Weak(i8* %p) + call void @Weak(ptr %p) ret void } -define weak dso_local void @Weak(i8* %p) #0 { +define weak dso_local void @Weak(ptr %p) #0 { entry: - %p1 = getelementptr i8, i8* %p, i64 -1 - store i8 0, i8* %p1, align 1 + %p1 = getelementptr i8, ptr %p, i64 -1 + store i8 0, ptr %p1, align 1 ret void } diff --git a/llvm/test/Analysis/StackSafetyAnalysis/i386-bug-fix.ll b/llvm/test/Analysis/StackSafetyAnalysis/i386-bug-fix.ll --- a/llvm/test/Analysis/StackSafetyAnalysis/i386-bug-fix.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/i386-bug-fix.ll @@ -14,11 +14,11 @@ target triple = "i386-pc-linux-gnu" ; Function Attrs: mustprogress norecurse sanitize_address uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) { +define dso_local i32 @main(i32 %argc, ptr %argv) { entry: %0 = alloca i32, align 4 %1 = alloca i8, i64 32, align 32 - %2 = ptrtoint i8* %1 to i32 - store i32 %2, i32* %0, align 4 + %2 = ptrtoint ptr %1 to i32 + store i32 %2, ptr %0, align 4 ret i32 0 } diff --git a/llvm/test/Analysis/StackSafetyAnalysis/ipa-alias.ll b/llvm/test/Analysis/StackSafetyAnalysis/ipa-alias.ll --- a/llvm/test/Analysis/StackSafetyAnalysis/ipa-alias.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/ipa-alias.ll @@ -33,7 +33,7 @@ ; RUN: -r %t.summ1.bc,PreemptableAliasWrite1,px \ ; RUN: -r %t.summ1.bc,Write1,px -; RUN: llvm-lto2 run -opaque-pointers=0 %t.summ0.bc %t.summ1.bc -o %t.lto -stack-safety-print -stack-safety-run -save-temps -thinlto-threads 1 -O0 \ +; RUN: llvm-lto2 run %t.summ0.bc %t.summ1.bc -o %t.lto -stack-safety-print -stack-safety-run -save-temps -thinlto-threads 1 -O0 \ ; RUN: $(cat %t.res.txt) \ ; RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK,GLOBAL,LTO @@ -42,16 +42,16 @@ attributes #0 = { noinline sanitize_memtag "target-features"="+mte,+neon" } -declare void @PreemptableAliasWrite1(i8* %p) -declare void @AliasToPreemptableAliasWrite1(i8* %p) +declare void @PreemptableAliasWrite1(ptr %p) +declare void @AliasToPreemptableAliasWrite1(ptr %p) -declare void @InterposableAliasWrite1(i8* %p) +declare void @InterposableAliasWrite1(ptr %p) ; Aliases to interposable aliases are not allowed -declare void @AliasWrite1(i8* %p) +declare void @AliasWrite1(ptr %p) -declare void @BitcastAliasWrite1(i32* %p) -declare void @AliasToBitcastAliasWrite1(i8* %p) +declare void @BitcastAliasWrite1(ptr %p) +declare void @AliasToBitcastAliasWrite1(ptr %p) ; Call to dso_preemptable alias to a dso_local aliasee define void @PreemptableAliasCall() #0 { @@ -66,11 +66,11 @@ ; CHECK-EMPTY: entry: %x1 = alloca i8 - call void @PreemptableAliasWrite1(i8* %x1) + call void @PreemptableAliasWrite1(ptr %x1) %x2 = alloca i8 ; Alias to a preemptable alias is not preemptable - call void @AliasToPreemptableAliasWrite1(i8* %x2) + call void @AliasToPreemptableAliasWrite1(ptr %x2) ret void } @@ -87,7 +87,7 @@ entry: %x = alloca i8 ; ThinLTO can resolve the prevailing implementation for interposable definitions. - call void @InterposableAliasWrite1(i8* %x) + call void @InterposableAliasWrite1(ptr %x) ret void } @@ -102,7 +102,7 @@ ; CHECK-EMPTY: entry: %x = alloca i8 - call void @AliasWrite1(i8* %x) + call void @AliasWrite1(ptr %x) ret void } @@ -119,9 +119,9 @@ ; CHECK-EMPTY: entry: %x1 = alloca i32 - call void @BitcastAliasWrite1(i32* %x1) + call void @BitcastAliasWrite1(ptr %x1) %x2 = alloca i8 - call void @AliasToBitcastAliasWrite1(i8* %x2) + call void @AliasToBitcastAliasWrite1(ptr %x2) ret void } @@ -132,5 +132,5 @@ ; CHECK-NEXT: p[]: [0,1){{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %p, align 1 +; GLOBAL-NEXT: store i8 0, ptr %p, align 1 ; CHECK-EMPTY: diff --git a/llvm/test/Analysis/StackSafetyAnalysis/ipa.ll b/llvm/test/Analysis/StackSafetyAnalysis/ipa.ll --- a/llvm/test/Analysis/StackSafetyAnalysis/ipa.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/ipa.ll @@ -83,7 +83,7 @@ ; RUN: -r %t.summ1.bc,Write8,px \ ; RUN: -r %t.summ1.bc,WriteAndReturn8,px -; RUN: llvm-lto2 run -opaque-pointers=0 %t.summ0.bc %t.summ1.bc -o %t.lto -stack-safety-print -stack-safety-run -save-temps -thinlto-threads 1 -O0 \ +; RUN: llvm-lto2 run %t.summ0.bc %t.summ1.bc -o %t.lto -stack-safety-print -stack-safety-run -save-temps -thinlto-threads 1 -O0 \ ; RUN: $(cat %t.res.txt) \ ; RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK,GLOBAL,LTO @@ -95,22 +95,22 @@ attributes #0 = { noinline sanitize_memtag "target-features"="+mte,+neon" } -declare void @Write1(i8* %p) -declare void @Write4(i8* %p) -declare void @Write4_2(i8* %p, i8* %q) -declare void @Write8(i8* %p) -declare dso_local i8* @WriteAndReturn8(i8* %p) -declare dso_local void @ExternalCall(i8* %p) -declare void @PreemptableWrite1(i8* %p) -declare void @InterposableWrite1(i8* %p) -declare i8* @ReturnDependent(i8* %p) -declare void @Rec2(i8* %p) -declare void @RecursiveNoOffset(i32* %p, i32 %size, i32* %acc) -declare void @RecursiveWithOffset(i32 %size, i32* %acc) -declare void @Write1SameModule(i8* %p) -declare void @Write1DiffModule(i8* %p) -declare void @Write1Private(i8* %p) -declare void @Write1Weak(i8* %p) +declare void @Write1(ptr %p) +declare void @Write4(ptr %p) +declare void @Write4_2(ptr %p, ptr %q) +declare void @Write8(ptr %p) +declare dso_local ptr @WriteAndReturn8(ptr %p) +declare dso_local void @ExternalCall(ptr %p) +declare void @PreemptableWrite1(ptr %p) +declare void @InterposableWrite1(ptr %p) +declare ptr @ReturnDependent(ptr %p) +declare void @Rec2(ptr %p) +declare void @RecursiveNoOffset(ptr %p, i32 %size, ptr %acc) +declare void @RecursiveWithOffset(i32 %size, ptr %acc) +declare void @Write1SameModule(ptr %p) +declare void @Write1DiffModule(ptr %p) +declare void @Write1Private(ptr %p) +declare void @Write1Weak(ptr %p) ; Basic out-of-bounds. define void @f1() #0 { @@ -123,8 +123,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void @Write8(i8* %x1) + call void @Write8(ptr %x) ret void } @@ -139,8 +138,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void @Write1(i8* %x1) + call void @Write1(ptr %x) ret void } @@ -155,8 +153,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void @Write4(i8* %x1) + call void @Write4(ptr %x) ret void } @@ -171,9 +168,8 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 1 - call void @Write1(i8* %x2) + %x2 = getelementptr i8, ptr %x, i64 1 + call void @Write1(ptr %x2) ret void } @@ -188,9 +184,8 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 1 - call void @Write4(i8* %x2) + %x2 = getelementptr i8, ptr %x, i64 1 + call void @Write4(ptr %x2) ret void } @@ -205,8 +200,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void @ExternalCall(i8* %x1) + call void @ExternalCall(ptr %x) ret void } @@ -221,8 +215,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void @PreemptableWrite1(i8* %x1) + call void @PreemptableWrite1(ptr %x) ret void } @@ -238,8 +231,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void @InterposableWrite1(i8* %x1) + call void @InterposableWrite1(ptr %x) ret void } @@ -254,21 +246,20 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void @PrivateWrite1(i8* %x1) + call void @PrivateWrite1(ptr %x) ret void } -define private void @PrivateWrite1(i8* %p) #0 { +define private void @PrivateWrite1(ptr %p) #0 { ; CHECK-LABEL: @PrivateWrite1{{$}} ; CHECK-NEXT: args uses: ; CHECK-NEXT: p[]: [0,1){{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %p, align 1 +; GLOBAL-NEXT: store i8 0, ptr %p, align 1 ; CHECK-EMPTY: entry: - store i8 0, i8* %p, align 1 + store i8 0, ptr %p, align 1 ret void } @@ -284,8 +275,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %x2 = call i8* @ReturnDependent(i8* %x1) + %x2 = call ptr @ReturnDependent(ptr %x) ret void } @@ -299,10 +289,9 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x1 = bitcast i64* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 2 + %x2 = getelementptr i8, ptr %x, i64 2 ; 2 + [-2, 2) = [0, 4) => OK - call void @Rec2(i8* %x2) + call void @Rec2(ptr %x2) ret void } @@ -316,10 +305,9 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x1 = bitcast i64* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 6 + %x2 = getelementptr i8, ptr %x, i64 6 ; 6 + [-2, 2) = [4, 8) => OK - call void @Rec2(i8* %x2) + call void @Rec2(ptr %x2) ret void } @@ -333,10 +321,9 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x1 = bitcast i64* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 1 + %x2 = getelementptr i8, ptr %x, i64 1 ; 1 + [-2, 2) = [-1, 3) => NOT OK - call void @Rec2(i8* %x2) + call void @Rec2(ptr %x2) ret void } @@ -350,10 +337,9 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x1 = bitcast i64* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 7 + %x2 = getelementptr i8, ptr %x, i64 7 ; 7 + [-2, 2) = [5, 9) => NOT OK - call void @Rec2(i8* %x2) + call void @Rec2(ptr %x2) ret void } @@ -367,9 +353,8 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x1 = bitcast i64* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 4 - call void @Write4_2(i8* %x2, i8* %x1) + %x2 = getelementptr i8, ptr %x, i64 4 + call void @Write4_2(ptr %x2, ptr %x) ret void } @@ -383,9 +368,8 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x1 = bitcast i64* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 5 - call void @Write4_2(i8* %x2, i8* %x1) + %x2 = getelementptr i8, ptr %x, i64 5 + call void @Write4_2(ptr %x2, ptr %x) ret void } @@ -399,10 +383,9 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x0 = bitcast i64* %x to i8* - %x1 = getelementptr i8, i8* %x0, i64 -1 - %x2 = getelementptr i8, i8* %x0, i64 4 - call void @Write4_2(i8* %x2, i8* %x1) + %x1 = getelementptr i8, ptr %x, i64 -1 + %x2 = getelementptr i8, ptr %x, i64 4 + call void @Write4_2(ptr %x2, ptr %x1) ret void } @@ -416,14 +399,13 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x0 = bitcast i64* %x to i8* - %x1 = getelementptr i8, i8* %x0, i64 -1 - %x2 = getelementptr i8, i8* %x0, i64 5 - call void @Write4_2(i8* %x2, i8* %x1) + %x1 = getelementptr i8, ptr %x, i64 -1 + %x2 = getelementptr i8, ptr %x, i64 5 + call void @Write4_2(ptr %x2, ptr %x1) ret void } -define i32 @TestRecursiveNoOffset(i32* %p, i32 %size) #0 { +define i32 @TestRecursiveNoOffset(ptr %p, i32 %size) #0 { ; CHECK-LABEL: @TestRecursiveNoOffset dso_preemptable{{$}} ; CHECK-NEXT: args uses: ; LOCAL-NEXT: p[]: empty-set, @RecursiveNoOffset(arg0, [0,1)){{$}} @@ -431,16 +413,15 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: sum[4]: [0,4), @RecursiveNoOffset(arg2, [0,1)){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i32 0, i32* %sum, align 4 -; GLOBAL-NEXT: %1 = load i32, i32* %sum, align 4 +; GLOBAL-NEXT: store i32 0, ptr %sum, align 4 +; GLOBAL-NEXT: %load = load i32, ptr %sum, align 4 ; CHECK-EMPTY: entry: %sum = alloca i32, align 4 - %0 = bitcast i32* %sum to i8* - store i32 0, i32* %sum, align 4 - call void @RecursiveNoOffset(i32* %p, i32 %size, i32* %sum) - %1 = load i32, i32* %sum, align 4 - ret i32 %1 + store i32 0, ptr %sum, align 4 + call void @RecursiveNoOffset(ptr %p, i32 %size, ptr %sum) + %load = load i32, ptr %sum, align 4 + ret i32 %load } define void @TestRecursiveWithOffset(i32 %size) #0 { @@ -453,7 +434,7 @@ ; CHECK-EMPTY: entry: %sum = alloca i32, i64 16, align 4 - call void @RecursiveWithOffset(i32 %size, i32* %sum) + call void @RecursiveWithOffset(i32 %size, ptr %sum) ret void } @@ -468,7 +449,7 @@ ; CHECK-EMPTY: entry: %x = alloca i8, i64 16, align 4 - %0 = call i8* @WriteAndReturn8(i8* %x) + %0 = call ptr @WriteAndReturn8(ptr %x) ret void } @@ -482,7 +463,7 @@ ; CHECK-EMPTY: entry: %y = alloca i8, align 4 - call void @Write1SameModule(i8* %y) + call void @Write1SameModule(ptr %y) ret void } @@ -496,7 +477,7 @@ ; CHECK-EMPTY: entry: %z = alloca i8, align 4 - call void @Write1DiffModule(i8* %z) + call void @Write1DiffModule(ptr %z) ret void } @@ -510,7 +491,7 @@ ; CHECK-EMPTY: entry: %x = alloca i8, align 4 - call void @Write1Private(i8* %x) + call void @Write1Private(ptr %x) ret void } @@ -526,27 +507,27 @@ ; CHECK-EMPTY: entry: %x = alloca i8, align 4 - call void @Write1Weak(i8* %x) + call void @Write1Weak(ptr %x) ret void } -define private dso_local void @Private(i8* %p) #0 { +define private dso_local void @Private(ptr %p) #0 { entry: - %p1 = getelementptr i8, i8* %p, i64 1 - store i8 0, i8* %p1, align 1 + %p1 = getelementptr i8, ptr %p, i64 1 + store i8 0, ptr %p1, align 1 ret void } -define dso_local void @Write1Module0(i8* %p) #0 { +define dso_local void @Write1Module0(ptr %p) #0 { entry: - store i8 0, i8* %p, align 1 + store i8 0, ptr %p, align 1 ret void } -define dso_local void @Weak(i8* %p) #0 { +define dso_local void @Weak(ptr %p) #0 { entry: - %p1 = getelementptr i8, i8* %p, i64 1 - store i8 0, i8* %p1, align 1 + %p1 = getelementptr i8, ptr %p, i64 1 + store i8 0, ptr %p1, align 1 ret void } @@ -557,7 +538,7 @@ ; CHECK-NEXT: p[]: [0,1){{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %p, align 1 +; GLOBAL-NEXT: store i8 0, ptr %p, align 1 ; CHECK-EMPTY: ; CHECK-LABEL: @Write4{{$}} @@ -565,7 +546,7 @@ ; CHECK-NEXT: p[]: [0,4){{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i32 0, i32* %0, align 1 +; GLOBAL-NEXT: store i32 0, ptr %p, align 1 ; CHECK-EMPTY: ; CHECK-LABEL: @Write4_2{{$}} @@ -574,8 +555,8 @@ ; CHECK-NEXT: q[]: [0,4){{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i32 0, i32* %0, align 1 -; GLOBAL-NEXT: store i32 0, i32* %1, align 1 +; GLOBAL-NEXT: store i32 0, ptr %p, align 1 +; GLOBAL-NEXT: store i32 0, ptr %q, align 1 ; CHECK-EMPTY: ; CHECK-LABEL: @Write8{{$}} @@ -583,7 +564,7 @@ ; CHECK-NEXT: p[]: [0,8){{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i64 0, i64* %0, align 1 +; GLOBAL-NEXT: store i64 0, ptr %p, align 1 ; CHECK-EMPTY: ; CHECK-LABEL: @WriteAndReturn8{{$}} @@ -591,7 +572,7 @@ ; CHECK-NEXT: p[]: full-set{{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %p, align 1 +; GLOBAL-NEXT: store i8 0, ptr %p, align 1 ; CHECK-EMPTY: ; CHECK-LABEL: @PreemptableWrite1 dso_preemptable{{$}} @@ -599,7 +580,7 @@ ; CHECK-NEXT: p[]: [0,1){{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %p, align 1 +; GLOBAL-NEXT: store i8 0, ptr %p, align 1 ; CHECK-EMPTY: ; CHECK-LABEL: @InterposableWrite1 interposable{{$}} @@ -607,7 +588,7 @@ ; CHECK-NEXT: p[]: [0,1){{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %p, align 1 +; GLOBAL-NEXT: store i8 0, ptr %p, align 1 ; CHECK-EMPTY: ; CHECK-LABEL: @ReturnDependent{{$}} @@ -648,9 +629,9 @@ ; CHECK-NEXT: acc[]: [0,4), @RecursiveNoOffset(arg2, [0,1)){{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: %0 = load i32, i32* %p, align 4 -; GLOBAL-NEXT: %1 = load i32, i32* %acc, align 4 -; GLOBAL-NEXT: store i32 %add, i32* %acc, align 4 +; GLOBAL-NEXT: %load0 = load i32, ptr %p, align 4 +; GLOBAL-NEXT: %load1 = load i32, ptr %acc, align 4 +; GLOBAL-NEXT: store i32 %add, ptr %acc, align 4 ; CHECK-EMPTY: ; CHECK-LABEL: @RecursiveWithOffset{{$}} @@ -659,7 +640,7 @@ ; GLOBAL-NEXT: acc[]: full-set, @RecursiveWithOffset(arg1, [4,5)){{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i32 0, i32* %acc, align 4 +; GLOBAL-NEXT: store i32 0, ptr %acc, align 4 ; CHECK-EMPTY: ; CHECK-LABEL: @ReturnAlloca diff --git a/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll b/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll --- a/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/lifetime.ll @@ -11,34 +11,31 @@ ; CHECK: %y = alloca i32, align 4 ; CHECK-NEXT: Alive: <> %z = alloca i32, align 4 - %x0 = bitcast i32* %x to i8* - %y0 = bitcast i32* %y to i8* - %z0 = bitcast i32* %z to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %z0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %z0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %z) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @capture32(i32* %x) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) + call void @capture32(ptr %x) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @capture32(i32* %y) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0) + call void @capture32(ptr %y) + call void @llvm.lifetime.end.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @capture32(i32* %z) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %z0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %z0) + call void @capture32(ptr %z) + call void @llvm.lifetime.end.p0(i64 -1, ptr %z) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z) ; CHECK-NEXT: Alive: <> ret void @@ -51,17 +48,16 @@ ; CHECK-NEXT: Alive: %x = alloca i32, align 4 %y = alloca i32, align 4 - %x0 = bitcast i32* %x to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @capture32(i32* %x) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) + call void @capture32(ptr %x) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @capture32(i32* %y) + call void @capture32(ptr %y) ret void } @@ -73,34 +69,31 @@ %x = alloca i32, align 4 %y = alloca i32, align 4 %z = alloca i64, align 4 - %x0 = bitcast i32* %x to i8* - %y0 = bitcast i32* %y to i8* - %z0 = bitcast i64* %z to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @capture32(i32* %x) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) + call void @capture32(ptr %x) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @capture32(i32* %y) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0) + call void @capture32(ptr %y) + call void @llvm.lifetime.end.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: <> - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %z0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %z0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %z) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z) ; CHECK-NEXT: Alive: - call void @capture64(i64* %z) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %z0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %z0) + call void @capture64(ptr %z) + call void @llvm.lifetime.end.p0(i64 -1, ptr %z) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z) ; CHECK-NEXT: Alive: <> ret void @@ -118,34 +111,31 @@ ; CHECK-NEXT: Alive: <> %z = alloca i64, align 4 %y = alloca i32, align 4 - %x0 = bitcast i32* %x to i8* - %y0 = bitcast i32* %y to i8* - %z0 = bitcast i64* %z to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %z0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %z0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %z) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z) ; CHECK-NEXT: Alive: - call void @capture32(i32* %x) - call void @capture32(i32* %y) - call void @capture64(i64* %z) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) + call void @capture32(ptr %x) + call void @capture32(ptr %y) + call void @capture64(ptr %z) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0) + call void @llvm.lifetime.end.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %z0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %z0) + call void @llvm.lifetime.end.p0(i64 -1, ptr %z) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z) ; CHECK-NEXT: Alive: <> ret void @@ -164,18 +154,16 @@ %z = alloca i64, align 8 %z1 = alloca i64, align 8 %z2 = alloca i64, align 8 - %0 = bitcast i64* %x1 to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %x1) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x1) ; CHECK-NEXT: Alive: - %1 = bitcast i64* %x2 to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %1) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %1) + call void @llvm.lifetime.start.p0(i64 -1, ptr %x2) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x2) ; CHECK-NEXT: Alive: - call void @capture64(i64* nonnull %x1) - call void @capture64(i64* nonnull %x2) + call void @capture64(ptr nonnull %x1) + call void @capture64(ptr nonnull %x2) br i1 %a, label %if.then, label %if.else4 ; CHECK: br i1 %a, label %if.then, label %if.else4 ; CHECK-NEXT: Alive: @@ -183,25 +171,23 @@ if.then: ; preds = %entry ; CHECK: if.then: ; CHECK-NEXT: Alive: - %2 = bitcast i64* %y to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %2) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %2) + call void @llvm.lifetime.start.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @capture64(i64* nonnull %y) + call void @capture64(ptr nonnull %y) br i1 %b, label %if.then3, label %if.else if.then3: ; preds = %if.then ; CHECK: if.then3: ; CHECK-NEXT: Alive: - %3 = bitcast i64* %y1 to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %3) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %3) + call void @llvm.lifetime.start.p0(i64 -1, ptr %y1) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y1) ; CHECK-NEXT: Alive: - call void @capture64(i64* nonnull %y1) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %3) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %3) + call void @capture64(ptr nonnull %y1) + call void @llvm.lifetime.end.p0(i64 -1, ptr %y1) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y1) ; CHECK-NEXT: Alive: br label %if.end @@ -209,14 +195,13 @@ if.else: ; preds = %if.then ; CHECK: if.else: ; CHECK-NEXT: Alive: - %4 = bitcast i64* %y2 to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %4) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %4) + call void @llvm.lifetime.start.p0(i64 -1, ptr %y2) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y2) ; CHECK-NEXT: Alive: - call void @capture64(i64* nonnull %y2) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %4) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %4) + call void @capture64(ptr nonnull %y2) + call void @llvm.lifetime.end.p0(i64 -1, ptr %y2) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y2) ; CHECK-NEXT: Alive: br label %if.end @@ -224,8 +209,8 @@ if.end: ; preds = %if.else, %if.then3 ; CHECK: if.end: ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %2) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %2) + call void @llvm.lifetime.end.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: br label %if.end9 @@ -233,29 +218,27 @@ if.else4: ; preds = %entry ; CHECK: if.else4: ; CHECK-NEXT: Alive: - - %5 = bitcast i64* %z to i8* -; CHECK: %5 = bitcast i64* %z to i8* + %z.cast = bitcast ptr %z to ptr +; CHECK: %z.cast = bitcast ptr %z to ptr ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %5) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %5) + call void @llvm.lifetime.start.p0(i64 -1, ptr %z) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z) ; CHECK-NEXT: Alive: - call void @capture64(i64* nonnull %z) + call void @capture64(ptr nonnull %z) br i1 %b, label %if.then6, label %if.else7 if.then6: ; preds = %if.else4 ; CHECK: if.then6: ; CHECK-NEXT: Alive: - %6 = bitcast i64* %z1 to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %6) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %6) + call void @llvm.lifetime.start.p0(i64 -1, ptr %z1) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z1) ; CHECK-NEXT: Alive: - call void @capture64(i64* nonnull %z1) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %6) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %6) + call void @capture64(ptr nonnull %z1) + call void @llvm.lifetime.end.p0(i64 -1, ptr %z1) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z1) ; CHECK-NEXT: Alive: br label %if.end8 @@ -263,14 +246,13 @@ if.else7: ; preds = %if.else4 ; CHECK: if.else7: ; CHECK-NEXT: Alive: - %7 = bitcast i64* %z2 to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %7) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %7) + call void @llvm.lifetime.start.p0(i64 -1, ptr %z2) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %z2) ; CHECK-NEXT: Alive: - call void @capture64(i64* nonnull %z2) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %7) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %7) + call void @capture64(ptr nonnull %z2) + call void @llvm.lifetime.end.p0(i64 -1, ptr %z2) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z2) ; CHECK-NEXT: Alive: br label %if.end8 @@ -278,8 +260,8 @@ if.end8: ; preds = %if.else7, %if.then6 ; CHECK: if.end8: ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %5) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %5) + call void @llvm.lifetime.end.p0(i64 -1, ptr %z) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %z) ; CHECK-NEXT: Alive: br label %if.end9 @@ -287,12 +269,12 @@ if.end9: ; preds = %if.end8, %if.end ; CHECK: if.end9: ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x2) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x2) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x1) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x1) ; CHECK-NEXT: Alive: <> ret void @@ -305,29 +287,27 @@ ; CHECK-NEXT: Alive: <> %x = alloca i32, align 4 %y = alloca i32, align 4 - %x0 = bitcast i32* %x to i8* - %y0 = bitcast i32* %y to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @capture32(i32* %x) + call void @capture32(ptr %x) br i1 %d, label %bb2, label %bb3 bb2: ; preds = %entry ; CHECK: bb2: ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @capture32(i32* %y) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0) + call void @capture32(ptr %y) + call void @llvm.lifetime.end.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: <> ret void @@ -335,8 +315,8 @@ bb3: ; preds = %entry ; CHECK: bb3: ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: <> ret void @@ -349,15 +329,13 @@ ; CHECK-NEXT: Alive: <> %x = alloca i32, align 4 %y = alloca i32, align 4 - %x0 = bitcast i32* %x to i8* - %y0 = bitcast i32* %y to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @capture32(i32* %x) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) + call void @capture32(ptr %x) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: <> br i1 %d, label %bb2, label %bb3 @@ -365,13 +343,13 @@ bb2: ; preds = %entry ; CHECK: bb2: ; CHECK-NEXT: Alive: <> - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @capture32(i32* %y) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0) + call void @capture32(ptr %y) + call void @llvm.lifetime.end.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: <> ret void @@ -389,15 +367,13 @@ ; CHECK-NEXT: Alive: <> %x = alloca i32, align 4 %y = alloca i32, align 4 - %x0 = bitcast i32* %x to i8* - %y0 = bitcast i32* %y to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @capture32(i32* %x) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) + call void @capture32(ptr %x) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: <> br i1 %d, label %bb2, label %bb3 @@ -405,11 +381,11 @@ bb2: ; preds = %entry ; CHECK: bb2: ; CHECK-NEXT: Alive: <> - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @capture32(i32* %y) + call void @capture32(ptr %y) ret void bb3: ; preds = %entry @@ -425,27 +401,25 @@ ; CHECK-NEXT: Alive: <> %x = alloca i32, align 4 %y = alloca i32, align 4 - %x0 = bitcast i32* %x to i8* - %y0 = bitcast i32* %y to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: - call void @capture32(i32* %x) + call void @capture32(ptr %x) br i1 %d, label %bb2, label %bb3 bb2: ; preds = %entry ; CHECK: bb2: ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: <> - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @capture32(i32* %y) + call void @capture32(ptr %y) ret void bb3: ; preds = %entry @@ -461,11 +435,9 @@ ; CHECK-NEXT: Alive: %x = alloca i32, align 4 %y = alloca i32, align 4 - %x0 = bitcast i32* %x to i8* - %y0 = bitcast i32* %y to i8* - call void @capture32(i32* %x) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0) + call void @capture32(ptr %x) + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %x) ; CHECK-NEXT: Alive: br i1 %d, label %bb2, label %bb3 @@ -473,11 +445,11 @@ bb2: ; preds = %entry ; CHECK: bb2: ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %y) ; CHECK-NEXT: Alive: - call void @capture32(i32* %y) + call void @capture32(ptr %y) ret void bb3: ; preds = %entry @@ -495,47 +467,43 @@ %B.i2 = alloca [100 x i32], align 4 %A.i = alloca [100 x i32], align 4 %B.i = alloca [100 x i32], align 4 - %0 = bitcast [100 x i32]* %A.i to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i) ; CHECK-NEXT: Alive: - %1 = bitcast [100 x i32]* %B.i to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %1) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %1) + call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i) ; CHECK-NEXT: Alive: - call void @capture100x32([100 x i32]* %A.i) -; CHECK: call void @capture100x32([100 x i32]* %A.i) + call void @capture100x32(ptr %A.i) +; CHECK: call void @capture100x32(ptr %A.i) ; CHECK-NEXT: Alive: - call void @capture100x32([100 x i32]* %B.i) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0) + call void @capture100x32(ptr %B.i) + call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1) + call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i) ; CHECK-NEXT: Alive: <> - %2 = bitcast [100 x i32]* %A.i1 to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %2) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %2) + call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i1) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i1) ; CHECK-NEXT: Alive: - %3 = bitcast [100 x i32]* %B.i2 to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %3) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %3) + call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i2) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i2) ; CHECK-NEXT: Alive: - call void @capture100x32([100 x i32]* %A.i1) - call void @capture100x32([100 x i32]* %B.i2) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %2) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %2) + call void @capture100x32(ptr %A.i1) + call void @capture100x32(ptr %B.i2) + call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i1) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i1) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %3) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %3) + call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i2) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i2) ; CHECK-NEXT: Alive: <> ret void @@ -548,24 +516,24 @@ ; CHECK-NEXT: Alive: <> %buf1 = alloca i8, i32 100000, align 16 %buf2 = alloca i8, i32 100000, align 16 - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %buf1) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %buf1) + call void @llvm.lifetime.start.p0(i64 -1, ptr %buf1) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %buf1) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %buf1) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %buf1) + call void @llvm.lifetime.end.p0(i64 -1, ptr %buf1) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %buf1) ; CHECK-NEXT: Alive: <> - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %buf1) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %buf1) + call void @llvm.lifetime.start.p0(i64 -1, ptr %buf1) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %buf1) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %buf2) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %buf2) + call void @llvm.lifetime.start.p0(i64 -1, ptr %buf2) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %buf2) ; CHECK-NEXT: Alive: - call void @capture8(i8* %buf1) - call void @capture8(i8* %buf2) + call void @capture8(ptr %buf1) + call void @capture8(ptr %buf2) ret void } @@ -578,24 +546,22 @@ %B.i2 = alloca [100 x i32], align 4 %A.i = alloca [100 x i32], align 4 %B.i = alloca [100 x i32], align 4 - %0 = bitcast [100 x i32]* %A.i to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0) + call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %A.i) ; CHECK-NEXT: Alive: - %1 = bitcast [100 x i32]* %B.i to i8* - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %1) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %1) + call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %B.i) ; CHECK-NEXT: Alive: - call void @capture100x32([100 x i32]* %A.i) - call void @capture100x32([100 x i32]* %B.i) - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0) + call void @capture100x32(ptr %A.i) + call void @capture100x32(ptr %B.i) + call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %A.i) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1) + call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %B.i) ; CHECK-NEXT: Alive: br label %block2 @@ -603,40 +569,37 @@ block2: ; preds = %entry ; CHECK: block2: ; CHECK-NEXT: Alive: - call void @capture100x32([100 x i32]* %A.i) - call void @capture100x32([100 x i32]* %B.i) + call void @capture100x32(ptr %A.i) + call void @capture100x32(ptr %B.i) ret void } %struct.Klass = type { i32, i32 } -define i32 @shady_range(i32 %argc, i8** nocapture %argv) { -; CHECK-LABEL: define i32 @shady_range(i32 %argc, i8** nocapture %argv) +define i32 @shady_range(i32 %argc, ptr nocapture %argv) { +; CHECK-LABEL: define i32 @shady_range(i32 %argc, ptr nocapture %argv) entry: ; CHECK: entry: ; CHECK-NEXT: Alive: <> %a.i = alloca [4 x %struct.Klass], align 16 %b.i = alloca [4 x %struct.Klass], align 16 - %a8 = bitcast [4 x %struct.Klass]* %a.i to i8* - %b8 = bitcast [4 x %struct.Klass]* %b.i to i8* - %z2 = getelementptr inbounds [4 x %struct.Klass], [4 x %struct.Klass]* %a.i, i64 0, i64 0, i32 0 - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %a8) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %a8) + call void @llvm.lifetime.start.p0(i64 -1, ptr %a.i) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %a.i) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 -1, i8* %b8) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %b8) + call void @llvm.lifetime.start.p0(i64 -1, ptr %b.i) +; CHECK: call void @llvm.lifetime.start.p0(i64 -1, ptr %b.i) ; CHECK-NEXT: Alive: - call void @capture8(i8* %a8) - call void @capture8(i8* %b8) - %z3 = load i32, i32* %z2, align 16 - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %a8) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %a8) + call void @capture8(ptr %a.i) + call void @capture8(ptr %b.i) + %z3 = load i32, ptr %a.i, align 16 + call void @llvm.lifetime.end.p0(i64 -1, ptr %a.i) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %a.i) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 -1, i8* %b8) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %b8) + call void @llvm.lifetime.end.p0(i64 -1, ptr %b.i) +; CHECK: call void @llvm.lifetime.end.p0(i64 -1, ptr %b.i) ; CHECK-NEXT: Alive: <> ret i32 %z3 @@ -648,8 +611,8 @@ ; CHECK: entry: ; CHECK-NEXT: Alive: <> %x = alloca i8, align 4 - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: br label %l2 @@ -658,9 +621,9 @@ ; CHECK: l2: ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> - call void @capture8(i8* %x) - call void @llvm.lifetime.end.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %x) + call void @capture8(ptr %x) + call void @llvm.lifetime.end.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: <> br label %l2 @@ -673,8 +636,8 @@ ; CHECK-NEXT: Alive: <> %x = alloca i8, align 4 %y = alloca i8, align 4 - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: br label %l2 @@ -682,20 +645,20 @@ l2: ; preds = %l2, %entry ; CHECK: l2: ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) + call void @llvm.lifetime.start.p0(i64 1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y) ; CHECK-NEXT: Alive: - call void @capture8(i8* %y) - call void @llvm.lifetime.end.p0i8(i64 1, i8* %y) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %y) + call void @capture8(ptr %y) + call void @llvm.lifetime.end.p0(i64 1, ptr %y) +; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %y) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: - call void @capture8(i8* %x) + call void @capture8(ptr %x) br label %l2 } @@ -714,26 +677,24 @@ if.then: ; preds = %entry ; CHECK: if.then: ; CHECK-NEXT: Alive: <> - %0 = getelementptr inbounds %struct.char_array, %struct.char_array* %a, i64 0, i32 0, i64 0 - call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %0) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %0) + call void @llvm.lifetime.start.p0(i64 500, ptr nonnull %a) +; CHECK: call void @llvm.lifetime.start.p0(i64 500, ptr nonnull %a) ; CHECK-NEXT: Alive: - tail call void @capture8(i8* %0) - call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %0) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %0) + tail call void @capture8(ptr %a) + call void @llvm.lifetime.end.p0(i64 500, ptr nonnull %a) +; CHECK: call void @llvm.lifetime.end.p0(i64 500, ptr nonnull %a) ; CHECK-NEXT: Alive: <> br label %if.end if.else: ; preds = %entry ; CHECK: if.else: ; CHECK-NEXT: Alive: <> - %1 = getelementptr inbounds %struct.char_array, %struct.char_array* %b, i64 0, i32 0, i64 0 - call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %1) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 500, i8* nonnull %1) + call void @llvm.lifetime.start.p0(i64 500, ptr nonnull %b) +; CHECK: call void @llvm.lifetime.start.p0(i64 500, ptr nonnull %b) ; CHECK-NEXT: Alive: - tail call void @capture8(i8* %1) - call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %1) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 500, i8* nonnull %1) + tail call void @capture8(ptr %b) + call void @llvm.lifetime.end.p0(i64 500, ptr nonnull %b) +; CHECK: call void @llvm.lifetime.end.p0(i64 500, ptr nonnull %b) ; CHECK-NEXT: Alive: <> br label %if.end @@ -758,8 +719,8 @@ if.then: ; CHECK: if.then: ; CHECK-NEXT: Alive: <> - call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) + call void @llvm.lifetime.start.p0(i64 1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y) ; CHECK-NEXT: Alive: br label %if.end @@ -769,12 +730,12 @@ if.else: ; CHECK: if.else: ; CHECK-NEXT: Alive: <> - call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) + call void @llvm.lifetime.start.p0(i64 1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: br label %if.end @@ -797,12 +758,12 @@ %x = alloca i8, align 4 %y = alloca i8, align 4 - call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) + call void @llvm.lifetime.start.p0(i64 1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: br label %end @@ -812,7 +773,7 @@ dead: ; CHECK: dead: ; CHECK-NOT: Alive: - call void @llvm.lifetime.start.p0i8(i64 4, i8* %y) + call void @llvm.lifetime.start.p0(i64 4, ptr %y) br label %end ; CHECK: br label %end @@ -825,7 +786,7 @@ ret void } -define void @non_alloca(i8* %p) { +define void @non_alloca(ptr %p) { ; CHECK-LABEL: define void @non_alloca entry: ; CHECK: entry: @@ -834,18 +795,18 @@ %x = alloca i8, align 4 %y = alloca i8, align 4 - call void @llvm.lifetime.start.p0i8(i64 4, i8* %p) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 4, i8* %p) + call void @llvm.lifetime.start.p0(i64 4, ptr %p) +; CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %p) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> - call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) + call void @llvm.lifetime.start.p0(i64 4, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %x) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> - call void @llvm.lifetime.end.p0i8(i64 4, i8* %p) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 4, i8* %p) + call void @llvm.lifetime.end.p0(i64 4, ptr %p) +; CHECK: call void @llvm.lifetime.end.p0(i64 4, ptr %p) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> @@ -860,20 +821,20 @@ ; MUST-NEXT: Alive: <> %x = alloca i8, align 4 %y = alloca i8, align 4 - %cxcy = select i1 %v, i8* %x, i8* %y + %cxcy = select i1 %v, ptr %x, ptr %y - call void @llvm.lifetime.start.p0i8(i64 1, i8* %cxcy) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %cxcy) + call void @llvm.lifetime.start.p0(i64 1, ptr %cxcy) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %cxcy) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> - call void @llvm.lifetime.end.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.end.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %x) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> @@ -887,15 +848,15 @@ ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> %x = alloca [5 x i32], align 4 - %x2 = getelementptr [5 x i32], [5 x i32]* %x, i64 0, i64 1 + %x2 = getelementptr [5 x i32], ptr %x, i64 0, i64 1 - call void @llvm.lifetime.start.p0i32(i64 20, i32* %x2) -; CHECK: call void @llvm.lifetime.start.p0i32(i64 20, i32* %x2) + call void @llvm.lifetime.start.p0(i64 20, ptr %x2) +; CHECK: call void @llvm.lifetime.start.p0(i64 20, ptr %x2) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> - call void @llvm.lifetime.end.p0i32(i64 20, i32* %x2) -; CHECK: call void @llvm.lifetime.end.p0i32(i64 20, i32* %x2) + call void @llvm.lifetime.end.p0(i64 20, ptr %x2) +; CHECK: call void @llvm.lifetime.end.p0(i64 20, ptr %x2) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> @@ -909,15 +870,14 @@ ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> %x = alloca [5 x i32], align 4 - %x2 = getelementptr [5 x i32], [5 x i32]* %x, i64 0, i64 0 - call void @llvm.lifetime.start.p0i32(i64 15, i32* %x2) -; CHECK: call void @llvm.lifetime.start.p0i32(i64 15, i32* %x2) + call void @llvm.lifetime.start.p0(i64 15, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 15, ptr %x) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> - call void @llvm.lifetime.end.p0i32(i64 15, i32* %x2) -; CHECK: call void @llvm.lifetime.end.p0i32(i64 15, i32* %x2) + call void @llvm.lifetime.end.p0(i64 15, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 15, ptr %x) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> @@ -930,20 +890,20 @@ ; CHECK: entry: ; CHECK-NEXT: Alive: <> %x = alloca i8 - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.end.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: <> - call void @llvm.lifetime.end.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.end.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: <> ret void @@ -965,8 +925,8 @@ ; CHECK: if.then: ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> - call void @llvm.lifetime.end.p0i8(i64 1, i8* %y) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %y) + call void @llvm.lifetime.end.p0(i64 1, ptr %y) +; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %y) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> @@ -978,12 +938,12 @@ if.else: ; CHECK: if.else: ; CHECK-NEXT: Alive: <> - call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) + call void @llvm.lifetime.start.p0(i64 1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: br label %if.then @@ -1006,8 +966,8 @@ %x = alloca i8, align 4 %y = alloca i8, align 4 - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: br i1 %a, label %if.then, label %if.else @@ -1018,8 +978,8 @@ ; CHECK: if.then: ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: <> - call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) + call void @llvm.lifetime.start.p0(i64 1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y) ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: @@ -1031,12 +991,12 @@ if.else: ; CHECK: if.else: ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 1, i8* %y) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %y) + call void @llvm.lifetime.end.p0(i64 1, ptr %y) +; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %y) ; CHECK-NEXT: Alive: - call void @llvm.lifetime.end.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.end.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: <> br label %if.then @@ -1059,8 +1019,8 @@ %x = alloca i8, align 4 %y = alloca i8, align 4 - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %x) ; CHECK-NEXT: Alive: br i1 %a, label %if.then, label %if.end @@ -1071,8 +1031,8 @@ ; CHECK: if.then: ; MAY-NEXT: Alive: ; MUST-NEXT: Alive: - call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) -; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) + call void @llvm.lifetime.start.p0(i64 1, ptr %y) +; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr %y) ; CHECK-NEXT: Alive: br i1 %a, label %if.then, label %if.end @@ -1087,11 +1047,9 @@ ret void } -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) -declare void @llvm.lifetime.start.p0i32(i64, i32* nocapture) -declare void @llvm.lifetime.end.p0i32(i64, i32* nocapture) -declare void @capture8(i8*) -declare void @capture32(i32*) -declare void @capture64(i64*) -declare void @capture100x32([100 x i32]*) +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) +declare void @capture8(ptr) +declare void @capture32(ptr) +declare void @capture64(ptr) +declare void @capture100x32(ptr) diff --git a/llvm/test/Analysis/StackSafetyAnalysis/local.ll b/llvm/test/Analysis/StackSafetyAnalysis/local.ll --- a/llvm/test/Analysis/StackSafetyAnalysis/local.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/local.ll @@ -4,15 +4,15 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -@sink = global i8* null, align 8 +@sink = global ptr null, align 8 -declare void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 %len, i1 %isvolatile) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 %isvolatile) -declare void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 %isvolatile) -declare void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 %len, i1 %isvolatile) +declare void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 %len, i1 %isvolatile) +declare void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 %len, i1 %isvolatile) +declare void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 %len, i1 %isvolatile) +declare void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 %len, i1 %isvolatile) -declare void @unknown_call(i8* %dest) -declare i8* @retptr(i8* returned) +declare void @unknown_call(ptr %dest) +declare ptr @retptr(ptr returned) ; Address leaked. define void @LeakAddress() { @@ -24,8 +24,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - store i8* %x1, i8** @sink, align 8 + store ptr %x, ptr @sink, align 8 ret void } @@ -35,12 +34,11 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[4]: [0,1){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %x1, align 1 +; GLOBAL-NEXT: store i8 0, ptr %x, align 1 ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - store i8 0, i8* %x1, align 1 + store i8 0, ptr %x, align 1 ret void } @@ -50,11 +48,10 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[4]: full-set{{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %x2, align 1 +; GLOBAL-NEXT: store i8 0, ptr %x2, align 1 ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* %c1 = icmp sge i64 %i, 0 %c2 = icmp slt i64 %i, 4 br i1 %c1, label %c1.true, label %false @@ -63,8 +60,8 @@ br i1 %c2, label %c2.true, label %false c2.true: - %x2 = getelementptr i8, i8* %x1, i64 %i - store i8 0, i8* %x2, align 1 + %x2 = getelementptr i8, ptr %x, i64 %i + store i8 0, ptr %x2, align 1 br label %false false: @@ -77,17 +74,16 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[4]: [0,4){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %x2, align 1 +; GLOBAL-NEXT: store i8 0, ptr %x2, align 1 ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* %c1 = icmp sge i64 %i, 0 %i1 = select i1 %c1, i64 %i, i64 0 %c2 = icmp slt i64 %i1, 3 %i2 = select i1 %c2, i64 %i1, i64 3 - %x2 = getelementptr i8, i8* %x1, i64 %i2 - store i8 0, i8* %x2, align 1 + %x2 = getelementptr i8, ptr %x, i64 %i2 + store i8 0, ptr %x2, align 1 ret void } @@ -97,11 +93,11 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[4]: [0,4){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i32 0, i32* %x, align 4 +; GLOBAL-NEXT: store i32 0, ptr %x, align 4 ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - store i32 0, i32* %x, align 4 + store i32 0, ptr %x, align 4 ret void } @@ -111,13 +107,12 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[4]: [2,3){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %x2, align 1 +; GLOBAL-NEXT: store i8 0, ptr %x2, align 1 ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 2 - store i8 0, i8* %x2, align 1 + %x2 = getelementptr i8, ptr %x, i64 2 + store i8 0, ptr %x2, align 1 ret void } @@ -131,10 +126,10 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = ptrtoint i32* %x to i64 + %x1 = ptrtoint ptr %x to i64 %x2 = add i64 %x1, 2 - %x3 = inttoptr i64 %x2 to i8* - store i8 0, i8* %x3, align 1 + %x3 = inttoptr i64 %x2 to ptr + store i8 0, ptr %x3, align 1 ret void } @@ -145,46 +140,45 @@ ; GLOBAL-NEXT: x[4]: full-set, @retptr(arg0, [0,1)){{$}} ; LOCAL-NEXT: x[4]: [0,1), @retptr(arg0, [0,1)){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %x2, align 1 +; GLOBAL-NEXT: store i8 0, ptr %x2, align 1 ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %x2 = call i8* @retptr(i8* %x1) - store i8 0, i8* %x2, align 1 + %x2 = call ptr @retptr(ptr %x) + store i8 0, ptr %x2, align 1 ret void } -define dso_local void @WriteMinMax(i8* %p) { +define dso_local void @WriteMinMax(ptr %p) { ; CHECK-LABEL: @WriteMinMax{{$}} ; CHECK-NEXT: args uses: ; CHECK-NEXT: p[]: full-set ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %p1, align 1 -; GLOBAL-NEXT: store i8 0, i8* %p2, align 1 +; GLOBAL-NEXT: store i8 0, ptr %p1, align 1 +; GLOBAL-NEXT: store i8 0, ptr %p2, align 1 ; CHECK-EMPTY: entry: - %p1 = getelementptr i8, i8* %p, i64 9223372036854775805 - store i8 0, i8* %p1, align 1 - %p2 = getelementptr i8, i8* %p, i64 -9223372036854775805 - store i8 0, i8* %p2, align 1 + %p1 = getelementptr i8, ptr %p, i64 9223372036854775805 + store i8 0, ptr %p1, align 1 + %p2 = getelementptr i8, ptr %p, i64 -9223372036854775805 + store i8 0, ptr %p2, align 1 ret void } -define dso_local void @WriteMax(i8* %p) { +define dso_local void @WriteMax(ptr %p) { ; CHECK-LABEL: @WriteMax{{$}} ; CHECK-NEXT: args uses: ; CHECK-NEXT: p[]: [-9223372036854775807,9223372036854775806) ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memset.p0i8.i64(i8* %p, i8 1, i64 9223372036854775806, i1 false) -; GLOBAL-NEXT: call void @llvm.memset.p0i8.i64(i8* %p2, i8 1, i64 9223372036854775806, i1 false) +; GLOBAL-NEXT: call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 9223372036854775806, i1 false) +; GLOBAL-NEXT: call void @llvm.memset.p0.i64(ptr %p2, i8 1, i64 9223372036854775806, i1 false) ; CHECK-EMPTY: entry: - call void @llvm.memset.p0i8.i64(i8* %p, i8 1, i64 9223372036854775806, i1 0) - %p2 = getelementptr i8, i8* %p, i64 -9223372036854775807 - call void @llvm.memset.p0i8.i64(i8* %p2, i8 1, i64 9223372036854775806, i1 0) + call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 9223372036854775806, i1 0) + %p2 = getelementptr i8, ptr %p, i64 -9223372036854775807 + call void @llvm.memset.p0.i64(ptr %p2, i8 1, i64 9223372036854775806, i1 0) ret void } @@ -197,10 +191,8 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 2 - %x3 = bitcast i8* %x2 to i32* - store i32 0, i32* %x3, align 1 + %x2 = getelementptr i8, ptr %x, i64 2 + store i32 0, ptr %x2, align 1 ret void } @@ -213,7 +205,6 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* %c1 = icmp sge i64 %i, 0 %c2 = icmp slt i64 %i, 5 br i1 %c1, label %c1.true, label %false @@ -222,8 +213,8 @@ br i1 %c2, label %c2.true, label %false c2.true: - %x2 = getelementptr i8, i8* %x1, i64 %i - store i8 0, i8* %x2, align 1 + %x2 = getelementptr i8, ptr %x, i64 %i + store i8 0, ptr %x2, align 1 br label %false false: @@ -239,13 +230,12 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* %c2 = icmp slt i64 %i, 5 br i1 %c2, label %c2.true, label %false c2.true: - %x2 = getelementptr i8, i8* %x1, i64 %i - store i8 0, i8* %x2, align 1 + %x2 = getelementptr i8, ptr %x, i64 %i + store i8 0, ptr %x2, align 1 br label %false false: @@ -262,11 +252,9 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 2 - %x3 = call i8* @retptr(i8* %x2) - %x4 = bitcast i8* %x3 to i32* - store i32 0, i32* %x4, align 1 + %x2 = getelementptr i8, ptr %x, i64 2 + %x3 = call ptr @retptr(ptr %x2) + store i32 0, ptr %x3, align 1 ret void } @@ -277,12 +265,11 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[4]: [0,1){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: %v = load i8, i8* %x1, align 1 +; GLOBAL-NEXT: %v = load i8, ptr %x, align 1 ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %v = load i8, i8* %x1, align 1 + %v = load i8, ptr %x, align 1 ret void } @@ -295,15 +282,13 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 2 - %x3 = bitcast i8* %x2 to i32* - %v = load i32, i32* %x3, align 1 + %x2 = getelementptr i8, ptr %x, i64 2 + %v = load i32, ptr %x2, align 1 ret void } ; Leak through ret. -define i8* @Ret() { +define ptr @Ret() { ; CHECK-LABEL: @Ret dso_preemptable{{$}} ; CHECK-NEXT: args uses: ; CHECK-NEXT: allocas uses: @@ -312,12 +297,11 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 2 - ret i8* %x2 + %x2 = getelementptr i8, ptr %x, i64 2 + ret ptr %x2 } -declare void @Foo(i16* %p) +declare void @Foo(ptr %p) define void @DirectCall() { ; CHECK-LABEL: @DirectCall dso_preemptable{{$}} @@ -329,15 +313,14 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x1 = bitcast i64* %x to i16* - %x2 = getelementptr i16, i16* %x1, i64 1 - call void @Foo(i16* %x2); + %x2 = getelementptr i16, ptr %x, i64 1 + call void @Foo(ptr %x2); ret void } ; Indirect calls can not be analyzed (yet). ; FIXME: %p[]: full-set looks invalid -define void @IndirectCall(void (i8*)* %p) { +define void @IndirectCall(ptr %p) { ; CHECK-LABEL: @IndirectCall dso_preemptable{{$}} ; CHECK-NEXT: args uses: ; CHECK-NEXT: p[]: full-set{{$}} @@ -347,8 +330,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void %p(i8* %x1); + call void %p(ptr %x); ret void } @@ -359,14 +341,13 @@ ; FIXME: SCEV can't look through selects. ; CHECK-NEXT: x[4]: [0,4){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 0, i8* %x2, align 1 +; GLOBAL-NEXT: store i8 0, ptr %x2, align 1 ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* %idx = select i1 %z, i64 1, i64 2 - %x2 = getelementptr i8, i8* %x1, i64 %idx - store i8 0, i8* %x2, align 1 + %x2 = getelementptr i8, ptr %x, i64 %idx + store i8 0, ptr %x2, align 1 ret void } @@ -379,8 +360,8 @@ ; CHECK-EMPTY: entry: %x = alloca i32, i32 10, align 4 - %x2 = getelementptr i32, i32* %x, i64 -400000000000 - store i32 0, i32* %x2, align 1 + %x2 = getelementptr i32, ptr %x, i64 -400000000000 + store i32 0, ptr %x2, align 1 ret void } @@ -393,8 +374,8 @@ ; CHECK-EMPTY: entry: %x = alloca i32, i32 10, align 4 - %x2 = getelementptr i32, i32* %x, i16 %z - store i32 0, i32* %x2, align 1 + %x2 = getelementptr i32, ptr %x, i16 %z + store i32 0, ptr %x2, align 1 ret void } @@ -407,10 +388,9 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* %idx = select i1 %z, i64 1, i64 4 - %x2 = getelementptr i8, i8* %x1, i64 %idx - store i8 0, i8* %x2, align 1 + %x2 = getelementptr i8, ptr %x, i64 %idx + store i8 0, ptr %x2, align 1 ret void } @@ -420,14 +400,12 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[40]: [36,40){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i32 0, i32* %x3, align 1 +; GLOBAL-NEXT: store i32 0, ptr %x2, align 1 ; CHECK-EMPTY: entry: %x = alloca i32, i32 10, align 4 - %x1 = bitcast i32* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 36 - %x3 = bitcast i8* %x2 to i32* - store i32 0, i32* %x3, align 1 + %x2 = getelementptr i8, ptr %x, i64 36 + store i32 0, ptr %x2, align 1 ret void } @@ -440,10 +418,8 @@ ; CHECK-EMPTY: entry: %x = alloca i32, i32 10, align 4 - %x1 = bitcast i32* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 37 - %x3 = bitcast i8* %x2 to i32* - store i32 0, i32* %x3, align 1 + %x2 = getelementptr i8, ptr %x, i64 37 + store i32 0, ptr %x2, align 1 ret void } @@ -469,7 +445,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, i64 %size, align 16 - store i32 0, i32* %x, align 1 + store i32 0, ptr %x, align 1 ret void } @@ -485,7 +461,7 @@ entry: %size = select i1 %z, i64 3, i64 5 %x = alloca i32, i64 %size, align 16 - store i32 0, i32* %x, align 1 + store i32 0, ptr %x, align 1 ret void } @@ -495,21 +471,20 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[10]: [0,10){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: %1 = load volatile i8, i8* %p.09, align 1 +; GLOBAL-NEXT: %load = load volatile i8, ptr %p.09, align 1 ; CHECK-EMPTY: entry: %x = alloca [10 x i8], align 1 - %0 = getelementptr inbounds [10 x i8], [10 x i8]* %x, i64 0, i64 0 - %lftr.limit = getelementptr inbounds [10 x i8], [10 x i8]* %x, i64 0, i64 10 + %lftr.limit = getelementptr inbounds [10 x i8], ptr %x, i64 0, i64 10 br label %for.body for.body: %sum.010 = phi i8 [ 0, %entry ], [ %add, %for.body ] - %p.09 = phi i8* [ %0, %entry ], [ %incdec.ptr, %for.body ] - %incdec.ptr = getelementptr inbounds i8, i8* %p.09, i64 1 - %1 = load volatile i8, i8* %p.09, align 1 - %add = add i8 %1, %sum.010 - %exitcond = icmp eq i8* %incdec.ptr, %lftr.limit + %p.09 = phi ptr [ %x, %entry ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i8, ptr %p.09, i64 1 + %load = load volatile i8, ptr %p.09, align 1 + %add = add i8 %load, %sum.010 + %exitcond = icmp eq ptr %incdec.ptr, %lftr.limit br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: @@ -526,18 +501,17 @@ ; CHECK-EMPTY: entry: %x = alloca [10 x i8], align 1 - %0 = getelementptr inbounds [10 x i8], [10 x i8]* %x, i64 0, i64 0 ; 11 iterations - %lftr.limit = getelementptr inbounds [10 x i8], [10 x i8]* %x, i64 0, i64 11 + %lftr.limit = getelementptr inbounds [10 x i8], ptr %x, i64 0, i64 11 br label %for.body for.body: %sum.010 = phi i8 [ 0, %entry ], [ %add, %for.body ] - %p.09 = phi i8* [ %0, %entry ], [ %incdec.ptr, %for.body ] - %incdec.ptr = getelementptr inbounds i8, i8* %p.09, i64 1 - %1 = load volatile i8, i8* %p.09, align 1 - %add = add i8 %1, %sum.010 - %exitcond = icmp eq i8* %incdec.ptr, %lftr.limit + %p.09 = phi ptr [ %x, %entry ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i8, ptr %p.09, i64 1 + %load = load volatile i8, ptr %p.09, align 1 + %add = add i8 %load, %sum.010 + %exitcond = icmp eq ptr %incdec.ptr, %lftr.limit br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: @@ -553,12 +527,11 @@ ; CHECK-EMPTY: entry: %x1 = alloca [128 x i8], align 16 - %x1.sub = getelementptr inbounds [128 x i8], [128 x i8]* %x1, i64 0, i64 0 %cmp = icmp slt i32 %sz, 129 br i1 %cmp, label %if.then, label %if.end if.then: - call void @llvm.memset.p0i8.i32(i8* nonnull align 16 %x1.sub, i8 0, i32 %sz, i1 false) + call void @llvm.memset.p0.i32(ptr nonnull align 16 %x1, i8 0, i32 %sz, i1 false) br label %if.end if.end: @@ -567,7 +540,7 @@ ; FIXME: scalable allocas are considered to be of size zero, and scalable accesses to be full-range. ; This effectively disables safety analysis for scalable allocations. -define void @Scalable(* %p, * %unused, %v) { +define void @Scalable(ptr %p, ptr %unused, %v) { ; CHECK-LABEL: @Scalable dso_preemptable{{$}} ; CHECK-NEXT: args uses: ; CHECK-NEXT: p[]: full-set @@ -575,34 +548,33 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[0]: [0,1){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store %v, * %p, align 4 +; GLOBAL-NEXT: store %v, ptr %p, align 4 ; CHECK-EMPTY: entry: %x = alloca , align 4 - %x1 = bitcast * %x to i8* - store i8 0, i8* %x1, align 1 - store %v, * %p, align 4 + store i8 0, ptr %x, align 1 + store %v, ptr %p, align 4 ret void } %zerosize_type = type {} -define void @ZeroSize(%zerosize_type *%p) { +define void @ZeroSize(ptr %p) { ; CHECK-LABEL: @ZeroSize dso_preemptable{{$}} ; CHECK-NEXT: args uses: ; CHECK-NEXT: p[]: empty-set ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[0]: empty-set ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store %zerosize_type undef, %zerosize_type* %x, align 4 -; GLOBAL-NEXT: store %zerosize_type undef, %zerosize_type* undef, align 4 -; GLOBAL-NEXT: load %zerosize_type, %zerosize_type* %p, align +; GLOBAL-NEXT: store %zerosize_type undef, ptr %x, align 4 +; GLOBAL-NEXT: store %zerosize_type undef, ptr undef, align 4 +; GLOBAL-NEXT: load %zerosize_type, ptr %p, align ; CHECK-EMPTY: entry: %x = alloca %zerosize_type, align 4 - store %zerosize_type undef, %zerosize_type* %x, align 4 - store %zerosize_type undef, %zerosize_type* undef, align 4 - %val = load %zerosize_type, %zerosize_type* %p, align 4 + store %zerosize_type undef, ptr %x, align 4 + store %zerosize_type undef, ptr undef, align 4 + %val = load %zerosize_type, ptr %p, align 4 ret void } @@ -615,11 +587,11 @@ ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - call void @LeakAddress() ["unknown"(i32* %a)] + call void @LeakAddress() ["unknown"(ptr %a)] ret void } -define void @ByVal(i16* byval(i16) %p) { +define void @ByVal(ptr byval(i16) %p) { ; CHECK-LABEL: @ByVal dso_preemptable{{$}} ; CHECK-NEXT: args uses: ; CHECK-NEXT: allocas uses: @@ -636,21 +608,20 @@ ; CHECK-NEXT: x[2]: [0,2) ; CHECK-NEXT: y[8]: [0,2) ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @ByVal(i16* byval(i16) %x) -; GLOBAL-NEXT: call void @ByVal(i16* byval(i16) %y1) +; GLOBAL-NEXT: call void @ByVal(ptr byval(i16) %x) +; GLOBAL-NEXT: call void @ByVal(ptr byval(i16) %y) ; CHECK-EMPTY: entry: %x = alloca i16, align 4 - call void @ByVal(i16* byval(i16) %x) + call void @ByVal(ptr byval(i16) %x) %y = alloca i64, align 4 - %y1 = bitcast i64* %y to i16* - call void @ByVal(i16* byval(i16) %y1) + call void @ByVal(ptr byval(i16) %y) ret void } -declare void @ByValArray([100000 x i64]* byval([100000 x i64]) %p) +declare void @ByValArray(ptr byval([100000 x i64]) %p) define void @TestByValArray() { ; CHECK-LABEL: @TestByValArray dso_preemptable{{$}} @@ -661,23 +632,21 @@ ; CHECK-EMPTY: entry: %z = alloca [100000 x i64], align 4 - %z1 = bitcast [100000 x i64]* %z to i8* - %z2 = getelementptr i8, i8* %z1, i64 500000 - %z3 = bitcast i8* %z2 to [100000 x i64]* - call void @ByValArray([100000 x i64]* byval([100000 x i64]) %z3) + %z2 = getelementptr i8, ptr %z, i64 500000 + call void @ByValArray(ptr byval([100000 x i64]) %z2) ret void } -define dso_local i8 @LoadMinInt64(i8* %p) { +define dso_local i8 @LoadMinInt64(ptr %p) { ; CHECK-LABEL: @LoadMinInt64{{$}} ; CHECK-NEXT: args uses: ; CHECK-NEXT: p[]: [-9223372036854775808,-9223372036854775807){{$}} ; CHECK-NEXT: allocas uses: ; GLOBAL-NEXT: safe accesses: - ; GLOBAL-NEXT: load i8, i8* %p2, align 1 + ; GLOBAL-NEXT: load i8, ptr %p2, align 1 ; CHECK-EMPTY: - %p2 = getelementptr i8, i8* %p, i64 -9223372036854775808 - %v = load i8, i8* %p2, align 1 + %p2 = getelementptr i8, ptr %p, i64 -9223372036854775808 + %v = load i8, ptr %p2, align 1 ret i8 %v } @@ -691,28 +660,28 @@ ; CHECK-EMPTY: entry: %x = alloca i8, align 4 - %x2 = getelementptr i8, i8* %x, i64 -9223372036854775808 - %v = call i8 @LoadMinInt64(i8* %x2) + %x2 = getelementptr i8, ptr %x, i64 -9223372036854775808 + %v = call i8 @LoadMinInt64(ptr %x2) ret void } -define void @DeadBlock(i64* %p) { +define void @DeadBlock(ptr %p) { ; CHECK-LABEL: @DeadBlock dso_preemptable{{$}} ; CHECK-NEXT: args uses: ; CHECK-NEXT: p[]: empty-set{{$}} ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[1]: empty-set{{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 5, i8* %x -; GLOBAL-NEXT: store i64 -5, i64* %p +; GLOBAL-NEXT: store i8 5, ptr %x +; GLOBAL-NEXT: store i64 -5, ptr %p ; CHECK-EMPTY: entry: %x = alloca i8, align 4 br label %end dead: - store i8 5, i8* %x - store i64 -5, i64* %p + store i8 5, ptr %x + store i64 -5, ptr %p br label %end end: @@ -733,13 +702,13 @@ %y = alloca i8, align 4 %z = alloca i8, align 4 - store i8 5, i8* %x - %n = load i8, i8* %y - call void @llvm.memset.p0i8.i32(i8* nonnull %z, i8 0, i32 1, i1 false) + store i8 5, ptr %x + %n = load i8, ptr %y + call void @llvm.memset.p0.i32(ptr nonnull %z, i8 0, i32 1, i1 false) - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) - call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) - call void @llvm.lifetime.start.p0i8(i64 1, i8* %z) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %y) + call void @llvm.lifetime.start.p0(i64 1, ptr %z) ret void } @@ -752,22 +721,22 @@ ; CHECK: y[1]: [0,1){{$}} ; CHECK: z[1]: [0,1){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: store i8 5, i8* %x -; GLOBAL-NEXT: %n = load i8, i8* %y -; GLOBAL-NEXT: call void @llvm.memset.p0i8.i32(i8* nonnull %z, i8 0, i32 1, i1 false) +; GLOBAL-NEXT: store i8 5, ptr %x +; GLOBAL-NEXT: %n = load i8, ptr %y +; GLOBAL-NEXT: call void @llvm.memset.p0.i32(ptr nonnull %z, i8 0, i32 1, i1 false) ; CHECK-EMPTY: entry: %x = alloca i8, align 4 %y = alloca i8, align 4 %z = alloca i8, align 4 - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) - call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) - call void @llvm.lifetime.start.p0i8(i64 1, i8* %z) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %y) + call void @llvm.lifetime.start.p0(i64 1, ptr %z) - store i8 5, i8* %x - %n = load i8, i8* %y - call void @llvm.memset.p0i8.i32(i8* nonnull %z, i8 0, i32 1, i1 false) + store i8 5, ptr %x + %n = load i8, ptr %y + call void @llvm.memset.p0.i32(ptr nonnull %z, i8 0, i32 1, i1 false) ret void } @@ -786,17 +755,17 @@ %y = alloca i8, align 4 %z = alloca i8, align 4 - call void @llvm.lifetime.start.p0i8(i64 1, i8* %x) - call void @llvm.lifetime.start.p0i8(i64 1, i8* %y) - call void @llvm.lifetime.start.p0i8(i64 1, i8* %z) + call void @llvm.lifetime.start.p0(i64 1, ptr %x) + call void @llvm.lifetime.start.p0(i64 1, ptr %y) + call void @llvm.lifetime.start.p0(i64 1, ptr %z) - call void @llvm.lifetime.end.p0i8(i64 1, i8* %x) - call void @llvm.lifetime.end.p0i8(i64 1, i8* %y) - call void @llvm.lifetime.end.p0i8(i64 1, i8* %z) + call void @llvm.lifetime.end.p0(i64 1, ptr %x) + call void @llvm.lifetime.end.p0(i64 1, ptr %y) + call void @llvm.lifetime.end.p0(i64 1, ptr %z) - store i8 5, i8* %x - %n = load i8, i8* %y - call void @llvm.memset.p0i8.i32(i8* nonnull %z, i8 0, i32 1, i1 false) + store i8 5, ptr %x + %n = load i8, ptr %y + call void @llvm.memset.p0.i32(ptr nonnull %z, i8 0, i32 1, i1 false) ret void } @@ -808,13 +777,12 @@ ; CHECK: a[4]: [0,1){{$}} ; CHECK: y[1]: [0,1){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %y, i8* %x, i32 1, i1 false) +; GLOBAL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %y, ptr %a, i32 1, i1 false) ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* %y = alloca i8, align 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %y, i8* %x, i32 1, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %y, ptr %a, i32 1, i1 false) ret void } @@ -828,9 +796,8 @@ ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* %y = alloca i8, align 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %y, i8* %x, i32 4, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %y, ptr %a, i32 4, i1 false) ret void } @@ -844,9 +811,8 @@ ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* %y = alloca i8, align 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %y, i32 4, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %a, ptr %y, i32 4, i1 false) ret void } @@ -860,9 +826,8 @@ ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* %y = alloca i8, align 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %y, i8* %x, i32 5, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %y, ptr %a, i32 5, i1 false) ret void } @@ -872,13 +837,12 @@ ; CHECK-NEXT: allocas uses: ; CHECK: a[4]: [0,5){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 4, i1 false) +; GLOBAL-NEXT: call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false) ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* - call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 5, i1 false) - call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 4, i1 false) + call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 5, i1 false) + call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false) ret void } @@ -888,30 +852,28 @@ ; CHECK-NEXT: allocas uses: ; CHECK: a[4]: [0,8){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: load i32, i32* %a, align 4 +; GLOBAL-NEXT: load i32, ptr %a, align 4 ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %a1 = bitcast i32* %a to i64* - %n1 = load i64, i64* %a1, align 4 - %n2 = load i32, i32* %a, align 4 + %n1 = load i64, ptr %a, align 4 + %n2 = load i32, ptr %a, align 4 ret void } -define void @MixedAccesses3(void (i8*)* %func) { +define void @MixedAccesses3(ptr %func) { ; CHECK-LABEL: @MixedAccesses3 ; CHECK-NEXT: args uses: ; CHECK-NEXT: func[]: full-set ; CHECK-NEXT: allocas uses: ; CHECK: a[4]: full-set{{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: load i32, i32* %a, align 4 +; GLOBAL-NEXT: load i32, ptr %a, align 4 ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* - %n2 = load i32, i32* %a, align 4 - call void %func(i8* %x) + %n2 = load i32, ptr %a, align 4 + call void %func(ptr %a) ret void } @@ -922,52 +884,51 @@ ; CHECK: a[4]: full-set{{$}} ; CHECK: a1[8]: [0,8){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: load i32, i32* %a, align 4 +; GLOBAL-NEXT: load i32, ptr %a, align 4 ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %a1 = alloca i32*, align 4 - %n2 = load i32, i32* %a, align 4 - store i32* %a, i32** %a1 + %a1 = alloca ptr, align 4 + %n2 = load i32, ptr %a, align 4 + store ptr %a, ptr %a1 ret void } -define i32* @MixedAccesses5(i1 %x, i32* %y) { +define ptr @MixedAccesses5(i1 %x, ptr %y) { ; CHECK-LABEL: @MixedAccesses5 ; CHECK-NEXT: args uses: ; CHECK: y[]: full-set ; CHECK-NEXT: allocas uses: ; CHECK: a[4]: full-set{{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: load i32, i32* %a, align 4 +; GLOBAL-NEXT: load i32, ptr %a, align 4 ; CHECK-EMPTY: entry: %a = alloca i32, align 4 br i1 %x, label %tlabel, label %flabel flabel: - %n = load i32, i32* %a, align 4 - ret i32* %y + %n = load i32, ptr %a, align 4 + ret ptr %y tlabel: - ret i32* %a + ret ptr %a } -define void @MixedAccesses6(i8* %arg) { +define void @MixedAccesses6(ptr %arg) { ; CHECK-LABEL: @MixedAccesses6 ; CHECK-NEXT: args uses: ; CHECK-NEXT: arg[]: [0,4) ; CHECK-NEXT: allocas uses: ; CHECK: a[4]: [0,4) ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %arg, i32 4, i1 false) +; GLOBAL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %a, ptr %arg, i32 4, i1 false) ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %arg, i32 4, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %a, ptr %arg, i32 4, i1 false) ret void } -define void @MixedAccesses7(i1 %cond, i8* %arg) { +define void @MixedAccesses7(i1 %cond, ptr %arg) { ; SECV doesn't support select, so we consider this non-stack-safe, even through ; it is. ; @@ -980,13 +941,12 @@ ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* - %x1 = select i1 %cond, i8* %arg, i8* %x - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x1, i8* %arg, i32 4, i1 false) + %x1 = select i1 %cond, ptr %arg, ptr %a + call void @llvm.memcpy.p0.p0.i32(ptr %x1, ptr %arg, i32 4, i1 false) ret void } -define void @NoStackAccess(i8* %arg1, i8* %arg2) { +define void @NoStackAccess(ptr %arg1, ptr %arg2) { ; CHECK-LABEL: @NoStackAccess ; CHECK-NEXT: args uses: ; CHECK-NEXT: arg1[]: [0,4) @@ -994,12 +954,11 @@ ; CHECK-NEXT: allocas uses: ; CHECK: a[4]: empty-set{{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %arg1, i8* %arg2, i32 4, i1 false) +; GLOBAL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %arg1, ptr %arg2, i32 4, i1 false) ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %arg1, i8* %arg2, i32 4, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %arg1, ptr %arg2, i32 4, i1 false) ret void } @@ -1009,18 +968,17 @@ ; CHECK-NEXT: allocas uses: ; CHECK: a[4]: full-set{{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 4, i1 false) +; GLOBAL-NEXT: call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false) ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) - call void @llvm.lifetime.end.p0i8(i64 4, i8* %x) - call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 4, i1 true) - - call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) - call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 4, i1 false) - call void @llvm.lifetime.end.p0i8(i64 4, i8* %x) + call void @llvm.lifetime.start.p0(i64 4, ptr %a) + call void @llvm.lifetime.end.p0(i64 4, ptr %a) + call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 true) + + call void @llvm.lifetime.start.p0(i64 4, ptr %a) + call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false) + call void @llvm.lifetime.end.p0(i64 4, ptr %a) ret void } @@ -1030,18 +988,17 @@ ; CHECK-NEXT: allocas uses: ; CHECK: a[4]: full-set{{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 4, i1 false) +; GLOBAL-NEXT: call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false) ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) - call void @llvm.lifetime.end.p0i8(i64 4, i8* %x) - %n = load i32, i32* %a - - call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) - call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 4, i1 false) - call void @llvm.lifetime.end.p0i8(i64 4, i8* %x) + call void @llvm.lifetime.start.p0(i64 4, ptr %a) + call void @llvm.lifetime.end.p0(i64 4, ptr %a) + %n = load i32, ptr %a + + call void @llvm.lifetime.start.p0(i64 4, ptr %a) + call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false) + call void @llvm.lifetime.end.p0(i64 4, ptr %a) ret void } @@ -1051,18 +1008,17 @@ ; CHECK-NEXT: allocas uses: ; CHECK: a[4]: full-set{{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 4, i1 false) +; GLOBAL-NEXT: call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false) ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) - call void @llvm.lifetime.end.p0i8(i64 4, i8* %x) - store i32 5, i32* %a - - call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) - call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 4, i1 false) - call void @llvm.lifetime.end.p0i8(i64 4, i8* %x) + call void @llvm.lifetime.start.p0(i64 4, ptr %a) + call void @llvm.lifetime.end.p0(i64 4, ptr %a) + store i32 5, ptr %a + + call void @llvm.lifetime.start.p0(i64 4, ptr %a) + call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false) + call void @llvm.lifetime.end.p0(i64 4, ptr %a) ret void } @@ -1072,17 +1028,16 @@ ; CHECK-NEXT: allocas uses: ; CHECK: a[4]: full-set{{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 4, i1 false) +; GLOBAL-NEXT: call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false) ; CHECK-EMPTY: entry: %a = alloca i32, align 4 - %x = bitcast i32* %a to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) - call void @llvm.memset.p0i8.i32(i8* %x, i8 1, i32 4, i1 false) - call void @llvm.lifetime.end.p0i8(i64 4, i8* %x) - call void @unknown_call(i8* %x) + call void @llvm.lifetime.start.p0(i64 4, ptr %a) + call void @llvm.memset.p0.i32(ptr %a, i8 1, i32 4, i1 false) + call void @llvm.lifetime.end.p0(i64 4, ptr %a) + call void @unknown_call(ptr %a) ret void } -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) diff --git a/llvm/test/Analysis/StackSafetyAnalysis/memintrin.ll b/llvm/test/Analysis/StackSafetyAnalysis/memintrin.ll --- a/llvm/test/Analysis/StackSafetyAnalysis/memintrin.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/memintrin.ll @@ -4,10 +4,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -declare void @llvm.memset.p0i8.i64(i8* %dest, i8 %val, i64 %len, i1 %isvolatile) -declare void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 %len, i1 %isvolatile) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 %isvolatile) -declare void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 %isvolatile) +declare void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 %len, i1 %isvolatile) +declare void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 %len, i1 %isvolatile) +declare void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 %len, i1 %isvolatile) +declare void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 %len, i1 %isvolatile) define void @MemsetInBounds() { ; CHECK-LABEL: MemsetInBounds dso_preemptable{{$}} @@ -15,12 +15,11 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[4]: [0,4){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memset.p0i8.i32(i8* %x1, i8 42, i32 4, i1 false) +; GLOBAL-NEXT: call void @llvm.memset.p0.i32(ptr %x, i8 42, i32 4, i1 false) ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void @llvm.memset.p0i8.i32(i8* %x1, i8 42, i32 4, i1 false) + call void @llvm.memset.p0.i32(ptr %x, i8 42, i32 4, i1 false) ret void } @@ -31,12 +30,11 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[4]: [0,4){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memset.p0i8.i32(i8* %x1, i8 42, i32 4, i1 true) +; GLOBAL-NEXT: call void @llvm.memset.p0.i32(ptr %x, i8 42, i32 4, i1 true) ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void @llvm.memset.p0i8.i32(i8* %x1, i8 42, i32 4, i1 true) + call void @llvm.memset.p0.i32(ptr %x, i8 42, i32 4, i1 true) ret void } @@ -49,8 +47,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void @llvm.memset.p0i8.i32(i8* %x1, i8 42, i32 5, i1 false) + call void @llvm.memset.p0.i32(ptr %x, i8 42, i32 5, i1 false) ret void } @@ -63,8 +60,7 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - call void @llvm.memset.p0i8.i32(i8* %x1, i8 42, i32 %size, i1 false) + call void @llvm.memset.p0.i32(ptr %x, i8 42, i32 %size, i1 false) ret void } @@ -79,9 +75,8 @@ ; CHECK-EMPTY: entry: %x = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* %size = select i1 %z, i32 3, i32 4 - call void @llvm.memset.p0i8.i32(i8* %x1, i8 42, i32 %size, i1 false) + call void @llvm.memset.p0.i32(ptr %x, i8 42, i32 %size, i1 false) ret void } @@ -96,11 +91,10 @@ entry: %x = alloca i32, align 4 %y = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %xint = ptrtoint i32* %x to i32 - %yint = ptrtoint i32* %y to i32 + %xint = ptrtoint ptr %x to i32 + %yint = ptrtoint ptr %y to i32 %d = sub i32 %xint, %yint - call void @llvm.memset.p0i8.i32(i8* %x1, i8 42, i32 %d, i1 false) + call void @llvm.memset.p0.i32(ptr %x, i8 42, i32 %d, i1 false) ret void } @@ -111,14 +105,12 @@ ; CHECK-NEXT: x[4]: [0,4){{$}} ; CHECK-NEXT: y[4]: [0,4){{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x1, i8* %y1, i32 4, i1 false) +; GLOBAL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %x, ptr %y, i32 4, i1 false) ; CHECK-EMPTY: entry: %x = alloca i32, align 4 %y = alloca i32, align 4 - %x1 = bitcast i32* %x to i8* - %y1 = bitcast i32* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x1, i8* %y1, i32 4, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %x, ptr %y, i32 4, i1 false) ret void } @@ -133,9 +125,7 @@ entry: %x = alloca i64, align 4 %y = alloca i32, align 4 - %x1 = bitcast i64* %x to i8* - %y1 = bitcast i32* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x1, i8* %y1, i32 5, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %x, ptr %y, i32 5, i1 false) ret void } @@ -150,9 +140,7 @@ entry: %x = alloca i32, align 4 %y = alloca i64, align 4 - %x1 = bitcast i32* %x to i8* - %y1 = bitcast i64* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x1, i8* %y1, i32 5, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %x, ptr %y, i32 5, i1 false) ret void } @@ -167,9 +155,7 @@ entry: %x = alloca i32, align 4 %y = alloca i64, align 4 - %x1 = bitcast i32* %x to i8* - %y1 = bitcast i64* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x1, i8* %y1, i32 9, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %x, ptr %y, i32 9, i1 false) ret void } @@ -179,13 +165,12 @@ ; CHECK-NEXT: allocas uses: ; CHECK-NEXT: x[8]: [0,8){{$}} ; GLOBAL-NEXT: safe accesses -; GLOBAL-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x1, i8* %x2, i32 3, i1 false) +; GLOBAL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %x, ptr %x2, i32 3, i1 false) ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x1 = bitcast i64* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 5 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x1, i8* %x2, i32 3, i1 false) + %x2 = getelementptr i8, ptr %x, i64 5 + call void @llvm.memcpy.p0.p0.i32(ptr %x, ptr %x2, i32 3, i1 false) ret void } @@ -198,9 +183,8 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x1 = bitcast i64* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 5 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x1, i8* %x2, i32 4, i1 false) + %x2 = getelementptr i8, ptr %x, i64 5 + call void @llvm.memcpy.p0.p0.i32(ptr %x, ptr %x2, i32 4, i1 false) ret void } @@ -213,9 +197,8 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x1 = bitcast i64* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 5 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x2, i8* %x1, i32 4, i1 false) + %x2 = getelementptr i8, ptr %x, i64 5 + call void @llvm.memcpy.p0.p0.i32(ptr %x2, ptr %x, i32 4, i1 false) ret void } @@ -228,9 +211,8 @@ ; CHECK-EMPTY: entry: %x = alloca i64, align 4 - %x1 = bitcast i64* %x to i8* - %x2 = getelementptr i8, i8* %x1, i64 5 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %x1, i8* %x2, i32 9, i1 false) + %x2 = getelementptr i8, ptr %x, i64 5 + call void @llvm.memmove.p0.p0.i32(ptr %x, ptr %x2, i32 9, i1 false) ret void } @@ -241,14 +223,13 @@ ; CHECK-NEXT: x[4]: [0,4){{$}} ; CHECK-NEXT: y[1]: empty-set{{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memset.p0i8.i32(i8* %x1, i8 %yint, i32 4, i1 false) +; GLOBAL-NEXT: call void @llvm.memset.p0.i32(ptr %x, i8 %yint, i32 4, i1 false) ; CHECK-EMPTY: entry: %x = alloca i32, align 4 %y = alloca i8, align 1 - %x1 = bitcast i32* %x to i8* - %yint = ptrtoint i8* %y to i8 - call void @llvm.memset.p0i8.i32(i8* %x1, i8 %yint, i32 4, i1 false) + %yint = ptrtoint ptr %y to i8 + call void @llvm.memset.p0.i32(ptr %x, i8 %yint, i32 4, i1 false) ret void } @@ -260,15 +241,13 @@ ; CHECK-NEXT: y[256]: [0,255){{$}} ; CHECK-NEXT: z[1]: empty-set{{$}} ; GLOBAL-NEXT: safe accesses: -; GLOBAL-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x1, i8* %y1, i32 %zint32, i1 false) +; GLOBAL-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %x, ptr %y, i32 %zint32, i1 false) ; CHECK-EMPTY: entry: %x = alloca [256 x i8], align 4 %y = alloca [256 x i8], align 4 %z = alloca i8, align 1 - %x1 = bitcast [256 x i8]* %x to i8* - %y1 = bitcast [256 x i8]* %y to i8* %zint32 = zext i8 %zint8 to i32 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x1, i8* %y1, i32 %zint32, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %x, ptr %y, i32 %zint32, i1 false) ret void } diff --git a/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll b/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll old mode 100755 new mode 100644 diff --git a/llvm/test/Assembler/2002-04-07-HexFloatConstants.ll b/llvm/test/Assembler/2002-04-07-HexFloatConstants.ll --- a/llvm/test/Assembler/2002-04-07-HexFloatConstants.ll +++ b/llvm/test/Assembler/2002-04-07-HexFloatConstants.ll @@ -5,8 +5,8 @@ ; of the bug that was causing the Olden Health benchmark to output incorrect ; results! ; -; RUN: opt -instsimplify -S > %t.1 < %s -; RUN: llvm-as < %s | llvm-dis | llvm-as | opt -instsimplify | \ +; RUN: opt -passes=instsimplify -S > %t.1 < %s +; RUN: llvm-as < %s | llvm-dis | llvm-as | opt -passes=instsimplify | \ ; RUN: llvm-dis > %t.2 ; RUN: diff %t.1 %t.2 ; RUN: verify-uselistorder %s diff --git a/llvm/test/Assembler/2002-04-29-NameBinding.ll b/llvm/test/Assembler/2002-04-29-NameBinding.ll --- a/llvm/test/Assembler/2002-04-29-NameBinding.ll +++ b/llvm/test/Assembler/2002-04-29-NameBinding.ll @@ -4,7 +4,7 @@ ; Check by running globaldce, which will remove the constant if there are ; no references to it! ; -; RUN: opt < %s -globaldce -S | \ +; RUN: opt < %s -passes=globaldce -S | \ ; RUN: not grep constant ; ; RUN: verify-uselistorder %s diff --git a/llvm/test/Assembler/2002-08-19-BytecodeReader.ll b/llvm/test/Assembler/2002-08-19-BytecodeReader.ll --- a/llvm/test/Assembler/2002-08-19-BytecodeReader.ll +++ b/llvm/test/Assembler/2002-08-19-BytecodeReader.ll @@ -1,7 +1,7 @@ ; Testcase that seems to break the bytecode reader. This comes from the ; "crafty" spec benchmark. ; -; RUN: opt < %s -instcombine | llvm-dis +; RUN: opt < %s -passes=instcombine | llvm-dis ; RUN: verify-uselistorder %s %CHESS_POSITION = type { i32, i32 } diff --git a/llvm/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll b/llvm/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll --- a/llvm/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll +++ b/llvm/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -simplifycfg -S | not grep br +; RUN: opt < %s -passes=instcombine,simplifycfg -S | not grep br ; RUN: verify-uselistorder %s @.str_1 = internal constant [6 x i8] c"_Bool\00" ; <[6 x i8]*> [#uses=2] diff --git a/llvm/test/Assembler/2009-02-28-StripOpaqueName.ll b/llvm/test/Assembler/2009-02-28-StripOpaqueName.ll --- a/llvm/test/Assembler/2009-02-28-StripOpaqueName.ll +++ b/llvm/test/Assembler/2009-02-28-StripOpaqueName.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -strip -S | llvm-as | llvm-dis +; RUN: opt < %s -passes=strip -S | llvm-as | llvm-dis ; RUN: verify-uselistorder %s ; Stripping the name from A should not break references to it. diff --git a/llvm/test/Assembler/ifunc-stripPointerCastsAndAliases.ll b/llvm/test/Assembler/ifunc-stripPointerCastsAndAliases.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Assembler/ifunc-stripPointerCastsAndAliases.ll @@ -0,0 +1,23 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +; Check constantexprs which ifunc looks through to find the resolver +; function. + +@ifunc_addrspacecast_as1_to_as0 = ifunc void (), ptr addrspacecast (ptr addrspace(1) @resolver_as1 to ptr) + +; CHECK: @alias_resolver = internal alias i32 (i32), ptr @resolver +@alias_resolver = internal alias i32 (i32), ptr @resolver + +; CHECK: @ifunc_resolver_is_alias = internal ifunc i32 (i32), ptr @alias_resolver +@ifunc_resolver_is_alias = internal ifunc i32 (i32), ptr @alias_resolver + + +; CHECK: define ptr @resolver_as1() addrspace(1) { +define ptr @resolver_as1() addrspace(1) { + ret ptr null +} + +; CHECK: define internal ptr @resolver() { +define internal ptr @resolver() { + ret ptr null +} diff --git a/llvm/test/Bitcode/DISubprogram-v5.ll.bc b/llvm/test/Bitcode/DISubprogram-v5.ll.bc old mode 100755 new mode 100644 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -O0 -mattr=+lse -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-CAS-O0 @var = global i128 0 -define void @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { +define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) { ; CHECK-LLSC-O1-LABEL: val_compare_and_swap: ; CHECK-LLSC-O1: // %bb.0: ; CHECK-LLSC-O1-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1 @@ -84,13 +84,13 @@ ; CHECK-CAS-O0-NEXT: add sp, sp, #16 ; CHECK-CAS-O0-NEXT: ret -%pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire +%pair = cmpxchg ptr %p, i128 %oldval, i128 %newval acquire acquire %val = extractvalue { i128, i1 } %pair, 0 - store i128 %val, i128* %p + store i128 %val, ptr %p ret void } -define void @val_compare_and_swap_monotonic_seqcst(i128* %p, i128 %oldval, i128 %newval) { +define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %newval) { ; CHECK-LLSC-O1-LABEL: val_compare_and_swap_monotonic_seqcst: ; CHECK-LLSC-O1: // %bb.0: ; CHECK-LLSC-O1-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1 @@ -169,13 +169,13 @@ ; CHECK-CAS-O0-NEXT: add sp, sp, #16 ; CHECK-CAS-O0-NEXT: ret - %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval monotonic seq_cst + %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval monotonic seq_cst %val = extractvalue { i128, i1 } %pair, 0 - store i128 %val, i128* %p + store i128 %val, ptr %p ret void } -define void @val_compare_and_swap_release_acquire(i128* %p, i128 %oldval, i128 %newval) { +define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %newval) { ; CHECK-LLSC-O1-LABEL: val_compare_and_swap_release_acquire: ; CHECK-LLSC-O1: // %bb.0: ; CHECK-LLSC-O1-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1 @@ -254,13 +254,13 @@ ; CHECK-CAS-O0-NEXT: add sp, sp, #16 ; CHECK-CAS-O0-NEXT: ret - %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval release acquire + %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval release acquire %val = extractvalue { i128, i1 } %pair, 0 - store i128 %val, i128* %p + store i128 %val, ptr %p ret void } -define void @val_compare_and_swap_monotonic(i128* %p, i128 %oldval, i128 %newval) { +define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval) { ; CHECK-LLSC-O1-LABEL: val_compare_and_swap_monotonic: ; CHECK-LLSC-O1: // %bb.0: ; CHECK-LLSC-O1-NEXT: .LBB3_1: // =>This Inner Loop Header: Depth=1 @@ -338,13 +338,13 @@ ; CHECK-CAS-O0-NEXT: str q0, [x0] ; CHECK-CAS-O0-NEXT: add sp, sp, #16 ; CHECK-CAS-O0-NEXT: ret - %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval release acquire + %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval release acquire %val = extractvalue { i128, i1 } %pair, 0 - store i128 %val, i128* %p + store i128 %val, ptr %p ret void } -define void @atomic_load_relaxed(i64, i64, i128* %p, i128* %p2) { +define void @atomic_load_relaxed(i64, i64, ptr %p, ptr %p2) { ; CHECK-LLSC-O1-LABEL: atomic_load_relaxed: ; CHECK-LLSC-O1: // %bb.0: ; CHECK-LLSC-O1-NEXT: .LBB4_1: // %atomicrmw.start @@ -411,12 +411,12 @@ ; CHECK-CAS-O0-NEXT: str q0, [x3] ; CHECK-CAS-O0-NEXT: ret - %r = load atomic i128, i128* %p monotonic, align 16 - store i128 %r, i128* %p2 + %r = load atomic i128, ptr %p monotonic, align 16 + store i128 %r, ptr %p2 ret void } -define i128 @val_compare_and_swap_return(i128* %p, i128 %oldval, i128 %newval) { +define i128 @val_compare_and_swap_return(ptr %p, i128 %oldval, i128 %newval) { ; CHECK-LLSC-O1-LABEL: val_compare_and_swap_return: ; CHECK-LLSC-O1: // %bb.0: ; CHECK-LLSC-O1-NEXT: .LBB5_1: // =>This Inner Loop Header: Depth=1 @@ -482,7 +482,7 @@ ; CHECK-CAS-O0-NEXT: mov x1, x3 ; CHECK-CAS-O0-NEXT: ret - %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire + %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval acquire acquire %val = extractvalue { i128, i1 } %pair, 0 ret i128 %val } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O1 ; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O0 -define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 { +define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) #0 { ; CHECK-NOLSE-O1-LABEL: val_compare_and_swap: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB0_1: ; %cmpxchg.start @@ -49,12 +49,12 @@ ; CHECK-LSE-O0-NEXT: mov x0, x1 ; CHECK-LSE-O0-NEXT: casa w0, w2, [x8] ; CHECK-LSE-O0-NEXT: ret - %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire + %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val } -define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 { +define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) #0 { ; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_from_load: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: ldr w9, [x2] @@ -103,13 +103,13 @@ ; CHECK-LSE-O0-NEXT: ldr w8, [x2] ; CHECK-LSE-O0-NEXT: casa w0, w8, [x9] ; CHECK-LSE-O0-NEXT: ret - %new = load i32, i32* %pnew - %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire + %new = load i32, ptr %pnew + %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val } -define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 { +define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) #0 { ; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_rel: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB2_1: ; %cmpxchg.start @@ -154,12 +154,12 @@ ; CHECK-LSE-O0-NEXT: mov x0, x1 ; CHECK-LSE-O0-NEXT: casal w0, w2, [x8] ; CHECK-LSE-O0-NEXT: ret - %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic + %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acq_rel monotonic %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val } -define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 { +define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) #0 { ; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_64: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB3_1: ; %cmpxchg.start @@ -204,12 +204,12 @@ ; CHECK-LSE-O0-NEXT: mov x0, x1 ; CHECK-LSE-O0-NEXT: cas x0, x2, [x8] ; CHECK-LSE-O0-NEXT: ret - %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic + %pair = cmpxchg ptr %p, i64 %cmp, i64 %new monotonic monotonic %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val } -define i64 @val_compare_and_swap_64_monotonic_seqcst(i64* %p, i64 %cmp, i64 %new) #0 { +define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new) #0 { ; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_64_monotonic_seqcst: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB4_1: ; %cmpxchg.start @@ -254,12 +254,12 @@ ; CHECK-LSE-O0-NEXT: mov x0, x1 ; CHECK-LSE-O0-NEXT: casal x0, x2, [x8] ; CHECK-LSE-O0-NEXT: ret - %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic seq_cst + %pair = cmpxchg ptr %p, i64 %cmp, i64 %new monotonic seq_cst %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val } -define i64 @val_compare_and_swap_64_release_acquire(i64* %p, i64 %cmp, i64 %new) #0 { +define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new) #0 { ; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_64_release_acquire: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB5_1: ; %cmpxchg.start @@ -304,12 +304,12 @@ ; CHECK-LSE-O0-NEXT: mov x0, x1 ; CHECK-LSE-O0-NEXT: casal x0, x2, [x8] ; CHECK-LSE-O0-NEXT: ret - %pair = cmpxchg i64* %p, i64 %cmp, i64 %new release acquire + %pair = cmpxchg ptr %p, i64 %cmp, i64 %new release acquire %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val } -define i32 @fetch_and_nand(i32* %p) #0 { +define i32 @fetch_and_nand(ptr %p) #0 { ; CHECK-NOLSE-O1-LABEL: fetch_and_nand: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB6_1: ; %atomicrmw.start @@ -398,11 +398,11 @@ ; CHECK-LSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload ; CHECK-LSE-O0-NEXT: add sp, sp, #32 ; CHECK-LSE-O0-NEXT: ret - %val = atomicrmw nand i32* %p, i32 7 release + %val = atomicrmw nand ptr %p, i32 7 release ret i32 %val } -define i64 @fetch_and_nand_64(i64* %p) #0 { +define i64 @fetch_and_nand_64(ptr %p) #0 { ; CHECK-NOLSE-O1-LABEL: fetch_and_nand_64: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB7_1: ; %atomicrmw.start @@ -491,11 +491,11 @@ ; CHECK-LSE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload ; CHECK-LSE-O0-NEXT: add sp, sp, #32 ; CHECK-LSE-O0-NEXT: ret - %val = atomicrmw nand i64* %p, i64 7 acq_rel + %val = atomicrmw nand ptr %p, i64 7 acq_rel ret i64 %val } -define i32 @fetch_and_or(i32* %p) #0 { +define i32 @fetch_and_or(ptr %p) #0 { ; CHECK-NOLSE-O1-LABEL: fetch_and_or: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: mov w9, #5 @@ -557,11 +557,11 @@ ; CHECK-LSE-O0-NEXT: mov w8, #5 ; CHECK-LSE-O0-NEXT: ldsetal w8, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %val = atomicrmw or i32* %p, i32 5 seq_cst + %val = atomicrmw or ptr %p, i32 5 seq_cst ret i32 %val } -define i64 @fetch_and_or_64(i64* %p) #0 { +define i64 @fetch_and_or_64(ptr %p) #0 { ; CHECK-NOLSE-O1-LABEL: fetch_and_or_64: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB9_1: ; %atomicrmw.start @@ -622,7 +622,7 @@ ; CHECK-LSE-O0-NEXT: ; kill: def $x8 killed $w8 ; CHECK-LSE-O0-NEXT: ldset x8, x0, [x0] ; CHECK-LSE-O0-NEXT: ret - %val = atomicrmw or i64* %p, i64 7 monotonic + %val = atomicrmw or ptr %p, i64 7 monotonic ret i64 %val } @@ -683,7 +683,7 @@ ret void } -define i32 @atomic_load(i32* %p) #0 { +define i32 @atomic_load(ptr %p) #0 { ; CHECK-NOLSE-LABEL: atomic_load: ; CHECK-NOLSE: ; %bb.0: ; CHECK-NOLSE-NEXT: ldar w0, [x0] @@ -698,11 +698,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldar w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %r = load atomic i32, i32* %p seq_cst, align 4 + %r = load atomic i32, ptr %p seq_cst, align 4 ret i32 %r } -define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) #0 { +define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 @@ -754,25 +754,25 @@ ; CHECK-LSE-O0-NEXT: ldrb w9, [x9] ; CHECK-LSE-O0-NEXT: add w0, w8, w9, uxtb ; CHECK-LSE-O0-NEXT: ret - %ptr_unsigned = getelementptr i8, i8* %p, i32 4095 - %val_unsigned = load atomic i8, i8* %ptr_unsigned monotonic, align 1 + %ptr_unsigned = getelementptr i8, ptr %p, i32 4095 + %val_unsigned = load atomic i8, ptr %ptr_unsigned monotonic, align 1 - %ptr_regoff = getelementptr i8, i8* %p, i32 %off32 - %val_regoff = load atomic i8, i8* %ptr_regoff unordered, align 1 + %ptr_regoff = getelementptr i8, ptr %p, i32 %off32 + %val_regoff = load atomic i8, ptr %ptr_regoff unordered, align 1 %tot1 = add i8 %val_unsigned, %val_regoff - %ptr_unscaled = getelementptr i8, i8* %p, i32 -256 - %val_unscaled = load atomic i8, i8* %ptr_unscaled monotonic, align 1 + %ptr_unscaled = getelementptr i8, ptr %p, i32 -256 + %val_unscaled = load atomic i8, ptr %ptr_unscaled monotonic, align 1 %tot2 = add i8 %tot1, %val_unscaled - %ptr_random = getelementptr i8, i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm) - %val_random = load atomic i8, i8* %ptr_random unordered, align 1 + %ptr_random = getelementptr i8, ptr %p, i32 1191936 ; 0x123000 (i.e. ADD imm) + %val_random = load atomic i8, ptr %ptr_random unordered, align 1 %tot3 = add i8 %tot2, %val_random ret i8 %tot3 } -define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) #0 { +define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 @@ -824,25 +824,25 @@ ; CHECK-LSE-O0-NEXT: ldrh w9, [x9] ; CHECK-LSE-O0-NEXT: add w0, w8, w9, uxth ; CHECK-LSE-O0-NEXT: ret - %ptr_unsigned = getelementptr i16, i16* %p, i32 4095 - %val_unsigned = load atomic i16, i16* %ptr_unsigned monotonic, align 2 + %ptr_unsigned = getelementptr i16, ptr %p, i32 4095 + %val_unsigned = load atomic i16, ptr %ptr_unsigned monotonic, align 2 - %ptr_regoff = getelementptr i16, i16* %p, i32 %off32 - %val_regoff = load atomic i16, i16* %ptr_regoff unordered, align 2 + %ptr_regoff = getelementptr i16, ptr %p, i32 %off32 + %val_regoff = load atomic i16, ptr %ptr_regoff unordered, align 2 %tot1 = add i16 %val_unsigned, %val_regoff - %ptr_unscaled = getelementptr i16, i16* %p, i32 -128 - %val_unscaled = load atomic i16, i16* %ptr_unscaled monotonic, align 2 + %ptr_unscaled = getelementptr i16, ptr %p, i32 -128 + %val_unscaled = load atomic i16, ptr %ptr_unscaled monotonic, align 2 %tot2 = add i16 %tot1, %val_unscaled - %ptr_random = getelementptr i16, i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) - %val_random = load atomic i16, i16* %ptr_random unordered, align 2 + %ptr_random = getelementptr i16, ptr %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) + %val_random = load atomic i16, ptr %ptr_random unordered, align 2 %tot3 = add i16 %tot2, %val_random ret i16 %tot3 } -define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) #0 { +define i32 @atomic_load_relaxed_32(ptr %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_32: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 @@ -890,25 +890,25 @@ ; CHECK-LSE-O0-NEXT: ldr w9, [x9] ; CHECK-LSE-O0-NEXT: add w0, w8, w9 ; CHECK-LSE-O0-NEXT: ret - %ptr_unsigned = getelementptr i32, i32* %p, i32 4095 - %val_unsigned = load atomic i32, i32* %ptr_unsigned monotonic, align 4 + %ptr_unsigned = getelementptr i32, ptr %p, i32 4095 + %val_unsigned = load atomic i32, ptr %ptr_unsigned monotonic, align 4 - %ptr_regoff = getelementptr i32, i32* %p, i32 %off32 - %val_regoff = load atomic i32, i32* %ptr_regoff unordered, align 4 + %ptr_regoff = getelementptr i32, ptr %p, i32 %off32 + %val_regoff = load atomic i32, ptr %ptr_regoff unordered, align 4 %tot1 = add i32 %val_unsigned, %val_regoff - %ptr_unscaled = getelementptr i32, i32* %p, i32 -64 - %val_unscaled = load atomic i32, i32* %ptr_unscaled monotonic, align 4 + %ptr_unscaled = getelementptr i32, ptr %p, i32 -64 + %val_unscaled = load atomic i32, ptr %ptr_unscaled monotonic, align 4 %tot2 = add i32 %tot1, %val_unscaled - %ptr_random = getelementptr i32, i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) - %val_random = load atomic i32, i32* %ptr_random unordered, align 4 + %ptr_random = getelementptr i32, ptr %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) + %val_random = load atomic i32, ptr %ptr_random unordered, align 4 %tot3 = add i32 %tot2, %val_random ret i32 %tot3 } -define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) #0 { +define i64 @atomic_load_relaxed_64(ptr %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_64: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 @@ -956,26 +956,26 @@ ; CHECK-LSE-O0-NEXT: ldr x9, [x9] ; CHECK-LSE-O0-NEXT: add x0, x8, x9 ; CHECK-LSE-O0-NEXT: ret - %ptr_unsigned = getelementptr i64, i64* %p, i32 4095 - %val_unsigned = load atomic i64, i64* %ptr_unsigned monotonic, align 8 + %ptr_unsigned = getelementptr i64, ptr %p, i32 4095 + %val_unsigned = load atomic i64, ptr %ptr_unsigned monotonic, align 8 - %ptr_regoff = getelementptr i64, i64* %p, i32 %off32 - %val_regoff = load atomic i64, i64* %ptr_regoff unordered, align 8 + %ptr_regoff = getelementptr i64, ptr %p, i32 %off32 + %val_regoff = load atomic i64, ptr %ptr_regoff unordered, align 8 %tot1 = add i64 %val_unsigned, %val_regoff - %ptr_unscaled = getelementptr i64, i64* %p, i32 -32 - %val_unscaled = load atomic i64, i64* %ptr_unscaled monotonic, align 8 + %ptr_unscaled = getelementptr i64, ptr %p, i32 -32 + %val_unscaled = load atomic i64, ptr %ptr_unscaled monotonic, align 8 %tot2 = add i64 %tot1, %val_unscaled - %ptr_random = getelementptr i64, i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) - %val_random = load atomic i64, i64* %ptr_random unordered, align 8 + %ptr_random = getelementptr i64, ptr %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) + %val_random = load atomic i64, ptr %ptr_random unordered, align 8 %tot3 = add i64 %tot2, %val_random ret i64 %tot3 } -define void @atomc_store(i32* %p) #0 { +define void @atomc_store(ptr %p) #0 { ; CHECK-NOLSE-LABEL: atomc_store: ; CHECK-NOLSE: ; %bb.0: ; CHECK-NOLSE-NEXT: mov w8, #4 @@ -993,11 +993,11 @@ ; CHECK-LSE-O0-NEXT: mov w8, #4 ; CHECK-LSE-O0-NEXT: stlr w8, [x0] ; CHECK-LSE-O0-NEXT: ret - store atomic i32 4, i32* %p seq_cst, align 4 + store atomic i32 4, ptr %p seq_cst, align 4 ret void } -define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 { +define void @atomic_store_relaxed_8(ptr %p, i32 %off32, i8 %val) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 @@ -1033,22 +1033,22 @@ ; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O0-NEXT: strb w2, [x8] ; CHECK-LSE-O0-NEXT: ret - %ptr_unsigned = getelementptr i8, i8* %p, i32 4095 - store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1 + %ptr_unsigned = getelementptr i8, ptr %p, i32 4095 + store atomic i8 %val, ptr %ptr_unsigned monotonic, align 1 - %ptr_regoff = getelementptr i8, i8* %p, i32 %off32 - store atomic i8 %val, i8* %ptr_regoff unordered, align 1 + %ptr_regoff = getelementptr i8, ptr %p, i32 %off32 + store atomic i8 %val, ptr %ptr_regoff unordered, align 1 - %ptr_unscaled = getelementptr i8, i8* %p, i32 -256 - store atomic i8 %val, i8* %ptr_unscaled monotonic, align 1 + %ptr_unscaled = getelementptr i8, ptr %p, i32 -256 + store atomic i8 %val, ptr %ptr_unscaled monotonic, align 1 - %ptr_random = getelementptr i8, i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm) - store atomic i8 %val, i8* %ptr_random unordered, align 1 + %ptr_random = getelementptr i8, ptr %p, i32 1191936 ; 0x123000 (i.e. ADD imm) + store atomic i8 %val, ptr %ptr_random unordered, align 1 ret void } -define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 { +define void @atomic_store_relaxed_16(ptr %p, i32 %off32, i16 %val) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 @@ -1084,22 +1084,22 @@ ; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O0-NEXT: strh w2, [x8] ; CHECK-LSE-O0-NEXT: ret - %ptr_unsigned = getelementptr i16, i16* %p, i32 4095 - store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2 + %ptr_unsigned = getelementptr i16, ptr %p, i32 4095 + store atomic i16 %val, ptr %ptr_unsigned monotonic, align 2 - %ptr_regoff = getelementptr i16, i16* %p, i32 %off32 - store atomic i16 %val, i16* %ptr_regoff unordered, align 2 + %ptr_regoff = getelementptr i16, ptr %p, i32 %off32 + store atomic i16 %val, ptr %ptr_regoff unordered, align 2 - %ptr_unscaled = getelementptr i16, i16* %p, i32 -128 - store atomic i16 %val, i16* %ptr_unscaled monotonic, align 2 + %ptr_unscaled = getelementptr i16, ptr %p, i32 -128 + store atomic i16 %val, ptr %ptr_unscaled monotonic, align 2 - %ptr_random = getelementptr i16, i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) - store atomic i16 %val, i16* %ptr_random unordered, align 2 + %ptr_random = getelementptr i16, ptr %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) + store atomic i16 %val, ptr %ptr_random unordered, align 2 ret void } -define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) #0 { +define void @atomic_store_relaxed_32(ptr %p, i32 %off32, i32 %val) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_32: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 @@ -1135,22 +1135,22 @@ ; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O0-NEXT: str w2, [x8] ; CHECK-LSE-O0-NEXT: ret - %ptr_unsigned = getelementptr i32, i32* %p, i32 4095 - store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4 + %ptr_unsigned = getelementptr i32, ptr %p, i32 4095 + store atomic i32 %val, ptr %ptr_unsigned monotonic, align 4 - %ptr_regoff = getelementptr i32, i32* %p, i32 %off32 - store atomic i32 %val, i32* %ptr_regoff unordered, align 4 + %ptr_regoff = getelementptr i32, ptr %p, i32 %off32 + store atomic i32 %val, ptr %ptr_regoff unordered, align 4 - %ptr_unscaled = getelementptr i32, i32* %p, i32 -64 - store atomic i32 %val, i32* %ptr_unscaled monotonic, align 4 + %ptr_unscaled = getelementptr i32, ptr %p, i32 -64 + store atomic i32 %val, ptr %ptr_unscaled monotonic, align 4 - %ptr_random = getelementptr i32, i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) - store atomic i32 %val, i32* %ptr_random unordered, align 4 + %ptr_random = getelementptr i32, ptr %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) + store atomic i32 %val, ptr %ptr_random unordered, align 4 ret void } -define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) #0 { +define void @atomic_store_relaxed_64(ptr %p, i32 %off32, i64 %val) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_64: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 @@ -1186,22 +1186,22 @@ ; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O0-NEXT: str x2, [x8] ; CHECK-LSE-O0-NEXT: ret - %ptr_unsigned = getelementptr i64, i64* %p, i32 4095 - store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8 + %ptr_unsigned = getelementptr i64, ptr %p, i32 4095 + store atomic i64 %val, ptr %ptr_unsigned monotonic, align 8 - %ptr_regoff = getelementptr i64, i64* %p, i32 %off32 - store atomic i64 %val, i64* %ptr_regoff unordered, align 8 + %ptr_regoff = getelementptr i64, ptr %p, i32 %off32 + store atomic i64 %val, ptr %ptr_regoff unordered, align 8 - %ptr_unscaled = getelementptr i64, i64* %p, i32 -32 - store atomic i64 %val, i64* %ptr_unscaled monotonic, align 8 + %ptr_unscaled = getelementptr i64, ptr %p, i32 -32 + store atomic i64 %val, ptr %ptr_unscaled monotonic, align 8 - %ptr_random = getelementptr i64, i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) - store atomic i64 %val, i64* %ptr_random unordered, align 8 + %ptr_random = getelementptr i64, ptr %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) + store atomic i64 %val, ptr %ptr_random unordered, align 8 ret void } -define i32 @load_zext(i8* %p8, i16* %p16) { +define i32 @load_zext(ptr %p8, ptr %p16) { ; CHECK-NOLSE-O1-LABEL: load_zext: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: ldarb w8, [x0] @@ -1229,17 +1229,17 @@ ; CHECK-LSE-O0-NEXT: ldrh w8, [x1] ; CHECK-LSE-O0-NEXT: add w0, w8, w9, uxtb ; CHECK-LSE-O0-NEXT: ret - %val1.8 = load atomic i8, i8* %p8 acquire, align 1 + %val1.8 = load atomic i8, ptr %p8 acquire, align 1 %val1 = zext i8 %val1.8 to i32 - %val2.16 = load atomic i16, i16* %p16 unordered, align 2 + %val2.16 = load atomic i16, ptr %p16 unordered, align 2 %val2 = zext i16 %val2.16 to i32 %res = add i32 %val1, %val2 ret i32 %res } -define { i32, i64 } @load_acq(i32* %p32, i64* %p64) { +define { i32, i64 } @load_acq(ptr %p32, ptr %p64) { ; CHECK-NOLSE-LABEL: load_acq: ; CHECK-NOLSE: ; %bb.0: ; CHECK-NOLSE-NEXT: ldar w0, [x0] @@ -1257,16 +1257,16 @@ ; CHECK-LSE-O0-NEXT: ldar w0, [x0] ; CHECK-LSE-O0-NEXT: ldapr x1, [x1] ; CHECK-LSE-O0-NEXT: ret - %val32 = load atomic i32, i32* %p32 seq_cst, align 4 + %val32 = load atomic i32, ptr %p32 seq_cst, align 4 %tmp = insertvalue { i32, i64 } undef, i32 %val32, 0 - %val64 = load atomic i64, i64* %p64 acquire, align 8 + %val64 = load atomic i64, ptr %p64 acquire, align 8 %res = insertvalue { i32, i64 } %tmp, i64 %val64, 1 ret { i32, i64 } %res } -define i32 @load_sext(i8* %p8, i16* %p16) { +define i32 @load_sext(ptr %p8, ptr %p16) { ; CHECK-NOLSE-O1-LABEL: load_sext: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: ldarb w8, [x0] @@ -1298,17 +1298,17 @@ ; CHECK-LSE-O0-NEXT: sxth w8, w8 ; CHECK-LSE-O0-NEXT: add w0, w8, w9, sxtb ; CHECK-LSE-O0-NEXT: ret - %val1.8 = load atomic i8, i8* %p8 acquire, align 1 + %val1.8 = load atomic i8, ptr %p8 acquire, align 1 %val1 = sext i8 %val1.8 to i32 - %val2.16 = load atomic i16, i16* %p16 unordered, align 2 + %val2.16 = load atomic i16, ptr %p16 unordered, align 2 %val2 = sext i16 %val2.16 to i32 %res = add i32 %val1, %val2 ret i32 %res } -define void @store_trunc(i32 %val, i8* %p8, i16* %p16) { +define void @store_trunc(i32 %val, ptr %p8, ptr %p16) { ; CHECK-NOLSE-LABEL: store_trunc: ; CHECK-NOLSE: ; %bb.0: ; CHECK-NOLSE-NEXT: stlrb w0, [x1] @@ -1327,15 +1327,15 @@ ; CHECK-LSE-O0-NEXT: strh w0, [x2] ; CHECK-LSE-O0-NEXT: ret %val8 = trunc i32 %val to i8 - store atomic i8 %val8, i8* %p8 seq_cst, align 1 + store atomic i8 %val8, ptr %p8 seq_cst, align 1 %val16 = trunc i32 %val to i16 - store atomic i16 %val16, i16* %p16 monotonic, align 2 + store atomic i16 %val16, ptr %p16 monotonic, align 2 ret void } -define i8 @atomicrmw_add_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_add_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB27_1: ; %atomicrmw.start @@ -1397,11 +1397,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldaddalb w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw add i8* %ptr, i8 %rhs seq_cst + %res = atomicrmw add ptr %ptr, i8 %rhs seq_cst ret i8 %res } -define i8 @atomicrmw_xchg_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 @@ -1462,11 +1462,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: swpb w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw xchg i8* %ptr, i8 %rhs monotonic + %res = atomicrmw xchg ptr %ptr, i8 %rhs monotonic ret i8 %res } -define i8 @atomicrmw_sub_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_sub_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB29_1: ; %atomicrmw.start @@ -1530,11 +1530,11 @@ ; CHECK-LSE-O0-NEXT: neg w8, w1 ; CHECK-LSE-O0-NEXT: ldaddab w8, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw sub i8* %ptr, i8 %rhs acquire + %res = atomicrmw sub ptr %ptr, i8 %rhs acquire ret i8 %res } -define i8 @atomicrmw_and_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_and_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB30_1: ; %atomicrmw.start @@ -1598,11 +1598,11 @@ ; CHECK-LSE-O0-NEXT: mvn w8, w1 ; CHECK-LSE-O0-NEXT: ldclrlb w8, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw and i8* %ptr, i8 %rhs release + %res = atomicrmw and ptr %ptr, i8 %rhs release ret i8 %res } -define i8 @atomicrmw_or_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_or_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB31_1: ; %atomicrmw.start @@ -1664,11 +1664,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldsetalb w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw or i8* %ptr, i8 %rhs seq_cst + %res = atomicrmw or ptr %ptr, i8 %rhs seq_cst ret i8 %res } -define i8 @atomicrmw_xor_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_xor_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB32_1: ; %atomicrmw.start @@ -1730,11 +1730,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldeorb w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw xor i8* %ptr, i8 %rhs monotonic + %res = atomicrmw xor ptr %ptr, i8 %rhs monotonic ret i8 %res } -define i8 @atomicrmw_min_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_min_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB33_1: ; %atomicrmw.start @@ -1803,11 +1803,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldsminab w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw min i8* %ptr, i8 %rhs acquire + %res = atomicrmw min ptr %ptr, i8 %rhs acquire ret i8 %res } -define i8 @atomicrmw_max_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_max_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB34_1: ; %atomicrmw.start @@ -1876,11 +1876,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldsmaxlb w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw max i8* %ptr, i8 %rhs release + %res = atomicrmw max ptr %ptr, i8 %rhs release ret i8 %res } -define i8 @atomicrmw_umin_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xff @@ -1950,11 +1950,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: lduminalb w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw umin i8* %ptr, i8 %rhs seq_cst + %res = atomicrmw umin ptr %ptr, i8 %rhs seq_cst ret i8 %res } -define i8 @atomicrmw_umax_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xff @@ -2024,11 +2024,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldumaxb w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw umax i8* %ptr, i8 %rhs monotonic + %res = atomicrmw umax ptr %ptr, i8 %rhs monotonic ret i8 %res } -define i16 @atomicrmw_add_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_add_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB37_1: ; %atomicrmw.start @@ -2090,11 +2090,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldaddalh w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw add i16* %ptr, i16 %rhs seq_cst + %res = atomicrmw add ptr %ptr, i16 %rhs seq_cst ret i16 %res } -define i16 @atomicrmw_xchg_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 @@ -2155,11 +2155,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: swph w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw xchg i16* %ptr, i16 %rhs monotonic + %res = atomicrmw xchg ptr %ptr, i16 %rhs monotonic ret i16 %res } -define i16 @atomicrmw_sub_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_sub_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB39_1: ; %atomicrmw.start @@ -2223,11 +2223,11 @@ ; CHECK-LSE-O0-NEXT: neg w8, w1 ; CHECK-LSE-O0-NEXT: ldaddah w8, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw sub i16* %ptr, i16 %rhs acquire + %res = atomicrmw sub ptr %ptr, i16 %rhs acquire ret i16 %res } -define i16 @atomicrmw_and_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_and_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB40_1: ; %atomicrmw.start @@ -2291,11 +2291,11 @@ ; CHECK-LSE-O0-NEXT: mvn w8, w1 ; CHECK-LSE-O0-NEXT: ldclrlh w8, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw and i16* %ptr, i16 %rhs release + %res = atomicrmw and ptr %ptr, i16 %rhs release ret i16 %res } -define i16 @atomicrmw_or_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_or_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB41_1: ; %atomicrmw.start @@ -2357,11 +2357,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldsetalh w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw or i16* %ptr, i16 %rhs seq_cst + %res = atomicrmw or ptr %ptr, i16 %rhs seq_cst ret i16 %res } -define i16 @atomicrmw_xor_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_xor_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB42_1: ; %atomicrmw.start @@ -2423,11 +2423,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldeorh w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw xor i16* %ptr, i16 %rhs monotonic + %res = atomicrmw xor ptr %ptr, i16 %rhs monotonic ret i16 %res } -define i16 @atomicrmw_min_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_min_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB43_1: ; %atomicrmw.start @@ -2496,11 +2496,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldsminah w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw min i16* %ptr, i16 %rhs acquire + %res = atomicrmw min ptr %ptr, i16 %rhs acquire ret i16 %res } -define i16 @atomicrmw_max_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_max_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: LBB44_1: ; %atomicrmw.start @@ -2569,11 +2569,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldsmaxlh w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw max i16* %ptr, i16 %rhs release + %res = atomicrmw max ptr %ptr, i16 %rhs release ret i16 %res } -define i16 @atomicrmw_umin_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xffff @@ -2643,11 +2643,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: lduminalh w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw umin i16* %ptr, i16 %rhs seq_cst + %res = atomicrmw umin ptr %ptr, i16 %rhs seq_cst ret i16 %res } -define i16 @atomicrmw_umax_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xffff @@ -2717,11 +2717,11 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldumaxh w1, w0, [x0] ; CHECK-LSE-O0-NEXT: ret - %res = atomicrmw umax i16* %ptr, i16 %rhs monotonic + %res = atomicrmw umax ptr %ptr, i16 %rhs monotonic ret i16 %res } -define { i8, i1 } @cmpxchg_i8(i8* %ptr, i8 %desired, i8 %new) { +define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) { ; CHECK-NOLSE-O1-LABEL: cmpxchg_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: mov x8, x0 @@ -2783,11 +2783,11 @@ ; CHECK-LSE-O0-NEXT: cset w8, eq ; CHECK-LSE-O0-NEXT: and w1, w8, #0x1 ; CHECK-LSE-O0-NEXT: ret - %res = cmpxchg i8* %ptr, i8 %desired, i8 %new monotonic monotonic + %res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic ret { i8, i1 } %res } -define { i16, i1 } @cmpxchg_i16(i16* %ptr, i16 %desired, i16 %new) { +define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) { ; CHECK-NOLSE-O1-LABEL: cmpxchg_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: mov x8, x0 @@ -2849,11 +2849,11 @@ ; CHECK-LSE-O0-NEXT: cset w8, eq ; CHECK-LSE-O0-NEXT: and w1, w8, #0x1 ; CHECK-LSE-O0-NEXT: ret - %res = cmpxchg i16* %ptr, i16 %desired, i16 %new monotonic monotonic + %res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic ret { i16, i1 } %res } -define internal double @bitcast_to_double(i64* %ptr) { +define internal double @bitcast_to_double(ptr %ptr) { ; CHECK-NOLSE-LABEL: bitcast_to_double: ; CHECK-NOLSE: ; %bb.0: ; CHECK-NOLSE-NEXT: ldar x8, [x0] @@ -2871,12 +2871,12 @@ ; CHECK-LSE-O0-NEXT: ldar x8, [x0] ; CHECK-LSE-O0-NEXT: fmov d0, x8 ; CHECK-LSE-O0-NEXT: ret - %load = load atomic i64, i64* %ptr seq_cst, align 8 + %load = load atomic i64, ptr %ptr seq_cst, align 8 %bitcast = bitcast i64 %load to double ret double %bitcast } -define internal float @bitcast_to_float(i32* %ptr) { +define internal float @bitcast_to_float(ptr %ptr) { ; CHECK-NOLSE-LABEL: bitcast_to_float: ; CHECK-NOLSE: ; %bb.0: ; CHECK-NOLSE-NEXT: ldar w8, [x0] @@ -2894,12 +2894,12 @@ ; CHECK-LSE-O0-NEXT: ldar w8, [x0] ; CHECK-LSE-O0-NEXT: fmov s0, w8 ; CHECK-LSE-O0-NEXT: ret - %load = load atomic i32, i32* %ptr seq_cst, align 8 + %load = load atomic i32, ptr %ptr seq_cst, align 8 %bitcast = bitcast i32 %load to float ret float %bitcast } -define internal half @bitcast_to_half(i16* %ptr) { +define internal half @bitcast_to_half(ptr %ptr) { ; CHECK-NOLSE-LABEL: bitcast_to_half: ; CHECK-NOLSE: ; %bb.0: ; CHECK-NOLSE-NEXT: ldarh w8, [x0] @@ -2920,12 +2920,12 @@ ; CHECK-LSE-O0-NEXT: fmov s0, w8 ; CHECK-LSE-O0-NEXT: ; kill: def $h0 killed $h0 killed $s0 ; CHECK-LSE-O0-NEXT: ret - %load = load atomic i16, i16* %ptr seq_cst, align 8 + %load = load atomic i16, ptr %ptr seq_cst, align 8 %bitcast = bitcast i16 %load to half ret half %bitcast } -define internal i64* @inttoptr(i64* %ptr) { +define internal ptr @inttoptr(ptr %ptr) { ; CHECK-NOLSE-LABEL: inttoptr: ; CHECK-NOLSE: ; %bb.0: ; CHECK-NOLSE-NEXT: ldar x0, [x0] @@ -2940,12 +2940,12 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldar x0, [x0] ; CHECK-LSE-O0-NEXT: ret - %load = load atomic i64, i64* %ptr seq_cst, align 8 - %bitcast = inttoptr i64 %load to i64* - ret i64* %bitcast + %load = load atomic i64, ptr %ptr seq_cst, align 8 + %bitcast = inttoptr i64 %load to ptr + ret ptr %bitcast } -define internal i64* @load_ptr(i64** %ptr) { +define internal ptr @load_ptr(ptr %ptr) { ; CHECK-NOLSE-LABEL: load_ptr: ; CHECK-NOLSE: ; %bb.0: ; CHECK-NOLSE-NEXT: ldar x0, [x0] @@ -2960,8 +2960,8 @@ ; CHECK-LSE-O0: ; %bb.0: ; CHECK-LSE-O0-NEXT: ldar x0, [x0] ; CHECK-LSE-O0-NEXT: ret - %load = load atomic i64*, i64** %ptr seq_cst, align 8 - ret i64* %load + %load = load atomic ptr, ptr %ptr seq_cst, align 8 + ret ptr %load } attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll @@ -43,7 +43,7 @@ } -define i8* @args_ptrs(i8* %x0, i16* %x1, <2 x i8>* %x2, {i8, i16, i32}* %x3, +define ptr @args_ptrs(ptr %x0, ptr %x1, ptr %x2, ptr %x3, ; CHECK-LABEL: name: args_ptrs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 @@ -58,8 +58,8 @@ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p0) = COPY $x7 ; CHECK-NEXT: $x0 = COPY [[COPY]](p0) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - [3 x float]* %x4, double* %x5, i8* %x6, i8* %x7) { - ret i8* %x0 + ptr %x4, ptr %x5, ptr %x6, ptr %x7) { + ret ptr %x0 } define [1 x double] @args_arr([1 x double] %d0) { @@ -134,7 +134,7 @@ } ; Check that we can lower incoming i128 types into constituent s64 gprs. -define void @callee_s128(i128 %a, i128 %b, i128 *%ptr) { +define void @callee_s128(i128 %a, i128 %b, ptr %ptr) { ; CHECK-LABEL: name: callee_s128 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4 @@ -148,12 +148,12 @@ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $x4 ; CHECK-NEXT: G_STORE [[MV1]](s128), [[COPY4]](p0) :: (store (s128) into %ir.ptr) ; CHECK-NEXT: RET_ReallyLR - store i128 %b, i128 *%ptr + store i128 %b, ptr %ptr ret void } ; Check we can lower outgoing s128 arguments into s64 gprs. -define void @caller_s128(i128 *%ptr) { +define void @caller_s128(ptr %ptr) { ; CHECK-LABEL: name: caller_s128 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $x0 @@ -171,8 +171,8 @@ ; CHECK-NEXT: BL @callee_s128, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: RET_ReallyLR - %v = load i128, i128 *%ptr - call void @callee_s128(i128 %v, i128 %v, i128 *%ptr) + %v = load i128, ptr %ptr + call void @callee_s128(i128 %v, i128 %v, ptr %ptr) ret void } @@ -237,9 +237,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[COPY]](<2 x s64>), [[DEF]](p0) :: (store (<2 x s64>) into `<2 x i64>* undef`) + ; CHECK-NEXT: G_STORE [[COPY]](<2 x s64>), [[DEF]](p0) :: (store (<2 x s64>) into `ptr undef`) ; CHECK-NEXT: RET_ReallyLR - store <2 x i64> %arg, <2 x i64>* undef + store <2 x i64> %arg, ptr undef ret void } @@ -254,9 +254,9 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>), [[COPY3]](<2 x s64>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s64>), [[DEF]](p0) :: (store (<8 x s64>) into `<8 x i64>* undef`) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s64>), [[DEF]](p0) :: (store (<8 x s64>) into `ptr undef`) ; CHECK-NEXT: RET_ReallyLR - store <8 x i64> %arg, <8 x i64>* undef + store <8 x i64> %arg, ptr undef ret void } @@ -268,9 +268,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[DEF]](p0) :: (store (<4 x s32>) into `<4 x float>* undef`) + ; CHECK-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[DEF]](p0) :: (store (<4 x s32>) into `ptr undef`) ; CHECK-NEXT: RET_ReallyLR - store <4 x float> %arg, <4 x float>* undef + store <4 x float> %arg, ptr undef ret void } @@ -289,8 +289,8 @@ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY3]](<2 x s64>) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[BITCAST1]](<4 x s32>), [[BITCAST2]](<4 x s32>), [[BITCAST3]](<4 x s32>) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s32>), [[DEF]](p0) :: (store (<16 x s32>) into `<16 x float>* undef`) + ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s32>), [[DEF]](p0) :: (store (<16 x s32>) into `ptr undef`) ; CHECK-NEXT: RET_ReallyLR - store <16 x float> %arg, <16 x float>* undef + store <16 x float> %arg, ptr undef ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -24,7 +24,7 @@ ret void } -@_ZTIi = external global i8* +@_ZTIi = external global ptr declare i32 @__gxx_personality_v0(...) ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %2:_(<2 x p0>) = G_INSERT_VECTOR_ELT %0:_, %{{[0-9]+}}:_(p0), %{{[0-9]+}}:_(s32) (in function: vector_of_pointers_insertelement) @@ -34,20 +34,20 @@ br label %end block: - %dummy = insertelement <2 x i16*> %vec, i16* null, i32 0 - store <2 x i16*> %dummy, <2 x i16*>* undef + %dummy = insertelement <2 x ptr> %vec, ptr null, i32 0 + store <2 x ptr> %dummy, ptr undef ret void end: - %vec = load <2 x i16*>, <2 x i16*>* undef + %vec = load <2 x ptr>, ptr undef br label %block } ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: RET_ReallyLR implicit $x0 (in function: strict_align_feature) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for strict_align_feature ; FALLBACK-WITH-REPORT-OUT-LABEL: strict_align_feature -define i64 @strict_align_feature(i64* %p) #0 { - %x = load i64, i64* %p, align 1 +define i64 @strict_align_feature(ptr %p) #0 { + %x = load i64, ptr %p, align 1 ret i64 %x } @@ -64,24 +64,24 @@ ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to lower function{{.*}}scalable_arg ; FALLBACK-WITH-REPORT-OUT-LABEL: scalable_arg -define @scalable_arg( %pred, i8* %addr) #1 { - %res = call @llvm.aarch64.sve.ld1.nxv16i8( %pred, i8* %addr) +define @scalable_arg( %pred, ptr %addr) #1 { + %res = call @llvm.aarch64.sve.ld1.nxv16i8( %pred, ptr %addr) ret %res } ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to lower function{{.*}}scalable_ret ; FALLBACK-WITH-REPORT-OUT-LABEL: scalable_ret -define @scalable_ret(i8* %addr) #1 { +define @scalable_ret(ptr %addr) #1 { %pred = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 0) - %res = call @llvm.aarch64.sve.ld1.nxv16i8( %pred, i8* %addr) + %res = call @llvm.aarch64.sve.ld1.nxv16i8( %pred, ptr %addr) ret %res } ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate instruction{{.*}}scalable_call ; FALLBACK-WITH-REPORT-OUT-LABEL: scalable_call -define i8 @scalable_call(i8* %addr) #1 { +define i8 @scalable_call(ptr %addr) #1 { %pred = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 0) - %vec = call @llvm.aarch64.sve.ld1.nxv16i8( %pred, i8* %addr) + %vec = call @llvm.aarch64.sve.ld1.nxv16i8( %pred, ptr %addr) %res = extractelement %vec, i32 0 ret i8 %res } @@ -90,7 +90,7 @@ ; FALLBACK-WITH-REPORT-OUT-LABEL: scalable_alloca define void @scalable_alloca() #1 { %local0 = alloca - load volatile , * %local0 + load volatile , ptr %local0 ret void } @@ -98,9 +98,9 @@ ; FALLBACK-WITH-REPORT-OUT-LABEL: asm_indirect_output define void @asm_indirect_output() { entry: - %ap = alloca i8*, align 8 - %0 = load i8*, i8** %ap, align 8 - call void asm sideeffect "", "=*r|m,0,~{memory}"(i8** elementtype(i8*) %ap, i8* %0) + %ap = alloca ptr, align 8 + %0 = load ptr, ptr %ap, align 8 + call void asm sideeffect "", "=*r|m,0,~{memory}"(ptr elementtype(ptr) %ap, ptr %0) ret void } @@ -109,22 +109,20 @@ ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate instruction:{{.*}}ld64b{{.*}}asm_output_ls64 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for asm_output_ls64 ; FALLBACK-WITH-REPORT-OUT-LABEL: asm_output_ls64 -define void @asm_output_ls64(%struct.foo* %output, i8* %addr) #2 { +define void @asm_output_ls64(ptr %output, ptr %addr) #2 { entry: - %val = call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(i8* %addr) - %outcast = bitcast %struct.foo* %output to i512* - store i512 %val, i512* %outcast, align 8 + %val = call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr %addr) + store i512 %val, ptr %output, align 8 ret void } ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate instruction:{{.*}}st64b{{.*}}asm_input_ls64 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for asm_input_ls64 ; FALLBACK-WITH-REPORT-OUT-LABEL: asm_input_ls64 -define void @asm_input_ls64(%struct.foo* %input, i8* %addr) #2 { +define void @asm_input_ls64(ptr %input, ptr %addr) #2 { entry: - %incast = bitcast %struct.foo* %input to i512* - %val = load i512, i512* %incast, align 8 - call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 %val, i8* %addr) + %val = load i512, ptr %input, align 8 + call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 %val, ptr %addr) ret void } @@ -132,12 +130,12 @@ ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for umul_s128 ; FALLBACK-WITH-REPORT-OUT-LABEL: umul_s128 declare {i128, i1} @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone -define zeroext i1 @umul_s128(i128 %v1, i128* %res) { +define zeroext i1 @umul_s128(i128 %v1, ptr %res) { entry: %t = call {i128, i1} @llvm.umul.with.overflow.i128(i128 %v1, i128 2) %val = extractvalue {i128, i1} %t, 0 %obit = extractvalue {i128, i1} %t, 1 - store i128 %val, i128* %res + store i128 %val, ptr %res ret i1 %obit } @@ -145,13 +143,13 @@ ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for gc_intr ; FALLBACK-WITH-REPORT-OUT-LABEL: gc_intr -declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, i32()*, i32 immarg, i32 immarg, ...) +declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...) declare i32 @llvm.experimental.gc.result(token) declare i32 @extern_returning_i32() define i32 @gc_intr() gc "statepoint-example" { - %statepoint_token = call token (i64, i32, i32()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, i32()* elementtype(i32 ()) @extern_returning_i32, i32 0, i32 0, i32 0, i32 0) [ "deopt"() ] + %statepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i32 ()) @extern_returning_i32, i32 0, i32 0, i32 0, i32 0) [ "deopt"() ] %ret = call i32 (token) @llvm.experimental.gc.result(token %statepoint_token) ret i32 %ret } @@ -160,4 +158,4 @@ attributes #2 = { "target-features"="+ls64" } declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 %pattern) -declare @llvm.aarch64.sve.ld1.nxv16i8(, i8*) +declare @llvm.aarch64.sve.ld1.nxv16i8(, ptr) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64--" -define i32 @cse_gep([4 x i32]* %ptr, i32 %idx) { +define i32 @cse_gep(ptr %ptr, i32 %idx) { ; O0-LABEL: name: cse_gep ; O0: bb.1 (%ir-block.0): ; O0-NEXT: liveins: $w1, $x0 @@ -44,10 +44,10 @@ ; O3-NEXT: $w0 = COPY [[ADD]](s32) ; O3-NEXT: RET_ReallyLR implicit $w0 %sidx = sext i32 %idx to i64 - %gep1 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 %sidx, i64 0 - %v1 = load i32, i32* %gep1 - %gep2 = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i64 %sidx, i64 1 - %v2 = load i32, i32* %gep2 + %gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0 + %v1 = load i32, ptr %gep1 + %gep2 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 1 + %v2 = load i32, ptr %gep2 %res = add i32 %v1, %v2 ret i32 %res } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll @@ -13,11 +13,11 @@ ; CHECK: [[GUARD_SLOT:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.StackGuardSlot ; CHECK: [[GUARD:%[0-9]+]]:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load (p0) from @__stack_chk_guard) ; CHECK: G_STORE [[GUARD]](p0), [[GUARD_SLOT]](p0) :: (volatile store (p0) into %stack.0.StackGuardSlot) -declare void @llvm.stackprotector(i8*, i8**) +declare void @llvm.stackprotector(ptr, ptr) define void @test_stack_guard_remat2() { - %StackGuardSlot = alloca i8* - call void @llvm.stackprotector(i8* undef, i8** %StackGuardSlot) + %StackGuardSlot = alloca ptr + call void @llvm.stackprotector(ptr undef, ptr %StackGuardSlot) ret void } -@__stack_chk_guard = external global i64* +@__stack_chk_guard = external global ptr diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll @@ -1,40 +1,42 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple aarch64 -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s +; RUN: llc -global-isel -mtriple aarch64 -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -simplify-mir -verify-machineinstrs %s -o - 2>&1 | FileCheck %s define i32 @switch(i32 %argc) { ; CHECK-LABEL: name: switch ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.3(0x40000000), %bb.6(0x40000000) - ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.3 - ; CHECK: G_BR %bb.6 - ; CHECK: bb.6.entry: - ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000) - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] - ; CHECK: G_BRCOND [[ICMP1]](s1), %bb.4 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2.default: - ; CHECK: successors: %bb.5(0x80000000) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C4]] - ; CHECK: G_BR %bb.5 - ; CHECK: bb.3.case100: - ; CHECK: successors: %bb.5(0x80000000) - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C3]] - ; CHECK: G_BR %bb.5 - ; CHECK: bb.4.case200: - ; CHECK: successors: %bb.5(0x80000000) - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] - ; CHECK: bb.5.return: - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.2, [[ADD1]](s32), %bb.3, [[ADD2]](s32), %bb.4 - ; CHECK: $w0 = COPY [[PHI]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3 + ; CHECK-NEXT: G_BR %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.entry: + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.4 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.default: + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C4]] + ; CHECK-NEXT: G_BR %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.case100: + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C3]] + ; CHECK-NEXT: G_BR %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.case200: + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] + ; CHECK-NEXT: G_BR %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.return: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.2, [[ADD1]](s32), %bb.3, [[ADD2]](s32), %bb.4 + ; CHECK-NEXT: $w0 = COPY [[PHI]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: switch i32 %argc, label %default [ i32 100, label %case100 @@ -61,31 +63,33 @@ define i32 @test_cfg_remap(i32 %in) { ; CHECK-LABEL: name: test_cfg_remap ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000) - ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 57 - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.2 - ; CHECK: G_BR %bb.5 - ; CHECK: bb.5.entry: - ; CHECK: successors: %bb.3(0x40000000), %bb.4(0x40000000) - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] - ; CHECK: G_BRCOND [[ICMP1]](s1), %bb.3 - ; CHECK: G_BR %bb.4 - ; CHECK: bb.2.next: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: G_BR %bb.4 - ; CHECK: bb.3.other: - ; CHECK: $w0 = COPY [[DEF]](s32) - ; CHECK: RET_ReallyLR implicit $w0 - ; CHECK: bb.4.phi.block: - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.5, [[C2]](s32), %bb.2 - ; CHECK: $w0 = COPY [[PHI]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 57 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.3 + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.next: + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.other: + ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.phi.block: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.5, [[C2]](s32), %bb.2 + ; CHECK-NEXT: $w0 = COPY [[PHI]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: switch i32 %in, label %phi.block [i32 1, label %next i32 57, label %other] @@ -104,43 +108,47 @@ define i32 @test_cfg_remap_multiple_preds(i32 %in) { ; CHECK-LABEL: name: test_cfg_remap_multiple_preds ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.3(0x40000000), %bb.6(0x40000000) - ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 57 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.3 - ; CHECK: G_BR %bb.6 - ; CHECK: bb.6.entry: - ; CHECK: successors: %bb.4(0x40000000), %bb.7(0x40000000) - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] - ; CHECK: G_BRCOND [[ICMP1]](s1), %bb.4 - ; CHECK: G_BR %bb.7 - ; CHECK: bb.7.entry: - ; CHECK: successors: %bb.5(0x40000000), %bb.8(0x40000000) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C2]] - ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.5 - ; CHECK: G_BR %bb.8 - ; CHECK: bb.8.entry: - ; CHECK: successors: %bb.5(0x80000000) - ; CHECK: G_BR %bb.5 - ; CHECK: bb.2.odd: - ; CHECK: successors: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 57 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3 + ; CHECK-NEXT: G_BR %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.entry: + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.4 + ; CHECK-NEXT: G_BR %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.entry: + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C2]] + ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.5 + ; CHECK-NEXT: G_BR %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.entry: + ; CHECK-NEXT: G_BR %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.odd: + ; CHECK-NEXT: successors: + ; CHECK: {{ $}} + ; CHECK: {{ $}} ; CHECK: bb.3.next: - ; CHECK: successors: %bb.5(0x80000000) - ; CHECK: G_BR %bb.5 - ; CHECK: bb.4.other: - ; CHECK: $w0 = COPY [[DEF]](s32) - ; CHECK: RET_ReallyLR implicit $w0 - ; CHECK: bb.5.phi.block: - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.7, [[C]](s32), %bb.8, [[C4]](s32), %bb.3 - ; CHECK: $w0 = COPY [[C3]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: G_BR %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.other: + ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.phi.block: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.7, [[C]](s32), %bb.8, [[C4]](s32), %bb.3 + ; CHECK-NEXT: $w0 = COPY [[C3]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: switch i32 %in, label %odd [i32 1, label %next i32 57, label %other @@ -163,34 +171,38 @@ define i32 @jt_test(i32 %x) { ; CHECK-LABEL: name: jt_test ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.4(0x40000000), %bb.5(0x40000000) - ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 71 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C4]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SUB]](s32) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[ZEXT]](s64), [[ZEXT1]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.4 - ; CHECK: bb.5.entry: - ; CHECK: successors: %bb.3(0x2aaaaaab), %bb.4(0x2aaaaaab), %bb.2(0x2aaaaaab) - ; CHECK: [[JUMP_TABLE:%[0-9]+]]:_(p0) = G_JUMP_TABLE %jump-table.0 - ; CHECK: G_BRJT [[JUMP_TABLE]](p0), %jump-table.0, [[ZEXT]](s64) - ; CHECK: bb.2.sw.bb: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY]], [[C2]] - ; CHECK: G_BR %bb.4 - ; CHECK: bb.3.sw.bb1: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[COPY]], [[C1]] - ; CHECK: bb.4.return: - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[MUL]](s32), %bb.3, [[ADD]](s32), %bb.2, [[C3]](s32), %bb.1, [[C3]](s32), %bb.5 - ; CHECK: $w0 = COPY [[PHI]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 71 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C4]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SUB]](s32) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[ZEXT]](s64), [[ZEXT1]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: successors: %bb.3, %bb.4, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[JUMP_TABLE:%[0-9]+]]:_(p0) = G_JUMP_TABLE %jump-table.0 + ; CHECK-NEXT: G_BRJT [[JUMP_TABLE]](p0), %jump-table.0, [[ZEXT]](s64) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.sw.bb: + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY]], [[C2]] + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.sw.bb1: + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[COPY]], [[C1]] + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.return: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[MUL]](s32), %bb.3, [[ADD]](s32), %bb.2, [[C3]](s32), %bb.1, [[C3]](s32), %bb.5 + ; CHECK-NEXT: $w0 = COPY [[PHI]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: switch i32 %x, label %return [ i32 75, label %sw.bb @@ -216,612 +228,614 @@ ret i32 %retval.0 } -%0 = type { i32, i32* } -%1 = type { i32*, i32, i32 } +%0 = type { i32, ptr } +%1 = type { ptr, i32, i32 } @global = external hidden constant [55 x %0], align 8 -define void @jt_multiple_jump_tables(%1* %arg, i32 %arg1, i32* %arg2) { +define void @jt_multiple_jump_tables(ptr %arg, i32 %arg1, ptr %arg2) { ; CHECK-LABEL: name: jt_multiple_jump_tables ; CHECK: bb.1.bb: - ; CHECK: successors: %bb.56(0x40000000), %bb.61(0x40000000) - ; CHECK: liveins: $w1, $x0, $x2 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 - ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 - ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 - ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 - ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 - ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 - ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 - ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 - ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 34 - ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 35 - ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 37 - ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 38 - ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 39 - ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 41 - ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 43 - ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 45 - ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 46 - ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 47 - ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 49 - ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 50 - ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 53 - ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 54 - ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 55 - ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 4352 - ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 4353 - ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 4354 - ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 4355 - ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @global - ; CHECK: [[C55:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; CHECK: [[C56:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[C57:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[C58:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[C59:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK: [[C60:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[C61:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CHECK: [[C62:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CHECK: [[C63:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CHECK: [[C64:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[C65:%[0-9]+]]:_(s64) = G_CONSTANT i64 9 - ; CHECK: [[C66:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 - ; CHECK: [[C67:%[0-9]+]]:_(s64) = G_CONSTANT i64 11 - ; CHECK: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 - ; CHECK: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 13 - ; CHECK: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 - ; CHECK: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 17 - ; CHECK: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 18 - ; CHECK: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 19 - ; CHECK: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; CHECK: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 21 - ; CHECK: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 22 - ; CHECK: [[C79:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 - ; CHECK: [[C80:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK: [[C81:%[0-9]+]]:_(s64) = G_CONSTANT i64 25 - ; CHECK: [[C82:%[0-9]+]]:_(s64) = G_CONSTANT i64 26 - ; CHECK: [[C83:%[0-9]+]]:_(s64) = G_CONSTANT i64 27 - ; CHECK: [[C84:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 - ; CHECK: [[C85:%[0-9]+]]:_(s64) = G_CONSTANT i64 29 - ; CHECK: [[C86:%[0-9]+]]:_(s64) = G_CONSTANT i64 30 - ; CHECK: [[C87:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 - ; CHECK: [[C88:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[C89:%[0-9]+]]:_(s64) = G_CONSTANT i64 33 - ; CHECK: [[C90:%[0-9]+]]:_(s64) = G_CONSTANT i64 34 - ; CHECK: [[C91:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 - ; CHECK: [[C92:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; CHECK: [[C93:%[0-9]+]]:_(s64) = G_CONSTANT i64 37 - ; CHECK: [[C94:%[0-9]+]]:_(s64) = G_CONSTANT i64 38 - ; CHECK: [[C95:%[0-9]+]]:_(s64) = G_CONSTANT i64 39 - ; CHECK: [[C96:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK: [[C97:%[0-9]+]]:_(s64) = G_CONSTANT i64 41 - ; CHECK: [[C98:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 - ; CHECK: [[C99:%[0-9]+]]:_(s64) = G_CONSTANT i64 43 - ; CHECK: [[C100:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 - ; CHECK: [[C101:%[0-9]+]]:_(s64) = G_CONSTANT i64 45 - ; CHECK: [[C102:%[0-9]+]]:_(s64) = G_CONSTANT i64 46 - ; CHECK: [[C103:%[0-9]+]]:_(s64) = G_CONSTANT i64 47 - ; CHECK: [[C104:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[C105:%[0-9]+]]:_(s64) = G_CONSTANT i64 49 - ; CHECK: [[C106:%[0-9]+]]:_(s64) = G_CONSTANT i64 50 - ; CHECK: [[C107:%[0-9]+]]:_(s64) = G_CONSTANT i64 51 - ; CHECK: [[C108:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 - ; CHECK: [[C109:%[0-9]+]]:_(s64) = G_CONSTANT i64 53 - ; CHECK: [[C110:%[0-9]+]]:_(s64) = G_CONSTANT i64 54 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.tmp - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.56 - ; CHECK: G_BR %bb.61 - ; CHECK: bb.61.bb: - ; CHECK: successors: %bb.2(0x40000000), %bb.62(0x40000000) - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] - ; CHECK: G_BRCOND [[ICMP1]](s1), %bb.2 - ; CHECK: G_BR %bb.62 - ; CHECK: bb.62.bb: - ; CHECK: successors: %bb.3(0x40000000), %bb.63(0x40000000) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] - ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.3 - ; CHECK: G_BR %bb.63 - ; CHECK: bb.63.bb: - ; CHECK: successors: %bb.4(0x40000000), %bb.64(0x40000000) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] - ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.4 - ; CHECK: G_BR %bb.64 - ; CHECK: bb.64.bb: - ; CHECK: successors: %bb.5(0x40000000), %bb.65(0x40000000) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] - ; CHECK: G_BRCOND [[ICMP4]](s1), %bb.5 - ; CHECK: G_BR %bb.65 - ; CHECK: bb.65.bb: - ; CHECK: successors: %bb.6(0x40000000), %bb.66(0x40000000) - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] - ; CHECK: G_BRCOND [[ICMP5]](s1), %bb.6 - ; CHECK: G_BR %bb.66 - ; CHECK: bb.66.bb: - ; CHECK: successors: %bb.7(0x40000000), %bb.67(0x40000000) - ; CHECK: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] - ; CHECK: G_BRCOND [[ICMP6]](s1), %bb.7 - ; CHECK: G_BR %bb.67 - ; CHECK: bb.67.bb: - ; CHECK: successors: %bb.8(0x40000000), %bb.68(0x40000000) - ; CHECK: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C7]] - ; CHECK: G_BRCOND [[ICMP7]](s1), %bb.8 - ; CHECK: G_BR %bb.68 - ; CHECK: bb.68.bb: - ; CHECK: successors: %bb.9(0x40000000), %bb.69(0x40000000) - ; CHECK: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C8]] - ; CHECK: G_BRCOND [[ICMP8]](s1), %bb.9 - ; CHECK: G_BR %bb.69 - ; CHECK: bb.69.bb: - ; CHECK: successors: %bb.10(0x40000000), %bb.70(0x40000000) - ; CHECK: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C9]] - ; CHECK: G_BRCOND [[ICMP9]](s1), %bb.10 - ; CHECK: G_BR %bb.70 - ; CHECK: bb.70.bb: - ; CHECK: successors: %bb.11(0x40000000), %bb.71(0x40000000) - ; CHECK: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C10]] - ; CHECK: G_BRCOND [[ICMP10]](s1), %bb.11 - ; CHECK: G_BR %bb.71 - ; CHECK: bb.71.bb: - ; CHECK: successors: %bb.12(0x40000000), %bb.72(0x40000000) - ; CHECK: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C11]] - ; CHECK: G_BRCOND [[ICMP11]](s1), %bb.12 - ; CHECK: G_BR %bb.72 - ; CHECK: bb.72.bb: - ; CHECK: successors: %bb.13(0x40000000), %bb.73(0x40000000) - ; CHECK: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C12]] - ; CHECK: G_BRCOND [[ICMP12]](s1), %bb.13 - ; CHECK: G_BR %bb.73 - ; CHECK: bb.73.bb: - ; CHECK: successors: %bb.14(0x40000000), %bb.74(0x40000000) - ; CHECK: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C13]] - ; CHECK: G_BRCOND [[ICMP13]](s1), %bb.14 - ; CHECK: G_BR %bb.74 - ; CHECK: bb.74.bb: - ; CHECK: successors: %bb.15(0x40000000), %bb.75(0x40000000) - ; CHECK: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] - ; CHECK: G_BRCOND [[ICMP14]](s1), %bb.15 - ; CHECK: G_BR %bb.75 - ; CHECK: bb.75.bb: - ; CHECK: successors: %bb.16(0x40000000), %bb.76(0x40000000) - ; CHECK: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C15]] - ; CHECK: G_BRCOND [[ICMP15]](s1), %bb.16 - ; CHECK: G_BR %bb.76 - ; CHECK: bb.76.bb: - ; CHECK: successors: %bb.17(0x40000000), %bb.77(0x40000000) - ; CHECK: [[ICMP16:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C16]] - ; CHECK: G_BRCOND [[ICMP16]](s1), %bb.17 - ; CHECK: G_BR %bb.77 - ; CHECK: bb.77.bb: - ; CHECK: successors: %bb.18(0x40000000), %bb.78(0x40000000) - ; CHECK: [[ICMP17:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C17]] - ; CHECK: G_BRCOND [[ICMP17]](s1), %bb.18 - ; CHECK: G_BR %bb.78 - ; CHECK: bb.78.bb: - ; CHECK: successors: %bb.19(0x40000000), %bb.79(0x40000000) - ; CHECK: [[ICMP18:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C18]] - ; CHECK: G_BRCOND [[ICMP18]](s1), %bb.19 - ; CHECK: G_BR %bb.79 - ; CHECK: bb.79.bb: - ; CHECK: successors: %bb.20(0x40000000), %bb.80(0x40000000) - ; CHECK: [[ICMP19:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C19]] - ; CHECK: G_BRCOND [[ICMP19]](s1), %bb.20 - ; CHECK: G_BR %bb.80 - ; CHECK: bb.80.bb: - ; CHECK: successors: %bb.21(0x40000000), %bb.81(0x40000000) - ; CHECK: [[ICMP20:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C20]] - ; CHECK: G_BRCOND [[ICMP20]](s1), %bb.21 - ; CHECK: G_BR %bb.81 - ; CHECK: bb.81.bb: - ; CHECK: successors: %bb.22(0x40000000), %bb.82(0x40000000) - ; CHECK: [[ICMP21:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C21]] - ; CHECK: G_BRCOND [[ICMP21]](s1), %bb.22 - ; CHECK: G_BR %bb.82 - ; CHECK: bb.82.bb: - ; CHECK: successors: %bb.23(0x40000000), %bb.83(0x40000000) - ; CHECK: [[ICMP22:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C22]] - ; CHECK: G_BRCOND [[ICMP22]](s1), %bb.23 - ; CHECK: G_BR %bb.83 - ; CHECK: bb.83.bb: - ; CHECK: successors: %bb.24(0x40000000), %bb.84(0x40000000) - ; CHECK: [[ICMP23:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C23]] - ; CHECK: G_BRCOND [[ICMP23]](s1), %bb.24 - ; CHECK: G_BR %bb.84 - ; CHECK: bb.84.bb: - ; CHECK: successors: %bb.25(0x40000000), %bb.85(0x40000000) - ; CHECK: [[ICMP24:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C24]] - ; CHECK: G_BRCOND [[ICMP24]](s1), %bb.25 - ; CHECK: G_BR %bb.85 - ; CHECK: bb.85.bb: - ; CHECK: successors: %bb.26(0x40000000), %bb.86(0x40000000) - ; CHECK: [[ICMP25:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C25]] - ; CHECK: G_BRCOND [[ICMP25]](s1), %bb.26 - ; CHECK: G_BR %bb.86 - ; CHECK: bb.86.bb: - ; CHECK: successors: %bb.27(0x40000000), %bb.87(0x40000000) - ; CHECK: [[ICMP26:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C26]] - ; CHECK: G_BRCOND [[ICMP26]](s1), %bb.27 - ; CHECK: G_BR %bb.87 - ; CHECK: bb.87.bb: - ; CHECK: successors: %bb.28(0x40000000), %bb.88(0x40000000) - ; CHECK: [[ICMP27:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C27]] - ; CHECK: G_BRCOND [[ICMP27]](s1), %bb.28 - ; CHECK: G_BR %bb.88 - ; CHECK: bb.88.bb: - ; CHECK: successors: %bb.29(0x40000000), %bb.89(0x40000000) - ; CHECK: [[ICMP28:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C28]] - ; CHECK: G_BRCOND [[ICMP28]](s1), %bb.29 - ; CHECK: G_BR %bb.89 - ; CHECK: bb.89.bb: - ; CHECK: successors: %bb.30(0x40000000), %bb.90(0x40000000) - ; CHECK: [[ICMP29:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C29]] - ; CHECK: G_BRCOND [[ICMP29]](s1), %bb.30 - ; CHECK: G_BR %bb.90 - ; CHECK: bb.90.bb: - ; CHECK: successors: %bb.31(0x40000000), %bb.91(0x40000000) - ; CHECK: [[ICMP30:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C30]] - ; CHECK: G_BRCOND [[ICMP30]](s1), %bb.31 - ; CHECK: G_BR %bb.91 - ; CHECK: bb.91.bb: - ; CHECK: successors: %bb.32(0x40000000), %bb.92(0x40000000) - ; CHECK: [[ICMP31:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C31]] - ; CHECK: G_BRCOND [[ICMP31]](s1), %bb.32 - ; CHECK: G_BR %bb.92 - ; CHECK: bb.92.bb: - ; CHECK: successors: %bb.33(0x40000000), %bb.93(0x40000000) - ; CHECK: [[ICMP32:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C32]] - ; CHECK: G_BRCOND [[ICMP32]](s1), %bb.33 - ; CHECK: G_BR %bb.93 - ; CHECK: bb.93.bb: - ; CHECK: successors: %bb.34(0x40000000), %bb.94(0x40000000) - ; CHECK: [[ICMP33:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C33]] - ; CHECK: G_BRCOND [[ICMP33]](s1), %bb.34 - ; CHECK: G_BR %bb.94 - ; CHECK: bb.94.bb: - ; CHECK: successors: %bb.35(0x40000000), %bb.95(0x40000000) - ; CHECK: [[ICMP34:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C34]] - ; CHECK: G_BRCOND [[ICMP34]](s1), %bb.35 - ; CHECK: G_BR %bb.95 - ; CHECK: bb.95.bb: - ; CHECK: successors: %bb.36(0x40000000), %bb.96(0x40000000) - ; CHECK: [[ICMP35:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C35]] - ; CHECK: G_BRCOND [[ICMP35]](s1), %bb.36 - ; CHECK: G_BR %bb.96 - ; CHECK: bb.96.bb: - ; CHECK: successors: %bb.37(0x40000000), %bb.97(0x40000000) - ; CHECK: [[ICMP36:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C36]] - ; CHECK: G_BRCOND [[ICMP36]](s1), %bb.37 - ; CHECK: G_BR %bb.97 - ; CHECK: bb.97.bb: - ; CHECK: successors: %bb.38(0x40000000), %bb.98(0x40000000) - ; CHECK: [[ICMP37:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C37]] - ; CHECK: G_BRCOND [[ICMP37]](s1), %bb.38 - ; CHECK: G_BR %bb.98 - ; CHECK: bb.98.bb: - ; CHECK: successors: %bb.39(0x40000000), %bb.99(0x40000000) - ; CHECK: [[ICMP38:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C38]] - ; CHECK: G_BRCOND [[ICMP38]](s1), %bb.39 - ; CHECK: G_BR %bb.99 - ; CHECK: bb.99.bb: - ; CHECK: successors: %bb.40(0x40000000), %bb.100(0x40000000) - ; CHECK: [[ICMP39:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C39]] - ; CHECK: G_BRCOND [[ICMP39]](s1), %bb.40 - ; CHECK: G_BR %bb.100 - ; CHECK: bb.100.bb: - ; CHECK: successors: %bb.41(0x40000000), %bb.101(0x40000000) - ; CHECK: [[ICMP40:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C40]] - ; CHECK: G_BRCOND [[ICMP40]](s1), %bb.41 - ; CHECK: G_BR %bb.101 - ; CHECK: bb.101.bb: - ; CHECK: successors: %bb.42(0x40000000), %bb.102(0x40000000) - ; CHECK: [[ICMP41:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C41]] - ; CHECK: G_BRCOND [[ICMP41]](s1), %bb.42 - ; CHECK: G_BR %bb.102 - ; CHECK: bb.102.bb: - ; CHECK: successors: %bb.43(0x40000000), %bb.103(0x40000000) - ; CHECK: [[ICMP42:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C42]] - ; CHECK: G_BRCOND [[ICMP42]](s1), %bb.43 - ; CHECK: G_BR %bb.103 - ; CHECK: bb.103.bb: - ; CHECK: successors: %bb.44(0x40000000), %bb.104(0x40000000) - ; CHECK: [[ICMP43:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C43]] - ; CHECK: G_BRCOND [[ICMP43]](s1), %bb.44 - ; CHECK: G_BR %bb.104 - ; CHECK: bb.104.bb: - ; CHECK: successors: %bb.45(0x40000000), %bb.105(0x40000000) - ; CHECK: [[ICMP44:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C44]] - ; CHECK: G_BRCOND [[ICMP44]](s1), %bb.45 - ; CHECK: G_BR %bb.105 - ; CHECK: bb.105.bb: - ; CHECK: successors: %bb.46(0x40000000), %bb.106(0x40000000) - ; CHECK: [[ICMP45:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C45]] - ; CHECK: G_BRCOND [[ICMP45]](s1), %bb.46 - ; CHECK: G_BR %bb.106 - ; CHECK: bb.106.bb: - ; CHECK: successors: %bb.47(0x40000000), %bb.107(0x40000000) - ; CHECK: [[ICMP46:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C46]] - ; CHECK: G_BRCOND [[ICMP46]](s1), %bb.47 - ; CHECK: G_BR %bb.107 - ; CHECK: bb.107.bb: - ; CHECK: successors: %bb.48(0x40000000), %bb.108(0x40000000) - ; CHECK: [[ICMP47:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C47]] - ; CHECK: G_BRCOND [[ICMP47]](s1), %bb.48 - ; CHECK: G_BR %bb.108 - ; CHECK: bb.108.bb: - ; CHECK: successors: %bb.49(0x40000000), %bb.109(0x40000000) - ; CHECK: [[ICMP48:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C48]] - ; CHECK: G_BRCOND [[ICMP48]](s1), %bb.49 - ; CHECK: G_BR %bb.109 - ; CHECK: bb.109.bb: - ; CHECK: successors: %bb.50(0x40000000), %bb.110(0x40000000) - ; CHECK: [[ICMP49:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C49]] - ; CHECK: G_BRCOND [[ICMP49]](s1), %bb.50 - ; CHECK: G_BR %bb.110 - ; CHECK: bb.110.bb: - ; CHECK: successors: %bb.51(0x40000000), %bb.111(0x40000000) - ; CHECK: [[ICMP50:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C50]] - ; CHECK: G_BRCOND [[ICMP50]](s1), %bb.51 - ; CHECK: G_BR %bb.111 - ; CHECK: bb.111.bb: - ; CHECK: successors: %bb.52(0x40000000), %bb.112(0x40000000) - ; CHECK: [[ICMP51:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C51]] - ; CHECK: G_BRCOND [[ICMP51]](s1), %bb.52 - ; CHECK: G_BR %bb.112 - ; CHECK: bb.112.bb: - ; CHECK: successors: %bb.53(0x40000000), %bb.113(0x40000000) - ; CHECK: [[ICMP52:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C52]] - ; CHECK: G_BRCOND [[ICMP52]](s1), %bb.53 - ; CHECK: G_BR %bb.113 - ; CHECK: bb.113.bb: - ; CHECK: successors: %bb.54(0x40000000), %bb.114(0x40000000) - ; CHECK: [[ICMP53:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C53]] - ; CHECK: G_BRCOND [[ICMP53]](s1), %bb.54 - ; CHECK: G_BR %bb.114 - ; CHECK: bb.114.bb: - ; CHECK: successors: %bb.55(0x40000000), %bb.60(0x40000000) - ; CHECK: [[ICMP54:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C54]] - ; CHECK: G_BRCOND [[ICMP54]](s1), %bb.55 - ; CHECK: G_BR %bb.60 - ; CHECK: bb.2.bb3: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.3.bb4: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.4.bb5: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.5.bb6: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.6.bb7: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.7.bb8: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.8.bb9: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.9.bb10: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.10.bb11: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.11.bb12: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.12.bb13: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.13.bb14: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.14.bb15: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.15.bb16: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.16.bb17: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.17.bb18: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.18.bb19: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.19.bb20: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.20.bb21: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.21.bb22: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.22.bb23: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.23.bb24: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.24.bb25: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.25.bb26: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.26.bb27: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.27.bb28: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.28.bb29: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.29.bb30: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.30.bb31: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.31.bb32: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.32.bb33: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.33.bb34: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.34.bb35: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.35.bb36: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.36.bb37: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.37.bb38: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.38.bb39: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.39.bb40: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.40.bb41: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.41.bb42: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.42.bb43: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.43.bb44: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.44.bb45: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.45.bb46: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.46.bb47: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.47.bb48: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.48.bb49: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.49.bb50: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.50.bb51: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.51.bb52: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.52.bb53: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.53.bb54: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.54.bb55: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: G_BR %bb.56 - ; CHECK: bb.55.bb56: - ; CHECK: successors: %bb.56(0x80000000) - ; CHECK: bb.56.bb57: - ; CHECK: successors: %bb.59(0x80000000) - ; CHECK: [[PHI:%[0-9]+]]:_(s64) = G_PHI [[C56]](s64), %bb.1, [[C57]](s64), %bb.2, [[C58]](s64), %bb.3, [[C59]](s64), %bb.4, [[C60]](s64), %bb.5, [[C61]](s64), %bb.6, [[C62]](s64), %bb.7, [[C63]](s64), %bb.8, [[C64]](s64), %bb.9, [[C65]](s64), %bb.10, [[C66]](s64), %bb.11, [[C67]](s64), %bb.12, [[C68]](s64), %bb.13, [[C69]](s64), %bb.14, [[C70]](s64), %bb.15, [[C71]](s64), %bb.16, [[C72]](s64), %bb.17, [[C73]](s64), %bb.18, [[C74]](s64), %bb.19, [[C75]](s64), %bb.20, [[C76]](s64), %bb.21, [[C77]](s64), %bb.22, [[C78]](s64), %bb.23, [[C79]](s64), %bb.24, [[C80]](s64), %bb.25, [[C81]](s64), %bb.26, [[C82]](s64), %bb.27, [[C83]](s64), %bb.28, [[C84]](s64), %bb.29, [[C85]](s64), %bb.30, [[C86]](s64), %bb.31, [[C87]](s64), %bb.32, [[C88]](s64), %bb.33, [[C89]](s64), %bb.34, [[C90]](s64), %bb.35, [[C91]](s64), %bb.36, [[C92]](s64), %bb.37, [[C93]](s64), %bb.38, [[C94]](s64), %bb.39, [[C95]](s64), %bb.40, [[C96]](s64), %bb.41, [[C97]](s64), %bb.42, [[C98]](s64), %bb.43, [[C99]](s64), %bb.44, [[C100]](s64), %bb.45, [[C101]](s64), %bb.46, [[C102]](s64), %bb.47, [[C103]](s64), %bb.48, [[C104]](s64), %bb.49, [[C105]](s64), %bb.50, [[C106]](s64), %bb.51, [[C107]](s64), %bb.52, [[C108]](s64), %bb.53, [[C109]](s64), %bb.54, [[C110]](s64), %bb.55 - ; CHECK: [[C111:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[PHI]], [[C111]] - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[MUL]](s64) - ; CHECK: [[C112:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C112]](s64) - ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD1]](p0) :: (load (p0) from %ir.tmp59) - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $x0 = COPY [[COPY]](p0) - ; CHECK: $x1 = COPY [[LOAD]](p0) - ; CHECK: BL @wibble, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1 - ; CHECK: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: G_BR %bb.59 - ; CHECK: bb.57.bb62: - ; CHECK: successors: %bb.59(0x80000000) - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $x0 = COPY [[COPY]](p0) - ; CHECK: $x1 = COPY [[COPY2]](p0) - ; CHECK: BL @wibble, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1 - ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: G_BR %bb.59 - ; CHECK: bb.58.bb64: - ; CHECK: successors: %bb.59(0x80000000) - ; CHECK: [[COPY5:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0) - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $w0 = COPY [[COPY1]](s32) - ; CHECK: $x1 = COPY [[COPY5]](p0) - ; CHECK: BL @baz, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $x1 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $x0 = COPY [[COPY]](p0) - ; CHECK: $x1 = COPY [[COPY5]](p0) - ; CHECK: BL @wibble, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1 - ; CHECK: [[COPY6:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: bb.59.bb68: - ; CHECK: RET_ReallyLR - ; CHECK: bb.60.bb69: - ; CHECK: successors: %bb.58(0x40000000), %bb.57(0x40000000) - ; CHECK: [[ICMP55:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](p0), [[C55]] - ; CHECK: G_BRCOND [[ICMP55]](s1), %bb.58 - ; CHECK: G_BR %bb.57 + ; CHECK-NEXT: liveins: $w1, $x0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 33 + ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 34 + ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 35 + ; CHECK-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 + ; CHECK-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 37 + ; CHECK-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 38 + ; CHECK-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 39 + ; CHECK-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 41 + ; CHECK-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 43 + ; CHECK-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 + ; CHECK-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 45 + ; CHECK-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 46 + ; CHECK-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 47 + ; CHECK-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; CHECK-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 49 + ; CHECK-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 50 + ; CHECK-NEXT: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 + ; CHECK-NEXT: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 + ; CHECK-NEXT: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 53 + ; CHECK-NEXT: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 54 + ; CHECK-NEXT: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 55 + ; CHECK-NEXT: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 4352 + ; CHECK-NEXT: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 4353 + ; CHECK-NEXT: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 4354 + ; CHECK-NEXT: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 4355 + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @global + ; CHECK-NEXT: [[C55:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C56:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C57:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C58:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[C59:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[C60:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[C61:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK-NEXT: [[C62:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK-NEXT: [[C63:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; CHECK-NEXT: [[C64:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[C65:%[0-9]+]]:_(s64) = G_CONSTANT i64 9 + ; CHECK-NEXT: [[C66:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CHECK-NEXT: [[C67:%[0-9]+]]:_(s64) = G_CONSTANT i64 11 + ; CHECK-NEXT: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CHECK-NEXT: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 13 + ; CHECK-NEXT: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 14 + ; CHECK-NEXT: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK-NEXT: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 17 + ; CHECK-NEXT: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 18 + ; CHECK-NEXT: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 19 + ; CHECK-NEXT: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 + ; CHECK-NEXT: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 21 + ; CHECK-NEXT: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 22 + ; CHECK-NEXT: [[C79:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 + ; CHECK-NEXT: [[C80:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CHECK-NEXT: [[C81:%[0-9]+]]:_(s64) = G_CONSTANT i64 25 + ; CHECK-NEXT: [[C82:%[0-9]+]]:_(s64) = G_CONSTANT i64 26 + ; CHECK-NEXT: [[C83:%[0-9]+]]:_(s64) = G_CONSTANT i64 27 + ; CHECK-NEXT: [[C84:%[0-9]+]]:_(s64) = G_CONSTANT i64 28 + ; CHECK-NEXT: [[C85:%[0-9]+]]:_(s64) = G_CONSTANT i64 29 + ; CHECK-NEXT: [[C86:%[0-9]+]]:_(s64) = G_CONSTANT i64 30 + ; CHECK-NEXT: [[C87:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; CHECK-NEXT: [[C88:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[C89:%[0-9]+]]:_(s64) = G_CONSTANT i64 33 + ; CHECK-NEXT: [[C90:%[0-9]+]]:_(s64) = G_CONSTANT i64 34 + ; CHECK-NEXT: [[C91:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 + ; CHECK-NEXT: [[C92:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 + ; CHECK-NEXT: [[C93:%[0-9]+]]:_(s64) = G_CONSTANT i64 37 + ; CHECK-NEXT: [[C94:%[0-9]+]]:_(s64) = G_CONSTANT i64 38 + ; CHECK-NEXT: [[C95:%[0-9]+]]:_(s64) = G_CONSTANT i64 39 + ; CHECK-NEXT: [[C96:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; CHECK-NEXT: [[C97:%[0-9]+]]:_(s64) = G_CONSTANT i64 41 + ; CHECK-NEXT: [[C98:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 + ; CHECK-NEXT: [[C99:%[0-9]+]]:_(s64) = G_CONSTANT i64 43 + ; CHECK-NEXT: [[C100:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 + ; CHECK-NEXT: [[C101:%[0-9]+]]:_(s64) = G_CONSTANT i64 45 + ; CHECK-NEXT: [[C102:%[0-9]+]]:_(s64) = G_CONSTANT i64 46 + ; CHECK-NEXT: [[C103:%[0-9]+]]:_(s64) = G_CONSTANT i64 47 + ; CHECK-NEXT: [[C104:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[C105:%[0-9]+]]:_(s64) = G_CONSTANT i64 49 + ; CHECK-NEXT: [[C106:%[0-9]+]]:_(s64) = G_CONSTANT i64 50 + ; CHECK-NEXT: [[C107:%[0-9]+]]:_(s64) = G_CONSTANT i64 51 + ; CHECK-NEXT: [[C108:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 + ; CHECK-NEXT: [[C109:%[0-9]+]]:_(s64) = G_CONSTANT i64 53 + ; CHECK-NEXT: [[C110:%[0-9]+]]:_(s64) = G_CONSTANT i64 54 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.tmp + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.56 + ; CHECK-NEXT: G_BR %bb.61 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.61.bb: + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.62 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.62.bb: + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] + ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.3 + ; CHECK-NEXT: G_BR %bb.63 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.63.bb: + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C3]] + ; CHECK-NEXT: G_BRCOND [[ICMP3]](s1), %bb.4 + ; CHECK-NEXT: G_BR %bb.64 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.64.bb: + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C4]] + ; CHECK-NEXT: G_BRCOND [[ICMP4]](s1), %bb.5 + ; CHECK-NEXT: G_BR %bb.65 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.65.bb: + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C5]] + ; CHECK-NEXT: G_BRCOND [[ICMP5]](s1), %bb.6 + ; CHECK-NEXT: G_BR %bb.66 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.66.bb: + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C6]] + ; CHECK-NEXT: G_BRCOND [[ICMP6]](s1), %bb.7 + ; CHECK-NEXT: G_BR %bb.67 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.67.bb: + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C7]] + ; CHECK-NEXT: G_BRCOND [[ICMP7]](s1), %bb.8 + ; CHECK-NEXT: G_BR %bb.68 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.68.bb: + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C8]] + ; CHECK-NEXT: G_BRCOND [[ICMP8]](s1), %bb.9 + ; CHECK-NEXT: G_BR %bb.69 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.69.bb: + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C9]] + ; CHECK-NEXT: G_BRCOND [[ICMP9]](s1), %bb.10 + ; CHECK-NEXT: G_BR %bb.70 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.70.bb: + ; CHECK-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C10]] + ; CHECK-NEXT: G_BRCOND [[ICMP10]](s1), %bb.11 + ; CHECK-NEXT: G_BR %bb.71 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.71.bb: + ; CHECK-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C11]] + ; CHECK-NEXT: G_BRCOND [[ICMP11]](s1), %bb.12 + ; CHECK-NEXT: G_BR %bb.72 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.72.bb: + ; CHECK-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C12]] + ; CHECK-NEXT: G_BRCOND [[ICMP12]](s1), %bb.13 + ; CHECK-NEXT: G_BR %bb.73 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.73.bb: + ; CHECK-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C13]] + ; CHECK-NEXT: G_BRCOND [[ICMP13]](s1), %bb.14 + ; CHECK-NEXT: G_BR %bb.74 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.74.bb: + ; CHECK-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C14]] + ; CHECK-NEXT: G_BRCOND [[ICMP14]](s1), %bb.15 + ; CHECK-NEXT: G_BR %bb.75 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.75.bb: + ; CHECK-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C15]] + ; CHECK-NEXT: G_BRCOND [[ICMP15]](s1), %bb.16 + ; CHECK-NEXT: G_BR %bb.76 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.76.bb: + ; CHECK-NEXT: [[ICMP16:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C16]] + ; CHECK-NEXT: G_BRCOND [[ICMP16]](s1), %bb.17 + ; CHECK-NEXT: G_BR %bb.77 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.77.bb: + ; CHECK-NEXT: [[ICMP17:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C17]] + ; CHECK-NEXT: G_BRCOND [[ICMP17]](s1), %bb.18 + ; CHECK-NEXT: G_BR %bb.78 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.78.bb: + ; CHECK-NEXT: [[ICMP18:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C18]] + ; CHECK-NEXT: G_BRCOND [[ICMP18]](s1), %bb.19 + ; CHECK-NEXT: G_BR %bb.79 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.79.bb: + ; CHECK-NEXT: [[ICMP19:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C19]] + ; CHECK-NEXT: G_BRCOND [[ICMP19]](s1), %bb.20 + ; CHECK-NEXT: G_BR %bb.80 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.80.bb: + ; CHECK-NEXT: [[ICMP20:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C20]] + ; CHECK-NEXT: G_BRCOND [[ICMP20]](s1), %bb.21 + ; CHECK-NEXT: G_BR %bb.81 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.81.bb: + ; CHECK-NEXT: [[ICMP21:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C21]] + ; CHECK-NEXT: G_BRCOND [[ICMP21]](s1), %bb.22 + ; CHECK-NEXT: G_BR %bb.82 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.82.bb: + ; CHECK-NEXT: [[ICMP22:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C22]] + ; CHECK-NEXT: G_BRCOND [[ICMP22]](s1), %bb.23 + ; CHECK-NEXT: G_BR %bb.83 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.83.bb: + ; CHECK-NEXT: [[ICMP23:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C23]] + ; CHECK-NEXT: G_BRCOND [[ICMP23]](s1), %bb.24 + ; CHECK-NEXT: G_BR %bb.84 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.84.bb: + ; CHECK-NEXT: [[ICMP24:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C24]] + ; CHECK-NEXT: G_BRCOND [[ICMP24]](s1), %bb.25 + ; CHECK-NEXT: G_BR %bb.85 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.85.bb: + ; CHECK-NEXT: [[ICMP25:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C25]] + ; CHECK-NEXT: G_BRCOND [[ICMP25]](s1), %bb.26 + ; CHECK-NEXT: G_BR %bb.86 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.86.bb: + ; CHECK-NEXT: [[ICMP26:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C26]] + ; CHECK-NEXT: G_BRCOND [[ICMP26]](s1), %bb.27 + ; CHECK-NEXT: G_BR %bb.87 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.87.bb: + ; CHECK-NEXT: [[ICMP27:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C27]] + ; CHECK-NEXT: G_BRCOND [[ICMP27]](s1), %bb.28 + ; CHECK-NEXT: G_BR %bb.88 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.88.bb: + ; CHECK-NEXT: [[ICMP28:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C28]] + ; CHECK-NEXT: G_BRCOND [[ICMP28]](s1), %bb.29 + ; CHECK-NEXT: G_BR %bb.89 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.89.bb: + ; CHECK-NEXT: [[ICMP29:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C29]] + ; CHECK-NEXT: G_BRCOND [[ICMP29]](s1), %bb.30 + ; CHECK-NEXT: G_BR %bb.90 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.90.bb: + ; CHECK-NEXT: [[ICMP30:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C30]] + ; CHECK-NEXT: G_BRCOND [[ICMP30]](s1), %bb.31 + ; CHECK-NEXT: G_BR %bb.91 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.91.bb: + ; CHECK-NEXT: [[ICMP31:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C31]] + ; CHECK-NEXT: G_BRCOND [[ICMP31]](s1), %bb.32 + ; CHECK-NEXT: G_BR %bb.92 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.92.bb: + ; CHECK-NEXT: [[ICMP32:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C32]] + ; CHECK-NEXT: G_BRCOND [[ICMP32]](s1), %bb.33 + ; CHECK-NEXT: G_BR %bb.93 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.93.bb: + ; CHECK-NEXT: [[ICMP33:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C33]] + ; CHECK-NEXT: G_BRCOND [[ICMP33]](s1), %bb.34 + ; CHECK-NEXT: G_BR %bb.94 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.94.bb: + ; CHECK-NEXT: [[ICMP34:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C34]] + ; CHECK-NEXT: G_BRCOND [[ICMP34]](s1), %bb.35 + ; CHECK-NEXT: G_BR %bb.95 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.95.bb: + ; CHECK-NEXT: [[ICMP35:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C35]] + ; CHECK-NEXT: G_BRCOND [[ICMP35]](s1), %bb.36 + ; CHECK-NEXT: G_BR %bb.96 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.96.bb: + ; CHECK-NEXT: [[ICMP36:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C36]] + ; CHECK-NEXT: G_BRCOND [[ICMP36]](s1), %bb.37 + ; CHECK-NEXT: G_BR %bb.97 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.97.bb: + ; CHECK-NEXT: [[ICMP37:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C37]] + ; CHECK-NEXT: G_BRCOND [[ICMP37]](s1), %bb.38 + ; CHECK-NEXT: G_BR %bb.98 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.98.bb: + ; CHECK-NEXT: [[ICMP38:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C38]] + ; CHECK-NEXT: G_BRCOND [[ICMP38]](s1), %bb.39 + ; CHECK-NEXT: G_BR %bb.99 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.99.bb: + ; CHECK-NEXT: [[ICMP39:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C39]] + ; CHECK-NEXT: G_BRCOND [[ICMP39]](s1), %bb.40 + ; CHECK-NEXT: G_BR %bb.100 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.100.bb: + ; CHECK-NEXT: [[ICMP40:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C40]] + ; CHECK-NEXT: G_BRCOND [[ICMP40]](s1), %bb.41 + ; CHECK-NEXT: G_BR %bb.101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.101.bb: + ; CHECK-NEXT: [[ICMP41:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C41]] + ; CHECK-NEXT: G_BRCOND [[ICMP41]](s1), %bb.42 + ; CHECK-NEXT: G_BR %bb.102 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.102.bb: + ; CHECK-NEXT: [[ICMP42:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C42]] + ; CHECK-NEXT: G_BRCOND [[ICMP42]](s1), %bb.43 + ; CHECK-NEXT: G_BR %bb.103 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.103.bb: + ; CHECK-NEXT: [[ICMP43:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C43]] + ; CHECK-NEXT: G_BRCOND [[ICMP43]](s1), %bb.44 + ; CHECK-NEXT: G_BR %bb.104 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.104.bb: + ; CHECK-NEXT: [[ICMP44:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C44]] + ; CHECK-NEXT: G_BRCOND [[ICMP44]](s1), %bb.45 + ; CHECK-NEXT: G_BR %bb.105 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.105.bb: + ; CHECK-NEXT: [[ICMP45:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C45]] + ; CHECK-NEXT: G_BRCOND [[ICMP45]](s1), %bb.46 + ; CHECK-NEXT: G_BR %bb.106 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.106.bb: + ; CHECK-NEXT: [[ICMP46:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C46]] + ; CHECK-NEXT: G_BRCOND [[ICMP46]](s1), %bb.47 + ; CHECK-NEXT: G_BR %bb.107 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.107.bb: + ; CHECK-NEXT: [[ICMP47:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C47]] + ; CHECK-NEXT: G_BRCOND [[ICMP47]](s1), %bb.48 + ; CHECK-NEXT: G_BR %bb.108 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.108.bb: + ; CHECK-NEXT: [[ICMP48:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C48]] + ; CHECK-NEXT: G_BRCOND [[ICMP48]](s1), %bb.49 + ; CHECK-NEXT: G_BR %bb.109 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.109.bb: + ; CHECK-NEXT: [[ICMP49:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C49]] + ; CHECK-NEXT: G_BRCOND [[ICMP49]](s1), %bb.50 + ; CHECK-NEXT: G_BR %bb.110 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.110.bb: + ; CHECK-NEXT: [[ICMP50:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C50]] + ; CHECK-NEXT: G_BRCOND [[ICMP50]](s1), %bb.51 + ; CHECK-NEXT: G_BR %bb.111 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.111.bb: + ; CHECK-NEXT: [[ICMP51:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C51]] + ; CHECK-NEXT: G_BRCOND [[ICMP51]](s1), %bb.52 + ; CHECK-NEXT: G_BR %bb.112 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.112.bb: + ; CHECK-NEXT: [[ICMP52:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C52]] + ; CHECK-NEXT: G_BRCOND [[ICMP52]](s1), %bb.53 + ; CHECK-NEXT: G_BR %bb.113 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.113.bb: + ; CHECK-NEXT: [[ICMP53:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C53]] + ; CHECK-NEXT: G_BRCOND [[ICMP53]](s1), %bb.54 + ; CHECK-NEXT: G_BR %bb.114 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.114.bb: + ; CHECK-NEXT: [[ICMP54:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C54]] + ; CHECK-NEXT: G_BRCOND [[ICMP54]](s1), %bb.55 + ; CHECK-NEXT: G_BR %bb.60 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.bb3: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.bb4: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.bb5: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.bb6: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.bb7: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.bb8: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.bb9: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9.bb10: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.10.bb11: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.11.bb12: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.12.bb13: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.13.bb14: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.14.bb15: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.15.bb16: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.16.bb17: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.17.bb18: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.18.bb19: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.19.bb20: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.20.bb21: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.21.bb22: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.22.bb23: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.23.bb24: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.24.bb25: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.25.bb26: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.26.bb27: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.27.bb28: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.28.bb29: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.29.bb30: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.30.bb31: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.31.bb32: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.32.bb33: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.33.bb34: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.34.bb35: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.35.bb36: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.36.bb37: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.37.bb38: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.38.bb39: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.39.bb40: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.40.bb41: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.41.bb42: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.42.bb43: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.43.bb44: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.44.bb45: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.45.bb46: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.46.bb47: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.47.bb48: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.48.bb49: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.49.bb50: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.50.bb51: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.51.bb52: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.52.bb53: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.53.bb54: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.54.bb55: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.55.bb56: + ; CHECK-NEXT: G_BR %bb.56 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.56.bb57: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI [[C56]](s64), %bb.1, [[C57]](s64), %bb.2, [[C58]](s64), %bb.3, [[C59]](s64), %bb.4, [[C60]](s64), %bb.5, [[C61]](s64), %bb.6, [[C62]](s64), %bb.7, [[C63]](s64), %bb.8, [[C64]](s64), %bb.9, [[C65]](s64), %bb.10, [[C66]](s64), %bb.11, [[C67]](s64), %bb.12, [[C68]](s64), %bb.13, [[C69]](s64), %bb.14, [[C70]](s64), %bb.15, [[C71]](s64), %bb.16, [[C72]](s64), %bb.17, [[C73]](s64), %bb.18, [[C74]](s64), %bb.19, [[C75]](s64), %bb.20, [[C76]](s64), %bb.21, [[C77]](s64), %bb.22, [[C78]](s64), %bb.23, [[C79]](s64), %bb.24, [[C80]](s64), %bb.25, [[C81]](s64), %bb.26, [[C82]](s64), %bb.27, [[C83]](s64), %bb.28, [[C84]](s64), %bb.29, [[C85]](s64), %bb.30, [[C86]](s64), %bb.31, [[C87]](s64), %bb.32, [[C88]](s64), %bb.33, [[C89]](s64), %bb.34, [[C90]](s64), %bb.35, [[C91]](s64), %bb.36, [[C92]](s64), %bb.37, [[C93]](s64), %bb.38, [[C94]](s64), %bb.39, [[C95]](s64), %bb.40, [[C96]](s64), %bb.41, [[C97]](s64), %bb.42, [[C98]](s64), %bb.43, [[C99]](s64), %bb.44, [[C100]](s64), %bb.45, [[C101]](s64), %bb.46, [[C102]](s64), %bb.47, [[C103]](s64), %bb.48, [[C104]](s64), %bb.49, [[C105]](s64), %bb.50, [[C106]](s64), %bb.51, [[C107]](s64), %bb.52, [[C108]](s64), %bb.53, [[C109]](s64), %bb.54, [[C110]](s64), %bb.55 + ; CHECK-NEXT: [[C111:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[PHI]], [[C111]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[MUL]](s64) + ; CHECK-NEXT: [[C112:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C112]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD1]](p0) :: (load (p0) from %ir.tmp59) + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $x0 = COPY [[COPY]](p0) + ; CHECK-NEXT: $x1 = COPY [[LOAD]](p0) + ; CHECK-NEXT: BL @wibble, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: G_BR %bb.59 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.57.bb62: + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $x0 = COPY [[COPY]](p0) + ; CHECK-NEXT: $x1 = COPY [[COPY2]](p0) + ; CHECK-NEXT: BL @wibble, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: G_BR %bb.59 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.58.bb64: + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $w0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: $x1 = COPY [[FRAME_INDEX]](p0) + ; CHECK-NEXT: BL @baz, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $x1 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $x0 = COPY [[COPY]](p0) + ; CHECK-NEXT: $x1 = COPY [[FRAME_INDEX]](p0) + ; CHECK-NEXT: BL @wibble, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: G_BR %bb.59 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.59.bb68: + ; CHECK-NEXT: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.60.bb69: + ; CHECK-NEXT: [[ICMP55:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](p0), [[C55]] + ; CHECK-NEXT: G_BRCOND [[ICMP55]](s1), %bb.58 + ; CHECK-NEXT: G_BR %bb.57 bb: %tmp = alloca [16 x i32], align 4 switch i32 %arg1, label %bb69 [ @@ -1046,33 +1060,31 @@ bb57: ; preds = %bb56, %bb55, %bb54, %bb53, %bb52, %bb51, %bb50, %bb49, %bb48, %bb47, %bb46, %bb45, %bb44, %bb43, %bb42, %bb41, %bb40, %bb39, %bb38, %bb37, %bb36, %bb35, %bb34, %bb33, %bb32, %bb31, %bb30, %bb29, %bb28, %bb27, %bb26, %bb25, %bb24, %bb23, %bb22, %bb21, %bb20, %bb19, %bb18, %bb17, %bb16, %bb15, %bb14, %bb13, %bb12, %bb11, %bb10, %bb9, %bb8, %bb7, %bb6, %bb5, %bb4, %bb3, %bb %tmp58 = phi i64 [ 0, %bb ], [ 1, %bb3 ], [ 2, %bb4 ], [ 3, %bb5 ], [ 4, %bb6 ], [ 5, %bb7 ], [ 6, %bb8 ], [ 7, %bb9 ], [ 8, %bb10 ], [ 9, %bb11 ], [ 10, %bb12 ], [ 11, %bb13 ], [ 12, %bb14 ], [ 13, %bb15 ], [ 14, %bb16 ], [ 15, %bb17 ], [ 16, %bb18 ], [ 17, %bb19 ], [ 18, %bb20 ], [ 19, %bb21 ], [ 20, %bb22 ], [ 21, %bb23 ], [ 22, %bb24 ], [ 23, %bb25 ], [ 24, %bb26 ], [ 25, %bb27 ], [ 26, %bb28 ], [ 27, %bb29 ], [ 28, %bb30 ], [ 29, %bb31 ], [ 30, %bb32 ], [ 31, %bb33 ], [ 32, %bb34 ], [ 33, %bb35 ], [ 34, %bb36 ], [ 35, %bb37 ], [ 36, %bb38 ], [ 37, %bb39 ], [ 38, %bb40 ], [ 39, %bb41 ], [ 40, %bb42 ], [ 41, %bb43 ], [ 42, %bb44 ], [ 43, %bb45 ], [ 44, %bb46 ], [ 45, %bb47 ], [ 46, %bb48 ], [ 47, %bb49 ], [ 48, %bb50 ], [ 49, %bb51 ], [ 50, %bb52 ], [ 51, %bb53 ], [ 52, %bb54 ], [ 53, %bb55 ], [ 54, %bb56 ] - %tmp59 = getelementptr inbounds [55 x %0], [55 x %0]* @global, i64 0, i64 %tmp58, i32 1 - %tmp60 = load i32*, i32** %tmp59, align 8 - %tmp61 = call %1* @wibble(%1* %arg, i32* %tmp60) + %tmp59 = getelementptr inbounds [55 x %0], ptr @global, i64 0, i64 %tmp58, i32 1 + %tmp60 = load ptr, ptr %tmp59, align 8 + %tmp61 = call ptr @wibble(ptr %arg, ptr %tmp60) br label %bb68 bb62: ; preds = %bb69 - %tmp63 = call %1* @wibble(%1* %arg, i32* nonnull %arg2) + %tmp63 = call ptr @wibble(ptr %arg, ptr nonnull %arg2) br label %bb68 bb64: ; preds = %bb69 - %tmp65 = bitcast [16 x i32]* %tmp to i8* - %tmp66 = getelementptr inbounds [16 x i32], [16 x i32]* %tmp, i64 0, i64 0 - call void @baz(i32 %arg1, i32* %tmp66) - %tmp67 = call %1* @wibble(%1* %arg, i32* %tmp66) + call void @baz(i32 %arg1, ptr %tmp) + %tmp67 = call ptr @wibble(ptr %arg, ptr %tmp) br label %bb68 bb68: ; preds = %bb64, %bb62, %bb57 ret void bb69: ; preds = %bb - %tmp70 = icmp eq i32* %arg2, null + %tmp70 = icmp eq ptr %arg2, null br i1 %tmp70, label %bb64, label %bb62 } -declare %1* @wibble(%1* returned, i32*) +declare ptr @wibble(ptr returned, ptr) -declare void @baz(i32, i32*) +declare void @baz(i32, ptr) ; Check that with 2 jump tables, the phi node doesn't lose the edge from the @@ -1080,146 +1092,161 @@ define void @jt_2_tables_phi_edge_from_second() { ; CHECK-LABEL: name: jt_2_tables_phi_edge_from_second ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.6(0x40000000), %bb.19(0x40000000) - ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 123 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 263 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 265 - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 270 - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 279 - ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 37 - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 43 - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 45 - ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 278 - ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 280 - ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 281 - ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 283 - ; CHECK: [[DEF2:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32) from `i32* undef`, align 8) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.6 - ; CHECK: G_BR %bb.19 - ; CHECK: bb.19.entry: - ; CHECK: successors: %bb.4(0x40000000), %bb.20(0x40000000) - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C1]] - ; CHECK: G_BRCOND [[ICMP1]](s1), %bb.4 - ; CHECK: G_BR %bb.20 - ; CHECK: bb.20.entry: - ; CHECK: successors: %bb.7(0x40000000), %bb.21(0x40000000) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C2]] - ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.7 - ; CHECK: G_BR %bb.21 - ; CHECK: bb.21.entry: - ; CHECK: successors: %bb.2(0x40000000), %bb.22(0x40000000) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]] - ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2 - ; CHECK: G_BR %bb.22 - ; CHECK: bb.22.entry: - ; CHECK: successors: %bb.5(0x40000000), %bb.23(0x40000000) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C4]] - ; CHECK: G_BRCOND [[ICMP4]](s1), %bb.5 - ; CHECK: G_BR %bb.23 - ; CHECK: bb.23.entry: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: G_BR %bb.3 - ; CHECK: bb.2.if.then: - ; CHECK: successors: + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 123 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 263 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 265 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 270 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 279 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 37 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 43 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 45 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 278 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 280 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 281 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 283 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32) from `ptr undef`, align 8) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.6 + ; CHECK-NEXT: G_BR %bb.19 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.19.entry: + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.4 + ; CHECK-NEXT: G_BR %bb.20 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.20.entry: + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C2]] + ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.7 + ; CHECK-NEXT: G_BR %bb.21 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.21.entry: + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]] + ; CHECK-NEXT: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.22 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.22.entry: + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C4]] + ; CHECK-NEXT: G_BRCOND [[ICMP4]](s1), %bb.5 + ; CHECK-NEXT: G_BR %bb.23 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.23.entry: + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.then: + ; CHECK-NEXT: successors: + ; CHECK: {{ $}} + ; CHECK: {{ $}} ; CHECK: bb.3.sw.bb2.i41: - ; CHECK: successors: + ; CHECK-NEXT: successors: + ; CHECK: {{ $}} + ; CHECK: {{ $}} ; CHECK: bb.4.sw.bb7.i44: - ; CHECK: successors: + ; CHECK-NEXT: successors: + ; CHECK: {{ $}} + ; CHECK: {{ $}} ; CHECK: bb.5.sw.bb8.i45: - ; CHECK: successors: + ; CHECK-NEXT: successors: + ; CHECK: {{ $}} + ; CHECK: {{ $}} ; CHECK: bb.6.sw.bb13.i47: ; CHECK: successors: + ; CHECK: {{ $}} + ; CHECK: {{ $}} ; CHECK: bb.7.sw.bb14.i48: - ; CHECK: successors: %bb.10(0x40000000), %bb.24(0x40000000) - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C5]] - ; CHECK: G_BRCOND [[ICMP5]](s1), %bb.10 - ; CHECK: G_BR %bb.24 - ; CHECK: bb.24.sw.bb14.i48: - ; CHECK: successors: %bb.16(0x40000000), %bb.25(0x40000000) - ; CHECK: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C6]] - ; CHECK: G_BRCOND [[ICMP6]](s1), %bb.16 - ; CHECK: G_BR %bb.25 - ; CHECK: bb.25.sw.bb14.i48: - ; CHECK: successors: %bb.9(0x40000000), %bb.26(0x40000000) - ; CHECK: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C7]] - ; CHECK: G_BRCOND [[ICMP7]](s1), %bb.9 - ; CHECK: G_BR %bb.26 - ; CHECK: bb.26.sw.bb14.i48: - ; CHECK: successors: %bb.14(0x40000000), %bb.27(0x40000000) - ; CHECK: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C8]] - ; CHECK: G_BRCOND [[ICMP8]](s1), %bb.14 - ; CHECK: G_BR %bb.27 - ; CHECK: bb.27.sw.bb14.i48: - ; CHECK: successors: %bb.11(0x40000000), %bb.28(0x40000000) - ; CHECK: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C9]] - ; CHECK: G_BRCOND [[ICMP9]](s1), %bb.11 - ; CHECK: G_BR %bb.28 - ; CHECK: bb.28.sw.bb14.i48: - ; CHECK: successors: %bb.13(0x40000000), %bb.29(0x40000000) - ; CHECK: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C10]] - ; CHECK: G_BRCOND [[ICMP10]](s1), %bb.13 - ; CHECK: G_BR %bb.29 - ; CHECK: bb.29.sw.bb14.i48: - ; CHECK: successors: %bb.15(0x40000000), %bb.30(0x40000000) - ; CHECK: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C11]] - ; CHECK: G_BRCOND [[ICMP11]](s1), %bb.15 - ; CHECK: G_BR %bb.30 - ; CHECK: bb.30.sw.bb14.i48: - ; CHECK: successors: %bb.12(0x40000000), %bb.18(0x40000000) - ; CHECK: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C12]] - ; CHECK: G_BRCOND [[ICMP12]](s1), %bb.12 - ; CHECK: G_BR %bb.18 - ; CHECK: bb.8.sw.default.i49: - ; CHECK: successors: + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C5]] + ; CHECK-NEXT: G_BRCOND [[ICMP5]](s1), %bb.10 + ; CHECK-NEXT: G_BR %bb.24 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.24.sw.bb14.i48: + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C6]] + ; CHECK-NEXT: G_BRCOND [[ICMP6]](s1), %bb.16 + ; CHECK-NEXT: G_BR %bb.25 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.25.sw.bb14.i48: + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C7]] + ; CHECK-NEXT: G_BRCOND [[ICMP7]](s1), %bb.9 + ; CHECK-NEXT: G_BR %bb.26 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.26.sw.bb14.i48: + ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C8]] + ; CHECK-NEXT: G_BRCOND [[ICMP8]](s1), %bb.14 + ; CHECK-NEXT: G_BR %bb.27 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.27.sw.bb14.i48: + ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C9]] + ; CHECK-NEXT: G_BRCOND [[ICMP9]](s1), %bb.11 + ; CHECK-NEXT: G_BR %bb.28 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.28.sw.bb14.i48: + ; CHECK-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C10]] + ; CHECK-NEXT: G_BRCOND [[ICMP10]](s1), %bb.13 + ; CHECK-NEXT: G_BR %bb.29 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.29.sw.bb14.i48: + ; CHECK-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C11]] + ; CHECK-NEXT: G_BRCOND [[ICMP11]](s1), %bb.15 + ; CHECK-NEXT: G_BR %bb.30 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.30.sw.bb14.i48: + ; CHECK-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF1]](s32), [[C12]] + ; CHECK-NEXT: G_BRCOND [[ICMP12]](s1), %bb.12 + ; CHECK-NEXT: G_BR %bb.18 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.sw.default.i49: + ; CHECK-NEXT: successors: + ; CHECK: {{ $}} + ; CHECK: {{ $}} ; CHECK: bb.9.sw.bb1.i: - ; CHECK: successors: %bb.16(0x80000000) - ; CHECK: G_BR %bb.16 - ; CHECK: bb.10.sw.bb4.i: - ; CHECK: successors: %bb.16(0x80000000) - ; CHECK: G_BR %bb.16 - ; CHECK: bb.11.sw.bb6.i: - ; CHECK: successors: %bb.16(0x80000000) - ; CHECK: G_BR %bb.16 - ; CHECK: bb.12.sw.bb7.i: - ; CHECK: successors: %bb.16(0x80000000) - ; CHECK: G_BR %bb.16 - ; CHECK: bb.13.sw.bb8.i: - ; CHECK: successors: %bb.16(0x80000000) - ; CHECK: G_BR %bb.16 - ; CHECK: bb.14.sw.bb9.i: - ; CHECK: successors: %bb.16(0x80000000) - ; CHECK: G_BR %bb.16 - ; CHECK: bb.15.sw.bb12.i: - ; CHECK: successors: %bb.16(0x80000000) - ; CHECK: bb.16.land.rhs.lr.ph: - ; CHECK: successors: %bb.17(0x40000000), %bb.18(0x40000000) - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C13]](s32), %bb.24, [[C14]](s32), %bb.9, [[C15]](s32), %bb.10, [[C16]](s32), %bb.11, [[C17]](s32), %bb.12, [[C18]](s32), %bb.13, [[C19]](s32), %bb.14, [[C20]](s32), %bb.15 - ; CHECK: G_BRCOND [[DEF2]](s1), %bb.17 - ; CHECK: G_BR %bb.18 - ; CHECK: bb.17.while.body: - ; CHECK: successors: + ; CHECK-NEXT: G_BR %bb.16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.10.sw.bb4.i: + ; CHECK-NEXT: G_BR %bb.16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.11.sw.bb6.i: + ; CHECK-NEXT: G_BR %bb.16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.12.sw.bb7.i: + ; CHECK-NEXT: G_BR %bb.16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.13.sw.bb8.i: + ; CHECK-NEXT: G_BR %bb.16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.14.sw.bb9.i: + ; CHECK-NEXT: G_BR %bb.16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.15.sw.bb12.i: + ; CHECK-NEXT: G_BR %bb.16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.16.land.rhs.lr.ph: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C13]](s32), %bb.24, [[C14]](s32), %bb.9, [[C15]](s32), %bb.10, [[C16]](s32), %bb.11, [[C17]](s32), %bb.12, [[C18]](s32), %bb.13, [[C19]](s32), %bb.14, [[C20]](s32), %bb.15 + ; CHECK-NEXT: G_BRCOND [[DEF2]](s1), %bb.17 + ; CHECK-NEXT: G_BR %bb.18 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.17.while.body: + ; CHECK-NEXT: successors: + ; CHECK: {{ $}} ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: BL @jt_2_tables_phi_edge_from_second, csr_aarch64_aapcs, implicit-def $lr, implicit $sp - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: bb.18.while.end: - ; CHECK: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C21]](s32), %bb.30, [[PHI]](s32), %bb.16 - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: BL @jt_2_tables_phi_edge_from_second, csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.18.while.end: + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C21]](s32), %bb.30, [[PHI]](s32), %bb.16 + ; CHECK-NEXT: RET_ReallyLR entry: - %0 = load i32, i32* undef, align 8 + %0 = load i32, ptr undef, align 8 switch i32 %0, label %sw.default.i49 [ i32 270, label %if.then i32 265, label %sw.bb14.i48 @@ -1297,35 +1324,37 @@ define i32 @range_test(i32 %x) { ; CHECK-LABEL: name: range_test ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.3(0x40000000), %bb.5(0x40000000) - ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.3 - ; CHECK: G_BR %bb.5 - ; CHECK: bb.5.entry: - ; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C1]] - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ule), [[SUB]](s32), [[C5]] - ; CHECK: G_BRCOND [[ICMP1]](s1), %bb.2 - ; CHECK: G_BR %bb.4 - ; CHECK: bb.2.sw.bb: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY]], [[C3]] - ; CHECK: G_BR %bb.4 - ; CHECK: bb.3.sw.bb1: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[COPY]], [[C2]] - ; CHECK: bb.4.return: - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[MUL]](s32), %bb.3, [[ADD]](s32), %bb.2, [[C4]](s32), %bb.5 - ; CHECK: $w0 = COPY [[PHI]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3 + ; CHECK-NEXT: G_BR %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ule), [[SUB]](s32), [[C5]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.sw.bb: + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY]], [[C3]] + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.sw.bb1: + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[COPY]], [[C2]] + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.return: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[MUL]](s32), %bb.3, [[ADD]](s32), %bb.2, [[C4]](s32), %bb.5 + ; CHECK-NEXT: $w0 = COPY [[PHI]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: switch i32 %x, label %return [ i32 24, label %sw.bb @@ -1347,45 +1376,46 @@ ret i32 %retval.0 } -define i64* @test_range_phi_switch_cycle() { +define ptr @test_range_phi_switch_cycle() { ; CHECK-LABEL: name: test_range_phi_switch_cycle ; CHECK: bb.1.bb: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 305 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 307 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 497 - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: bb.2.bb1: - ; CHECK: successors: %bb.2(0x40000000), %bb.6(0x40000000) - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[DEF]](s32), %bb.1, [[C3]](s32), %bb.3, [[C4]](s32), %bb.4, [[C5]](s32), %bb.2 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.2 - ; CHECK: G_BR %bb.6 - ; CHECK: bb.6.bb1: - ; CHECK: successors: %bb.3(0x40000000), %bb.7(0x40000000) - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF]](s32), [[C1]] - ; CHECK: G_BRCOND [[ICMP1]](s1), %bb.3 - ; CHECK: G_BR %bb.7 - ; CHECK: bb.7.bb1: - ; CHECK: successors: %bb.4(0x40000000), %bb.5(0x40000000) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF]](s32), [[C2]] - ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.4 - ; CHECK: G_BR %bb.5 - ; CHECK: bb.3.bb2: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.4.bb3: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.5.bb4: - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $w0 = COPY [[PHI]](s32) - ; CHECK: BL @ham, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 305 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 307 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 497 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.bb1: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[DEF]](s32), %bb.1, [[C3]](s32), %bb.3, [[C4]](s32), %bb.4, [[C5]](s32), %bb.2 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.bb1: + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.3 + ; CHECK-NEXT: G_BR %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.bb1: + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[DEF]](s32), [[C2]] + ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.4 + ; CHECK-NEXT: G_BR %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.bb2: + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.bb3: + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.bb4: + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $w0 = COPY [[PHI]](s32) + ; CHECK-NEXT: BL @ham, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $x0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp bb: br label %bb1 @@ -1404,11 +1434,11 @@ br label %bb1 bb4: ; preds = %bb1 - %tmp5 = tail call i64* @ham(i32 %tmp) + %tmp5 = tail call ptr @ham(i32 %tmp) unreachable } -declare i64* @ham(i32) +declare ptr @ham(i32) define internal void @bar() unnamed_addr #1 { ; CHECK-LABEL: name: bar @@ -1419,27 +1449,30 @@ define i1 @i1_value_cmp_is_signed(i1) { ; CHECK-LABEL: name: i1_value_cmp_is_signed ; CHECK: bb.1.Entry: - ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8) - ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[TRUNC1]](s1), [[C1]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.3 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2.BadValue: - ; CHECK: successors: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[TRUNC1]](s1), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.BadValue: + ; CHECK-NEXT: successors: + ; CHECK: {{ $}} ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: bb.3.OkValue: - ; CHECK: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[TRUNC1]](s1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) - ; CHECK: $w0 = COPY [[ANYEXT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.OkValue: + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[TRUNC1]](s1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 Entry: switch i1 %0, label %BadValue [ i1 false, label %OkValue diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -118,8 +118,8 @@ ; CHECK-NEXT: RET_ReallyLR ; CHECK: [[FALSE]].{{[a-zA-Z0-9.]+}}: ; CHECK-NEXT: RET_ReallyLR -define void @condbr(i1* %tstaddr) { - %tst = load i1, i1* %tstaddr +define void @condbr(ptr %tstaddr) { + %tst = load i1, ptr %tstaddr br i1 %tst, label %true, label %false true: ret void @@ -146,7 +146,7 @@ ; CHECK: [[BB_L2]].{{[a-zA-Z0-9.]+}} (ir-block-address-taken %ir-block.{{[a-zA-Z0-9.]+}}): ; CHECK-NEXT: RET_ReallyLR -@indirectbr.L = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@indirectbr, %L1), i8* blockaddress(@indirectbr, %L2), i8* null], align 8 +@indirectbr.L = internal unnamed_addr constant [3 x ptr] [ptr blockaddress(@indirectbr, %L1), ptr blockaddress(@indirectbr, %L2), ptr null], align 8 define void @indirectbr() { entry: @@ -155,9 +155,9 @@ %i = phi i32 [ 0, %entry ], [ %inc, %L1 ] %inc = add i32 %i, 1 %idxprom = zext i32 %i to i64 - %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @indirectbr.L, i64 0, i64 %idxprom - %brtarget = load i8*, i8** %arrayidx, align 8 - indirectbr i8* %brtarget, [label %L1, label %L2] + %arrayidx = getelementptr inbounds [3 x ptr], ptr @indirectbr.L, i64 0, i64 %idxprom + %brtarget = load ptr, ptr %arrayidx, align 8 + indirectbr ptr %brtarget, [label %L1, label %L2] L2: ; preds = %L1 ret void } @@ -259,8 +259,8 @@ ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_PTRTOINT [[ARG1]] ; CHECK: $x0 = COPY [[RES]] ; CHECK: RET_ReallyLR implicit $x0 -define i64 @ptrtoint(i64* %a) { - %val = ptrtoint i64* %a to i64 +define i64 @ptrtoint(ptr %a) { + %val = ptrtoint ptr %a to i64 ret i64 %val } @@ -269,22 +269,21 @@ ; CHECK: [[RES:%[0-9]+]]:_(p0) = G_INTTOPTR [[ARG1]] ; CHECK: $x0 = COPY [[RES]] ; CHECK: RET_ReallyLR implicit $x0 -define i64* @inttoptr(i64 %a) { - %val = inttoptr i64 %a to i64* - ret i64* %val +define ptr @inttoptr(i64 %a) { + %val = inttoptr i64 %a to ptr + ret ptr %val } ; CHECK-LABEL: name: trivial_bitcast ; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: $x0 = COPY [[ARG1]] ; CHECK: RET_ReallyLR implicit $x0 -define i64* @trivial_bitcast(i8* %a) { - %val = bitcast i8* %a to i64* - ret i64* %val +define ptr @trivial_bitcast(ptr %a) { + ret ptr %a } ; CHECK-LABEL: name: trivial_bitcast_with_copy -; CHECK: [[A:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[A:%[0-9]+]]:_(s64) = COPY $d0 ; CHECK: G_BR %[[CAST:bb\.[0-9]+]] ; CHECK: [[END:bb\.[0-9]+]].{{[a-zA-Z0-9.]+}}: @@ -292,14 +291,14 @@ ; CHECK: [[CAST]].{{[a-zA-Z0-9.]+}}: ; CHECK: G_BR %[[END]] -define i64* @trivial_bitcast_with_copy(i8* %a) { +define i64 @trivial_bitcast_with_copy(double %a) { br label %cast end: - ret i64* %val + ret i64 %val cast: - %val = bitcast i8* %a to i64* + %val = bitcast double %a to i64 br label %end } @@ -321,10 +320,10 @@ ; CHECK: [[RES2:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[RES1]] ; CHECK: $x0 = COPY [[RES2]] ; CHECK: RET_ReallyLR implicit $x0 -define i64* @addrspacecast(i32 addrspace(1)* %a) { - %res1 = addrspacecast i32 addrspace(1)* %a to i64 addrspace(2)* - %res2 = addrspacecast i64 addrspace(2)* %res1 to i64* - ret i64* %res2 +define ptr @addrspacecast(ptr addrspace(1) %a) { + %res1 = addrspacecast ptr addrspace(1) %a to ptr addrspace(2) + %res2 = addrspacecast ptr addrspace(2) %res1 to ptr + ret ptr %res2 } ; CHECK-LABEL: name: trunc @@ -334,7 +333,7 @@ ; CHECK: [[RES2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[VEC]] define void @trunc(i64 %a) { %vecptr = alloca <4 x i32> - %vec = load <4 x i32>, <4 x i32>* %vecptr + %vec = load <4 x i32>, ptr %vecptr %res1 = trunc i64 %a to i8 %res2 = trunc <4 x i32> %vec to <4 x i16> ret void @@ -352,16 +351,16 @@ ; CHECK: [[SUM4:%[0-9]+]]:_(s64) = G_ADD [[SUM3]], [[VAL4]] ; CHECK: $x0 = COPY [[SUM4]] ; CHECK: RET_ReallyLR implicit $x0 -define i64 @load(i64* %addr, i64 addrspace(42)* %addr42) { - %val1 = load i64, i64* %addr, align 16 +define i64 @load(ptr %addr, ptr addrspace(42) %addr42) { + %val1 = load i64, ptr %addr, align 16 - %val2 = load i64, i64 addrspace(42)* %addr42 + %val2 = load i64, ptr addrspace(42) %addr42 %sum2 = add i64 %val1, %val2 - %val3 = load volatile i64, i64* %addr + %val3 = load volatile i64, ptr %addr %sum3 = add i64 %sum2, %val3 - %val4 = load i64, i64* %addr, !range !0 + %val4 = load i64, ptr %addr, !range !0 %sum4 = add i64 %sum3, %val4 ret i64 %sum4 } @@ -375,10 +374,10 @@ ; CHECK: G_STORE [[VAL2]](s64), [[ADDR42]](p42) :: (store (s64) into %ir.addr42, addrspace 42) ; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (volatile store (s64) into %ir.addr) ; CHECK: RET_ReallyLR -define void @store(i64* %addr, i64 addrspace(42)* %addr42, i64 %val1, i64 %val2) { - store i64 %val1, i64* %addr, align 16 - store i64 %val2, i64 addrspace(42)* %addr42 - store volatile i64 %val1, i64* %addr +define void @store(ptr %addr, ptr addrspace(42) %addr42, i64 %val1, i64 %val2) { + store i64 %val1, ptr %addr, align 16 + store i64 %val2, ptr addrspace(42) %addr42 + store volatile i64 %val1, ptr %addr %sum = add i64 %val1, %val2 ret void } @@ -391,14 +390,14 @@ ; CHECK: [[VEC:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_VEC]] ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), [[VEC]](<8 x s8>), [[VEC]](<8 x s8>), [[PTR]](p0) ; CHECK: RET_ReallyLR -declare i8* @llvm.returnaddress(i32) -declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) -declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>*) +declare ptr @llvm.returnaddress(i32) +declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) +declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr) define void @intrinsics(i32 %cur, i32 %bits) { - %ptr = call i8* @llvm.returnaddress(i32 0) + %ptr = call ptr @llvm.returnaddress(i32 0) %ptr.vec = alloca <8 x i8> - %vec = load <8 x i8>, <8 x i8>* %ptr.vec - call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec, <8 x i8> %vec, i8* %ptr) + %vec = load <8 x i8>, ptr %ptr.vec + call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %vec, <8 x i8> %vec, ptr %ptr) ret void } @@ -414,15 +413,15 @@ ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_PHI [[RES1]](s32), %[[TRUE]], [[RES2]](s32), %[[FALSE]] ; CHECK: $w0 = COPY [[RES]] -define i32 @test_phi(i32* %addr1, i32* %addr2, i1 %tst) { +define i32 @test_phi(ptr %addr1, ptr %addr2, i1 %tst) { br i1 %tst, label %true, label %false true: - %res1 = load i32, i32* %addr1 + %res1 = load i32, ptr %addr1 br label %end false: - %res2 = load i32, i32* %addr2 + %res2 = load i32, ptr %addr2 br label %end end: @@ -481,8 +480,8 @@ ; CHECK: [[ONE:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ONE]] ; CHECK: $x0 = COPY [[PTR]] -define i8* @test_constant_inttoptr() { - ret i8* inttoptr(i64 1 to i8*) +define ptr @test_constant_inttoptr() { + ret ptr inttoptr(i64 1 to ptr) } ; This failed purely because the Constant -> VReg map was kept across @@ -593,8 +592,8 @@ ; CHECK-LABEL: name: test_constant_null ; CHECK: [[NULL:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK: $x0 = COPY [[NULL]] -define i8* @test_constant_null() { - ret i8* null +define ptr @test_constant_null() { + ret ptr null } ; CHECK-LABEL: name: test_struct_memops @@ -606,9 +605,9 @@ ; CHECK: G_STORE [[VAL1]](s8), [[ADDR]](p0) :: (store (s8) into %ir.addr, align 4) ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST1]](s64) ; CHECK: G_STORE [[VAL2]](s32), [[GEP2]](p0) :: (store (s32) into %ir.addr + 4) -define void @test_struct_memops({ i8, i32 }* %addr) { - %val = load { i8, i32 }, { i8, i32 }* %addr - store { i8, i32 } %val, { i8, i32 }* %addr +define void @test_struct_memops(ptr %addr) { + %val = load { i8, i32 }, ptr %addr + store { i8, i32 } %val, ptr %addr ret void } @@ -616,9 +615,9 @@ ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[VAL:%[0-9]+]]:_(s1) = G_LOAD [[ADDR]](p0) :: (load (s1) from %ir.addr) ; CHECK: G_STORE [[VAL]](s1), [[ADDR]](p0) :: (store (s1) into %ir.addr) -define void @test_i1_memops(i1* %addr) { - %val = load i1, i1* %addr - store i1 %val, i1* %addr +define void @test_i1_memops(ptr %addr) { + %val = load i1, ptr %addr + store i1 %val, ptr %addr ret void } @@ -628,9 +627,9 @@ ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[TST:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LHS]](s32), [[RHS]] ; CHECK: G_STORE [[TST]](s1), [[ADDR]](p0) -define void @int_comparison(i32 %a, i32 %b, i1* %addr) { +define void @int_comparison(i32 %a, i32 %b, ptr %addr) { %res = icmp ne i32 %a, %b - store i1 %res, i1* %addr + store i1 %res, ptr %addr ret void } @@ -640,9 +639,9 @@ ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[TST:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LHS]](p0), [[RHS]] ; CHECK: G_STORE [[TST]](s1), [[ADDR]](p0) -define void @ptr_comparison(i8* %a, i8* %b, i1* %addr) { - %res = icmp eq i8* %a, %b - store i1 %res, i1* %addr +define void @ptr_comparison(ptr %a, ptr %b, ptr %addr) { + %res = icmp eq ptr %a, %b + store i1 %res, ptr %addr ret void } @@ -711,9 +710,9 @@ ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) -define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { +define void @test_sadd_overflow(i32 %lhs, i32 %rhs, ptr %addr) { %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhs, i32 %rhs) - store { i32, i1 } %res, { i32, i1 }* %addr + store { i32, i1 } %res, ptr %addr ret void } @@ -727,9 +726,9 @@ ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) -define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { +define void @test_uadd_overflow(i32 %lhs, i32 %rhs, ptr %addr) { %res = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %lhs, i32 %rhs) - store { i32, i1 } %res, { i32, i1 }* %addr + store { i32, i1 } %res, ptr %addr ret void } @@ -743,9 +742,9 @@ ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.subr + 4, align 4) declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) -define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { +define void @test_ssub_overflow(i32 %lhs, i32 %rhs, ptr %subr) { %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %lhs, i32 %rhs) - store { i32, i1 } %res, { i32, i1 }* %subr + store { i32, i1 } %res, ptr %subr ret void } @@ -759,9 +758,9 @@ ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.subr + 4, align 4) declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) -define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { +define void @test_usub_overflow(i32 %lhs, i32 %rhs, ptr %subr) { %res = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %lhs, i32 %rhs) - store { i32, i1 } %res, { i32, i1 }* %subr + store { i32, i1 } %res, ptr %subr ret void } @@ -775,9 +774,9 @@ ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) -define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { +define void @test_smul_overflow(i32 %lhs, i32 %rhs, ptr %addr) { %res = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %lhs, i32 %rhs) - store { i32, i1 } %res, { i32, i1 }* %addr + store { i32, i1 } %res, ptr %addr ret void } @@ -791,9 +790,9 @@ ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) -define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { +define void @test_umul_overflow(i32 %lhs, i32 %rhs, ptr %addr) { %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs) - store { i32, i1 } %res, { i32, i1 }* %addr + store { i32, i1 } %res, ptr %addr ret void } @@ -811,8 +810,8 @@ ; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load (s32) from %ir.addr + 12) ; CHECK: $w0 = COPY [[LD3]](s32) %struct.nested = type {i8, { i8, i32 }, i32} -define i32 @test_extractvalue(%struct.nested* %addr) { - %struct = load %struct.nested, %struct.nested* %addr +define i32 @test_extractvalue(ptr %addr) { + %struct = load %struct.nested, ptr %addr %res = extractvalue %struct.nested %struct, 1, 1 ret i32 %res } @@ -833,10 +832,10 @@ ; CHECK: G_STORE [[LD2]](s8), %1(p0) :: (store (s8) into %ir.addr2, align 4) ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD %1, [[CST1]](s64) ; CHECK: G_STORE [[LD3]](s32), [[GEP4]](p0) :: (store (s32) into %ir.addr2 + 4) -define void @test_extractvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { - %struct = load %struct.nested, %struct.nested* %addr +define void @test_extractvalue_agg(ptr %addr, ptr %addr2) { + %struct = load %struct.nested, ptr %addr %res = extractvalue %struct.nested %struct, 1 - store {i8, i32} %res, {i8, i32}* %addr2 + store {i8, i32} %res, ptr %addr2 ret void } @@ -845,9 +844,9 @@ ; CHECK: [[VAL32:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_TRUNC [[VAL32]] ; CHECK: G_STORE [[VAL]](s8), [[STRUCT]](p0) -define void @test_trivial_extract_ptr([1 x i8*] %s, i8 %val) { - %addr = extractvalue [1 x i8*] %s, 0 - store i8 %val, i8* %addr +define void @test_trivial_extract_ptr([1 x ptr] %s, i8 %val) { + %addr = extractvalue [1 x ptr] %s, 0 + store i8 %val, ptr %addr ret void } @@ -871,10 +870,10 @@ ; CHECK: G_STORE %1(s32), [[GEP5]](p0) :: (store (s32) into %ir.addr + 8) ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) ; CHECK: G_STORE [[LD4]](s32), [[GEP6]](p0) :: (store (s32) into %ir.addr + 12) -define void @test_insertvalue(%struct.nested* %addr, i32 %val) { - %struct = load %struct.nested, %struct.nested* %addr +define void @test_insertvalue(ptr %addr, i32 %val) { + %struct = load %struct.nested, ptr %addr %newstruct = insertvalue %struct.nested %struct, i32 %val, 1, 1 - store %struct.nested %newstruct, %struct.nested* %addr + store %struct.nested %newstruct, ptr %addr ret void } @@ -887,13 +886,13 @@ ret [1 x i64] %res } -define [1 x i8*] @test_trivial_insert_ptr([1 x i8*] %s, i8* %val) { +define [1 x ptr] @test_trivial_insert_ptr([1 x ptr] %s, ptr %val) { ; CHECK-LABEL: name: test_trivial_insert_ptr ; CHECK: [[STRUCT:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[VAL:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: $x0 = COPY [[VAL]] - %res = insertvalue [1 x i8*] %s, i8* %val, 0 - ret [1 x i8*] %res + %res = insertvalue [1 x ptr] %s, ptr %val, 0 + ret [1 x ptr] %res } ; CHECK-LABEL: name: test_insertvalue_agg @@ -919,11 +918,11 @@ ; CHECK: G_STORE [[LD2]](s32), [[GEP6]](p0) :: (store (s32) into %ir.addr + 8) ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST4]](s64) ; CHECK: G_STORE [[LD6]](s32), [[GEP7]](p0) :: (store (s32) into %ir.addr + 12) -define void @test_insertvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { - %smallstruct = load {i8, i32}, {i8, i32}* %addr2 - %struct = load %struct.nested, %struct.nested* %addr +define void @test_insertvalue_agg(ptr %addr, ptr %addr2) { + %smallstruct = load {i8, i32}, ptr %addr2 + %struct = load %struct.nested, ptr %addr %res = insertvalue %struct.nested %struct, {i8, i32} %smallstruct, 1 - store %struct.nested %res, %struct.nested* %addr + store %struct.nested %res, ptr %addr ret void } @@ -977,9 +976,9 @@ ; CHECK: [[TST:%[0-9]+]]:_(s1) = G_TRUNC [[TSTASSERT]] ; CHECK: [[RES:%[0-9]+]]:_(p0) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]] ; CHECK: $x0 = COPY [[RES]] -define i8* @test_select_ptr(i1 %tst, i8* %lhs, i8* %rhs) { - %res = select i1 %tst, i8* %lhs, i8* %rhs - ret i8* %res +define ptr @test_select_ptr(i1 %tst, ptr %lhs, ptr %rhs) { + %res = select i1 %tst, ptr %lhs, ptr %rhs + ret ptr %res } ; CHECK-LABEL: name: test_select_vec @@ -1014,8 +1013,8 @@ ; CHECK: [[FP:%[0-9]+]]:_(s32) = G_LOAD [[FPADDR]](p0) ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FPTOSI [[FP]](s32) ; CHECK: $x0 = COPY [[RES]] -define i64 @test_fptosi(float* %fp.addr) { - %fp = load float, float* %fp.addr +define i64 @test_fptosi(ptr %fp.addr) { + %fp = load float, ptr %fp.addr %res = fptosi float %fp to i64 ret i64 %res } @@ -1025,8 +1024,8 @@ ; CHECK: [[FP:%[0-9]+]]:_(s32) = G_LOAD [[FPADDR]](p0) ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FPTOUI [[FP]](s32) ; CHECK: $x0 = COPY [[RES]] -define i64 @test_fptoui(float* %fp.addr) { - %fp = load float, float* %fp.addr +define i64 @test_fptoui(ptr %fp.addr) { + %fp = load float, ptr %fp.addr %res = fptoui float %fp to i64 ret i64 %res } @@ -1036,9 +1035,9 @@ ; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[FP:%[0-9]+]]:_(s64) = G_SITOFP [[IN]](s32) ; CHECK: G_STORE [[FP]](s64), [[ADDR]](p0) -define void @test_sitofp(double* %addr, i32 %in) { +define void @test_sitofp(ptr %addr, i32 %in) { %fp = sitofp i32 %in to double - store double %fp, double* %addr + store double %fp, ptr %addr ret void } @@ -1047,9 +1046,9 @@ ; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[FP:%[0-9]+]]:_(s64) = G_UITOFP [[IN]](s32) ; CHECK: G_STORE [[FP]](s64), [[ADDR]](p0) -define void @test_uitofp(double* %addr, i32 %in) { +define void @test_uitofp(ptr %addr, i32 %in) { %fp = uitofp i32 %in to double - store double %fp, double* %addr + store double %fp, ptr %addr ret void } @@ -1075,8 +1074,8 @@ ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[TMP:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+00 ; CHECK: G_STORE [[TMP]](s32), [[ADDR]](p0) -define void @test_constant_float(float* %addr) { - store float 1.5, float* %addr +define void @test_constant_float(ptr %addr) { + store float 1.5, ptr %addr ret void } @@ -1088,11 +1087,11 @@ ; CHECK: [[RHS:%[0-9]+]]:_(s32) = G_LOAD [[RHSADDR]](p0) ; CHECK: [[TST:%[0-9]+]]:_(s1) = nnan ninf nsz arcp contract afn reassoc G_FCMP floatpred(oge), [[LHS]](s32), [[RHS]] ; CHECK: G_STORE [[TST]](s1), [[BOOLADDR]](p0) -define void @float_comparison(float* %a.addr, float* %b.addr, i1* %bool.addr) { - %a = load float, float* %a.addr - %b = load float, float* %b.addr +define void @float_comparison(ptr %a.addr, ptr %b.addr, ptr %bool.addr) { + %a = load float, ptr %a.addr + %b = load float, ptr %b.addr %res = fcmp nnan ninf nsz arcp contract afn reassoc oge float %a, %b - store i1 %res, i1* %bool.addr + store i1 %res, ptr %bool.addr ret void } @@ -1111,93 +1110,93 @@ @var = global i32 0 -define i32* @test_global() { +define ptr @test_global() { ; CHECK-LABEL: name: test_global ; CHECK: [[TMP:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var{{$}} ; CHECK: $x0 = COPY [[TMP]](p0) - ret i32* @var + ret ptr @var } @var1 = addrspace(42) global i32 0 -define i32 addrspace(42)* @test_global_addrspace() { +define ptr addrspace(42) @test_global_addrspace() { ; CHECK-LABEL: name: test_global ; CHECK: [[TMP:%[0-9]+]]:_(p42) = G_GLOBAL_VALUE @var1{{$}} ; CHECK: $x0 = COPY [[TMP]](p42) - ret i32 addrspace(42)* @var1 + ret ptr addrspace(42) @var1 } -define void()* @test_global_func() { +define ptr @test_global_func() { ; CHECK-LABEL: name: test_global_func ; CHECK: [[TMP:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @allocai64{{$}} ; CHECK: $x0 = COPY [[TMP]](p0) - ret void()* @allocai64 + ret ptr @allocai64 } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) -define void @test_memcpy(i8* %dst, i8* %src, i64 %size) { +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) +define void @test_memcpy(ptr %dst, ptr %src, i64 %size) { ; CHECK-LABEL: name: test_memcpy ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src) - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) + call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 0) ret void } -define void @test_memcpy_tail(i8* %dst, i8* %src, i64 %size) { +define void @test_memcpy_tail(ptr %dst, ptr %src, i64 %size) { ; CHECK-LABEL: name: test_memcpy_tail ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 1 :: (store (s8) into %ir.dst), (load (s8) from %ir.src) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) + tail call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 0) ret void } -declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)*, i8 addrspace(1)*, i64, i1) -define void @test_memcpy_nonzero_as(i8 addrspace(1)* %dst, i8 addrspace(1) * %src, i64 %size) { +declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1), ptr addrspace(1), i64, i1) +define void @test_memcpy_nonzero_as(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %size) { ; CHECK-LABEL: name: test_memcpy_nonzero_as ; CHECK: [[DST:%[0-9]+]]:_(p1) = COPY $x0 ; CHECK: [[SRC:%[0-9]+]]:_(p1) = COPY $x1 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK: G_MEMCPY [[DST]](p1), [[SRC]](p1), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 1) - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %size, i1 0) + call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %size, i1 0) ret void } -declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i1) -define void @test_memmove(i8* %dst, i8* %src, i64 %size) { +declare void @llvm.memmove.p0.p0.i64(ptr, ptr, i64, i1) +define void @test_memmove(ptr %dst, ptr %src, i64 %size) { ; CHECK-LABEL: name: test_memmove ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK: G_MEMMOVE [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src) - call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) + call void @llvm.memmove.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 0) ret void } -declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) -define void @test_memset(i8* %dst, i8 %val, i64 %size) { +declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) +define void @test_memset(ptr %dst, i8 %val, i64 %size) { ; CHECK-LABEL: name: test_memset ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[SRC_C:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[SRC:%[0-9]+]]:_(s8) = G_TRUNC [[SRC_C]] ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK: G_MEMSET [[DST]](p0), [[SRC]](s8), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst) - call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i1 0) + call void @llvm.memset.p0.i64(ptr %dst, i8 %val, i64 %size, i1 0) ret void } -define void @test_large_const(i128* %addr) { +define void @test_large_const(ptr %addr) { ; CHECK-LABEL: name: test_large_const ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[VAL:%[0-9]+]]:_(s128) = G_CONSTANT i128 42 ; CHECK: G_STORE [[VAL]](s128), [[ADDR]](p0) - store i128 42, i128* %addr + store i128 42, ptr %addr ret void } @@ -1205,7 +1204,7 @@ ; to insert the constants at the end of the block, even if they were encountered ; after the block's terminators had been emitted. Also make sure the order is ; correct. -define i8* @test_const_placement() { +define ptr @test_const_placement() { ; CHECK-LABEL: name: test_const_placement ; CHECK: bb.{{[0-9]+}} (%ir-block.{{[0-9]+}}): ; CHECK: [[VAL_INT:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 @@ -1214,29 +1213,29 @@ br label %next next: - ret i8* inttoptr(i32 42 to i8*) + ret ptr inttoptr(i32 42 to ptr) } -declare void @llvm.va_end(i8*) -define void @test_va_end(i8* %list) { +declare void @llvm.va_end(ptr) +define void @test_va_end(ptr %list) { ; CHECK-LABEL: name: test_va_end ; CHECK-NOT: va_end ; CHECK-NOT: INTRINSIC ; CHECK: RET_ReallyLR - call void @llvm.va_end(i8* %list) + call void @llvm.va_end(ptr %list) ret void } -define void @test_va_arg(i8* %list) { +define void @test_va_arg(ptr %list) { ; CHECK-LABEL: test_va_arg ; CHECK: [[LIST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: G_VAARG [[LIST]](p0), 8 ; CHECK: G_VAARG [[LIST]](p0), 1 ; CHECK: G_VAARG [[LIST]](p0), 16 - %v0 = va_arg i8* %list, i64 - %v1 = va_arg i8* %list, i8 - %v2 = va_arg i8* %list, i128 + %v0 = va_arg ptr %list, i64 + %v1 = va_arg ptr %list, i8 + %v2 = va_arg ptr %list, i128 ret void } @@ -1450,8 +1449,8 @@ ret i32 %res } -declare void @llvm.lifetime.start.p0i8(i64, i8*) -declare void @llvm.lifetime.end.p0i8(i64, i8*) +declare void @llvm.lifetime.start.p0(i64, ptr) +declare void @llvm.lifetime.end.p0(i64, ptr) define void @test_lifetime_intrin() { ; CHECK-LABEL: name: test_lifetime_intrin ; CHECK: RET_ReallyLR @@ -1462,13 +1461,13 @@ ; O3-NEXT: LIFETIME_END %stack.0.slot ; O3-NEXT: RET_ReallyLR %slot = alloca i8, i32 4 - call void @llvm.lifetime.start.p0i8(i64 0, i8* %slot) - store volatile i8 10, i8* %slot - call void @llvm.lifetime.end.p0i8(i64 0, i8* %slot) + call void @llvm.lifetime.start.p0(i64 0, ptr %slot) + store volatile i8 10, ptr %slot + call void @llvm.lifetime.end.p0(i64 0, ptr %slot) ret void } -define void @test_load_store_atomics(i8* %addr) { +define void @test_load_store_atomics(ptr %addr) { ; CHECK-LABEL: name: test_load_store_atomics ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[V0:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load unordered (s8) from %ir.addr) @@ -1477,14 +1476,14 @@ ; CHECK: G_STORE [[V1]](s8), [[ADDR]](p0) :: (store release (s8) into %ir.addr) ; CHECK: [[V2:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load syncscope("singlethread") seq_cst (s8) from %ir.addr) ; CHECK: G_STORE [[V2]](s8), [[ADDR]](p0) :: (store syncscope("singlethread") monotonic (s8) into %ir.addr) - %v0 = load atomic i8, i8* %addr unordered, align 1 - store atomic i8 %v0, i8* %addr monotonic, align 1 + %v0 = load atomic i8, ptr %addr unordered, align 1 + store atomic i8 %v0, ptr %addr monotonic, align 1 - %v1 = load atomic i8, i8* %addr acquire, align 1 - store atomic i8 %v1, i8* %addr release, align 1 + %v1 = load atomic i8, ptr %addr acquire, align 1 + store atomic i8 %v1, ptr %addr release, align 1 - %v2 = load atomic i8, i8* %addr syncscope("singlethread") seq_cst, align 1 - store atomic i8 %v2, i8* %addr syncscope("singlethread") monotonic, align 1 + %v2 = load atomic i8, ptr %addr syncscope("singlethread") seq_cst, align 1 + store atomic i8 %v2, ptr %addr syncscope("singlethread") monotonic, align 1 ret void } @@ -1803,28 +1802,28 @@ ret <4 x half> } -define i32 @test_target_mem_intrinsic(i32* %addr) { +define i32 @test_target_mem_intrinsic(ptr %addr) { ; CHECK-LABEL: name: test_target_mem_intrinsic ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[VAL:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[ADDR]](p0) :: (volatile load (s32) from %ir.addr) ; CHECK: G_TRUNC [[VAL]](s64) - %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* elementtype(i32) %addr) + %val = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i32) %addr) %trunc = trunc i64 %val to i32 ret i32 %trunc } -declare i64 @llvm.aarch64.ldxr.p0i32(i32*) nounwind +declare i64 @llvm.aarch64.ldxr.p0(ptr) nounwind %zerosize_type = type {} -define %zerosize_type @test_empty_load_store(%zerosize_type *%ptr, %zerosize_type %in) noinline optnone { +define %zerosize_type @test_empty_load_store(ptr %ptr, %zerosize_type %in) noinline optnone { ; CHECK-LABEL: name: test_empty_load_store ; CHECK-NOT: G_STORE ; CHECK-NOT: G_LOAD ; CHECK: RET_ReallyLR entry: - store %zerosize_type undef, %zerosize_type* undef, align 4 - %val = load %zerosize_type, %zerosize_type* %ptr, align 4 + store %zerosize_type undef, ptr undef, align 4 + %val = load %zerosize_type, ptr %ptr, align 4 ret %zerosize_type %in } @@ -1868,7 +1867,7 @@ ret i64 %res } -define void @test_phi_diamond({ i8, i16, i32 }* %a.ptr, { i8, i16, i32 }* %b.ptr, i1 %selector, { i8, i16, i32 }* %dst) { +define void @test_phi_diamond(ptr %a.ptr, ptr %b.ptr, i1 %selector, ptr %dst) { ; CHECK-LABEL: name: test_phi_diamond ; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[ARG2:%[0-9]+]]:_(p0) = COPY $x1 @@ -1913,16 +1912,16 @@ br i1 %selector, label %store.a, label %store.b store.a: - %a = load { i8, i16, i32 }, { i8, i16, i32 }* %a.ptr + %a = load { i8, i16, i32 }, ptr %a.ptr br label %join store.b: - %b = load { i8, i16, i32 }, { i8, i16, i32 }* %b.ptr + %b = load { i8, i16, i32 }, ptr %b.ptr br label %join join: %v = phi { i8, i16, i32 } [ %a, %store.a ], [ %b, %store.b ] - store { i8, i16, i32 } %v, { i8, i16, i32 }* %dst + store { i8, i16, i32 } %v, ptr %dst ret void } @@ -1930,7 +1929,7 @@ %agg.inner = type {i16, i8, %agg.inner.inner } %agg.nested = type {i32, i32, %agg.inner, i32} -define void @test_nested_aggregate_const(%agg.nested *%ptr) { +define void @test_nested_aggregate_const(ptr %ptr) { ; CHECK-LABEL: name: test_nested_aggregate_const ; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CST1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -1958,7 +1957,7 @@ ; CHECK: [[CST12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST12]](s64) ; CHECK: G_STORE [[CST6]](s32), [[GEP6]](p0) :: (store (s32) into %ir.ptr + 32, align 8) - store %agg.nested { i32 1, i32 1, %agg.inner { i16 2, i8 3, %agg.inner.inner {i64 5, i64 8} }, i32 13}, %agg.nested *%ptr + store %agg.nested { i32 1, i32 1, %agg.inner { i16 2, i8 3, %agg.inner.inner {i64 5, i64 8} }, i32 13}, ptr %ptr ret void } @@ -1974,7 +1973,7 @@ } ; Try one cmpxchg -define i32 @test_atomic_cmpxchg_1(i32* %addr) { +define i32 @test_atomic_cmpxchg_1(ptr %addr) { ; CHECK-LABEL: name: test_atomic_cmpxchg_1 ; CHECK: bb.1.entry: ; CHECK-NEXT: successors: %bb.{{[^)]+}} @@ -1991,7 +1990,7 @@ entry: br label %repeat repeat: - %val_success = cmpxchg i32* %addr, i32 0, i32 1 monotonic monotonic + %val_success = cmpxchg ptr %addr, i32 0, i32 1 monotonic monotonic %value_loaded = extractvalue { i32, i1 } %val_success, 0 %success = extractvalue { i32, i1 } %val_success, 1 br i1 %success, label %done, label %repeat @@ -2000,7 +1999,7 @@ } ; Try one cmpxchg -define i32 @test_weak_atomic_cmpxchg_1(i32* %addr) { +define i32 @test_weak_atomic_cmpxchg_1(ptr %addr) { ; CHECK-LABEL: name: test_weak_atomic_cmpxchg_1 ; CHECK: bb.1.entry: ; CHECK-NEXT: successors: %bb.{{[^)]+}} @@ -2017,7 +2016,7 @@ entry: br label %repeat repeat: - %val_success = cmpxchg weak i32* %addr, i32 0, i32 1 monotonic monotonic + %val_success = cmpxchg weak ptr %addr, i32 0, i32 1 monotonic monotonic %value_loaded = extractvalue { i32, i1 } %val_success, 0 %success = extractvalue { i32, i1 } %val_success, 1 br i1 %success, label %done, label %repeat @@ -2026,7 +2025,7 @@ } ; Try one cmpxchg with a small type and high atomic ordering. -define i16 @test_atomic_cmpxchg_2(i16* %addr) { +define i16 @test_atomic_cmpxchg_2(ptr %addr) { ; CHECK-LABEL: name: test_atomic_cmpxchg_2 ; CHECK: bb.1.entry: ; CHECK-NEXT: successors: %bb.2({{[^)]+}}) @@ -2043,7 +2042,7 @@ entry: br label %repeat repeat: - %val_success = cmpxchg i16* %addr, i16 0, i16 1 seq_cst seq_cst + %val_success = cmpxchg ptr %addr, i16 0, i16 1 seq_cst seq_cst %value_loaded = extractvalue { i16, i1 } %val_success, 0 %success = extractvalue { i16, i1 } %val_success, 1 br i1 %success, label %done, label %repeat @@ -2052,7 +2051,7 @@ } ; Try one cmpxchg where the success order and failure order differ. -define i64 @test_atomic_cmpxchg_3(i64* %addr) { +define i64 @test_atomic_cmpxchg_3(ptr %addr) { ; CHECK-LABEL: name: test_atomic_cmpxchg_3 ; CHECK: bb.1.entry: ; CHECK-NEXT: successors: %bb.2({{[^)]+}}) @@ -2069,7 +2068,7 @@ entry: br label %repeat repeat: - %val_success = cmpxchg i64* %addr, i64 0, i64 1 seq_cst acquire + %val_success = cmpxchg ptr %addr, i64 0, i64 1 seq_cst acquire %value_loaded = extractvalue { i64, i1 } %val_success, 0 %success = extractvalue { i64, i1 } %val_success, 1 br i1 %success, label %done, label %repeat @@ -2079,7 +2078,7 @@ ; Try a monotonic atomicrmw xchg ; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. -define i32 @test_atomicrmw_xchg(i256* %addr) { +define i32 @test_atomicrmw_xchg(ptr %addr) { ; CHECK-LABEL: name: test_atomicrmw_xchg ; CHECK: bb.1 (%ir-block.{{[0-9]+}}): ; CHECK-NEXT: liveins: $x0 @@ -2087,7 +2086,7 @@ ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 ; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_XCHG [[ADDR]](p0), [[VAL]] :: (load store monotonic (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] - %oldval = atomicrmw xchg i256* %addr, i256 1 monotonic + %oldval = atomicrmw xchg ptr %addr, i256 1 monotonic ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this ; test so work around it by truncating to i32 for now. %oldval.trunc = trunc i256 %oldval to i32 @@ -2096,7 +2095,7 @@ ; Try an acquire atomicrmw add ; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. -define i32 @test_atomicrmw_add(i256* %addr) { +define i32 @test_atomicrmw_add(ptr %addr) { ; CHECK-LABEL: name: test_atomicrmw_add ; CHECK: bb.1 (%ir-block.{{[0-9]+}}): ; CHECK-NEXT: liveins: $x0 @@ -2104,7 +2103,7 @@ ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 ; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_ADD [[ADDR]](p0), [[VAL]] :: (load store acquire (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] - %oldval = atomicrmw add i256* %addr, i256 1 acquire + %oldval = atomicrmw add ptr %addr, i256 1 acquire ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this ; test so work around it by truncating to i32 for now. %oldval.trunc = trunc i256 %oldval to i32 @@ -2113,7 +2112,7 @@ ; Try a release atomicrmw sub ; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. -define i32 @test_atomicrmw_sub(i256* %addr) { +define i32 @test_atomicrmw_sub(ptr %addr) { ; CHECK-LABEL: name: test_atomicrmw_sub ; CHECK: bb.1 (%ir-block.{{[0-9]+}}): ; CHECK-NEXT: liveins: $x0 @@ -2121,7 +2120,7 @@ ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 ; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_SUB [[ADDR]](p0), [[VAL]] :: (load store release (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] - %oldval = atomicrmw sub i256* %addr, i256 1 release + %oldval = atomicrmw sub ptr %addr, i256 1 release ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this ; test so work around it by truncating to i32 for now. %oldval.trunc = trunc i256 %oldval to i32 @@ -2130,7 +2129,7 @@ ; Try an acq_rel atomicrmw and ; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. -define i32 @test_atomicrmw_and(i256* %addr) { +define i32 @test_atomicrmw_and(ptr %addr) { ; CHECK-LABEL: name: test_atomicrmw_and ; CHECK: bb.1 (%ir-block.{{[0-9]+}}): ; CHECK-NEXT: liveins: $x0 @@ -2138,7 +2137,7 @@ ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 ; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_AND [[ADDR]](p0), [[VAL]] :: (load store acq_rel (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] - %oldval = atomicrmw and i256* %addr, i256 1 acq_rel + %oldval = atomicrmw and ptr %addr, i256 1 acq_rel ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this ; test so work around it by truncating to i32 for now. %oldval.trunc = trunc i256 %oldval to i32 @@ -2147,7 +2146,7 @@ ; Try an seq_cst atomicrmw nand ; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. -define i32 @test_atomicrmw_nand(i256* %addr) { +define i32 @test_atomicrmw_nand(ptr %addr) { ; CHECK-LABEL: name: test_atomicrmw_nand ; CHECK: bb.1 (%ir-block.{{[0-9]+}}): ; CHECK-NEXT: liveins: $x0 @@ -2155,7 +2154,7 @@ ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 ; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_NAND [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] - %oldval = atomicrmw nand i256* %addr, i256 1 seq_cst + %oldval = atomicrmw nand ptr %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this ; test so work around it by truncating to i32 for now. %oldval.trunc = trunc i256 %oldval to i32 @@ -2164,7 +2163,7 @@ ; Try an seq_cst atomicrmw or ; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. -define i32 @test_atomicrmw_or(i256* %addr) { +define i32 @test_atomicrmw_or(ptr %addr) { ; CHECK-LABEL: name: test_atomicrmw_or ; CHECK: bb.1 (%ir-block.{{[0-9]+}}): ; CHECK-NEXT: liveins: $x0 @@ -2172,7 +2171,7 @@ ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 ; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_OR [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] - %oldval = atomicrmw or i256* %addr, i256 1 seq_cst + %oldval = atomicrmw or ptr %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this ; test so work around it by truncating to i32 for now. %oldval.trunc = trunc i256 %oldval to i32 @@ -2181,7 +2180,7 @@ ; Try an seq_cst atomicrmw xor ; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. -define i32 @test_atomicrmw_xor(i256* %addr) { +define i32 @test_atomicrmw_xor(ptr %addr) { ; CHECK-LABEL: name: test_atomicrmw_xor ; CHECK: bb.1 (%ir-block.{{[0-9]+}}): ; CHECK-NEXT: liveins: $x0 @@ -2189,7 +2188,7 @@ ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 ; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_XOR [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] - %oldval = atomicrmw xor i256* %addr, i256 1 seq_cst + %oldval = atomicrmw xor ptr %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this ; test so work around it by truncating to i32 for now. %oldval.trunc = trunc i256 %oldval to i32 @@ -2198,7 +2197,7 @@ ; Try an seq_cst atomicrmw min ; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. -define i32 @test_atomicrmw_min(i256* %addr) { +define i32 @test_atomicrmw_min(ptr %addr) { ; CHECK-LABEL: name: test_atomicrmw_min ; CHECK: bb.1 (%ir-block.{{[0-9]+}}): ; CHECK-NEXT: liveins: $x0 @@ -2206,7 +2205,7 @@ ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 ; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_MIN [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] - %oldval = atomicrmw min i256* %addr, i256 1 seq_cst + %oldval = atomicrmw min ptr %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this ; test so work around it by truncating to i32 for now. %oldval.trunc = trunc i256 %oldval to i32 @@ -2215,7 +2214,7 @@ ; Try an seq_cst atomicrmw max ; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. -define i32 @test_atomicrmw_max(i256* %addr) { +define i32 @test_atomicrmw_max(ptr %addr) { ; CHECK-LABEL: name: test_atomicrmw_max ; CHECK: bb.1 (%ir-block.{{[0-9]+}}): ; CHECK-NEXT: liveins: $x0 @@ -2223,7 +2222,7 @@ ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 ; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_MAX [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] - %oldval = atomicrmw max i256* %addr, i256 1 seq_cst + %oldval = atomicrmw max ptr %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this ; test so work around it by truncating to i32 for now. %oldval.trunc = trunc i256 %oldval to i32 @@ -2232,7 +2231,7 @@ ; Try an seq_cst atomicrmw unsigned min ; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. -define i32 @test_atomicrmw_umin(i256* %addr) { +define i32 @test_atomicrmw_umin(ptr %addr) { ; CHECK-LABEL: name: test_atomicrmw_umin ; CHECK: bb.1 (%ir-block.{{[0-9]+}}): ; CHECK-NEXT: liveins: $x0 @@ -2240,7 +2239,7 @@ ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 ; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_UMIN [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] - %oldval = atomicrmw umin i256* %addr, i256 1 seq_cst + %oldval = atomicrmw umin ptr %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this ; test so work around it by truncating to i32 for now. %oldval.trunc = trunc i256 %oldval to i32 @@ -2249,7 +2248,7 @@ ; Try an seq_cst atomicrmw unsigned max ; AArch64 will expand some atomicrmw's at the LLVM-IR level so we use a wide type to avoid this. -define i32 @test_atomicrmw_umax(i256* %addr) { +define i32 @test_atomicrmw_umax(ptr %addr) { ; CHECK-LABEL: name: test_atomicrmw_umax ; CHECK: bb.1 (%ir-block.{{[0-9]+}}): ; CHECK-NEXT: liveins: $x0 @@ -2257,36 +2256,35 @@ ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 ; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_UMAX [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] - %oldval = atomicrmw umax i256* %addr, i256 1 seq_cst + %oldval = atomicrmw umax ptr %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this ; test so work around it by truncating to i32 for now. %oldval.trunc = trunc i256 %oldval to i32 ret i32 %oldval.trunc } -@addr = global i8* null +@addr = global ptr null define void @test_blockaddress() { ; CHECK-LABEL: name: test_blockaddress ; CHECK: [[BADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) ; CHECK: G_STORE [[BADDR]](p0) - store i8* blockaddress(@test_blockaddress, %block), i8** @addr - indirectbr i8* blockaddress(@test_blockaddress, %block), [label %block] + store ptr blockaddress(@test_blockaddress, %block), ptr @addr + indirectbr ptr blockaddress(@test_blockaddress, %block), [label %block] block: ret void } %t = type { i32 } -declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) readonly nounwind -declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind +declare ptr @llvm.invariant.start.p0(i64, ptr nocapture) readonly nounwind +declare void @llvm.invariant.end.p0(ptr, i64, ptr nocapture) nounwind define void @test_invariant_intrin() { ; CHECK-LABEL: name: test_invariant_intrin ; CHECK: %{{[0-9]+}}:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: RET_ReallyLR %x = alloca %t - %y = bitcast %t* %x to i8* - %inv = call {}* @llvm.invariant.start.p0i8(i64 8, i8* %y) - call void @llvm.invariant.end.p0i8({}* %inv, i64 8, i8* %y) + %inv = call ptr @llvm.invariant.start.p0(i64 8, ptr %x) + call void @llvm.invariant.end.p0(ptr %inv, i64 8, ptr %x) ret void } @@ -2373,14 +2371,14 @@ ; CHECK-LABEL: name: test_llvm.aarch64.neon.ld3.v4i32.p0i32 ; CHECK: %1:_(<4 x s32>), %2:_(<4 x s32>), %3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld3), %0(p0) :: (load (s384) from %ir.ptr, align 64) -define void @test_llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %ptr) { - %arst = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %ptr) +define void @test_llvm.aarch64.neon.ld3.v4i32.p0i32(ptr %ptr) { + %arst = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %ptr) ret void } -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) #3 +declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr) #3 -define void @test_i1_arg_zext(void (i1)* %f) { +define void @test_i1_arg_zext(ptr %f) { ; CHECK-LABEL: name: test_i1_arg_zext ; CHECK: [[I1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK: [[ZEXT0:%[0-9]+]]:_(s8) = G_ZEXT [[I1]](s1) @@ -2390,15 +2388,15 @@ ret void } -declare i8* @llvm.stacksave() -declare void @llvm.stackrestore(i8*) +declare ptr @llvm.stacksave() +declare void @llvm.stackrestore(ptr) define void @test_stacksaverestore() { ; CHECK-LABEL: name: test_stacksaverestore ; CHECK: [[SAVE:%[0-9]+]]:_(p0) = COPY $sp ; CHECK-NEXT: $sp = COPY [[SAVE]](p0) ; CHECK-NEXT: RET_ReallyLR - %sp = call i8* @llvm.stacksave() - call void @llvm.stackrestore(i8* %sp) + %sp = call ptr @llvm.stacksave() + call void @llvm.stackrestore(ptr %sp) ret void } @@ -2420,7 +2418,7 @@ } declare void @llvm.experimental.noalias.scope.decl(metadata) -define void @test.llvm.noalias.scope.decl(i8* %P, i8* %Q) nounwind ssp { +define void @test.llvm.noalias.scope.decl(ptr %P, ptr %Q) nounwind ssp { tail call void @llvm.experimental.noalias.scope.decl(metadata !3) ; CHECK-LABEL: name: test.llvm.noalias.scope.decl ; CHECK-NOT: llvm.experimental.noalias.scope.decl @@ -2442,12 +2440,12 @@ ret void } -declare void @llvm.var.annotation(i8*, i8*, i8*, i32, i8*) -define void @test_var_annotation(i8*, i8*, i8*, i32) { +declare void @llvm.var.annotation(ptr, ptr, ptr, i32, ptr) +define void @test_var_annotation(ptr, ptr, ptr, i32) { ; CHECK-LABEL: name: test_var_annotation ; CHECK-NOT: llvm.var.annotation ; CHECK: RET_ReallyLR - call void @llvm.var.annotation(i8* %0, i8* %1, i8* %2, i32 %3, i8* null) + call void @llvm.var.annotation(ptr %0, ptr %1, ptr %2, i32 %3, ptr null) ret void } @@ -2471,7 +2469,7 @@ ret i64 %res } -define {i8, i32} @test_freeze_struct({ i8, i32 }* %addr) { +define {i8, i32} @test_freeze_struct(ptr %addr) { ; CHECK-LABEL: name: test_freeze_struct ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) @@ -2484,7 +2482,7 @@ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]] ; CHECK-NEXT: $w1 = COPY [[FREEZE1]] ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1 - %load = load { i8, i32 }, { i8, i32 }* %addr + %load = load { i8, i32 }, ptr %addr %res = freeze {i8, i32} %load ret {i8, i32} %res } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=1 -stop-after=aarch64-expand-pseudo -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-NOLSE ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=1 -stop-after=aarch64-expand-pseudo -mattr=+rcpc -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-LDAPR -define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) { +define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) { ; CHECK-LABEL: name: val_compare_and_swap ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -36,12 +36,12 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire, !pcsections !0 + %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acquire acquire, !pcsections !0 %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val } -define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) { +define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) { ; CHECK-LABEL: name: val_compare_and_swap_from_load ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -76,13 +76,13 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %new = load i32, i32* %pnew, !pcsections !0 - %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire, !pcsections !0 + %new = load i32, ptr %pnew, !pcsections !0 + %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acquire acquire, !pcsections !0 %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val } -define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) { +define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) { ; CHECK-LABEL: name: val_compare_and_swap_rel ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -116,12 +116,12 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic, !pcsections !0 + %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acq_rel monotonic, !pcsections !0 %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val } -define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) { +define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) { ; CHECK-LABEL: name: val_compare_and_swap_64 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -155,12 +155,12 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x0 = ORRXrs $xzr, killed $x8, 0 ; CHECK-NEXT: RET undef $lr, implicit $x0 - %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic, !pcsections !0 + %pair = cmpxchg ptr %p, i64 %cmp, i64 %new monotonic monotonic, !pcsections !0 %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val } -define i64 @val_compare_and_swap_64_monotonic_seqcst(i64* %p, i64 %cmp, i64 %new) { +define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new) { ; CHECK-LABEL: name: val_compare_and_swap_64_monotonic_seqcst ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -194,12 +194,12 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x0 = ORRXrs $xzr, killed $x8, 0 ; CHECK-NEXT: RET undef $lr, implicit $x0 - %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic seq_cst, !pcsections !0 + %pair = cmpxchg ptr %p, i64 %cmp, i64 %new monotonic seq_cst, !pcsections !0 %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val } -define i64 @val_compare_and_swap_64_release_acquire(i64* %p, i64 %cmp, i64 %new) { +define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new) { ; CHECK-LABEL: name: val_compare_and_swap_64_release_acquire ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -233,12 +233,12 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x0 = ORRXrs $xzr, killed $x8, 0 ; CHECK-NEXT: RET undef $lr, implicit $x0 - %pair = cmpxchg i64* %p, i64 %cmp, i64 %new release acquire, !pcsections !0 + %pair = cmpxchg ptr %p, i64 %cmp, i64 %new release acquire, !pcsections !0 %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val } -define i32 @fetch_and_nand(i32* %p) { +define i32 @fetch_and_nand(ptr %p) { ; CHECK-LABEL: name: fetch_and_nand ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -260,11 +260,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %val = atomicrmw nand i32* %p, i32 7 release, !pcsections !0 + %val = atomicrmw nand ptr %p, i32 7 release, !pcsections !0 ret i32 %val } -define i64 @fetch_and_nand_64(i64* %p) { +define i64 @fetch_and_nand_64(ptr %p) { ; CHECK-LABEL: name: fetch_and_nand_64 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -286,11 +286,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x0 = ORRXrs $xzr, killed $x8, 0 ; CHECK-NEXT: RET undef $lr, implicit $x0 - %val = atomicrmw nand i64* %p, i64 7 acq_rel, !pcsections !0 + %val = atomicrmw nand ptr %p, i64 7 acq_rel, !pcsections !0 ret i64 %val } -define i32 @fetch_and_or(i32* %p) { +define i32 @fetch_and_or(ptr %p) { ; CHECK-LABEL: name: fetch_and_or ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -312,11 +312,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %val = atomicrmw or i32* %p, i32 5 seq_cst, !pcsections !0 + %val = atomicrmw or ptr %p, i32 5 seq_cst, !pcsections !0 ret i32 %val } -define i64 @fetch_and_or_64(i64* %p) { +define i64 @fetch_and_or_64(ptr %p) { ; CHECK-LABEL: name: fetch_and_or_64 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -337,7 +337,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x0 = ORRXrs $xzr, killed $x8, 0 ; CHECK-NEXT: RET undef $lr, implicit $x0 - %val = atomicrmw or i64* %p, i64 7 monotonic, !pcsections !0 + %val = atomicrmw or ptr %p, i64 7 monotonic, !pcsections !0 ret i64 %val } @@ -368,18 +368,18 @@ ret void } -define i32 @atomic_load(i32* %p) { +define i32 @atomic_load(ptr %p) { ; CHECK-LABEL: name: atomic_load ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w0 = LDARW killed renamable $x0, pcsections !0 :: (load seq_cst (s32) from %ir.p) ; CHECK-NEXT: RET undef $lr, implicit $w0 - %r = load atomic i32, i32* %p seq_cst, align 4, !pcsections !0 + %r = load atomic i32, ptr %p seq_cst, align 4, !pcsections !0 ret i32 %r } -define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) { +define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) { ; CHECK-LABEL: name: atomic_load_relaxed_8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $x0 @@ -393,25 +393,25 @@ ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w10, killed renamable $w9, 0 ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %ptr_unsigned = getelementptr i8, i8* %p, i32 4095 - %val_unsigned = load atomic i8, i8* %ptr_unsigned monotonic, align 1, !pcsections !0 + %ptr_unsigned = getelementptr i8, ptr %p, i32 4095 + %val_unsigned = load atomic i8, ptr %ptr_unsigned monotonic, align 1, !pcsections !0 - %ptr_regoff = getelementptr i8, i8* %p, i32 %off32 - %val_regoff = load atomic i8, i8* %ptr_regoff unordered, align 1, !pcsections !0 + %ptr_regoff = getelementptr i8, ptr %p, i32 %off32 + %val_regoff = load atomic i8, ptr %ptr_regoff unordered, align 1, !pcsections !0 %tot1 = add i8 %val_unsigned, %val_regoff, !pcsections !0 - %ptr_unscaled = getelementptr i8, i8* %p, i32 -256 - %val_unscaled = load atomic i8, i8* %ptr_unscaled monotonic, align 1, !pcsections !0 + %ptr_unscaled = getelementptr i8, ptr %p, i32 -256 + %val_unscaled = load atomic i8, ptr %ptr_unscaled monotonic, align 1, !pcsections !0 %tot2 = add i8 %tot1, %val_unscaled, !pcsections !0 - %ptr_random = getelementptr i8, i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm) - %val_random = load atomic i8, i8* %ptr_random unordered, align 1, !pcsections !0 + %ptr_random = getelementptr i8, ptr %p, i32 1191936 ; 0x123000 (i.e. ADD imm) + %val_random = load atomic i8, ptr %ptr_random unordered, align 1, !pcsections !0 %tot3 = add i8 %tot2, %val_random, !pcsections !0 ret i8 %tot3 } -define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) { +define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) { ; CHECK-LABEL: name: atomic_load_relaxed_16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $x0 @@ -425,25 +425,25 @@ ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w10, killed renamable $w9, 0 ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %ptr_unsigned = getelementptr i16, i16* %p, i32 4095 - %val_unsigned = load atomic i16, i16* %ptr_unsigned monotonic, align 2, !pcsections !0 + %ptr_unsigned = getelementptr i16, ptr %p, i32 4095 + %val_unsigned = load atomic i16, ptr %ptr_unsigned monotonic, align 2, !pcsections !0 - %ptr_regoff = getelementptr i16, i16* %p, i32 %off32 - %val_regoff = load atomic i16, i16* %ptr_regoff unordered, align 2, !pcsections !0 + %ptr_regoff = getelementptr i16, ptr %p, i32 %off32 + %val_regoff = load atomic i16, ptr %ptr_regoff unordered, align 2, !pcsections !0 %tot1 = add i16 %val_unsigned, %val_regoff, !pcsections !0 - %ptr_unscaled = getelementptr i16, i16* %p, i32 -128 - %val_unscaled = load atomic i16, i16* %ptr_unscaled monotonic, align 2, !pcsections !0 + %ptr_unscaled = getelementptr i16, ptr %p, i32 -128 + %val_unscaled = load atomic i16, ptr %ptr_unscaled monotonic, align 2, !pcsections !0 %tot2 = add i16 %tot1, %val_unscaled, !pcsections !0 - %ptr_random = getelementptr i16, i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) - %val_random = load atomic i16, i16* %ptr_random unordered, align 2, !pcsections !0 + %ptr_random = getelementptr i16, ptr %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) + %val_random = load atomic i16, ptr %ptr_random unordered, align 2, !pcsections !0 %tot3 = add i16 %tot2, %val_random, !pcsections !0 ret i16 %tot3 } -define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) { +define i32 @atomic_load_relaxed_32(ptr %p, i32 %off32) { ; CHECK-LABEL: name: atomic_load_relaxed_32 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $x0 @@ -457,25 +457,25 @@ ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w10, killed renamable $w9, 0 ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %ptr_unsigned = getelementptr i32, i32* %p, i32 4095 - %val_unsigned = load atomic i32, i32* %ptr_unsigned monotonic, align 4, !pcsections !0 + %ptr_unsigned = getelementptr i32, ptr %p, i32 4095 + %val_unsigned = load atomic i32, ptr %ptr_unsigned monotonic, align 4, !pcsections !0 - %ptr_regoff = getelementptr i32, i32* %p, i32 %off32 - %val_regoff = load atomic i32, i32* %ptr_regoff unordered, align 4, !pcsections !0 + %ptr_regoff = getelementptr i32, ptr %p, i32 %off32 + %val_regoff = load atomic i32, ptr %ptr_regoff unordered, align 4, !pcsections !0 %tot1 = add i32 %val_unsigned, %val_regoff, !pcsections !0 - %ptr_unscaled = getelementptr i32, i32* %p, i32 -64 - %val_unscaled = load atomic i32, i32* %ptr_unscaled monotonic, align 4, !pcsections !0 + %ptr_unscaled = getelementptr i32, ptr %p, i32 -64 + %val_unscaled = load atomic i32, ptr %ptr_unscaled monotonic, align 4, !pcsections !0 %tot2 = add i32 %tot1, %val_unscaled, !pcsections !0 - %ptr_random = getelementptr i32, i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) - %val_random = load atomic i32, i32* %ptr_random unordered, align 4, !pcsections !0 + %ptr_random = getelementptr i32, ptr %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) + %val_random = load atomic i32, ptr %ptr_random unordered, align 4, !pcsections !0 %tot3 = add i32 %tot2, %val_random, !pcsections !0 ret i32 %tot3 } -define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) { +define i64 @atomic_load_relaxed_64(ptr %p, i32 %off32) { ; CHECK-LABEL: name: atomic_load_relaxed_64 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $x0 @@ -489,26 +489,26 @@ ; CHECK-NEXT: $x9 = ADDXrs killed renamable $x10, killed renamable $x9, 0 ; CHECK-NEXT: $x0 = ADDXrs killed renamable $x9, killed renamable $x8, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $x0 - %ptr_unsigned = getelementptr i64, i64* %p, i32 4095 - %val_unsigned = load atomic i64, i64* %ptr_unsigned monotonic, align 8, !pcsections !0 + %ptr_unsigned = getelementptr i64, ptr %p, i32 4095 + %val_unsigned = load atomic i64, ptr %ptr_unsigned monotonic, align 8, !pcsections !0 - %ptr_regoff = getelementptr i64, i64* %p, i32 %off32 - %val_regoff = load atomic i64, i64* %ptr_regoff unordered, align 8, !pcsections !0 + %ptr_regoff = getelementptr i64, ptr %p, i32 %off32 + %val_regoff = load atomic i64, ptr %ptr_regoff unordered, align 8, !pcsections !0 %tot1 = add i64 %val_unsigned, %val_regoff, !pcsections !0 - %ptr_unscaled = getelementptr i64, i64* %p, i32 -32 - %val_unscaled = load atomic i64, i64* %ptr_unscaled monotonic, align 8, !pcsections !0 + %ptr_unscaled = getelementptr i64, ptr %p, i32 -32 + %val_unscaled = load atomic i64, ptr %ptr_unscaled monotonic, align 8, !pcsections !0 %tot2 = add i64 %tot1, %val_unscaled, !pcsections !0 - %ptr_random = getelementptr i64, i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) - %val_random = load atomic i64, i64* %ptr_random unordered, align 8, !pcsections !0 + %ptr_random = getelementptr i64, ptr %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) + %val_random = load atomic i64, ptr %ptr_random unordered, align 8, !pcsections !0 %tot3 = add i64 %tot2, %val_random, !pcsections !0 ret i64 %tot3 } -define void @atomc_store(i32* %p) { +define void @atomc_store(ptr %p) { ; CHECK-LABEL: name: atomc_store ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $x0 @@ -516,11 +516,11 @@ ; CHECK-NEXT: renamable $w8 = MOVZWi 4, 0 ; CHECK-NEXT: STLRW killed renamable $w8, killed renamable $x0, pcsections !0 :: (store seq_cst (s32) into %ir.p) ; CHECK-NEXT: RET undef $lr - store atomic i32 4, i32* %p seq_cst, align 4, !pcsections !0 + store atomic i32 4, ptr %p seq_cst, align 4, !pcsections !0 ret void } -define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) { +define void @atomic_store_relaxed_8(ptr %p, i32 %off32, i8 %val) { ; CHECK-LABEL: name: atomic_store_relaxed_8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $w2, $x0 @@ -531,22 +531,22 @@ ; CHECK-NEXT: STURBBi renamable $w2, killed renamable $x0, -256, pcsections !0 :: (store monotonic (s8) into %ir.ptr_unscaled) ; CHECK-NEXT: STRBBui killed renamable $w2, killed renamable $x8, 0, pcsections !0 :: (store unordered (s8) into %ir.ptr_random) ; CHECK-NEXT: RET undef $lr - %ptr_unsigned = getelementptr i8, i8* %p, i32 4095 - store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1, !pcsections !0 + %ptr_unsigned = getelementptr i8, ptr %p, i32 4095 + store atomic i8 %val, ptr %ptr_unsigned monotonic, align 1, !pcsections !0 - %ptr_regoff = getelementptr i8, i8* %p, i32 %off32 - store atomic i8 %val, i8* %ptr_regoff unordered, align 1, !pcsections !0 + %ptr_regoff = getelementptr i8, ptr %p, i32 %off32 + store atomic i8 %val, ptr %ptr_regoff unordered, align 1, !pcsections !0 - %ptr_unscaled = getelementptr i8, i8* %p, i32 -256 - store atomic i8 %val, i8* %ptr_unscaled monotonic, align 1, !pcsections !0 + %ptr_unscaled = getelementptr i8, ptr %p, i32 -256 + store atomic i8 %val, ptr %ptr_unscaled monotonic, align 1, !pcsections !0 - %ptr_random = getelementptr i8, i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm) - store atomic i8 %val, i8* %ptr_random unordered, align 1, !pcsections !0 + %ptr_random = getelementptr i8, ptr %p, i32 1191936 ; 0x123000 (i.e. ADD imm) + store atomic i8 %val, ptr %ptr_random unordered, align 1, !pcsections !0 ret void } -define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) { +define void @atomic_store_relaxed_16(ptr %p, i32 %off32, i16 %val) { ; CHECK-LABEL: name: atomic_store_relaxed_16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $w2, $x0 @@ -557,22 +557,22 @@ ; CHECK-NEXT: STURHHi renamable $w2, killed renamable $x0, -256, pcsections !0 :: (store monotonic (s16) into %ir.ptr_unscaled) ; CHECK-NEXT: STRHHui killed renamable $w2, killed renamable $x8, 0, pcsections !0 :: (store unordered (s16) into %ir.ptr_random) ; CHECK-NEXT: RET undef $lr - %ptr_unsigned = getelementptr i16, i16* %p, i32 4095 - store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2, !pcsections !0 + %ptr_unsigned = getelementptr i16, ptr %p, i32 4095 + store atomic i16 %val, ptr %ptr_unsigned monotonic, align 2, !pcsections !0 - %ptr_regoff = getelementptr i16, i16* %p, i32 %off32 - store atomic i16 %val, i16* %ptr_regoff unordered, align 2, !pcsections !0 + %ptr_regoff = getelementptr i16, ptr %p, i32 %off32 + store atomic i16 %val, ptr %ptr_regoff unordered, align 2, !pcsections !0 - %ptr_unscaled = getelementptr i16, i16* %p, i32 -128 - store atomic i16 %val, i16* %ptr_unscaled monotonic, align 2, !pcsections !0 + %ptr_unscaled = getelementptr i16, ptr %p, i32 -128 + store atomic i16 %val, ptr %ptr_unscaled monotonic, align 2, !pcsections !0 - %ptr_random = getelementptr i16, i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) - store atomic i16 %val, i16* %ptr_random unordered, align 2, !pcsections !0 + %ptr_random = getelementptr i16, ptr %p, i32 595968 ; 0x123000/2 (i.e. ADD imm) + store atomic i16 %val, ptr %ptr_random unordered, align 2, !pcsections !0 ret void } -define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) { +define void @atomic_store_relaxed_32(ptr %p, i32 %off32, i32 %val) { ; CHECK-LABEL: name: atomic_store_relaxed_32 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $w2, $x0 @@ -583,22 +583,22 @@ ; CHECK-NEXT: STURWi renamable $w2, killed renamable $x0, -256, pcsections !0 :: (store monotonic (s32) into %ir.ptr_unscaled) ; CHECK-NEXT: STRWui killed renamable $w2, killed renamable $x8, 0, pcsections !0 :: (store unordered (s32) into %ir.ptr_random) ; CHECK-NEXT: RET undef $lr - %ptr_unsigned = getelementptr i32, i32* %p, i32 4095 - store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4, !pcsections !0 + %ptr_unsigned = getelementptr i32, ptr %p, i32 4095 + store atomic i32 %val, ptr %ptr_unsigned monotonic, align 4, !pcsections !0 - %ptr_regoff = getelementptr i32, i32* %p, i32 %off32 - store atomic i32 %val, i32* %ptr_regoff unordered, align 4, !pcsections !0 + %ptr_regoff = getelementptr i32, ptr %p, i32 %off32 + store atomic i32 %val, ptr %ptr_regoff unordered, align 4, !pcsections !0 - %ptr_unscaled = getelementptr i32, i32* %p, i32 -64 - store atomic i32 %val, i32* %ptr_unscaled monotonic, align 4, !pcsections !0 + %ptr_unscaled = getelementptr i32, ptr %p, i32 -64 + store atomic i32 %val, ptr %ptr_unscaled monotonic, align 4, !pcsections !0 - %ptr_random = getelementptr i32, i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) - store atomic i32 %val, i32* %ptr_random unordered, align 4, !pcsections !0 + %ptr_random = getelementptr i32, ptr %p, i32 297984 ; 0x123000/4 (i.e. ADD imm) + store atomic i32 %val, ptr %ptr_random unordered, align 4, !pcsections !0 ret void } -define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) { +define void @atomic_store_relaxed_64(ptr %p, i32 %off32, i64 %val) { ; CHECK-LABEL: name: atomic_store_relaxed_64 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $x0, $x2 @@ -609,22 +609,22 @@ ; CHECK-NEXT: STURXi renamable $x2, killed renamable $x0, -256, pcsections !0 :: (store monotonic (s64) into %ir.ptr_unscaled) ; CHECK-NEXT: STRXui killed renamable $x2, killed renamable $x8, 0, pcsections !0 :: (store unordered (s64) into %ir.ptr_random) ; CHECK-NEXT: RET undef $lr - %ptr_unsigned = getelementptr i64, i64* %p, i32 4095 - store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8, !pcsections !0 + %ptr_unsigned = getelementptr i64, ptr %p, i32 4095 + store atomic i64 %val, ptr %ptr_unsigned monotonic, align 8, !pcsections !0 - %ptr_regoff = getelementptr i64, i64* %p, i32 %off32 - store atomic i64 %val, i64* %ptr_regoff unordered, align 8, !pcsections !0 + %ptr_regoff = getelementptr i64, ptr %p, i32 %off32 + store atomic i64 %val, ptr %ptr_regoff unordered, align 8, !pcsections !0 - %ptr_unscaled = getelementptr i64, i64* %p, i32 -32 - store atomic i64 %val, i64* %ptr_unscaled monotonic, align 8, !pcsections !0 + %ptr_unscaled = getelementptr i64, ptr %p, i32 -32 + store atomic i64 %val, ptr %ptr_unscaled monotonic, align 8, !pcsections !0 - %ptr_random = getelementptr i64, i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) - store atomic i64 %val, i64* %ptr_random unordered, align 8, !pcsections !0 + %ptr_random = getelementptr i64, ptr %p, i32 148992 ; 0x123000/8 (i.e. ADD imm) + store atomic i64 %val, ptr %ptr_random unordered, align 8, !pcsections !0 ret void } -define i32 @load_zext(i8* %p8, i16* %p16) { +define i32 @load_zext(ptr %p8, ptr %p16) { ; CHECK-NOLSE-LABEL: name: load_zext ; CHECK-NOLSE: bb.0 (%ir-block.0): ; CHECK-NOLSE-NEXT: liveins: $x0, $x1 @@ -641,17 +641,17 @@ ; CHECK-LDAPR-NEXT: renamable $w9 = LDRHHui killed renamable $x1, 0, pcsections !0 :: (load unordered (s16) from %ir.p16) ; CHECK-LDAPR-NEXT: renamable $w0 = ADDWrx killed renamable $w9, killed renamable $w8, 0, pcsections !0 ; CHECK-LDAPR-NEXT: RET undef $lr, implicit $w0 - %val1.8 = load atomic i8, i8* %p8 acquire, align 1, !pcsections !0 + %val1.8 = load atomic i8, ptr %p8 acquire, align 1, !pcsections !0 %val1 = zext i8 %val1.8 to i32 - %val2.16 = load atomic i16, i16* %p16 unordered, align 2, !pcsections !0 + %val2.16 = load atomic i16, ptr %p16 unordered, align 2, !pcsections !0 %val2 = zext i16 %val2.16 to i32 %res = add i32 %val1, %val2, !pcsections !0 ret i32 %res } -define { i32, i64 } @load_acq(i32* %p32, i64* %p64) { +define { i32, i64 } @load_acq(ptr %p32, ptr %p64) { ; CHECK-NOLSE-LABEL: name: load_acq ; CHECK-NOLSE: bb.0 (%ir-block.0): ; CHECK-NOLSE-NEXT: liveins: $x0, $x1 @@ -666,16 +666,16 @@ ; CHECK-LDAPR-NEXT: renamable $w0 = LDARW killed renamable $x0, pcsections !0 :: (load seq_cst (s32) from %ir.p32) ; CHECK-LDAPR-NEXT: renamable $x1 = LDAPRX killed renamable $x1, pcsections !0 :: (load acquire (s64) from %ir.p64) ; CHECK-LDAPR-NEXT: RET undef $lr, implicit $w0, implicit $x1 - %val32 = load atomic i32, i32* %p32 seq_cst, align 4, !pcsections !0 + %val32 = load atomic i32, ptr %p32 seq_cst, align 4, !pcsections !0 %tmp = insertvalue { i32, i64 } undef, i32 %val32, 0 - %val64 = load atomic i64, i64* %p64 acquire, align 8, !pcsections !0 + %val64 = load atomic i64, ptr %p64 acquire, align 8, !pcsections !0 %res = insertvalue { i32, i64 } %tmp, i64 %val64, 1 ret { i32, i64 } %res } -define i32 @load_sext(i8* %p8, i16* %p16) { +define i32 @load_sext(ptr %p8, ptr %p16) { ; CHECK-NOLSE-LABEL: name: load_sext ; CHECK-NOLSE: bb.0 (%ir-block.0): ; CHECK-NOLSE-NEXT: liveins: $x0, $x1 @@ -694,17 +694,17 @@ ; CHECK-LDAPR-NEXT: renamable $w9 = SBFMWri killed renamable $w9, 0, 15 ; CHECK-LDAPR-NEXT: renamable $w0 = ADDWrx killed renamable $w9, killed renamable $w8, 32, pcsections !0 ; CHECK-LDAPR-NEXT: RET undef $lr, implicit $w0 - %val1.8 = load atomic i8, i8* %p8 acquire, align 1, !pcsections !0 + %val1.8 = load atomic i8, ptr %p8 acquire, align 1, !pcsections !0 %val1 = sext i8 %val1.8 to i32 - %val2.16 = load atomic i16, i16* %p16 unordered, align 2, !pcsections !0 + %val2.16 = load atomic i16, ptr %p16 unordered, align 2, !pcsections !0 %val2 = sext i16 %val2.16 to i32 %res = add i32 %val1, %val2, !pcsections !0 ret i32 %res } -define void @store_trunc(i32 %val, i8* %p8, i16* %p16) { +define void @store_trunc(i32 %val, ptr %p8, ptr %p16) { ; CHECK-LABEL: name: store_trunc ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w0, $x1, $x2 @@ -713,15 +713,15 @@ ; CHECK-NEXT: STRHHui killed renamable $w0, killed renamable $x2, 0, pcsections !0 :: (store monotonic (s16) into %ir.p16) ; CHECK-NEXT: RET undef $lr %val8 = trunc i32 %val to i8 - store atomic i8 %val8, i8* %p8 seq_cst, align 1, !pcsections !0 + store atomic i8 %val8, ptr %p8 seq_cst, align 1, !pcsections !0 %val16 = trunc i32 %val to i16 - store atomic i16 %val16, i16* %p16 monotonic, align 2, !pcsections !0 + store atomic i16 %val16, ptr %p16 monotonic, align 2, !pcsections !0 ret void } -define i8 @atomicrmw_add_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) { ; CHECK-LABEL: name: atomicrmw_add_i8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -742,11 +742,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw add i8* %ptr, i8 %rhs seq_cst, !pcsections !0 + %res = atomicrmw add ptr %ptr, i8 %rhs seq_cst, !pcsections !0 ret i8 %res } -define i8 @atomicrmw_xchg_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) { ; CHECK-LABEL: name: atomicrmw_xchg_i8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -767,11 +767,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw xchg i8* %ptr, i8 %rhs monotonic, !pcsections !0 + %res = atomicrmw xchg ptr %ptr, i8 %rhs monotonic, !pcsections !0 ret i8 %res } -define i8 @atomicrmw_sub_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) { ; CHECK-LABEL: name: atomicrmw_sub_i8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -792,11 +792,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw sub i8* %ptr, i8 %rhs acquire, !pcsections !0 + %res = atomicrmw sub ptr %ptr, i8 %rhs acquire, !pcsections !0 ret i8 %res } -define i8 @atomicrmw_and_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) { ; CHECK-LABEL: name: atomicrmw_and_i8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -817,11 +817,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw and i8* %ptr, i8 %rhs release, !pcsections !0 + %res = atomicrmw and ptr %ptr, i8 %rhs release, !pcsections !0 ret i8 %res } -define i8 @atomicrmw_or_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) { ; CHECK-LABEL: name: atomicrmw_or_i8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -842,11 +842,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw or i8* %ptr, i8 %rhs seq_cst, !pcsections !0 + %res = atomicrmw or ptr %ptr, i8 %rhs seq_cst, !pcsections !0 ret i8 %res } -define i8 @atomicrmw_xor_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) { ; CHECK-LABEL: name: atomicrmw_xor_i8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -867,11 +867,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw xor i8* %ptr, i8 %rhs monotonic, !pcsections !0 + %res = atomicrmw xor ptr %ptr, i8 %rhs monotonic, !pcsections !0 ret i8 %res } -define i8 @atomicrmw_min_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-LABEL: name: atomicrmw_min_i8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -894,11 +894,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw min i8* %ptr, i8 %rhs acquire, !pcsections !0 + %res = atomicrmw min ptr %ptr, i8 %rhs acquire, !pcsections !0 ret i8 %res } -define i8 @atomicrmw_max_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { ; CHECK-LABEL: name: atomicrmw_max_i8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -921,11 +921,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw max i8* %ptr, i8 %rhs release, !pcsections !0 + %res = atomicrmw max ptr %ptr, i8 %rhs release, !pcsections !0 ret i8 %res } -define i8 @atomicrmw_umin_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-LABEL: name: atomicrmw_umin_i8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -949,11 +949,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw umin i8* %ptr, i8 %rhs seq_cst, !pcsections !0 + %res = atomicrmw umin ptr %ptr, i8 %rhs seq_cst, !pcsections !0 ret i8 %res } -define i8 @atomicrmw_umax_i8(i8* %ptr, i8 %rhs) { +define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-LABEL: name: atomicrmw_umax_i8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -977,11 +977,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw umax i8* %ptr, i8 %rhs monotonic, !pcsections !0 + %res = atomicrmw umax ptr %ptr, i8 %rhs monotonic, !pcsections !0 ret i8 %res } -define i16 @atomicrmw_add_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) { ; CHECK-LABEL: name: atomicrmw_add_i16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1002,11 +1002,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw add i16* %ptr, i16 %rhs seq_cst, !pcsections !0 + %res = atomicrmw add ptr %ptr, i16 %rhs seq_cst, !pcsections !0 ret i16 %res } -define i16 @atomicrmw_xchg_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) { ; CHECK-LABEL: name: atomicrmw_xchg_i16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1027,11 +1027,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw xchg i16* %ptr, i16 %rhs monotonic, !pcsections !0 + %res = atomicrmw xchg ptr %ptr, i16 %rhs monotonic, !pcsections !0 ret i16 %res } -define i16 @atomicrmw_sub_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) { ; CHECK-LABEL: name: atomicrmw_sub_i16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1052,11 +1052,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw sub i16* %ptr, i16 %rhs acquire, !pcsections !0 + %res = atomicrmw sub ptr %ptr, i16 %rhs acquire, !pcsections !0 ret i16 %res } -define i16 @atomicrmw_and_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) { ; CHECK-LABEL: name: atomicrmw_and_i16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1077,11 +1077,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw and i16* %ptr, i16 %rhs release, !pcsections !0 + %res = atomicrmw and ptr %ptr, i16 %rhs release, !pcsections !0 ret i16 %res } -define i16 @atomicrmw_or_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) { ; CHECK-LABEL: name: atomicrmw_or_i16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1102,11 +1102,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw or i16* %ptr, i16 %rhs seq_cst, !pcsections !0 + %res = atomicrmw or ptr %ptr, i16 %rhs seq_cst, !pcsections !0 ret i16 %res } -define i16 @atomicrmw_xor_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) { ; CHECK-LABEL: name: atomicrmw_xor_i16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1127,11 +1127,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw xor i16* %ptr, i16 %rhs monotonic, !pcsections !0 + %res = atomicrmw xor ptr %ptr, i16 %rhs monotonic, !pcsections !0 ret i16 %res } -define i16 @atomicrmw_min_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-LABEL: name: atomicrmw_min_i16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1154,11 +1154,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw min i16* %ptr, i16 %rhs acquire, !pcsections !0 + %res = atomicrmw min ptr %ptr, i16 %rhs acquire, !pcsections !0 ret i16 %res } -define i16 @atomicrmw_max_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { ; CHECK-LABEL: name: atomicrmw_max_i16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1181,11 +1181,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw max i16* %ptr, i16 %rhs release, !pcsections !0 + %res = atomicrmw max ptr %ptr, i16 %rhs release, !pcsections !0 ret i16 %res } -define i16 @atomicrmw_umin_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-LABEL: name: atomicrmw_umin_i16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1209,11 +1209,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw umin i16* %ptr, i16 %rhs seq_cst, !pcsections !0 + %res = atomicrmw umin ptr %ptr, i16 %rhs seq_cst, !pcsections !0 ret i16 %res } -define i16 @atomicrmw_umax_i16(i16* %ptr, i16 %rhs) { +define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-LABEL: name: atomicrmw_umax_i16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1237,11 +1237,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8 ; CHECK-NEXT: RET undef $lr, implicit $w0 - %res = atomicrmw umax i16* %ptr, i16 %rhs monotonic, !pcsections !0 + %res = atomicrmw umax ptr %ptr, i16 %rhs monotonic, !pcsections !0 ret i16 %res } -define { i8, i1 } @cmpxchg_i8(i8* %ptr, i8 %desired, i8 %new) { +define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) { ; CHECK-LABEL: name: cmpxchg_i8 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1280,11 +1280,11 @@ ; CHECK-NEXT: CLREX 15, pcsections !0 ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0 ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1 - %res = cmpxchg i8* %ptr, i8 %desired, i8 %new monotonic monotonic, !pcsections !0 + %res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic, !pcsections !0 ret { i8, i1 } %res } -define { i16, i1 } @cmpxchg_i16(i16* %ptr, i16 %desired, i16 %new) { +define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) { ; CHECK-LABEL: name: cmpxchg_i16 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) @@ -1323,7 +1323,7 @@ ; CHECK-NEXT: CLREX 15, pcsections !0 ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0 ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1 - %res = cmpxchg i16* %ptr, i16 %desired, i16 %new monotonic monotonic, !pcsections !0 + %res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic, !pcsections !0 ret { i16, i1 } %res } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll b/llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s -declare i8* @foo() +declare ptr @foo() define void @call_assert_align() { ; CHECK-LABEL: call_assert_align: @@ -13,16 +13,16 @@ ; CHECK-NEXT: strb wzr, [x0] ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %ptr = call align 8 i8* @foo() - store i8 0, i8* %ptr + %ptr = call align 8 ptr @foo() + store i8 0, ptr %ptr ret void } -define i8* @tailcall_assert_align() { +define ptr @tailcall_assert_align() { ; CHECK-LABEL: tailcall_assert_align: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: b foo entry: - %call = tail call align 4 i8* @foo() - ret i8* %call + %call = tail call align 4 ptr @foo() + ret ptr %call } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/builtin-return-address-pacret.ll b/llvm/test/CodeGen/AArch64/GlobalISel/builtin-return-address-pacret.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/builtin-return-address-pacret.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/builtin-return-address-pacret.ll @@ -1,19 +1,19 @@ ;; RUN: llc -mtriple aarch64 -global-isel -O0 %s -o - | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-NOP ;; RUN: llc -mtriple aarch64 -mattr=+v8.3a -global-isel -O0 %s -o - | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-V83 declare void @g0() #1 -declare void @g1(i8*) #1 -declare void @g2(i32, i8*) #1 +declare void @g1(ptr) #1 +declare void @g2(i32, ptr) #1 -declare i8* @llvm.returnaddress(i32 immarg) #2 +declare ptr @llvm.returnaddress(i32 immarg) #2 -define i8* @f0() #0 { +define ptr @f0() #0 { entry: - %0 = call i8* @llvm.returnaddress(i32 0) - call void @g1(i8* %0) - %1 = call i8* @llvm.returnaddress(i32 1) - call void @g2(i32 1, i8* %1) - %2 = call i8* @llvm.returnaddress(i32 2) - ret i8* %2 + %0 = call ptr @llvm.returnaddress(i32 0) + call void @g1(ptr %0) + %1 = call ptr @llvm.returnaddress(i32 1) + call void @g2(i32 1, ptr %1) + %2 = call ptr @llvm.returnaddress(i32 2) + ret ptr %2 } ;; CHECK-LABEL: f0: ;; CHECK-NOT: {{(mov|ldr)}} x30 @@ -35,14 +35,14 @@ ;; CHECK-V83-NEXT: ldr x0, [x[[T1]], #8] ;; CHECK-V83-NEXT: xpaci x0 -define i8* @f1() #0 { +define ptr @f1() #0 { entry: - %0 = call i8* @llvm.returnaddress(i32 1) - call void @g1(i8* %0) - %1 = call i8* @llvm.returnaddress(i32 2) - call void @g2(i32 1, i8* %1) - %2 = call i8* @llvm.returnaddress(i32 0) - ret i8* %2 + %0 = call ptr @llvm.returnaddress(i32 1) + call void @g1(ptr %0) + %1 = call ptr @llvm.returnaddress(i32 2) + call void @g2(i32 1, ptr %1) + %2 = call ptr @llvm.returnaddress(i32 0) + ret ptr %2 } ;; CHECK-LABEL: f1: ;; CHECK-DAG: ldr x[[T0:[0-9]+]], [x29] @@ -71,11 +71,11 @@ ;; CHECK-NOT: x0 ;; CHECK: ret -define i8* @f2() #0 { +define ptr @f2() #0 { entry: - call void bitcast (void ()* @g0 to void ()*)() - %0 = call i8* @llvm.returnaddress(i32 0) - ret i8* %0 + call void @g0() + %0 = call ptr @llvm.returnaddress(i32 0) + ret ptr %0 } ;; CHECK-LABEL: f2 ;; CHECK: bl g0 @@ -88,10 +88,10 @@ ;; CHECK-NOT: x0 ;; CHECK: ret -define i8* @f3() #0 { +define ptr @f3() #0 { entry: - %0 = call i8* @llvm.returnaddress(i32 0) - ret i8* %0 + %0 = call ptr @llvm.returnaddress(i32 0) + ret ptr %0 } ;; CHECK-LABEL: f3: ;; CHECK-NOP: str x30, [sp, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s -declare void @byval_i32(i32* byval(i32) %ptr) +declare void @byval_i32(ptr byval(i32) %ptr) -define void @call_byval_i32(i32* %incoming) uwtable { +define void @call_byval_i32(ptr %incoming) uwtable { ; CHECK-LABEL: call_byval_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 @@ -18,13 +18,13 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret - call void @byval_i32(i32* byval(i32) %incoming) + call void @byval_i32(ptr byval(i32) %incoming) ret void } -declare void @byval_a64i32([64 x i32]* byval([64 x i32]) %ptr) +declare void @byval_a64i32(ptr byval([64 x i32]) %ptr) -define void @call_byval_a64i32([64 x i32]* %incoming) uwtable { +define void @call_byval_a64i32(ptr %incoming) uwtable { ; CHECK-LABEL: call_byval_a64i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #288 @@ -78,6 +78,6 @@ ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret - call void @byval_a64i32([64 x i32]* byval([64 x i32]) %incoming) + call void @byval_a64i32(ptr byval([64 x i32]) %incoming) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-const-bitcast-func.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-const-bitcast-func.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-const-bitcast-func.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-const-bitcast-func.ll @@ -3,12 +3,12 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64-darwin-ios13.0" -declare i8* @objc_msgSend(i8*, i8*, ...) +declare ptr @objc_msgSend(ptr, ptr, ...) define void @call_bitcast_ptr_const() { ; CHECK-LABEL: @call_bitcast_ptr_const ; CHECK: bl _objc_msgSend ; CHECK-NOT: blr entry: - call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [2 x i32], i32, float)*)(i8* undef, i8* undef, [2 x i32] zeroinitializer, i32 0, float 1.000000e+00) + call void @objc_msgSend(ptr undef, ptr undef, [2 x i32] zeroinitializer, i32 0, float 1.000000e+00) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll @@ -13,14 +13,14 @@ ; CHECK: G_STORE [[LO]](s64), [[GEP2]](p0) :: (store (s64) into stack, align 1) ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[CST]](s64) ; CHECK: G_STORE [[HI]](s64), [[GEP3]](p0) :: (store (s64) into stack + 8, align 1) -define void @test_split_struct([2 x i64]* %ptr) { - %struct = load [2 x i64], [2 x i64]* %ptr - call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, +define void @test_split_struct(ptr %ptr) { + %struct = load [2 x i64], ptr %ptr + call void @take_split_struct(ptr null, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, [2 x i64] %struct) ret void } -declare void @take_split_struct([2 x i64]* %ptr, i64, i64, i64, +declare void @take_split_struct(ptr %ptr, i64, i64, i64, i64, i64, i64, [2 x i64] %in) ; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll @@ -43,9 +43,9 @@ ; CHECK: $x1 = COPY ; CHECK: $x2 = COPY ; CHECK: BL @take_128bit_struct -define void @test_128bit_struct([2 x i64]* %ptr) { - %struct = load [2 x i64], [2 x i64]* %ptr - call void @take_128bit_struct([2 x i64]* null, [2 x i64] %struct) +define void @test_128bit_struct(ptr %ptr) { + %struct = load [2 x i64], ptr %ptr + call void @take_128bit_struct(ptr null, [2 x i64] %struct) ret void } @@ -53,8 +53,8 @@ ; CHECK: {{%.*}}:_(p0) = COPY $x0 ; CHECK: {{%.*}}:_(s64) = COPY $x1 ; CHECK: {{%.*}}:_(s64) = COPY $x2 -define void @take_128bit_struct([2 x i64]* %ptr, [2 x i64] %in) { - store [2 x i64] %in, [2 x i64]* %ptr +define void @take_128bit_struct(ptr %ptr, [2 x i64] %in) { + store [2 x i64] %in, ptr %ptr ret void } @@ -71,9 +71,9 @@ ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[CST]] ; CHECK: G_STORE [[LD2]](s64), [[ADDR]](p0) :: (store (s64) into stack + 8, align 1) -define void @test_split_struct([2 x i64]* %ptr) { - %struct = load [2 x i64], [2 x i64]* %ptr - call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, +define void @test_split_struct(ptr %ptr) { + %struct = load [2 x i64], ptr %ptr + call void @take_split_struct(ptr null, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, [2 x i64] %struct) ret void @@ -89,9 +89,9 @@ ; CHECK: [[HIPTR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[HI_FRAME]] ; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[HIPTR]](p0) :: (invariant load (s64) from %fixed-stack.[[HI_FRAME]]) -define void @take_split_struct([2 x i64]* %ptr, i64, i64, i64, +define void @take_split_struct(ptr %ptr, i64, i64, i64, i64, i64, i64, [2 x i64] %in) { - store [2 x i64] %in, [2 x i64]* %ptr + store [2 x i64] %in, ptr %ptr ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll @@ -2,22 +2,22 @@ ; RUN: llc < %s -mtriple arm64-apple-darwin -global-isel -stop-after=irtranslator -verify-machineinstrs | FileCheck %s ; Check that we don't try to tail-call with a non-forwarded sret parameter. -declare void @test_explicit_sret(i64* sret(i64)) +declare void @test_explicit_sret(ptr sret(i64)) ; Forwarded explicit sret pointer => we can tail call. -define void @can_tail_call_forwarded_explicit_sret_ptr(i64* sret(i64) %arg) { +define void @can_tail_call_forwarded_explicit_sret_ptr(ptr sret(i64) %arg) { ; CHECK-LABEL: name: can_tail_call_forwarded_explicit_sret_ptr ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x8 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 ; CHECK: $x8 = COPY [[COPY]](p0) ; CHECK: TCRETURNdi @test_explicit_sret, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x8 - tail call void @test_explicit_sret(i64* %arg) + tail call void @test_explicit_sret(ptr %arg) ret void } ; Not marked as tail, so don't tail call. -define void @test_call_explicit_sret(i64* sret(i64) %arg) { +define void @test_call_explicit_sret(ptr sret(i64) %arg) { ; CHECK-LABEL: name: test_call_explicit_sret ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x8 @@ -27,7 +27,7 @@ ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK: RET_ReallyLR - call void @test_explicit_sret(i64* %arg) + call void @test_explicit_sret(ptr %arg) ret void } @@ -41,11 +41,11 @@ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK: RET_ReallyLR %l = alloca i64, align 8 - tail call void @test_explicit_sret(i64* %l) + tail call void @test_explicit_sret(ptr %l) ret void } -define void @dont_tail_call_explicit_sret_alloca_dummyusers(i64* %ptr) { +define void @dont_tail_call_explicit_sret_alloca_dummyusers(ptr %ptr) { ; CHECK-LABEL: name: dont_tail_call_explicit_sret_alloca_dummyusers ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -59,13 +59,13 @@ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK: RET_ReallyLR %l = alloca i64, align 8 - %r = load i64, i64* %ptr, align 8 - store i64 %r, i64* %l, align 8 - tail call void @test_explicit_sret(i64* %l) + %r = load i64, ptr %ptr, align 8 + store i64 %r, ptr %l, align 8 + tail call void @test_explicit_sret(ptr %l) ret void } -define void @dont_tail_call_tailcall_explicit_sret_gep(i64* %ptr) { +define void @dont_tail_call_tailcall_explicit_sret_gep(ptr %ptr) { ; CHECK-LABEL: name: dont_tail_call_tailcall_explicit_sret_gep ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -77,8 +77,8 @@ ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK: RET_ReallyLR - %ptr2 = getelementptr i64, i64* %ptr, i32 1 - tail call void @test_explicit_sret(i64* %ptr2) + %ptr2 = getelementptr i64, ptr %ptr, i32 1 + tail call void @test_explicit_sret(ptr %ptr2) ret void } @@ -94,7 +94,7 @@ ; CHECK: $x0 = COPY [[LOAD]](s64) ; CHECK: RET_ReallyLR implicit $x0 %l = alloca i64, align 8 - tail call void @test_explicit_sret(i64* %l) - %r = load i64, i64* %l, align 8 + tail call void @test_explicit_sret(ptr %l) + %r = load i64, ptr %l, align 8 ret i64 %r } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll @@ -16,7 +16,7 @@ ; We should get a TCRETURNri here. ; FIXME: We don't need the COPY. -define void @indirect_tail_call(void()* %func) { +define void @indirect_tail_call(ptr %func) { ; DARWIN-LABEL: name: indirect_tail_call ; DARWIN: bb.1 (%ir-block.0): ; DARWIN-NEXT: liveins: $x0 @@ -313,7 +313,7 @@ } ; Shouldn't tail call when the caller has byval arguments. -define void @test_byval(i8* byval(i8) %ptr) { +define void @test_byval(ptr byval(i8) %ptr) { ; DARWIN-LABEL: name: test_byval ; DARWIN: bb.1 (%ir-block.0): ; DARWIN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 @@ -335,7 +335,7 @@ } ; Shouldn't tail call when the caller has inreg arguments. -define void @test_inreg(i8* inreg %ptr) { +define void @test_inreg(ptr inreg %ptr) { ; DARWIN-LABEL: name: test_inreg ; DARWIN: bb.1 (%ir-block.0): ; DARWIN-NEXT: liveins: $x0 @@ -386,8 +386,8 @@ ret void } -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) define void @test_lifetime() local_unnamed_addr { ; DARWIN-LABEL: name: test_lifetime ; DARWIN: bb.1.entry: @@ -401,18 +401,18 @@ ; WINDOWS-NEXT: TCRETURNdi @nonvoid_ret, 0, csr_aarch64_aapcs, implicit $sp entry: %t = alloca i8, align 1 - call void @llvm.lifetime.start.p0i8(i64 1, i8* %t) + call void @llvm.lifetime.start.p0(i64 1, ptr %t) %x = tail call i32 @nonvoid_ret() %y = icmp ne i32 %x, 0 - tail call void @llvm.lifetime.end.p0i8(i64 1, i8* %t) + tail call void @llvm.lifetime.end.p0(i64 1, ptr %t) ret void } ; We can tail call when the callee swiftself is the same as the caller one. ; It would be nice to move this to swiftself.ll, but it's important to verify ; that we get the COPY that makes this safe in the first place. -declare i8* @pluto() -define hidden swiftcc i64 @swiftself_indirect_tail(i64* swiftself %arg) { +declare ptr @pluto() +define hidden swiftcc i64 @swiftself_indirect_tail(ptr swiftself %arg) { ; DARWIN-LABEL: name: swiftself_indirect_tail ; DARWIN: bb.1 (%ir-block.0): ; DARWIN-NEXT: liveins: $x20 @@ -435,15 +435,14 @@ ; WINDOWS-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; WINDOWS-NEXT: $x20 = COPY [[COPY]](p0) ; WINDOWS-NEXT: TCRETURNri [[COPY1]](p0), 0, csr_aarch64_aapcs, implicit $sp, implicit $x20 - %tmp = call i8* @pluto() - %tmp1 = bitcast i8* %tmp to i64 (i64*)* - %tmp2 = tail call swiftcc i64 %tmp1(i64* swiftself %arg) + %tmp = call ptr @pluto() + %tmp2 = tail call swiftcc i64 %tmp(ptr swiftself %arg) ret i64 %tmp2 } ; Verify that we can tail call musttail callees. -declare void @must_callee(i8*) -define void @foo(i32*) { +declare void @must_callee(ptr) +define void @foo(ptr) { ; DARWIN-LABEL: name: foo ; DARWIN: bb.1 (%ir-block.1): ; DARWIN-NEXT: liveins: $x0 @@ -460,7 +459,7 @@ ; WINDOWS-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; WINDOWS-NEXT: $x0 = COPY [[C]](p0) ; WINDOWS-NEXT: TCRETURNdi @must_callee, 0, csr_aarch64_aapcs, implicit $sp, implicit $x0 - musttail call void @must_callee(i8* null) + musttail call void @must_callee(ptr null) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll @@ -26,7 +26,7 @@ ; Test musttailing with a normal call in the block. Test that we spill and ; restore, as a normal call will clobber all argument registers. @asdf = internal constant [4 x i8] c"asdf" -declare void @puts(i8*) +declare void @puts(ptr) define i32 @test_musttail_variadic_spill(i32 %arg0, ...) { ; CHECK-LABEL: test_musttail_variadic_spill: ; CHECK: ; %bb.0: @@ -90,16 +90,16 @@ ; CHECK-NEXT: add sp, sp, #224 ; CHECK-NEXT: b _musttail_variadic_callee ; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1 - call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0)) + call void @puts(ptr @asdf) %r = musttail call i32 (i32, ...) @musttail_variadic_callee(i32 %arg0, ...) ret i32 %r } ; Test musttailing with a varargs call in the block. Test that we spill and ; reload all arguments in the variadic argument pack. -declare void @llvm.va_start(i8*) nounwind -declare void(i8*, ...)* @get_f(i8* %this) -define void @f_thunk(i8* %this, ...) { +declare void @llvm.va_start(ptr) nounwind +declare ptr @get_f(ptr %this) +define void @f_thunk(ptr %this, ...) { ; CHECK-LABEL: f_thunk: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #256 @@ -161,29 +161,27 @@ ; CHECK-NEXT: ldp x28, x27, [sp, #160] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #256 ; CHECK-NEXT: br x9 - %ap = alloca [4 x i8*], align 16 - %ap_i8 = bitcast [4 x i8*]* %ap to i8* - call void @llvm.va_start(i8* %ap_i8) - %fptr = call void(i8*, ...)*(i8*) @get_f(i8* %this) - musttail call void (i8*, ...) %fptr(i8* %this, ...) + %ap = alloca [4 x ptr], align 16 + call void @llvm.va_start(ptr %ap) + %fptr = call ptr(ptr) @get_f(ptr %this) + musttail call void (ptr, ...) %fptr(ptr %this, ...) ret void } ; We don't need any spills and reloads here, but we should still emit the ; copies in call lowering. -define void @g_thunk(i8* %fptr_i8, ...) { +define void @g_thunk(ptr %fptr_i8, ...) { ; CHECK-LABEL: g_thunk: ; CHECK: ; %bb.0: ; CHECK-NEXT: br x0 - %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)* - musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...) + musttail call void (ptr, ...) %fptr_i8(ptr %fptr_i8, ...) ret void } ; Test that this works with multiple exits and basic blocks. -%struct.Foo = type { i1, i8*, i8* } +%struct.Foo = type { i1, ptr, ptr } @g = external global i32 -define void @h_thunk(%struct.Foo* %this, ...) { +define void @h_thunk(ptr %this, ...) { ; CHECK-LABEL: h_thunk: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldrb w9, [x0] @@ -202,22 +200,19 @@ ; CHECK-NEXT: str w11, [x10] ; CHECK-NEXT: br x9 ; CHECK-NEXT: .loh AdrpLdrGotStr Lloh2, Lloh3, Lloh4 - %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0 - %cond = load i1, i1* %cond_p + %cond = load i1, ptr %this br i1 %cond, label %then, label %else then: - %a_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 1 - %a_i8 = load i8*, i8** %a_p - %a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)* - musttail call void (%struct.Foo*, ...) %a(%struct.Foo* %this, ...) + %a_p = getelementptr %struct.Foo, ptr %this, i32 0, i32 1 + %a_i8 = load ptr, ptr %a_p + musttail call void (ptr, ...) %a_i8(ptr %this, ...) ret void else: - %b_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 2 - %b_i8 = load i8*, i8** %b_p - %b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)* - store i32 42, i32* @g - musttail call void (%struct.Foo*, ...) %b(%struct.Foo* %this, ...) + %b_p = getelementptr %struct.Foo, ptr %this, i32 0, i32 2 + %b_i8 = load ptr, ptr %b_p + store i32 42, ptr @g + musttail call void (ptr, ...) %b_i8(ptr %this, ...) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -39,7 +39,7 @@ ; CHECK: %[[FUNC]]:gpr64(p0) = COPY $x0 ; CHECK: BLR %[[FUNC]](p0), csr_aarch64_aapcs, implicit-def $lr, implicit $sp ; CHECK: RET_ReallyLR -define void @test_indirect_call(void()* %func) { +define void @test_indirect_call(ptr %func) { call void %func() ret void } @@ -73,8 +73,8 @@ ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST2]](s64) ; CHECK: G_STORE [[I8]](s8), [[GEP2]](p0) :: (store (s8) into %ir.addr + 16, align 8) ; CHECK: RET_ReallyLR -define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) { - store {double, i64, i8} %in, {double, i64, i8}* %addr +define void @test_struct_formal({double, i64, i8} %in, ptr %addr) { + store {double, i64, i8} %in, ptr %addr ret void } @@ -94,8 +94,8 @@ ; CHECK: $x0 = COPY [[LD2]](s64) ; CHECK: $w1 = COPY [[LD3]](s32) ; CHECK: RET_ReallyLR implicit $d0, implicit $x0, implicit $w1 -define {double, i64, i32} @test_struct_return({double, i64, i32}* %addr) { - %val = load {double, i64, i32}, {double, i64, i32}* %addr +define {double, i64, i32} @test_struct_return(ptr %addr) { + %val = load {double, i64, i32}, ptr %addr ret {double, i64, i32} %val } @@ -123,8 +123,8 @@ ; CHECK: [[E3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK: $x0 = COPY [[E1]] declare [4 x i64] @arr_callee([4 x i64]) -define i64 @test_arr_call([4 x i64]* %addr) { - %arg = load [4 x i64], [4 x i64]* %addr +define i64 @test_arr_call(ptr %addr) { + %arg = load [4 x i64], ptr %addr %res = call [4 x i64] @arr_callee([4 x i64] %arg) %val = extractvalue [4 x i64] %res, 1 ret i64 %val @@ -143,8 +143,8 @@ ; CHECK: $w0 = COPY [[ZVAL]](s32) ; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0 declare void @take_char(i8) -define void @test_abi_exts_call(i8* %addr) { - %val = load i8, i8* %addr +define void @test_abi_exts_call(ptr %addr) { + %val = load i8, ptr %addr call void @take_char(i8 %val) call void @take_char(i8 signext %val) call void @take_char(i8 zeroext %val) @@ -163,8 +163,8 @@ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK: RET_ReallyLR declare void @has_zext_param(i8 zeroext) -define void @test_zext_in_callee(i8* %addr) { - %val = load i8, i8* %addr +define void @test_zext_in_callee(ptr %addr) { + %val = load i8, ptr %addr call void @has_zext_param(i8 %val) ret void } @@ -181,8 +181,8 @@ ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK: RET_ReallyLR declare void @has_sext_param(i8 signext) -define void @test_sext_in_callee(i8* %addr) { - %val = load i8, i8* %addr +define void @test_sext_in_callee(ptr %addr) { + %val = load i8, ptr %addr call void @has_sext_param(i8 %val) ret void } @@ -192,8 +192,8 @@ ; CHECK: [[SVAL:%[0-9]+]]:_(s32) = G_SEXT [[VAL]](s8) ; CHECK: $w0 = COPY [[SVAL]](s32) ; CHECK: RET_ReallyLR implicit $w0 -define signext i8 @test_abi_sext_ret(i8* %addr) { - %val = load i8, i8* %addr +define signext i8 @test_abi_sext_ret(ptr %addr) { + %val = load i8, ptr %addr ret i8 %val } @@ -202,8 +202,8 @@ ; CHECK: [[SVAL:%[0-9]+]]:_(s32) = G_ZEXT [[VAL]](s8) ; CHECK: $w0 = COPY [[SVAL]](s32) ; CHECK: RET_ReallyLR implicit $w0 -define zeroext i8 @test_abi_zext_ret(i8* %addr) { - %val = load i8, i8* %addr +define zeroext i8 @test_abi_zext_ret(ptr %addr) { + %val = load i8, ptr %addr ret i8 %val } @@ -220,9 +220,9 @@ ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_LOAD [[ADDR_ADDR]](p0) :: (invariant load (p0) from %fixed-stack.[[STACK16]], align 16) ; CHECK: [[SUM:%[0-9]+]]:_(s64) = G_ADD [[LHS]], [[RHS]] ; CHECK: G_STORE [[SUM]](s64), [[ADDR]](p0) -define void @test_stack_slots([8 x i64], i64 %lhs, i64 %rhs, i64* %addr) { +define void @test_stack_slots([8 x i64], i64 %lhs, i64 %rhs, ptr %addr) { %sum = add i64 %lhs, %rhs - store i64 %sum, i64* %addr + store i64 %sum, ptr %addr ret void } @@ -244,7 +244,7 @@ ; CHECK: BL @test_stack_slots ; CHECK: ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp define void @test_call_stack() { - call void @test_stack_slots([8 x i64] undef, i64 42, i64 12, i64* null) + call void @test_stack_slots([8 x i64] undef, i64 42, i64 12, ptr null) ret void } @@ -267,9 +267,9 @@ ; CHECK: $x1 = COPY ; CHECK: $x2 = COPY ; CHECK: BL @take_128bit_struct -define void @test_128bit_struct([2 x i64]* %ptr) { - %struct = load [2 x i64], [2 x i64]* %ptr - call void @take_128bit_struct([2 x i64]* null, [2 x i64] %struct) +define void @test_128bit_struct(ptr %ptr) { + %struct = load [2 x i64], ptr %ptr + call void @take_128bit_struct(ptr null, [2 x i64] %struct) ret void } @@ -277,8 +277,8 @@ ; CHECK: {{%.*}}:_(p0) = COPY $x0 ; CHECK: {{%.*}}:_(s64) = COPY $x1 ; CHECK: {{%.*}}:_(s64) = COPY $x2 -define void @take_128bit_struct([2 x i64]* %ptr, [2 x i64] %in) { - store [2 x i64] %in, [2 x i64]* %ptr +define void @take_128bit_struct(ptr %ptr, [2 x i64] %in) { + store [2 x i64] %in, ptr %ptr ret void } @@ -295,9 +295,9 @@ ; CHECK: G_STORE [[LO]](s64), [[GEP2]](p0) :: (store (s64) into stack, align 1) ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[CST]](s64) ; CHECK: G_STORE [[HI]](s64), [[GEP3]](p0) :: (store (s64) into stack + 8, align 1) -define void @test_split_struct([2 x i64]* %ptr) { - %struct = load [2 x i64], [2 x i64]* %ptr - call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, +define void @test_split_struct(ptr %ptr) { + %struct = load [2 x i64], ptr %ptr + call void @take_split_struct(ptr null, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, [2 x i64] %struct) ret void @@ -313,10 +313,10 @@ ; CHECK: [[HIPTR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[HI_FRAME]] ; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[HIPTR]](p0) :: (invariant load (s64) from %fixed-stack.[[HI_FRAME]]) -define void @take_split_struct([2 x i64]* %ptr, i64, i64, i64, +define void @take_split_struct(ptr %ptr, i64, i64, i64, i64, i64, i64, [2 x i64] %in) { - store [2 x i64] %in, [2 x i64]* %ptr + store [2 x i64] %in, ptr %ptr ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll @@ -5,7 +5,7 @@ ; Check we don't fall back due to hitting a DBG_VALUE with a deleted vreg. -%0 = type { %1, %3, %5, %8, i8, i32, i8, i64, [4096 x %9], i64, i64, [4096 x %11], i64, i64, %13, %21, i8*, %35, i64, [504 x i8] } +%0 = type { %1, %3, %5, %8, i8, i32, i8, i64, [4096 x %9], i64, i64, [4096 x %11], i64, i64, %13, %21, ptr, %35, i64, [504 x i8] } %1 = type { [32 x %2] } %2 = type { i32, i32 } %3 = type { [32 x %4] } @@ -14,14 +14,14 @@ %6 = type { %7, %7 } %7 = type { i8, [64 x i8] } %8 = type { [1024 x %7], %7 } -%9 = type { %10*, i64 } -%10 = type { i8*, i8*, i8, i8 } -%11 = type { %12*, %12* } +%9 = type { ptr, i64 } +%10 = type { ptr, ptr, i8, i8 } +%11 = type { ptr, ptr } %12 = type { i64, i64 } %13 = type { %14 } -%14 = type { %15*, %17, %19 } -%15 = type { %16* } -%16 = type <{ %15, %16*, %15*, i8, [7 x i8] }> +%14 = type { ptr, %17, %19 } +%15 = type { ptr } +%16 = type <{ %15, ptr, ptr, i8, [7 x i8] }> %17 = type { %18 } %18 = type { %15 } %19 = type { %20 } @@ -30,8 +30,8 @@ %22 = type <{ %23, %30, %32, %33, [4 x i8] }> %23 = type { %24 } %24 = type { %25, %27 } -%25 = type { %26** } -%26 = type { %26* } +%25 = type { ptr } +%26 = type { ptr } %27 = type { %28 } %28 = type { %29 } %29 = type { %20 } @@ -44,7 +44,7 @@ @global = external hidden global %0, align 512 -define void @baz(i8* %arg) !dbg !6 { +define void @baz(ptr %arg) !dbg !6 { ; CHECK-LABEL: baz: ; CHECK: .Lfunc_begin0: ; CHECK-NEXT: .file 1 "/" "tmp.ll" @@ -63,12 +63,12 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .Ltmp0: bb: - %tmp = ptrtoint i8* %arg to i64, !dbg !14 + %tmp = ptrtoint ptr %arg to i64, !dbg !14 %tmp1 = shl i64 %tmp, 1, !dbg !15 %tmp2 = and i64 %tmp1, 1022, !dbg !16 call void @llvm.dbg.value(metadata i64 %tmp2, metadata !12, metadata !DIExpression()), !dbg !16 - %tmp3 = getelementptr inbounds %0, %0* @global, i64 0, i32 17, i32 0, i64 %tmp2, !dbg !17 - store i64 0, i64* %tmp3, align 16, !dbg !18 + %tmp3 = getelementptr inbounds %0, ptr @global, i64 0, i32 17, i32 0, i64 %tmp2, !dbg !17 + store i64 0, ptr %tmp3, align 16, !dbg !18 ret void, !dbg !19 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-prelegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s ; RUN: llc -debugify-and-strip-all-safe -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-prelegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s -define i8* @test_simple_load_pre(i8* %ptr) { +define ptr @test_simple_load_pre(ptr %ptr) { ; CHECK-LABEL: name: test_simple_load_pre ; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0 ; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42 @@ -9,47 +9,47 @@ ; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1 ; CHECK: $x0 = COPY [[NEXT]](p0) - %next = getelementptr i8, i8* %ptr, i32 42 - load volatile i8, i8* %next - ret i8* %next + %next = getelementptr i8, ptr %ptr, i32 42 + load volatile i8, ptr %next + ret ptr %next } -define i8* @test_unused_load_pre(i8* %ptr) { +define ptr @test_unused_load_pre(ptr %ptr) { ; CHECK-LABEL: name: test_unused_load_pre ; CHECK-NOT: G_INDEXED_LOAD - %next = getelementptr i8, i8* %ptr, i32 42 - load volatile i8, i8* %next - ret i8* null + %next = getelementptr i8, ptr %ptr, i32 42 + load volatile i8, ptr %next + ret ptr null } -define void @test_load_multiple_dominated(i8* %ptr, i1 %tst, i1 %tst2) { +define void @test_load_multiple_dominated(ptr %ptr, i1 %tst, i1 %tst2) { ; CHECK-LABEL: name: test_load_multiple_dominated ; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0 ; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42 ; CHECK-NOT: G_PTR_ADD ; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1 ; CHECK: $x0 = COPY [[NEXT]](p0) - %next = getelementptr i8, i8* %ptr, i32 42 + %next = getelementptr i8, ptr %ptr, i32 42 br i1 %tst, label %do_load, label %end do_load: - load volatile i8, i8* %next + load volatile i8, ptr %next br i1 %tst2, label %bb1, label %bb2 bb1: - store volatile i8* %next, i8** undef + store volatile ptr %next, ptr undef ret void bb2: - call void @bar(i8* %next) + call void @bar(ptr %next) ret void end: ret void } -define i8* @test_simple_store_pre(i8* %ptr) { +define ptr @test_simple_store_pre(ptr %ptr) { ; CHECK-LABEL: name: test_simple_store_pre ; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0 ; CHECK: [[VAL:%.*]]:_(s8) = G_CONSTANT i8 0 @@ -58,70 +58,69 @@ ; CHECK: [[NEXT:%.*]]:_(p0) = G_INDEXED_STORE [[VAL]](s8), [[BASE]], [[OFFSET]](s64), 1 ; CHECK: $x0 = COPY [[NEXT]](p0) - %next = getelementptr i8, i8* %ptr, i32 42 - store volatile i8 0, i8* %next - ret i8* %next + %next = getelementptr i8, ptr %ptr, i32 42 + store volatile i8 0, ptr %next + ret ptr %next } ; The potentially pre-indexed address is used as the value stored. Converting ; would produce the value too late but only by one instruction. -define i64** @test_store_pre_val_loop(i64** %ptr) { +define ptr @test_store_pre_val_loop(ptr %ptr) { ; CHECK-LABEL: name: test_store_pre_val_loop ; CHECK: G_PTR_ADD ; CHECK: G_STORE % - %next = getelementptr i64*, i64** %ptr, i32 42 - %next.p0 = bitcast i64** %next to i64* - store volatile i64* %next.p0, i64** %next - ret i64** %next + %next = getelementptr ptr, ptr %ptr, i32 42 + store volatile ptr %next, ptr %next + ret ptr %next } ; Potentially pre-indexed address is used between GEP computing it and load. -define i8* @test_load_pre_before(i8* %ptr) { +define ptr @test_load_pre_before(ptr %ptr) { ; CHECK-LABEL: name: test_load_pre_before ; CHECK: G_PTR_ADD ; CHECK: BL @bar ; CHECK: G_LOAD % - %next = getelementptr i8, i8* %ptr, i32 42 - call void @bar(i8* %next) - load volatile i8, i8* %next - ret i8* %next + %next = getelementptr i8, ptr %ptr, i32 42 + call void @bar(ptr %next) + load volatile i8, ptr %next + ret ptr %next } ; Materializing the base into a writable register (from sp/fp) would be just as ; bad as the original GEP. -define i8* @test_alloca_load_pre() { +define ptr @test_alloca_load_pre() { ; CHECK-LABEL: name: test_alloca_load_pre ; CHECK: G_PTR_ADD ; CHECK: G_LOAD % %ptr = alloca i8, i32 128 - %next = getelementptr i8, i8* %ptr, i32 42 - load volatile i8, i8* %next - ret i8* %next + %next = getelementptr i8, ptr %ptr, i32 42 + load volatile i8, ptr %next + ret ptr %next } ; Load does not dominate use of its address. No indexing. -define i8* @test_pre_nodom(i8* %in, i1 %tst) { +define ptr @test_pre_nodom(ptr %in, i1 %tst) { ; CHECK-LABEL: name: test_pre_nodom ; CHECK: G_PTR_ADD ; CHECK: G_LOAD % - %next = getelementptr i8, i8* %in, i32 16 + %next = getelementptr i8, ptr %in, i32 16 br i1 %tst, label %do_indexed, label %use_addr do_indexed: - %val = load i8, i8* %next - store i8 %val, i8* @var - store i8* %next, i8** @varp8 + %val = load i8, ptr %next + store i8 %val, ptr @var + store ptr %next, ptr @varp8 br label %use_addr use_addr: - ret i8* %next + ret ptr %next } -define i8* @test_simple_load_post(i8* %ptr) { +define ptr @test_simple_load_post(ptr %ptr) { ; CHECK-LABEL: name: test_simple_load_post ; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0 ; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42 @@ -129,12 +128,12 @@ ; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 0 ; CHECK: $x0 = COPY [[NEXT]](p0) - %next = getelementptr i8, i8* %ptr, i32 42 - load volatile i8, i8* %ptr - ret i8* %next + %next = getelementptr i8, ptr %ptr, i32 42 + load volatile i8, ptr %ptr + ret ptr %next } -define i8* @test_simple_load_post_gep_after(i8* %ptr) { +define ptr @test_simple_load_post_gep_after(ptr %ptr) { ; CHECK-LABEL: name: test_simple_load_post_gep_after ; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0 ; CHECK: BL @get_offset @@ -143,50 +142,50 @@ ; CHECK: $x0 = COPY [[ADDR]](p0) %offset = call i64 @get_offset() - load volatile i8, i8* %ptr - %next = getelementptr i8, i8* %ptr, i64 %offset - ret i8* %next + load volatile i8, ptr %ptr + %next = getelementptr i8, ptr %ptr, i64 %offset + ret ptr %next } -define i8* @test_load_post_keep_looking(i8* %ptr) { +define ptr @test_load_post_keep_looking(ptr %ptr) { ; CHECK: name: test_load_post_keep_looking ; CHECK: G_INDEXED_LOAD %offset = call i64 @get_offset() - load volatile i8, i8* %ptr - %intval = ptrtoint i8* %ptr to i8 - store i8 %intval, i8* @var + load volatile i8, ptr %ptr + %intval = ptrtoint ptr %ptr to i8 + store i8 %intval, ptr @var - %next = getelementptr i8, i8* %ptr, i64 %offset - ret i8* %next + %next = getelementptr i8, ptr %ptr, i64 %offset + ret ptr %next } ; Base is frame index. Using indexing would need copy anyway. -define i8* @test_load_post_alloca() { +define ptr @test_load_post_alloca() { ; CHECK-LABEL: name: test_load_post_alloca ; CHECK: G_PTR_ADD ; CHECK: G_LOAD % %ptr = alloca i8, i32 128 - %next = getelementptr i8, i8* %ptr, i32 42 - load volatile i8, i8* %ptr - ret i8* %next + %next = getelementptr i8, ptr %ptr, i32 42 + load volatile i8, ptr %ptr + ret ptr %next } ; Offset computation does not dominate the load we might be indexing. -define i8* @test_load_post_gep_offset_after(i8* %ptr) { +define ptr @test_load_post_gep_offset_after(ptr %ptr) { ; CHECK-LABEL: name: test_load_post_gep_offset_after ; CHECK: G_LOAD % ; CHECK: BL @get_offset ; CHECK: G_PTR_ADD - load volatile i8, i8* %ptr + load volatile i8, ptr %ptr %offset = call i64 @get_offset() - %next = getelementptr i8, i8* %ptr, i64 %offset - ret i8* %next + %next = getelementptr i8, ptr %ptr, i64 %offset + ret ptr %next } -declare void @bar(i8*) +declare void @bar(ptr) declare i64 @get_offset() @var = global i8 0 -@varp8 = global i8* null +@varp8 = global ptr null diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll b/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll @@ -30,13 +30,13 @@ ; CHECK: RET_ReallyLR implicit $w0, debug-location !24 entry: %retval = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - %0 = load i32, i32* @var1, align 4, !dbg !17 + store i32 0, ptr %retval, align 4 + %0 = load i32, ptr @var1, align 4, !dbg !17 %cmp = icmp eq i32 %0, 1, !dbg !19 br i1 %cmp, label %if.then, label %if.end, !dbg !20 if.then: - store i32 2, i32* @var2, align 4, !dbg !21 + store i32 2, ptr @var2, align 4, !dbg !21 br label %if.end, !dbg !23 if.end: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll b/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll @@ -4,25 +4,25 @@ @t_val = thread_local global i32 0, align 4 @.str = private unnamed_addr constant [5 x i8] c"str1\00", align 1 -@str1 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), align 8 +@str1 = global ptr @.str, align 8 @.str.1 = private unnamed_addr constant [5 x i8] c"str2\00", align 1 -@str2 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i32 0, i32 0), align 8 +@str2 = global ptr @.str.1, align 8 @.str.2 = private unnamed_addr constant [5 x i8] c"str3\00", align 1 -@str3 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.2, i32 0, i32 0), align 8 +@str3 = global ptr @.str.2, align 8 @.str.3 = private unnamed_addr constant [5 x i8] c"str4\00", align 1 -@str4 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.3, i32 0, i32 0), align 8 +@str4 = global ptr @.str.3, align 8 @.str.4 = private unnamed_addr constant [5 x i8] c"str5\00", align 1 -@str5 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.4, i32 0, i32 0), align 8 +@str5 = global ptr @.str.4, align 8 @.str.5 = private unnamed_addr constant [5 x i8] c"str6\00", align 1 -@str6 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), align 8 +@str6 = global ptr @.str.5, align 8 @.str.6 = private unnamed_addr constant [5 x i8] c"str7\00", align 1 -@str7 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.6, i32 0, i32 0), align 8 +@str7 = global ptr @.str.6, align 8 @.str.7 = private unnamed_addr constant [5 x i8] c"str8\00", align 1 -@str8 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.7, i32 0, i32 0), align 8 +@str8 = global ptr @.str.7, align 8 @.str.8 = private unnamed_addr constant [5 x i8] c"str9\00", align 1 -@str9 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.8, i32 0, i32 0), align 8 +@str9 = global ptr @.str.8, align 8 @.str.9 = private unnamed_addr constant [6 x i8] c"str10\00", align 1 -@str10 = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.9, i32 0, i32 0), align 8 +@str10 = global ptr @.str.9, align 8 @.str.10 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 @.str.11 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 @.str.12 = private unnamed_addr constant [4 x i8] c"xyz\00", align 1 @@ -35,138 +35,138 @@ ; CHECK: ldr x[[FPTR:[0-9]+]], [x0] ; CHECK: blr x[[FPTR]] -define void @_Z4funcPKc(i8* %id) { +define void @_Z4funcPKc(ptr %id) { entry: - %id.addr = alloca i8*, align 8 - store i8* %id, i8** %id.addr, align 8 - %0 = load i8*, i8** %id.addr, align 8 - %1 = load i8*, i8** @str1, align 8 - %cmp = icmp eq i8* %0, %1 + %id.addr = alloca ptr, align 8 + store ptr %id, ptr %id.addr, align 8 + %0 = load ptr, ptr %id.addr, align 8 + %1 = load ptr, ptr @str1, align 8 + %cmp = icmp eq ptr %0, %1 br i1 %cmp, label %if.then, label %if.else if.then: ; preds = %entry - %2 = load i8*, i8** @str1, align 8 - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %2) - %3 = load i8*, i8** @str2, align 8 - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %3) - %4 = load i8*, i8** @str3, align 8 - %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %4) - %5 = load i8*, i8** @str4, align 8 - %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %5) - %6 = load i8*, i8** @str5, align 8 - %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %6) - %7 = load i8*, i8** @str6, align 8 - %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %7) - %8 = load i8*, i8** @str7, align 8 - %call6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %8) - %9 = load i8*, i8** @str8, align 8 - %call7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %9) - %10 = load i8*, i8** @str9, align 8 - %call8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %10) - %11 = load i8*, i8** @str10, align 8 - %call9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %11) - %12 = load i32, i32* @t_val, align 4 - %call10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.11, i64 0, i64 0), i32 %12) + %2 = load ptr, ptr @str1, align 8 + %call = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %2) + %3 = load ptr, ptr @str2, align 8 + %call1 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %3) + %4 = load ptr, ptr @str3, align 8 + %call2 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %4) + %5 = load ptr, ptr @str4, align 8 + %call3 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %5) + %6 = load ptr, ptr @str5, align 8 + %call4 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %6) + %7 = load ptr, ptr @str6, align 8 + %call5 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %7) + %8 = load ptr, ptr @str7, align 8 + %call6 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %8) + %9 = load ptr, ptr @str8, align 8 + %call7 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %9) + %10 = load ptr, ptr @str9, align 8 + %call8 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %10) + %11 = load ptr, ptr @str10, align 8 + %call9 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %11) + %12 = load i32, ptr @t_val, align 4 + %call10 = call i32 (ptr, ...) @printf(ptr @.str.11, i32 %12) br label %if.end56 if.else: ; preds = %entry - %13 = load i8*, i8** %id.addr, align 8 - %14 = load i8*, i8** @str2, align 8 - %cmp11 = icmp eq i8* %13, %14 + %13 = load ptr, ptr %id.addr, align 8 + %14 = load ptr, ptr @str2, align 8 + %cmp11 = icmp eq ptr %13, %14 br i1 %cmp11, label %if.then12, label %if.else24 if.then12: ; preds = %if.else - %15 = load i8*, i8** @str1, align 8 - %call13 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %15) - %16 = load i8*, i8** @str2, align 8 - %call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %16) - %17 = load i8*, i8** @str3, align 8 - %call15 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %17) - %18 = load i8*, i8** @str4, align 8 - %call16 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %18) - %19 = load i8*, i8** @str5, align 8 - %call17 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %19) - %20 = load i8*, i8** @str6, align 8 - %call18 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %20) - %21 = load i8*, i8** @str7, align 8 - %call19 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %21) - %22 = load i8*, i8** @str8, align 8 - %call20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %22) - %23 = load i8*, i8** @str9, align 8 - %call21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %23) - %24 = load i8*, i8** @str10, align 8 - %call22 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %24) - %25 = load i32, i32* @t_val, align 4 - %call23 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.11, i64 0, i64 0), i32 %25) + %15 = load ptr, ptr @str1, align 8 + %call13 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %15) + %16 = load ptr, ptr @str2, align 8 + %call14 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %16) + %17 = load ptr, ptr @str3, align 8 + %call15 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %17) + %18 = load ptr, ptr @str4, align 8 + %call16 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %18) + %19 = load ptr, ptr @str5, align 8 + %call17 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %19) + %20 = load ptr, ptr @str6, align 8 + %call18 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %20) + %21 = load ptr, ptr @str7, align 8 + %call19 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %21) + %22 = load ptr, ptr @str8, align 8 + %call20 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %22) + %23 = load ptr, ptr @str9, align 8 + %call21 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %23) + %24 = load ptr, ptr @str10, align 8 + %call22 = call i32 (ptr, ...) @printf(ptr @.str.10, ptr %24) + %25 = load i32, ptr @t_val, align 4 + %call23 = call i32 (ptr, ...) @printf(ptr @.str.11, i32 %25) br label %if.end55 if.else24: ; preds = %if.else - %26 = load i8*, i8** %id.addr, align 8 - %27 = load i8*, i8** @str3, align 8 - %cmp25 = icmp eq i8* %26, %27 + %26 = load ptr, ptr %id.addr, align 8 + %27 = load ptr, ptr @str3, align 8 + %cmp25 = icmp eq ptr %26, %27 br i1 %cmp25, label %if.then26, label %if.else27 if.then26: ; preds = %if.else24 br label %if.end54 if.else27: ; preds = %if.else24 - %28 = load i8*, i8** %id.addr, align 8 - %29 = load i8*, i8** @str4, align 8 - %cmp28 = icmp eq i8* %28, %29 + %28 = load ptr, ptr %id.addr, align 8 + %29 = load ptr, ptr @str4, align 8 + %cmp28 = icmp eq ptr %28, %29 br i1 %cmp28, label %if.then29, label %if.else30 if.then29: ; preds = %if.else27 br label %if.end53 if.else30: ; preds = %if.else27 - %30 = load i8*, i8** %id.addr, align 8 - %31 = load i8*, i8** @str5, align 8 - %cmp31 = icmp eq i8* %30, %31 + %30 = load ptr, ptr %id.addr, align 8 + %31 = load ptr, ptr @str5, align 8 + %cmp31 = icmp eq ptr %30, %31 br i1 %cmp31, label %if.then32, label %if.else33 if.then32: ; preds = %if.else30 br label %if.end52 if.else33: ; preds = %if.else30 - %32 = load i8*, i8** %id.addr, align 8 - %33 = load i8*, i8** @str6, align 8 - %cmp34 = icmp eq i8* %32, %33 + %32 = load ptr, ptr %id.addr, align 8 + %33 = load ptr, ptr @str6, align 8 + %cmp34 = icmp eq ptr %32, %33 br i1 %cmp34, label %if.then35, label %if.else36 if.then35: ; preds = %if.else33 br label %if.end51 if.else36: ; preds = %if.else33 - %34 = load i8*, i8** %id.addr, align 8 - %35 = load i8*, i8** @str7, align 8 - %cmp37 = icmp eq i8* %34, %35 + %34 = load ptr, ptr %id.addr, align 8 + %35 = load ptr, ptr @str7, align 8 + %cmp37 = icmp eq ptr %34, %35 br i1 %cmp37, label %if.then38, label %if.else39 if.then38: ; preds = %if.else36 br label %if.end50 if.else39: ; preds = %if.else36 - %36 = load i8*, i8** %id.addr, align 8 - %37 = load i8*, i8** @str8, align 8 - %cmp40 = icmp eq i8* %36, %37 + %36 = load ptr, ptr %id.addr, align 8 + %37 = load ptr, ptr @str8, align 8 + %cmp40 = icmp eq ptr %36, %37 br i1 %cmp40, label %if.then41, label %if.else42 if.then41: ; preds = %if.else39 br label %if.end49 if.else42: ; preds = %if.else39 - %38 = load i8*, i8** %id.addr, align 8 - %39 = load i8*, i8** @str9, align 8 - %cmp43 = icmp eq i8* %38, %39 + %38 = load ptr, ptr %id.addr, align 8 + %39 = load ptr, ptr @str9, align 8 + %cmp43 = icmp eq ptr %38, %39 br i1 %cmp43, label %if.then44, label %if.else45 if.then44: ; preds = %if.else42 br label %if.end48 if.else45: ; preds = %if.else42 - %40 = load i8*, i8** %id.addr, align 8 - %41 = load i8*, i8** @str10, align 8 - %cmp46 = icmp eq i8* %40, %41 + %40 = load ptr, ptr %id.addr, align 8 + %41 = load ptr, ptr @str10, align 8 + %cmp46 = icmp eq ptr %40, %41 br i1 %cmp46, label %if.then47, label %if.end if.then47: ; preds = %if.else45 @@ -202,5 +202,5 @@ if.end56: ; preds = %if.end55, %if.then ret void } -declare i32 @printf(i8*, ...) +declare i32 @printf(ptr, ...) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/debug-cpp.ll b/llvm/test/CodeGen/AArch64/GlobalISel/debug-cpp.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/debug-cpp.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/debug-cpp.ll @@ -20,11 +20,10 @@ ; CHECK-LABEL: name: _Z3foo6NTCopy ; CHECK: DBG_VALUE %{{[0-9]+}}(p0), 0, !23, !DIExpression(), debug-location !24 ; Function Attrs: noinline nounwind optnone -define dso_local i32 @_Z3foo6NTCopy(%struct.NTCopy* %o) #0 !dbg !7 { +define dso_local i32 @_Z3foo6NTCopy(ptr %o) #0 !dbg !7 { entry: - call void @llvm.dbg.declare(metadata %struct.NTCopy* %o, metadata !23, metadata !DIExpression()), !dbg !24 - %x = getelementptr inbounds %struct.NTCopy, %struct.NTCopy* %o, i32 0, i32 0, !dbg !25 - %0 = load i32, i32* %x, align 4, !dbg !25 + call void @llvm.dbg.declare(metadata ptr %o, metadata !23, metadata !DIExpression()), !dbg !24 + %0 = load i32, ptr %o, align 4, !dbg !25 ret i32 %0, !dbg !26 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/debug-insts.ll b/llvm/test/CodeGen/AArch64/GlobalISel/debug-insts.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/debug-insts.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/debug-insts.ll @@ -11,8 +11,8 @@ define void @debug_declare(i32 %in) #0 !dbg !7 { entry: %in.addr = alloca i32, align 4 - store i32 %in, i32* %in.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %in.addr, metadata !11, metadata !DIExpression()), !dbg !12 + store i32 %in, ptr %in.addr, align 4 + call void @llvm.dbg.declare(metadata ptr %in.addr, metadata !11, metadata !DIExpression()), !dbg !12 ret void, !dbg !12 } @@ -21,7 +21,7 @@ define void @debug_declare_vla(i32 %in) #0 !dbg !13 { entry: %vla.addr = alloca i32, i32 %in - call void @llvm.dbg.declare(metadata i32* %vla.addr, metadata !14, metadata !DIExpression()), !dbg !15 + call void @llvm.dbg.declare(metadata ptr %vla.addr, metadata !14, metadata !DIExpression()), !dbg !15 ret void, !dbg !15 } @@ -33,19 +33,19 @@ %addr = alloca i32 ; CHECK: DBG_VALUE [[IN]](s32), $noreg, !17, !DIExpression(), debug-location !18 call void @llvm.dbg.value(metadata i32 %in, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 - store i32 %in, i32* %addr + store i32 %in, ptr %addr ; CHECK: DBG_VALUE %1(p0), $noreg, !17, !DIExpression(DW_OP_deref), debug-location !18 - call void @llvm.dbg.value(metadata i32* %addr, i64 0, metadata !17, metadata !DIExpression(DW_OP_deref)), !dbg !18 + call void @llvm.dbg.value(metadata ptr %addr, i64 0, metadata !17, metadata !DIExpression(DW_OP_deref)), !dbg !18 ; CHECK: DBG_VALUE 123, 0, !17, !DIExpression(), debug-location !18 call void @llvm.dbg.value(metadata i32 123, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 ; CHECK: DBG_VALUE float 1.000000e+00, 0, !17, !DIExpression(), debug-location !18 call void @llvm.dbg.value(metadata float 1.000000e+00, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 ; CHECK: DBG_VALUE 0, 0, !17, !DIExpression(), debug-location !18 - call void @llvm.dbg.value(metadata i32* null, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata ptr null, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 ; CHECK: DBG_VALUE $noreg, 0, !17, !DIExpression(), debug-location !18 - call void @llvm.dbg.value(metadata i32* @gv, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata ptr @gv, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 ; CHECK: DBG_VALUE 42, 0, !17, !DIExpression(), debug-location !18 - call void @llvm.dbg.value(metadata i32* inttoptr (i64 42 to i32*), i64 0, metadata !17, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata ptr inttoptr (i64 42 to ptr), i64 0, metadata !17, metadata !DIExpression()), !dbg !18 ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca-lifetime.ll b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca-lifetime.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca-lifetime.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca-lifetime.ll @@ -13,10 +13,10 @@ ; CHECK-NOT: remark{{.*}}foo ; Function Attrs: nounwind -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0 +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #0 ; Function Attrs: nounwind -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0 +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #0 ; Function Attrs: ssp define void @foo(i1 %cond1, i1 %cond2) #1 { @@ -32,12 +32,10 @@ ret void if.else130: ; preds = %bb1 - %tmp = getelementptr inbounds [8192 x i8], [8192 x i8]* %bitmapBuffer, i32 0, i32 0 - call void @llvm.lifetime.start.p0i8(i64 8192, i8* %tmp) #0 - call void @llvm.lifetime.end.p0i8(i64 8192, i8* %tmp) #0 - %tmp25 = getelementptr inbounds [8192 x i8], [8192 x i8]* %bitmapBuffer229, i32 0, i32 0 - call void @llvm.lifetime.start.p0i8(i64 8192, i8* %tmp25) #0 - call void @llvm.lifetime.end.p0i8(i64 8192, i8* %tmp25) #0 + call void @llvm.lifetime.start.p0(i64 8192, ptr %bitmapBuffer) #0 + call void @llvm.lifetime.end.p0(i64 8192, ptr %bitmapBuffer) #0 + call void @llvm.lifetime.start.p0(i64 8192, ptr %bitmapBuffer229) #0 + call void @llvm.lifetime.end.p0(i64 8192, ptr %bitmapBuffer229) #0 br label %end1 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=aarch64 -global-isel %s -o - -stop-after=irtranslator | FileCheck %s -define i8* @test_simple_alloca(i32 %numelts) { +define ptr @test_simple_alloca(i32 %numelts) { ; CHECK-LABEL: name: test_simple_alloca ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $w0 @@ -17,10 +17,10 @@ ; CHECK: $x0 = COPY [[DYN_STACKALLOC]](p0) ; CHECK: RET_ReallyLR implicit $x0 %addr = alloca i8, i32 %numelts - ret i8* %addr + ret ptr %addr } -define i8* @test_aligned_alloca(i32 %numelts) { +define ptr @test_aligned_alloca(i32 %numelts) { ; CHECK-LABEL: name: test_aligned_alloca ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $w0 @@ -36,10 +36,10 @@ ; CHECK: $x0 = COPY [[DYN_STACKALLOC]](p0) ; CHECK: RET_ReallyLR implicit $x0 %addr = alloca i8, i32 %numelts, align 32 - ret i8* %addr + ret ptr %addr } -define i128* @test_natural_alloca(i32 %numelts) { +define ptr @test_natural_alloca(i32 %numelts) { ; CHECK-LABEL: name: test_natural_alloca ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $w0 @@ -55,5 +55,5 @@ ; CHECK: $x0 = COPY [[DYN_STACKALLOC]](p0) ; CHECK: RET_ReallyLR implicit $x0 %addr = alloca i128, i32 %numelts - ret i128* %addr + ret ptr %addr } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/freeze.ll b/llvm/test/CodeGen/AArch64/GlobalISel/freeze.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/freeze.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/freeze.ll @@ -64,7 +64,7 @@ ret <2 x i32> %t1 } -define i8* @freeze_ptr() { +define ptr @freeze_ptr() { ; CHECK-LABEL: freeze_ptr: ; CHECK: // %bb.0: ; CHECK-NEXT: add x0, x8, #4 @@ -74,9 +74,9 @@ ; GISEL: // %bb.0: ; GISEL-NEXT: add x0, x8, #4 ; GISEL-NEXT: ret - %y1 = freeze i8* undef - %t1 = getelementptr i8, i8* %y1, i64 4 - ret i8* %t1 + %y1 = freeze ptr undef + %t1 = getelementptr i8, ptr %y1, i64 4 + ret ptr %t1 } define i32 @freeze_struct() { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll @@ -1,8 +1,8 @@ ;RUN: llc -mtriple=aarch64-unknown-unknown -o - -global-isel -global-isel-abort=2 %s 2>&1 | FileCheck %s ; CHECK: fallback ; CHECK-LABEL: foo -define i16 @foo(fp128* %p) { - %tmp0 = load fp128, fp128* %p +define i16 @foo(ptr %p) { + %tmp0 = load fp128, ptr %p %tmp1 = fptoui fp128 %tmp0 to i16 ret i16 %tmp1 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll b/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll @@ -10,6 +10,6 @@ ; CHECK-NEXT: str w9, [x8] ; CHECK-NEXT: ret entry: - store i32 1, i32* bitcast (i8* getelementptr inbounds (i8, i8* inttoptr (i32 -3076096 to i8*), i64 36) to i32*), align 4 + store i32 1, ptr getelementptr inbounds (i8, ptr inttoptr (i32 -3076096 to ptr), i64 36), align 4 ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-atomic-metadata.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-atomic-metadata.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-atomic-metadata.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-atomic-metadata.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=aarch64-- -mcpu=falkor -mattr=+lse -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s -define i32 @atomicrmw_volatile(i32* %ptr) { +define i32 @atomicrmw_volatile(ptr %ptr) { ; CHECK-LABEL: name: atomicrmw_volatile ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -10,11 +10,11 @@ ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (volatile load store monotonic (s32) on %ir.ptr) ; CHECK: $w0 = COPY [[ATOMICRMW_ADD]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %oldval = atomicrmw volatile add i32* %ptr, i32 1 monotonic + %oldval = atomicrmw volatile add ptr %ptr, i32 1 monotonic ret i32 %oldval } -define i32 @atomicrmw_falkor(i32* %ptr) { +define i32 @atomicrmw_falkor(ptr %ptr) { ; CHECK-LABEL: name: atomicrmw_falkor ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -23,11 +23,11 @@ ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: ("aarch64-strided-access" load store monotonic (s32) on %ir.ptr) ; CHECK: $w0 = COPY [[ATOMICRMW_ADD]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %oldval = atomicrmw add i32* %ptr, i32 1 monotonic, !falkor.strided.access !0 + %oldval = atomicrmw add ptr %ptr, i32 1 monotonic, !falkor.strided.access !0 ret i32 %oldval } -define i32 @atomicrmw_volatile_falkor(i32* %ptr) { +define i32 @atomicrmw_volatile_falkor(ptr %ptr) { ; CHECK-LABEL: name: atomicrmw_volatile_falkor ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -36,11 +36,11 @@ ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (volatile "aarch64-strided-access" load store monotonic (s32) on %ir.ptr) ; CHECK: $w0 = COPY [[ATOMICRMW_ADD]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %oldval = atomicrmw volatile add i32* %ptr, i32 1 monotonic, !falkor.strided.access !0 + %oldval = atomicrmw volatile add ptr %ptr, i32 1 monotonic, !falkor.strided.access !0 ret i32 %oldval } -define i32 @cmpxchg_volatile(i32* %addr) { +define i32 @cmpxchg_volatile(ptr %addr) { ; CHECK-LABEL: name: cmpxchg_volatile ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -50,12 +50,12 @@ ; CHECK: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (volatile load store monotonic monotonic (s32) on %ir.addr) ; CHECK: $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %val_success = cmpxchg volatile i32* %addr, i32 0, i32 1 monotonic monotonic + %val_success = cmpxchg volatile ptr %addr, i32 0, i32 1 monotonic monotonic %value_loaded = extractvalue { i32, i1 } %val_success, 0 ret i32 %value_loaded } -define i32 @cmpxchg_falkor(i32* %addr) { +define i32 @cmpxchg_falkor(ptr %addr) { ; CHECK-LABEL: name: cmpxchg_falkor ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -65,12 +65,12 @@ ; CHECK: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: ("aarch64-strided-access" load store monotonic monotonic (s32) on %ir.addr) ; CHECK: $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %val_success = cmpxchg i32* %addr, i32 0, i32 1 monotonic monotonic, !falkor.strided.access !0 + %val_success = cmpxchg ptr %addr, i32 0, i32 1 monotonic monotonic, !falkor.strided.access !0 %value_loaded = extractvalue { i32, i1 } %val_success, 0 ret i32 %value_loaded } -define i32 @cmpxchg_volatile_falkor(i32* %addr) { +define i32 @cmpxchg_volatile_falkor(ptr %addr) { ; CHECK-LABEL: name: cmpxchg_volatile_falkor ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -80,7 +80,7 @@ ; CHECK: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (volatile "aarch64-strided-access" load store monotonic monotonic (s32) on %ir.addr) ; CHECK: $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %val_success = cmpxchg volatile i32* %addr, i32 0, i32 1 monotonic monotonic, !falkor.strided.access !0 + %val_success = cmpxchg volatile ptr %addr, i32 0, i32 1 monotonic monotonic, !falkor.strided.access !0 %value_loaded = extractvalue { i32, i1 } %val_success, 0 ret i32 %value_loaded } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-block-order.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-block-order.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-block-order.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-block-order.ll @@ -9,7 +9,7 @@ br label %bb2 bb1: - store i8 %0, i8* undef, align 4 + store i8 %0, ptr undef, align 4 ret void bb2: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-delayed-stack-protector.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-delayed-stack-protector.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-delayed-stack-protector.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-delayed-stack-protector.ll @@ -32,9 +32,8 @@ ; CHECK-NEXT: RET_ReallyLR entry: %x = alloca i32, align 4 - %0 = bitcast i32* %x to i8* - call void @callee(i32* nonnull %x) + call void @callee(ptr nonnull %x) ret void } -declare void @callee(i32*) +declare void @callee(ptr) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-dilocation.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-dilocation.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-dilocation.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-dilocation.ll @@ -5,10 +5,10 @@ ; CHECK: Checking DILocation from %retval = alloca i32, align 4 was copied to G_FRAME_INDEX ; CHECK: Checking DILocation from %rv = alloca i32, align 4 was copied to G_FRAME_INDEX -; CHECK: Checking DILocation from store i32 0, i32* %retval, align 4 was copied to G_CONSTANT -; CHECK: Checking DILocation from store i32 0, i32* %retval, align 4 was copied to G_STORE -; CHECK: Checking DILocation from store i32 0, i32* %rv, align 4, !dbg !12 was copied to G_STORE debug-location !12; t.cpp:2:5 -; CHECK: Checking DILocation from %0 = load i32, i32* %rv, align 4, !dbg !13 was copied to G_LOAD debug-location !13; t.cpp:3:8 +; CHECK: Checking DILocation from store i32 0, ptr %retval, align 4 was copied to G_CONSTANT +; CHECK: Checking DILocation from store i32 0, ptr %retval, align 4 was copied to G_STORE +; CHECK: Checking DILocation from store i32 0, ptr %rv, align 4, !dbg !12 was copied to G_STORE debug-location !12; t.cpp:2:5 +; CHECK: Checking DILocation from %0 = load i32, ptr %rv, align 4, !dbg !13 was copied to G_LOAD debug-location !13; t.cpp:3:8 ; CHECK: Checking DILocation from ret i32 %0, !dbg !14 was copied to COPY debug-location !14; t.cpp:3:1 ; CHECK: Checking DILocation from ret i32 %0, !dbg !14 was copied to RET_ReallyLR implicit $w0, debug-location !14; t.cpp:3:1 @@ -21,10 +21,10 @@ entry: %retval = alloca i32, align 4 %rv = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - call void @llvm.dbg.declare(metadata i32* %rv, metadata !11, metadata !DIExpression()), !dbg !12 - store i32 0, i32* %rv, align 4, !dbg !12 - %0 = load i32, i32* %rv, align 4, !dbg !13 + store i32 0, ptr %retval, align 4 + call void @llvm.dbg.declare(metadata ptr %rv, metadata !11, metadata !DIExpression()), !dbg !12 + store i32 0, ptr %rv, align 4, !dbg !12 + %0 = load i32, ptr %rv, align 4, !dbg !13 ret i32 %0, !dbg !14 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -O0 -mtriple=aarch64-apple-ios -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -@_ZTIi = external global i8* +@_ZTIi = external global ptr declare i32 @foo(i32) declare i32 @__gxx_personality_v0(...) -declare i32 @llvm.eh.typeid.for(i8*) +declare i32 @llvm.eh.typeid.for(ptr) -define { i8*, i32 } @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define { ptr, i32 } @bar() personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: name: bar ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) @@ -45,16 +45,16 @@ broken: - %ptr.sel = landingpad { i8*, i32 } catch i8* bitcast(i8** @_ZTIi to i8*) - ret { i8*, i32 } %ptr.sel + %ptr.sel = landingpad { ptr, i32 } catch ptr @_ZTIi + ret { ptr, i32 } %ptr.sel continue: - %sel.int = tail call i32 @llvm.eh.typeid.for(i8* bitcast(i8** @_ZTIi to i8*)) - %res.good = insertvalue { i8*, i32 } undef, i32 %sel.int, 1 - ret { i8*, i32 } %res.good + %sel.int = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi) + %res.good = insertvalue { ptr, i32 } undef, i32 %sel.int, 1 + ret { ptr, i32 } %res.good } -define void @test_invoke_indirect(void()* %callee) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define void @test_invoke_indirect(ptr %callee) personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: name: test_invoke_indirect ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) @@ -83,15 +83,15 @@ invoke void %callee() to label %continue unwind label %broken broken: - landingpad { i8*, i32 } catch i8* bitcast(i8** @_ZTIi to i8*) + landingpad { ptr, i32 } catch ptr @_ZTIi ret void continue: ret void } -declare void @printf(i8*, ...) -define void @test_invoke_varargs() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +declare void @printf(ptr, ...) +define void @test_invoke_varargs() personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: name: test_invoke_varargs ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) @@ -127,10 +127,10 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.continue: ; CHECK-NEXT: RET_ReallyLR - invoke void(i8*, ...) @printf(i8* null, i32 42, float 1.0) to label %continue unwind label %broken + invoke void(ptr, ...) @printf(ptr null, i32 42, float 1.0) to label %continue unwind label %broken broken: - landingpad { i8*, i32 } catch i8* bitcast(i8** @_ZTIi to i8*) + landingpad { ptr, i32 } catch ptr @_ZTIi ret void continue: @@ -140,7 +140,7 @@ @global_var = external global i32 declare void @may_throw() -define i32 @test_lpad_phi() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define i32 @test_lpad_phi() personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: name: test_lpad_phi ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) @@ -175,15 +175,15 @@ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, [[C3]](s32), %bb.2 ; CHECK-NEXT: $w0 = COPY [[PHI1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 - store i32 42, i32* @global_var + store i32 42, ptr @global_var invoke void @may_throw() to label %continue unwind label %lpad lpad: ; preds = %entry %p = phi i32 [ 11, %0 ] ; Trivial, but -O0 keeps it - %1 = landingpad { i8*, i32 } - catch i8* null - store i32 %p, i32* @global_var + %1 = landingpad { ptr, i32 } + catch ptr null + store i32 %p, ptr @global_var br label %continue continue: ; preds = %entry, %lpad diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-extract-used-by-dbg.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-extract-used-by-dbg.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-extract-used-by-dbg.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-extract-used-by-dbg.ll @@ -11,11 +11,11 @@ br i1 undef, label %bb4, label %bb5 bb4: ; preds = %bb3 - %i = extractvalue { i8*, i64 } undef, 0 + %i = extractvalue { ptr, i64 } undef, 0 ret void bb5: ; preds = %bb3 - call void @llvm.dbg.value(metadata i8* %i, metadata !370, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg !372 + call void @llvm.dbg.value(metadata ptr %i, metadata !370, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg !372 ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-indirect-br-repeated-block.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-indirect-br-repeated-block.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-indirect-br-repeated-block.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-indirect-br-repeated-block.ll @@ -16,7 +16,7 @@ ; CHECK: successors: ; CHECK: bb.4 (%ir-block.3): ; CHECK: RET_ReallyLR - indirectbr i8* undef, [label %1, label %3, label %2, label %3, label %3] + indirectbr ptr undef, [label %1, label %3, label %2, label %3, label %3] 1: unreachable 2: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll @@ -119,7 +119,7 @@ ret i32 %0 } -define zeroext i8 @test_register_output_trunc(i8* %src) nounwind { +define zeroext i8 @test_register_output_trunc(ptr %src) nounwind { ; ; CHECK-LABEL: name: test_register_output_trunc ; CHECK: bb.1.entry: @@ -184,7 +184,7 @@ ret void } -define zeroext i8 @test_input_register(i8* %src) nounwind { +define zeroext i8 @test_input_register(ptr %src) nounwind { ; CHECK-LABEL: name: test_input_register ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $x0 @@ -198,11 +198,11 @@ ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: - %0 = tail call i8 asm "ldtrb ${0:w}, [$1]", "=r,r"(i8* %src) nounwind + %0 = tail call i8 asm "ldtrb ${0:w}, [$1]", "=r,r"(ptr %src) nounwind ret i8 %0 } -define i32 @test_memory_constraint(i32* %a) nounwind { +define i32 @test_memory_constraint(ptr %a) nounwind { ; CHECK-LABEL: name: test_memory_constraint ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $x0 @@ -212,7 +212,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: $w0 = COPY [[COPY1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 - %1 = tail call i32 asm "ldr $0, $1", "=r,*m"(i32* elementtype(i32) %a) + %1 = tail call i32 asm "ldr $0, $1", "=r,*m"(ptr elementtype(i32) %a) ret i32 %1 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-invoke-probabilities.ll @@ -2,13 +2,13 @@ %struct.foo = type { i64, i64, %struct.pluto, %struct.pluto } %struct.pluto = type { %struct.wombat } -%struct.wombat = type { i32*, i32*, %struct.barney } +%struct.wombat = type { ptr, ptr, %struct.barney } %struct.barney = type { %struct.widget } -%struct.widget = type { i32* } +%struct.widget = type { ptr } declare i32 @hoge(...) -define void @pluto() align 2 personality i8* bitcast (i32 (...)* @hoge to i8*) { +define void @pluto() align 2 personality ptr @hoge { ; CHECK-LABEL: @pluto ; CHECK: bb.1.bb ; CHECK: successors: %bb.2(0x00000000), %bb.3(0x80000000) @@ -23,10 +23,10 @@ unreachable bb2: ; preds = %bb - %tmp = landingpad { i8*, i32 } + %tmp = landingpad { ptr, i32 } cleanup - %tmp3 = getelementptr inbounds %struct.foo, %struct.foo* undef, i64 0, i32 3, i32 0, i32 0 - resume { i8*, i32 } %tmp + %tmp3 = getelementptr inbounds %struct.foo, ptr undef, i64 0, i32 3, i32 0, i32 0 + resume { ptr, i32 } %tmp } declare void @spam() diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=aarch64-- -mcpu=falkor -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s -define i32 @load_invariant(i32* %ptr) { +define i32 @load_invariant(ptr %ptr) { ; CHECK-LABEL: name: load_invariant ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -9,11 +9,11 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (invariant load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %load = load i32, i32* %ptr, align 4, !invariant.load !0 + %load = load i32, ptr %ptr, align 4, !invariant.load !0 ret i32 %load } -define i32 @load_volatile_invariant(i32* %ptr) { +define i32 @load_volatile_invariant(ptr %ptr) { ; CHECK-LABEL: name: load_volatile_invariant ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -21,11 +21,11 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile invariant load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %load = load volatile i32, i32* %ptr, align 4, !invariant.load !0 + %load = load volatile i32, ptr %ptr, align 4, !invariant.load !0 ret i32 %load } -define i32 @load_dereferenceable(i32* dereferenceable(4) %ptr) { +define i32 @load_dereferenceable(ptr dereferenceable(4) %ptr) { ; CHECK-LABEL: name: load_dereferenceable ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -33,11 +33,11 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %load = load i32, i32* %ptr, align 4 + %load = load i32, ptr %ptr, align 4 ret i32 %load } -define i32 @load_dereferenceable_invariant(i32* dereferenceable(4) %ptr) { +define i32 @load_dereferenceable_invariant(ptr dereferenceable(4) %ptr) { ; CHECK-LABEL: name: load_dereferenceable_invariant ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -45,11 +45,11 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable invariant load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %load = load i32, i32* %ptr, align 4, !invariant.load !0 + %load = load i32, ptr %ptr, align 4, !invariant.load !0 ret i32 %load } -define i32 @load_nontemporal(i32* %ptr) { +define i32 @load_nontemporal(ptr %ptr) { ; CHECK-LABEL: name: load_nontemporal ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -57,11 +57,11 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (non-temporal load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %load = load i32, i32* %ptr, align 4, !nontemporal !0 + %load = load i32, ptr %ptr, align 4, !nontemporal !0 ret i32 %load } -define i32 @load_falkor_strided_access(i32* %ptr) { +define i32 @load_falkor_strided_access(ptr %ptr) { ; CHECK-LABEL: name: load_falkor_strided_access ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -69,7 +69,7 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: ("aarch64-strided-access" load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %load = load i32, i32* %ptr, align 4, !falkor.strided.access !0 + %load = load i32, ptr %ptr, align 4, !falkor.strided.access !0 ret i32 %load } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll @@ -4,20 +4,20 @@ define void @local_escape() { ; CHECK-LABEL: name: local_escape ; CHECK: bb.1 (%ir-block.0): - ; CHECK: LOCAL_ESCAPE , %stack.1.b - ; CHECK: LOCAL_ESCAPE , %stack.0.a - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.b - ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.a) - ; CHECK: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.b) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: LOCAL_ESCAPE , %stack.1.b + ; CHECK-NEXT: LOCAL_ESCAPE , %stack.0.a + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.b + ; CHECK-NEXT: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.a) + ; CHECK-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.b) + ; CHECK-NEXT: RET_ReallyLR %a = alloca i32 %b = alloca i32, i32 2 - call void (...) @llvm.localescape(i32* %a, i32* %b) - store i32 42, i32* %a - store i32 13, i32* %b + call void (...) @llvm.localescape(ptr %a, ptr %b) + store i32 42, ptr %a + store i32 13, ptr %b ret void } @@ -25,43 +25,42 @@ define void @local_escape_insert_point() { ; CHECK-LABEL: name: local_escape_insert_point ; CHECK: bb.1 (%ir-block.0): - ; CHECK: LOCAL_ESCAPE , %stack.1.b - ; CHECK: LOCAL_ESCAPE , %stack.0.a - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.b - ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.a) - ; CHECK: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.b) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: LOCAL_ESCAPE , %stack.1.b + ; CHECK-NEXT: LOCAL_ESCAPE , %stack.0.a + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.b + ; CHECK-NEXT: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.a) + ; CHECK-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.b) + ; CHECK-NEXT: RET_ReallyLR %a = alloca i32 %b = alloca i32, i32 2 - store i32 42, i32* %a - store i32 13, i32* %b - call void (...) @llvm.localescape(i32* %a, i32* null, i32* %b) + store i32 42, ptr %a + store i32 13, ptr %b + call void (...) @llvm.localescape(ptr %a, ptr null, ptr %b) ret void } -declare void @foo([128 x i32]*) +declare void @foo(ptr) ; Check a cast of an alloca define void @local_escape_strip_ptr_cast() { ; CHECK-LABEL: name: local_escape_strip_ptr_cast ; CHECK: bb.1 (%ir-block.0): - ; CHECK: LOCAL_ESCAPE , %stack.0.a - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a - ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.cast) - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $x0 = COPY [[FRAME_INDEX]](p0) - ; CHECK: BL @foo, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: LOCAL_ESCAPE , %stack.0.a + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a + ; CHECK-NEXT: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.a) + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $x0 = COPY [[FRAME_INDEX]](p0) + ; CHECK-NEXT: BL @foo, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: RET_ReallyLR %a = alloca [128 x i32] - %cast = bitcast [128 x i32]* %a to i32* - store i32 42, i32* %cast - call void (...) @llvm.localescape(i32* %cast, i32* null) - call void @foo([128 x i32]* %a) + store i32 42, ptr %a + call void (...) @llvm.localescape(ptr %a, ptr null) + call void @foo(ptr %a) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-max-address-space.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-max-address-space.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-max-address-space.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-max-address-space.ll @@ -3,8 +3,8 @@ ; CHECK-LABEL: name: store_max_address_space ; CHECK: %0:_(p16777215) = COPY $x0 ; CHECK: G_STORE %1(s32), %0(p16777215) :: (store (s32) into %ir.ptr, addrspace 16777215) -define void @store_max_address_space(i32 addrspace(16777215)* %ptr) { - store i32 0, i32 addrspace(16777215)* %ptr +define void @store_max_address_space(ptr addrspace(16777215) %ptr) { + store i32 0, ptr addrspace(16777215) %ptr ret void } @@ -12,15 +12,15 @@ ; CHECK: %0:_(<2 x p16777215>) = COPY $q0 ; CHECK: %1:_(p16777215) = G_EXTRACT_VECTOR_ELT %0(<2 x p16777215>), %2(s64) ; CHECK: %1(p16777215) :: (store (s32) into %ir.elt0, addrspace 16777215) -define void @store_max_address_space_vector(<2 x i32 addrspace(16777215)*> %vptr) { - %elt0 = extractelement <2 x i32 addrspace(16777215)*> %vptr, i32 0 - store i32 0, i32 addrspace(16777215)* %elt0 +define void @store_max_address_space_vector(<2 x ptr addrspace(16777215)> %vptr) { + %elt0 = extractelement <2 x ptr addrspace(16777215)> %vptr, i32 0 + store i32 0, ptr addrspace(16777215) %elt0 ret void } ; CHECK-LABEL: name: max_address_space_vector_max_num_elts -; CHECK: %0:_(<65535 x p16777215>) = G_LOAD %1(p0) :: (volatile load (<65535 x p16777215>) from `<65535 x i32 addrspace(16777215)*>* undef`, align 524288) +; CHECK: %0:_(<65535 x p16777215>) = G_LOAD %1(p0) :: (volatile load (<65535 x p16777215>) from `ptr undef`, align 524288) define void @max_address_space_vector_max_num_elts() { - %load = load volatile <65535 x i32 addrspace(16777215)*>, <65535 x i32 addrspace(16777215)*>* undef + %load = load volatile <65535 x ptr addrspace(16777215)>, ptr undef ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=aarch64-unknown-unknown -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=irtranslator %s -o - | FileCheck %s -define void @copy(i8* %dst, i8* %src) { +define void @copy(ptr %dst, ptr %src) { ; CHECK-LABEL: name: copy ; CHECK: bb.1.entry: ; CHECK: liveins: $x0, $x1 @@ -12,11 +12,11 @@ ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %dst, ptr %src, i32 4, i1 false) ret void } -define void @inline_copy(i8* %dst, i8* %src) { +define void @inline_copy(ptr %dst, ptr %src) { ; CHECK-LABEL: name: inline_copy ; CHECK: bb.1.entry: ; CHECK: liveins: $x0, $x1 @@ -27,11 +27,11 @@ ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store (s8) into %ir.dst), (load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: - call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + call void @llvm.memcpy.inline.p0.p0.i32(ptr %dst, ptr %src, i32 4, i1 false) ret void } -define void @copy_volatile(i8* %dst, i8* %src) { +define void @copy_volatile(ptr %dst, ptr %src) { ; CHECK-LABEL: name: copy_volatile ; CHECK: bb.1.entry: ; CHECK: liveins: $x0, $x1 @@ -42,11 +42,11 @@ ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (volatile store (s8) into %ir.dst), (volatile load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + call void @llvm.memcpy.p0.p0.i32(ptr %dst, ptr %src, i32 4, i1 true) ret void } -define void @inline_copy_volatile(i8* %dst, i8* %src) { +define void @inline_copy_volatile(ptr %dst, ptr %src) { ; CHECK-LABEL: name: inline_copy_volatile ; CHECK: bb.1.entry: ; CHECK: liveins: $x0, $x1 @@ -57,11 +57,11 @@ ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store (s8) into %ir.dst), (volatile load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: - call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + call void @llvm.memcpy.inline.p0.p0.i32(ptr %dst, ptr %src, i32 4, i1 true) ret void } -define void @tail_copy(i8* %dst, i8* %src) { +define void @tail_copy(ptr %dst, ptr %src) { ; CHECK-LABEL: name: tail_copy ; CHECK: bb.1.entry: ; CHECK: liveins: $x0, $x1 @@ -72,11 +72,11 @@ ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (store (s8) into %ir.dst), (load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + tail call void @llvm.memcpy.p0.p0.i32(ptr %dst, ptr %src, i32 4, i1 false) ret void } -define void @tail_inline_copy(i8* %dst, i8* %src) { +define void @tail_inline_copy(ptr %dst, ptr %src) { ; CHECK-LABEL: name: tail_inline_copy ; CHECK: bb.1.entry: ; CHECK: liveins: $x0, $x1 @@ -87,11 +87,11 @@ ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store (s8) into %ir.dst), (load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: - tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + tail call void @llvm.memcpy.inline.p0.p0.i32(ptr %dst, ptr %src, i32 4, i1 false) ret void } -define void @tail_copy_volatile(i8* %dst, i8* %src) { +define void @tail_copy_volatile(ptr %dst, ptr %src) { ; CHECK-LABEL: name: tail_copy_volatile ; CHECK: bb.1.entry: ; CHECK: liveins: $x0, $x1 @@ -102,11 +102,11 @@ ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (volatile store (s8) into %ir.dst), (volatile load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + tail call void @llvm.memcpy.p0.p0.i32(ptr %dst, ptr %src, i32 4, i1 true) ret void } -define void @tail_inline_copy_volatile(i8* %dst, i8* %src) { +define void @tail_inline_copy_volatile(ptr %dst, ptr %src) { ; CHECK-LABEL: name: tail_inline_copy_volatile ; CHECK: bb.1.entry: ; CHECK: liveins: $x0, $x1 @@ -117,9 +117,9 @@ ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store (s8) into %ir.dst), (volatile load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: - tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + tail call void @llvm.memcpy.inline.p0.p0.i32(ptr %dst, ptr %src, i32 4, i1 true) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) nounwind -declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) nounwind +declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) nounwind +declare void @llvm.memcpy.inline.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) nounwind diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memfunc-undef.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memfunc-undef.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memfunc-undef.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memfunc-undef.ll @@ -7,8 +7,7 @@ ; CHECK-NEXT: RET_ReallyLR entry: %buf = alloca [512 x i8], align 1 - %ptr = getelementptr inbounds [512 x i8], [512 x i8]* %buf, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %ptr, i8 undef, i32 512, i1 false) + call void @llvm.memset.p0.i32(ptr %buf, i8 undef, i32 512, i1 false) ret void } @@ -19,8 +18,7 @@ ; CHECK-NEXT: RET_ReallyLR entry: %buf = alloca [512 x i8], align 1 - %ptr = getelementptr inbounds [512 x i8], [512 x i8]* %buf, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* undef, i32 512, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr %buf, ptr undef, i32 512, i1 false) ret void } @@ -31,11 +29,10 @@ ; CHECK-NEXT: RET_ReallyLR entry: %buf = alloca [512 x i8], align 1 - %ptr = getelementptr inbounds [512 x i8], [512 x i8]* %buf, i32 0, i32 0 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr, i8* undef, i32 512, i1 false) + call void @llvm.memmove.p0.p0.i32(ptr %buf, ptr undef, i32 512, i1 false) ret void } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) nounwind -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind +declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) nounwind +declare void @llvm.memmove.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-no-op-intrinsics.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-no-op-intrinsics.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-no-op-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-no-op-intrinsics.ll @@ -13,7 +13,7 @@ ret i64 %expval } -define i8* @ptr_annotate(i8* %arg0, i8* %arg1, i8* %arg2, i32 %arg3) { +define ptr @ptr_annotate(ptr %arg0, ptr %arg1, ptr %arg2, i32 %arg3) { ; CHECK-LABEL: name: ptr_annotate ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $w3, $x0, $x1, $x2 @@ -24,8 +24,8 @@ ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; CHECK: $x0 = COPY [[COPY4]](p0) ; CHECK: RET_ReallyLR implicit $x0 - %call = call i8* @llvm.ptr.annotation.p0i8(i8* %arg0, i8* %arg1, i8* %arg2, i32 %arg3, i8* null) - ret i8* %call + %call = call ptr @llvm.ptr.annotation.p0(ptr %arg0, ptr %arg1, ptr %arg2, i32 %arg3, ptr null) + ret ptr %call } @.str = private unnamed_addr constant [4 x i8] c"sth\00", section "llvm.metadata" @@ -39,11 +39,11 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: $w0 = COPY [[COPY1]](s32) ; CHECK: RET_ReallyLR implicit $w0 - %call = call i32 @llvm.annotation.i32(i32 %a, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), i32 2) + %call = call i32 @llvm.annotation.i32(i32 %a, ptr @.str, ptr @.str1, i32 2) ret i32 %call } -define i8* @launder_invariant_group(i8* %p) { +define ptr @launder_invariant_group(ptr %p) { ; CHECK-LABEL: name: launder_invariant_group ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -51,11 +51,11 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; CHECK: $x0 = COPY [[COPY1]](p0) ; CHECK: RET_ReallyLR implicit $x0 - %q = call i8* @llvm.launder.invariant.group.p0i8(i8* %p) - ret i8* %q + %q = call ptr @llvm.launder.invariant.group.p0(ptr %p) + ret ptr %q } -define i8* @strip_invariant_group(i8* %p) { +define ptr @strip_invariant_group(ptr %p) { ; CHECK-LABEL: name: strip_invariant_group ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -63,15 +63,15 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; CHECK: $x0 = COPY [[COPY1]](p0) ; CHECK: RET_ReallyLR implicit $x0 - %q = call i8* @llvm.strip.invariant.group.p0i8(i8* %p) - ret i8* %q + %q = call ptr @llvm.strip.invariant.group.p0(ptr %p) + ret ptr %q } declare i64 @llvm.expect.i64(i64, i64) #0 -declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32, i8*) #1 -declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) #1 -declare i8* @llvm.launder.invariant.group.p0i8(i8*) #2 -declare i8* @llvm.strip.invariant.group.p0i8(i8*) #3 +declare ptr @llvm.ptr.annotation.p0(ptr, ptr, ptr, i32, ptr) #1 +declare i32 @llvm.annotation.i32(i32, ptr, ptr, i32) #1 +declare ptr @llvm.launder.invariant.group.p0(ptr) #2 +declare ptr @llvm.strip.invariant.group.p0(ptr) #3 attributes #0 = { nounwind readnone willreturn } attributes #1 = { nounwind willreturn } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-no-unwind-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-no-unwind-inline-asm.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-no-unwind-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-no-unwind-inline-asm.ll @@ -10,7 +10,7 @@ unreachable } -define dso_local void @test() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define dso_local void @test() personality ptr @__gxx_personality_v0 { entry: ; CHECK-LABEL: name: test @@ -30,13 +30,13 @@ ; CHECK: bb.3.lpad ; CHECK: EH_LABEL - %0 = landingpad { i8*, i32 } + %0 = landingpad { ptr, i32 } cleanup - call void (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.2, i64 0, i64 0)) - resume { i8*, i32 } %0 + call void (ptr, ...) @printf(ptr @.str.2) + resume { ptr, i32 } %0 } declare dso_local i32 @__gxx_personality_v0(...) -declare dso_local void @printf(i8*, ...) +declare dso_local void @printf(ptr, ...) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-one-by-n-vector-ptr-add.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-one-by-n-vector-ptr-add.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-one-by-n-vector-ptr-add.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-one-by-n-vector-ptr-add.ll @@ -6,7 +6,7 @@ ; We should not create a splat vector for the non-vector index on this ; getelementptr. The entire getelementptr should be translated to a scalar ; G_PTR_ADD. -define <1 x i8*> @one_elt_vector_ptr_add_non_vector_idx(<1 x i8*> %vec) { +define <1 x ptr> @one_elt_vector_ptr_add_non_vector_idx(<1 x ptr> %vec) { ; CHECK-LABEL: name: one_elt_vector_ptr_add_non_vector_idx ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $d0 @@ -18,14 +18,14 @@ ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) ; CHECK: $d0 = COPY [[COPY2]](p0) ; CHECK: RET_ReallyLR implicit $d0 - %ptr_add = getelementptr i8, <1 x i8*> %vec, <1 x i32> - ret <1 x i8*> %ptr_add + %ptr_add = getelementptr i8, <1 x ptr> %vec, <1 x i32> + ret <1 x ptr> %ptr_add } ; We should not create a splat vector for the non-vector pointer on this ; getelementptr. The entire getelementptr should be translated to a scalar ; G_PTR_ADD. -define <1 x i8*> @one_elt_vector_ptr_add_non_vector_ptr(i8* %vec) { +define <1 x ptr> @one_elt_vector_ptr_add_non_vector_ptr(ptr %vec) { ; CHECK-LABEL: name: one_elt_vector_ptr_add_non_vector_ptr ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -37,6 +37,6 @@ ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) ; CHECK: $d0 = COPY [[COPY2]](p0) ; CHECK: RET_ReallyLR implicit $d0 - %ptr_add = getelementptr i8, i8* %vec, <1 x i32> - ret <1 x i8*> %ptr_add + %ptr_add = getelementptr i8, ptr %vec, <1 x i32> + ret <1 x ptr> %ptr_add } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll @@ -11,7 +11,7 @@ ; CHECK: - { id: 1, type: default, offset: 0, size: 8, alignment: 16, stack-id: default, ; CHECK-NEXT: isImmutable: true, isAliased: false, define void @stack_passed_i64(i64 %arg, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, - i64 %arg7, i64 %arg8, i64* byval(i64) %arg9) { + i64 %arg7, i64 %arg8, ptr byval(i64) %arg9) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16) @@ -21,8 +21,8 @@ ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD1]], [[LOAD]] ; CHECK: G_STORE [[ADD]](s64), [[COPY8]](p0) :: (volatile store (s64) into %ir.arg9) ; CHECK: RET_ReallyLR - %load = load i64, i64* %arg9 + %load = load i64, ptr %arg9 %add = add i64 %load, %arg8 - store volatile i64 %add, i64* %arg9 + store volatile i64 %add, ptr %arg9 ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-protector-windows.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-protector-windows.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-protector-windows.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-protector-windows.ll @@ -30,9 +30,8 @@ ; CHECK-NEXT: .seh_endproc entry: %x = alloca i32, align 4 - %0 = bitcast i32* %x to i8* - call void @callee(i32* nonnull %x) + call void @callee(ptr nonnull %x) ret void } -declare void @callee(i32*) +declare void @callee(ptr) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=aarch64-- -mcpu=falkor -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s -define void @store_nontemporal(i32* dereferenceable(4) %ptr) { +define void @store_nontemporal(ptr dereferenceable(4) %ptr) { ; CHECK-LABEL: name: store_nontemporal ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -9,11 +9,11 @@ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (non-temporal store (s32) into %ir.ptr) ; CHECK: RET_ReallyLR - store i32 0, i32* %ptr, align 4, !nontemporal !0 + store i32 0, ptr %ptr, align 4, !nontemporal !0 ret void } -define void @store_dereferenceable(i32* dereferenceable(4) %ptr) { +define void @store_dereferenceable(ptr dereferenceable(4) %ptr) { ; CHECK-LABEL: name: store_dereferenceable ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -21,11 +21,11 @@ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.ptr) ; CHECK: RET_ReallyLR - store i32 0, i32* %ptr, align 4 + store i32 0, ptr %ptr, align 4 ret void } -define void @store_volatile_dereferenceable(i32* dereferenceable(4) %ptr) { +define void @store_volatile_dereferenceable(ptr dereferenceable(4) %ptr) { ; CHECK-LABEL: name: store_volatile_dereferenceable ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -33,11 +33,11 @@ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (volatile store (s32) into %ir.ptr) ; CHECK: RET_ReallyLR - store volatile i32 0, i32* %ptr, align 4 + store volatile i32 0, ptr %ptr, align 4 ret void } -define void @store_falkor_strided_access(i32* %ptr) { +define void @store_falkor_strided_access(ptr %ptr) { ; CHECK-LABEL: name: store_falkor_strided_access ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -45,7 +45,7 @@ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: ("aarch64-strided-access" store (s32) into %ir.ptr) ; CHECK: RET_ReallyLR - store i32 0, i32* %ptr, align 4, !falkor.strided.access !0 + store i32 0, ptr %ptr, align 4, !falkor.strided.access !0 ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll @@ -229,7 +229,7 @@ ; CHECK: bb.3.if.end: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[DEF1]](p0) :: (load (p0) from `i8** undef`) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[DEF1]](p0) :: (load (p0) from `ptr undef`) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.return: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.3, [[C1]](s1), %bb.5 @@ -253,7 +253,7 @@ unreachable if.end: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry - %0 = load i8*, i8** undef, align 8 + %0 = load ptr, ptr undef, align 8 br label %return return: ; preds = %if.end, %entry, %entry, %entry, %entry diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-tbaa.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-tbaa.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-tbaa.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-tbaa.ll @@ -4,11 +4,10 @@ define void @snork() { bb: - %tmp1 = getelementptr i16, i16* null, i64 0 - %tmp5 = getelementptr i16, i16* null, i64 2 - %tmp6 = load i16, i16* %tmp1, align 2, !tbaa !0 - store i16 %tmp6, i16* %tmp5, align 2, !tbaa !0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD %{{[0-9]+}}(p0) :: (load (s16) from %ir.tmp1, !tbaa !0) + %tmp5 = getelementptr i16, ptr null, i64 2 + %tmp6 = load i16, ptr null, align 2, !tbaa !0 + store i16 %tmp6, ptr %tmp5, align 2, !tbaa !0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD %{{[0-9]+}}(p0) :: (load (s16) from `ptr null`, !tbaa !0) ; CHECK: G_STORE [[LOAD]](s16), %{{[0-9]+}}(p0) :: (store (s16) into %ir.tmp5, !tbaa !0) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll @@ -13,13 +13,12 @@ unreachable } -define dso_local void @test() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define dso_local void @test() personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: name: test ; CHECK: bb.1.entry: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @.str.2 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[GV]](p0) ; CHECK-NEXT: EH_LABEL ; CHECK-NEXT: INLINEASM &"bl trap", 1 /* sideeffect attdialect */ ; CHECK-NEXT: EH_LABEL @@ -33,15 +32,15 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: EH_LABEL ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY2]](p0) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p0) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK-NEXT: $x0 = COPY [[COPY]](p0) + ; CHECK-NEXT: $x0 = COPY [[GV]](p0) ; CHECK-NEXT: BL @printf, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK-NEXT: $x0 = COPY [[COPY1]](p0) + ; CHECK-NEXT: $x0 = COPY [[COPY]](p0) ; CHECK-NEXT: BL @_Unwind_Resume, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp entry: @@ -55,14 +54,14 @@ lpad: - %0 = landingpad { i8*, i32 } + %0 = landingpad { ptr, i32 } cleanup - call void (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.2, i64 0, i64 0)) - resume { i8*, i32 } %0 + call void (ptr, ...) @printf(ptr @.str.2) + resume { ptr, i32 } %0 } -define void @test2() #0 personality i32 (...)* @__gcc_personality_v0 { +define void @test2() #0 personality ptr @__gcc_personality_v0 { ; CHECK-LABEL: name: test2 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) @@ -87,15 +86,15 @@ ; CHECK-NEXT: $x0 = COPY [[COPY1]](p0) ; CHECK-NEXT: BL @_Unwind_Resume, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - invoke void asm sideeffect "", "r"(i64* undef) to label %a unwind label %b + invoke void asm sideeffect "", "r"(ptr undef) to label %a unwind label %b a: ret void b: - %landing_pad = landingpad { i8*, i32 } cleanup - resume { i8*, i32 } %landing_pad + %landing_pad = landingpad { ptr, i32 } cleanup + resume { ptr, i32 } %landing_pad } declare i32 @__gcc_personality_v0(...) declare dso_local i32 @__gxx_personality_v0(...) -declare dso_local void @printf(i8*, ...) +declare dso_local void @printf(ptr, ...) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-volatile-load-pr36018.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-volatile-load-pr36018.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-volatile-load-pr36018.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-volatile-load-pr36018.ll @@ -7,7 +7,7 @@ define hidden void @foo() { ; CHECK-NOT: ldrh ; CHECK: ldrsh - %1 = load volatile i16, i16* @g, align 2 + %1 = load volatile i16, ptr @g, align 2 %2 = sext i16 %1 to i32 call void @bar(i32 %2) ret void diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-weird-alloca-size.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-weird-alloca-size.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-weird-alloca-size.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-weird-alloca-size.ll @@ -12,8 +12,8 @@ ; CHECK-NEXT: - { id: 0, name: stack_slot, type: default, offset: 0, size: 4, alignment: 4 define void @foo() { %stack_slot = alloca i19 - call void @bar(i19* %stack_slot) + call void @bar(ptr %stack_slot) ret void } -declare void @bar(i19* %a) +declare void @bar(ptr %a) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -O0 -mtriple=aarch64-apple-ios -verify-machineinstrs -global-isel -stop-after=legalizer %s -o - | FileCheck %s -@_ZTIi = external global i8* +@_ZTIi = external global ptr declare i32 @foo(i32) declare i32 @__gxx_personality_v0(...) -declare i32 @llvm.eh.typeid.for(i8*) -declare void @_Unwind_Resume(i8*) +declare i32 @llvm.eh.typeid.for(ptr) +declare void @_Unwind_Resume(ptr) -define void @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define void @bar() personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: name: bar ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) @@ -47,23 +47,23 @@ ; CHECK-NEXT: BL @_Unwind_Resume, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap) - %exn.slot = alloca i8* + %exn.slot = alloca ptr %ehselector.slot = alloca i32 %1 = invoke i32 @foo(i32 42) to label %continue unwind label %cleanup cleanup: - %2 = landingpad { i8*, i32 } cleanup - %3 = extractvalue { i8*, i32 } %2, 0 - store i8* %3, i8** %exn.slot, align 8 - %4 = extractvalue { i8*, i32 } %2, 1 - store i32 %4, i32* %ehselector.slot, align 4 + %2 = landingpad { ptr, i32 } cleanup + %3 = extractvalue { ptr, i32 } %2, 0 + store ptr %3, ptr %exn.slot, align 8 + %4 = extractvalue { ptr, i32 } %2, 1 + store i32 %4, ptr %ehselector.slot, align 4 br label %eh.resume continue: ret void eh.resume: - %exn = load i8*, i8** %exn.slot, align 8 - call void @_Unwind_Resume(i8* %exn) + %exn = load ptr, ptr %exn.slot, align 8 + call void @_Unwind_Resume(ptr %exn) unreachable } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll @@ -50,15 +50,15 @@ ; CHECK-NEXT: $w0 = COPY [[C6]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: - %0 = load i32, i32* @var1, align 4 + %0 = load i32, ptr @var1, align 4 %cmp = icmp eq i32 %0, 1 br i1 %cmp, label %if.then, label %if.end if.then: - store i32 2, i32* @var2, align 4 - store i32 3, i32* @var1, align 4 - store i32 2, i32* @var3, align 4 - store i32 3, i32* @var1, align 4 + store i32 2, ptr @var2, align 4 + store i32 3, ptr @var1, align 4 + store i32 2, ptr @var3, align 4 + store i32 3, ptr @var1, align 4 br label %if.end if.end: @@ -101,13 +101,13 @@ ; CHECK-NEXT: $w0 = COPY [[C2]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: - %0 = load i32, i32* @var1, align 4 + %0 = load i32, ptr @var1, align 4 %cmp = icmp eq i32 %0, 1 br i1 %cmp, label %if.then, label %if.end if.then: - %tls = load i32, i32* @tls_gv, align 4 - store i32 %tls, i32* @var2, align 4 + %tls = load i32, ptr @tls_gv, align 4 + store i32 %tls, ptr @var2, align 4 br label %if.end if.end: @@ -152,21 +152,21 @@ ; CHECK-NEXT: $w0 = COPY [[C3]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: - %0 = load i32, i32* @var1, align 4 + %0 = load i32, ptr @var1, align 4 %cst1 = bitcast i32 -2228259 to i32 %cmp = icmp eq i32 %0, 1 br i1 %cmp, label %if.then, label %if.end if.then: - store i32 %cst1, i32* @var2 + store i32 %cst1, ptr @var2 br label %if.then2 if.then2: - store i32 %cst1, i32* @var1 + store i32 %cst1, ptr @var1 br label %if.end if.end: - store i32 %cst1, i32* @var3 + store i32 %cst1, ptr @var3 ret i32 0 } @@ -209,21 +209,21 @@ ; CHECK-NEXT: $x0 = COPY [[C4]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 entry: - %0 = load i64, i64* @var1_64, align 4 + %0 = load i64, ptr @var1_64, align 4 %cst1 = bitcast i64 -2228259 to i64 %cmp = icmp eq i64 %0, 1 br i1 %cmp, label %if.then, label %if.end if.then: - store i64 %cst1, i64* @var2_64 + store i64 %cst1, ptr @var2_64 br label %if.then2 if.then2: - store i64 %cst1, i64* @var1_64 + store i64 %cst1, ptr @var1_64 br label %if.end if.end: - store i64 %cst1, i64* @var3_64 + store i64 %cst1, ptr @var3_64 ret i64 0 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/memcpy_chk_no_tail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/memcpy_chk_no_tail.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/memcpy_chk_no_tail.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/memcpy_chk_no_tail.ll @@ -9,20 +9,18 @@ ; CHECK-LABEL: @usqrt ; CHECK-NOT: b memcpy ; CHECK: bl _memcpy -define void @usqrt(i32 %x, %struct.int_sqrt* %q) local_unnamed_addr #0 { +define void @usqrt(i32 %x, ptr %q) local_unnamed_addr #0 { %a = alloca i32, align 4 - %bc = bitcast i32* %a to i8* - %bc2 = bitcast %struct.int_sqrt* %q to i8* - %obj = tail call i64 @llvm.objectsize.i64.p0i8(i8* %bc2, i1 false, i1 true, i1 false) - %call = call i8* @__memcpy_chk(i8* %bc2, i8* nonnull %bc, i64 1000, i64 %obj) #4 + %obj = tail call i64 @llvm.objectsize.i64.p0(ptr %q, i1 false, i1 true, i1 false) + %call = call ptr @__memcpy_chk(ptr %q, ptr nonnull %a, i64 1000, i64 %obj) #4 ret void } ; Function Attrs: nofree nounwind optsize -declare i8* @__memcpy_chk(i8*, i8*, i64, i64) local_unnamed_addr #2 +declare ptr @__memcpy_chk(ptr, ptr, i64, i64) local_unnamed_addr #2 ; Function Attrs: nounwind readnone speculatable willreturn -declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #3 +declare i64 @llvm.objectsize.i64.p0(ptr, i1 immarg, i1 immarg, i1 immarg) #3 attributes #0 = { optsize "disable-tail-calls"="false" "frame-pointer"="all" } attributes #2 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="all" } attributes #3 = { nounwind readnone speculatable willreturn } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-apple-ios -global-isel -global-isel-abort=1 | FileCheck %s -define dso_local void @trunc_i16_to_i8(i16 %x, i8* %p) { +define dso_local void @trunc_i16_to_i8(i16 %x, ptr %p) { ; CHECK-LABEL: trunc_i16_to_i8: ; CHECK: ; %bb.0: ; CHECK-NEXT: strh w0, [x1] @@ -9,13 +9,13 @@ %t1 = trunc i16 %x to i8 %sh = lshr i16 %x, 8 %t2 = trunc i16 %sh to i8 - store i8 %t1, i8* %p, align 1 - %p1 = getelementptr inbounds i8, i8* %p, i64 1 - store i8 %t2, i8* %p1, align 1 + store i8 %t1, ptr %p, align 1 + %p1 = getelementptr inbounds i8, ptr %p, i64 1 + store i8 %t2, ptr %p1, align 1 ret void } -define dso_local void @trunc_i32_to_i8(i32 %x, i8* %p) { +define dso_local void @trunc_i32_to_i8(i32 %x, ptr %p) { ; CHECK-LABEL: trunc_i32_to_i8: ; CHECK: ; %bb.0: ; CHECK-NEXT: str w0, [x1] @@ -27,17 +27,17 @@ %t3 = trunc i32 %sh2 to i8 %sh3 = lshr i32 %x, 24 %t4 = trunc i32 %sh3 to i8 - store i8 %t1, i8* %p, align 1 - %p1 = getelementptr inbounds i8, i8* %p, i64 1 - store i8 %t2, i8* %p1, align 1 - %p2 = getelementptr inbounds i8, i8* %p, i64 2 - store i8 %t3, i8* %p2, align 1 - %p3 = getelementptr inbounds i8, i8* %p, i64 3 - store i8 %t4, i8* %p3, align 1 + store i8 %t1, ptr %p, align 1 + %p1 = getelementptr inbounds i8, ptr %p, i64 1 + store i8 %t2, ptr %p1, align 1 + %p2 = getelementptr inbounds i8, ptr %p, i64 2 + store i8 %t3, ptr %p2, align 1 + %p3 = getelementptr inbounds i8, ptr %p, i64 3 + store i8 %t4, ptr %p3, align 1 ret void } -define dso_local void @trunc_i32_to_i16(i32 %x, i16* %p) { +define dso_local void @trunc_i32_to_i16(i32 %x, ptr %p) { ; CHECK-LABEL: trunc_i32_to_i16: ; CHECK: ; %bb.0: ; CHECK-NEXT: str w0, [x1] @@ -45,13 +45,13 @@ %t1 = trunc i32 %x to i16 %sh = lshr i32 %x, 16 %t2 = trunc i32 %sh to i16 - store i16 %t1, i16* %p, align 2 - %p1 = getelementptr inbounds i16, i16* %p, i64 1 - store i16 %t2, i16* %p1, align 2 + store i16 %t1, ptr %p, align 2 + %p1 = getelementptr inbounds i16, ptr %p, i64 1 + store i16 %t2, ptr %p1, align 2 ret void } -define dso_local void @be_i32_to_i16(i32 %x, i16* %p0) { +define dso_local void @be_i32_to_i16(i32 %x, ptr %p0) { ; CHECK-LABEL: be_i32_to_i16: ; CHECK: ; %bb.0: ; CHECK-NEXT: ror w8, w0, #16 @@ -60,13 +60,13 @@ %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 %t1 = trunc i32 %sh1 to i16 - %p1 = getelementptr inbounds i16, i16* %p0, i64 1 - store i16 %t0, i16* %p1, align 2 - store i16 %t1, i16* %p0, align 2 + %p1 = getelementptr inbounds i16, ptr %p0, i64 1 + store i16 %t0, ptr %p1, align 2 + store i16 %t1, ptr %p0, align 2 ret void } -define dso_local void @be_i32_to_i16_order(i32 %x, i16* %p0) { +define dso_local void @be_i32_to_i16_order(i32 %x, ptr %p0) { ; CHECK-LABEL: be_i32_to_i16_order: ; CHECK: ; %bb.0: ; CHECK-NEXT: ror w8, w0, #16 @@ -75,13 +75,13 @@ %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 %t1 = trunc i32 %sh1 to i16 - %p1 = getelementptr inbounds i16, i16* %p0, i64 1 - store i16 %t1, i16* %p0, align 2 - store i16 %t0, i16* %p1, align 2 + %p1 = getelementptr inbounds i16, ptr %p0, i64 1 + store i16 %t1, ptr %p0, align 2 + store i16 %t0, ptr %p1, align 2 ret void } -define dso_local void @trunc_i64_to_i8(i64 %x, i8* %p) { +define dso_local void @trunc_i64_to_i8(i64 %x, ptr %p) { ; CHECK-LABEL: trunc_i64_to_i8: ; CHECK: ; %bb.0: ; CHECK-NEXT: str x0, [x1] @@ -101,25 +101,25 @@ %t7 = trunc i64 %sh6 to i8 %sh7 = lshr i64 %x, 56 %t8 = trunc i64 %sh7 to i8 - store i8 %t1, i8* %p, align 1 - %p1 = getelementptr inbounds i8, i8* %p, i64 1 - store i8 %t2, i8* %p1, align 1 - %p2 = getelementptr inbounds i8, i8* %p, i64 2 - store i8 %t3, i8* %p2, align 1 - %p3 = getelementptr inbounds i8, i8* %p, i64 3 - store i8 %t4, i8* %p3, align 1 - %p4 = getelementptr inbounds i8, i8* %p, i64 4 - store i8 %t5, i8* %p4, align 1 - %p5 = getelementptr inbounds i8, i8* %p, i64 5 - store i8 %t6, i8* %p5, align 1 - %p6 = getelementptr inbounds i8, i8* %p, i64 6 - store i8 %t7, i8* %p6, align 1 - %p7 = getelementptr inbounds i8, i8* %p, i64 7 - store i8 %t8, i8* %p7, align 1 + store i8 %t1, ptr %p, align 1 + %p1 = getelementptr inbounds i8, ptr %p, i64 1 + store i8 %t2, ptr %p1, align 1 + %p2 = getelementptr inbounds i8, ptr %p, i64 2 + store i8 %t3, ptr %p2, align 1 + %p3 = getelementptr inbounds i8, ptr %p, i64 3 + store i8 %t4, ptr %p3, align 1 + %p4 = getelementptr inbounds i8, ptr %p, i64 4 + store i8 %t5, ptr %p4, align 1 + %p5 = getelementptr inbounds i8, ptr %p, i64 5 + store i8 %t6, ptr %p5, align 1 + %p6 = getelementptr inbounds i8, ptr %p, i64 6 + store i8 %t7, ptr %p6, align 1 + %p7 = getelementptr inbounds i8, ptr %p, i64 7 + store i8 %t8, ptr %p7, align 1 ret void } -define dso_local void @trunc_i64_to_i16(i64 %x, i16* %p) { +define dso_local void @trunc_i64_to_i16(i64 %x, ptr %p) { ; CHECK-LABEL: trunc_i64_to_i16: ; CHECK: ; %bb.0: ; CHECK-NEXT: str x0, [x1] @@ -131,17 +131,17 @@ %t3 = trunc i64 %sh2 to i16 %sh3 = lshr i64 %x, 48 %t4 = trunc i64 %sh3 to i16 - store i16 %t1, i16* %p, align 2 - %p1 = getelementptr inbounds i16, i16* %p, i64 1 - store i16 %t2, i16* %p1, align 2 - %p2 = getelementptr inbounds i16, i16* %p, i64 2 - store i16 %t3, i16* %p2, align 2 - %p3 = getelementptr inbounds i16, i16* %p, i64 3 - store i16 %t4, i16* %p3, align 2 + store i16 %t1, ptr %p, align 2 + %p1 = getelementptr inbounds i16, ptr %p, i64 1 + store i16 %t2, ptr %p1, align 2 + %p2 = getelementptr inbounds i16, ptr %p, i64 2 + store i16 %t3, ptr %p2, align 2 + %p3 = getelementptr inbounds i16, ptr %p, i64 3 + store i16 %t4, ptr %p3, align 2 ret void } -define dso_local void @trunc_i64_to_i32(i64 %x, i32* %p) { +define dso_local void @trunc_i64_to_i32(i64 %x, ptr %p) { ; CHECK-LABEL: trunc_i64_to_i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: str x0, [x1] @@ -149,12 +149,12 @@ %t1 = trunc i64 %x to i32 %sh = lshr i64 %x, 32 %t2 = trunc i64 %sh to i32 - store i32 %t1, i32* %p, align 4 - %p1 = getelementptr inbounds i32, i32* %p, i64 1 - store i32 %t2, i32* %p1, align 4 + store i32 %t1, ptr %p, align 4 + %p1 = getelementptr inbounds i32, ptr %p, i64 1 + store i32 %t2, ptr %p1, align 4 ret void } -define dso_local void @be_i64_to_i32(i64 %x, i32* %p0) { +define dso_local void @be_i64_to_i32(i64 %x, ptr %p0) { ; CHECK-LABEL: be_i64_to_i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ror x8, x0, #32 @@ -163,13 +163,13 @@ %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 %t1 = trunc i64 %sh1 to i32 - %p1 = getelementptr inbounds i32, i32* %p0, i64 1 - store i32 %t0, i32* %p1, align 4 - store i32 %t1, i32* %p0, align 4 + %p1 = getelementptr inbounds i32, ptr %p0, i64 1 + store i32 %t0, ptr %p1, align 4 + store i32 %t1, ptr %p0, align 4 ret void } -define dso_local void @be_i64_to_i32_order(i64 %x, i32* %p0) { +define dso_local void @be_i64_to_i32_order(i64 %x, ptr %p0) { ; CHECK-LABEL: be_i64_to_i32_order: ; CHECK: ; %bb.0: ; CHECK-NEXT: ror x8, x0, #32 @@ -178,84 +178,80 @@ %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 %t1 = trunc i64 %sh1 to i32 - %p1 = getelementptr inbounds i32, i32* %p0, i64 1 - store i32 %t1, i32* %p0, align 4 - store i32 %t0, i32* %p1, align 4 + %p1 = getelementptr inbounds i32, ptr %p0, i64 1 + store i32 %t1, ptr %p0, align 4 + store i32 %t0, ptr %p1, align 4 ret void } ; Negative tests. -define void @merge_hole(i32 %x, i8* %p) { +define void @merge_hole(i32 %x, ptr %p) { ; CHECK-LABEL: merge_hole: ; CHECK: ; %bb.0: ; CHECK-NEXT: lsr w8, w0, #16 ; CHECK-NEXT: strb w0, [x1] ; CHECK-NEXT: strh w8, [x1, #2] ; CHECK-NEXT: ret - %pcast = bitcast i8* %p to i16* - %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %p2 = getelementptr inbounds i16, ptr %p, i64 1 %x3 = trunc i32 %x to i8 - store i8 %x3, i8* %p, align 1 + store i8 %x3, ptr %p, align 1 %sh = lshr i32 %x, 16 %x01 = trunc i32 %sh to i16 - store i16 %x01, i16* %p2, align 1 + store i16 %x01, ptr %p2, align 1 ret void } -define void @merge_hole2(i32 %x, i8* %p) { +define void @merge_hole2(i32 %x, ptr %p) { ; CHECK-LABEL: merge_hole2: ; CHECK: ; %bb.0: ; CHECK-NEXT: lsr w8, w0, #16 ; CHECK-NEXT: strb w0, [x1] ; CHECK-NEXT: strh w8, [x1, #2] ; CHECK-NEXT: ret - %pcast = bitcast i8* %p to i16* - %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %p2 = getelementptr inbounds i16, ptr %p, i64 1 %sh = lshr i32 %x, 16 %x01 = trunc i32 %sh to i16 - store i16 %x01, i16* %p2, align 1 + store i16 %x01, ptr %p2, align 1 %x3 = trunc i32 %x to i8 - store i8 %x3, i8* %p, align 1 + store i8 %x3, ptr %p, align 1 ret void } -define void @merge_hole3(i32 %x, i8* %p) { +define void @merge_hole3(i32 %x, ptr %p) { ; CHECK-LABEL: merge_hole3: ; CHECK: ; %bb.0: ; CHECK-NEXT: lsr w8, w0, #16 ; CHECK-NEXT: strb w0, [x1, #1] ; CHECK-NEXT: strh w8, [x1, #2] ; CHECK-NEXT: ret - %p1 = getelementptr inbounds i8, i8* %p, i64 1 - %pcast = bitcast i8* %p to i16* - %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %p1 = getelementptr inbounds i8, ptr %p, i64 1 + %p2 = getelementptr inbounds i16, ptr %p, i64 1 %x3 = trunc i32 %x to i8 - store i8 %x3, i8* %p1, align 1 + store i8 %x3, ptr %p1, align 1 %sh = lshr i32 %x, 16 %x01 = trunc i32 %sh to i16 - store i16 %x01, i16* %p2, align 1 + store i16 %x01, ptr %p2, align 1 ret void } -define void @merge_hole4(i32 %x, i8* %p) { +define void @merge_hole4(i32 %x, ptr %p) { ; CHECK-LABEL: merge_hole4: ; CHECK: ; %bb.0: ; CHECK-NEXT: lsr w8, w0, #16 ; CHECK-NEXT: strb w0, [x1, #2] ; CHECK-NEXT: strh w8, [x1] ; CHECK-NEXT: ret - %pcast = bitcast i8* %p to i16* - %p2 = getelementptr inbounds i8, i8* %p, i64 2 + %p2 = getelementptr inbounds i8, ptr %p, i64 2 %x3 = trunc i32 %x to i8 - store i8 %x3, i8* %p2, align 1 + store i8 %x3, ptr %p2, align 1 %sh = lshr i32 %x, 16 %x01 = trunc i32 %sh to i16 - store i16 %x01, i16* %pcast, align 1 + store i16 %x01, ptr %p, align 1 ret void } -define dso_local i32 @load_between_stores(i32 %x, i16* %p, i32 *%ptr) { +define dso_local i32 @load_between_stores(i32 %x, ptr %p, ptr %ptr) { ; CHECK-LABEL: load_between_stores: ; CHECK: ; %bb.0: ; CHECK-NEXT: strh w0, [x1] @@ -267,14 +263,14 @@ %t1 = trunc i32 %x to i16 %sh = lshr i32 %x, 16 %t2 = trunc i32 %sh to i16 - store i16 %t1, i16* %p, align 2 - %ld = load i32, i32 *%ptr - %p1 = getelementptr inbounds i16, i16* %p, i64 1 - store i16 %t2, i16* %p1, align 2 + store i16 %t1, ptr %p, align 2 + %ld = load i32, ptr %ptr + %p1 = getelementptr inbounds i16, ptr %p, i64 1 + store i16 %t2, ptr %p1, align 2 ret i32 %ld } -define dso_local void @invalid_shift(i16 %x, i8* %p) { +define dso_local void @invalid_shift(i16 %x, ptr %p) { ; CHECK-LABEL: invalid_shift: ; CHECK: ; %bb.0: ; CHECK-NEXT: ubfx w8, w0, #4, #12 @@ -284,13 +280,13 @@ %t1 = trunc i16 %x to i8 %sh = lshr i16 %x, 4 %t2 = trunc i16 %sh to i8 - store i8 %t1, i8* %p, align 1 - %p1 = getelementptr inbounds i8, i8* %p, i64 1 - store i8 %t2, i8* %p1, align 1 + store i8 %t1, ptr %p, align 1 + %p1 = getelementptr inbounds i8, ptr %p, i64 1 + store i8 %t2, ptr %p1, align 1 ret void } -define dso_local void @missing_store(i32 %x, i8* %p) { +define dso_local void @missing_store(i32 %x, ptr %p) { ; CHECK-LABEL: missing_store: ; CHECK: ; %bb.0: ; CHECK-NEXT: lsr w8, w0, #8 @@ -304,15 +300,15 @@ %t2 = trunc i32 %sh1 to i8 %sh3 = lshr i32 %x, 24 %t4 = trunc i32 %sh3 to i8 - store i8 %t1, i8* %p, align 1 - %p1 = getelementptr inbounds i8, i8* %p, i64 1 - store i8 %t2, i8* %p1, align 1 - %p3 = getelementptr inbounds i8, i8* %p, i64 3 - store i8 %t4, i8* %p3, align 1 + store i8 %t1, ptr %p, align 1 + %p1 = getelementptr inbounds i8, ptr %p, i64 1 + store i8 %t2, ptr %p1, align 1 + %p3 = getelementptr inbounds i8, ptr %p, i64 3 + store i8 %t4, ptr %p3, align 1 ret void } -define dso_local void @different_base_reg(i16 %x, i8* %p, i8 *%p2) { +define dso_local void @different_base_reg(i16 %x, ptr %p, ptr %p2) { ; CHECK-LABEL: different_base_reg: ; CHECK: ; %bb.0: ; CHECK-NEXT: ubfx w8, w0, #8, #8 @@ -322,13 +318,13 @@ %t1 = trunc i16 %x to i8 %sh = lshr i16 %x, 8 %t2 = trunc i16 %sh to i8 - store i8 %t1, i8* %p, align 1 - %p1 = getelementptr inbounds i8, i8* %p2, i64 1 - store i8 %t2, i8* %p1, align 1 + store i8 %t1, ptr %p, align 1 + %p1 = getelementptr inbounds i8, ptr %p2, i64 1 + store i8 %t2, ptr %p1, align 1 ret void } -define dso_local void @second_store_is_volatile(i16 %x, i8* %p) { +define dso_local void @second_store_is_volatile(i16 %x, ptr %p) { ; CHECK-LABEL: second_store_is_volatile: ; CHECK: ; %bb.0: ; CHECK-NEXT: ubfx w8, w0, #8, #8 @@ -338,8 +334,8 @@ %t1 = trunc i16 %x to i8 %sh = lshr i16 %x, 8 %t2 = trunc i16 %sh to i8 - store volatile i8 %t1, i8* %p, align 1 - %p1 = getelementptr inbounds i8, i8* %p, i64 1 - store i8 %t2, i8* %p1, align 1 + store volatile i8 %t1, ptr %p, align 1 + %p1 = getelementptr inbounds i8, ptr %p, i64 1 + store i8 %t2, ptr %p1, align 1 ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/no-neon-no-fp.ll b/llvm/test/CodeGen/AArch64/GlobalISel/no-neon-no-fp.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/no-neon-no-fp.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/no-neon-no-fp.ll @@ -4,9 +4,9 @@ ; We should fall back in the translator if we don't have no-neon/no-fp support. ; CHECK: Instruction selection used fallback path for foo -define void @foo(i128 *%ptr) #0 align 2 { +define void @foo(ptr %ptr) #0 align 2 { entry: - store i128 0, i128* %ptr, align 16 + store i128 0, ptr %ptr, align 16 ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/ret-1x-vec.ll b/llvm/test/CodeGen/AArch64/GlobalISel/ret-1x-vec.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/ret-1x-vec.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/ret-1x-vec.ll @@ -14,22 +14,22 @@ ret <1 x float> %v } -define <1 x i8*> @ret_v1p0(<1 x i8*> %v) { +define <1 x ptr> @ret_v1p0(<1 x ptr> %v) { ; CHECK-LABEL: name: ret_v1p0 ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $d0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $d0 ; CHECK: $d0 = COPY [[COPY]](p0) ; CHECK: RET_ReallyLR implicit $d0 - ret <1 x i8*> %v + ret <1 x ptr> %v } -define <1 x i8 addrspace(1)*> @ret_v1p1(<1 x i8 addrspace(1)*> %v) { +define <1 x ptr addrspace(1)> @ret_v1p1(<1 x ptr addrspace(1)> %v) { ; CHECK-LABEL: name: ret_v1p1 ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $d0 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $d0 ; CHECK: $d0 = COPY [[COPY]](p1) ; CHECK: RET_ReallyLR implicit $d0 - ret <1 x i8 addrspace(1)*> %v + ret <1 x ptr addrspace(1)> %v } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/ret-vec-promote.ll b/llvm/test/CodeGen/AArch64/GlobalISel/ret-vec-promote.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/ret-vec-promote.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/ret-vec-promote.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -O0 -global-isel -stop-after=irtranslator -o - %s | FileCheck %s ; Tests vectors of i1 types can appropriately extended first before return handles it. -define <4 x i1> @ret_v4i1(<4 x i1> *%v) { +define <4 x i1> @ret_v4i1(ptr %v) { ; CHECK-LABEL: name: ret_v4i1 ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 @@ -11,6 +11,6 @@ ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[LOAD]](<4 x s1>) ; CHECK: $d0 = COPY [[ANYEXT]](<4 x s16>) ; CHECK: RET_ReallyLR implicit $d0 - %v2 = load <4 x i1>, <4 x i1> *%v + %v2 = load <4 x i1>, ptr %v ret <4 x i1> %v2 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll @@ -109,7 +109,7 @@ ret i64 %out } -define i64 @extra_use1(i64 %in1, i64 %in2, i64* %p) { +define i64 @extra_use1(i64 %in1, i64 %in2, ptr %p) { ; GISEL-LABEL: extra_use1: ; GISEL: ; %bb.0: ; %bb ; GISEL-NEXT: lsl x8, x0, #1 @@ -129,11 +129,11 @@ %tmp3 = shl i64 %in1, 1 %tmp4 = and i64 %in2, 1 %out = or i64 %tmp3, %tmp4 - store i64 %tmp3, i64* %p + store i64 %tmp3, ptr %p ret i64 %out } -define i64 @extra_use2(i64 %in1, i64 %in2, i64* %p) { +define i64 @extra_use2(i64 %in1, i64 %in2, ptr %p) { ; GISEL-LABEL: extra_use2: ; GISEL: ; %bb.0: ; %bb ; GISEL-NEXT: and x8, x1, #0x1 @@ -152,6 +152,6 @@ %tmp3 = shl i64 %in1, 1 %tmp4 = and i64 %in2, 1 %out = or i64 %tmp3, %tmp4 - store i64 %tmp4, i64* %p + store i64 %tmp4, ptr %p ret i64 %out } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-frameaddr.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-frameaddr.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-frameaddr.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-frameaddr.ll @@ -1,20 +1,20 @@ ; RUN: llc -mtriple=arm64-apple-ios -global-isel -o - %s | FileCheck %s -define i8* @rt0(i32 %x) nounwind readnone { +define ptr @rt0(i32 %x) nounwind readnone { entry: ; CHECK-LABEL: rt0: ; CHECK: mov x0, x29 - %0 = tail call i8* @llvm.frameaddress(i32 0) - ret i8* %0 + %0 = tail call ptr @llvm.frameaddress(i32 0) + ret ptr %0 } -define i8* @rt2() nounwind readnone { +define ptr @rt2() nounwind readnone { entry: ; CHECK-LABEL: rt2: ; CHECK: ldr x[[reg:[0-9]+]], [x29] ; CHECK: ldr x0, [x[[reg]]] - %0 = tail call i8* @llvm.frameaddress(i32 2) - ret i8* %0 + %0 = tail call ptr @llvm.frameaddress(i32 2) + ret ptr %0 } -declare i8* @llvm.frameaddress(i32) nounwind readnone +declare ptr @llvm.frameaddress(i32) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddr.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddr.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddr.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddr.ll @@ -1,15 +1,15 @@ ; RUN: llc -mtriple=arm64-apple-ios -global-isel -o - %s | FileCheck %s -define i8* @rt0(i32 %x) nounwind readnone { +define ptr @rt0(i32 %x) nounwind readnone { entry: ; CHECK-LABEL: rt0: ; CHECK: hint #7 ; CHECK-NEXT: mov x0, x30 - %0 = tail call i8* @llvm.returnaddress(i32 0) - ret i8* %0 + %0 = tail call ptr @llvm.returnaddress(i32 0) + ret ptr %0 } -define i8* @rt0_call_clobber(i32 %x) nounwind readnone { +define ptr @rt0_call_clobber(i32 %x) nounwind readnone { entry: ; CHECK-LABEL: rt0_call_clobber: ; CHECK: stp x20, x19, [sp, #-32]! @@ -22,11 +22,11 @@ ; CHECK-NOT: x0 ; CHECK: ret %ret = call i32 @foo() - %0 = tail call i8* @llvm.returnaddress(i32 0) - ret i8* %0 + %0 = tail call ptr @llvm.returnaddress(i32 0) + ret ptr %0 } -define i8* @rt2() nounwind readnone { +define ptr @rt2() nounwind readnone { entry: ; CHECK-LABEL: rt2: ; CHECK: ldr x[[reg:[0-9]+]], [x29] @@ -36,10 +36,10 @@ ; CHECK: mov x0, x30 ; CHECK-NOT: x0 ; CHECK: ret - %0 = tail call i8* @llvm.returnaddress(i32 2) - ret i8* %0 + %0 = tail call ptr @llvm.returnaddress(i32 2) + ret ptr %0 } declare i32 @foo() -declare i8* @llvm.returnaddress(i32) nounwind readnone +declare ptr @llvm.returnaddress(i32) nounwind readnone diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-apple-ios -global-isel -global-isel-abort=1 - < %s | FileCheck %s -define void @test_simple_2xs8(i8 *%ptr) { +define void @test_simple_2xs8(ptr %ptr) { ; CHECK-LABEL: test_simple_2xs8: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w8, #4 @@ -9,28 +9,26 @@ ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: strb w9, [x0, #1] ; CHECK-NEXT: ret - %addr1 = getelementptr i8, i8 *%ptr, i64 0 - store i8 4, i8 *%addr1 - %addr2 = getelementptr i8, i8 *%ptr, i64 1 - store i8 5, i8 *%addr2 + store i8 4, ptr %ptr + %addr2 = getelementptr i8, ptr %ptr, i64 1 + store i8 5, ptr %addr2 ret void } -define void @test_simple_2xs16(i16 *%ptr) { +define void @test_simple_2xs16(ptr %ptr) { ; CHECK-LABEL: test_simple_2xs16: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w8, #4 ; CHECK-NEXT: movk w8, #5, lsl #16 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret - %addr1 = getelementptr i16, i16 *%ptr, i64 0 - store i16 4, i16 *%addr1 - %addr2 = getelementptr i16, i16 *%ptr, i64 1 - store i16 5, i16 *%addr2 + store i16 4, ptr %ptr + %addr2 = getelementptr i16, ptr %ptr, i64 1 + store i16 5, ptr %addr2 ret void } -define void @test_simple_4xs16(i16 *%ptr) { +define void @test_simple_4xs16(ptr %ptr) { ; CHECK-LABEL: test_simple_4xs16: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov x8, #4 @@ -39,47 +37,44 @@ ; CHECK-NEXT: movk x8, #14, lsl #48 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret - %addr1 = getelementptr i16, i16 *%ptr, i64 0 - store i16 4, i16 *%addr1 - %addr2 = getelementptr i16, i16 *%ptr, i64 1 - store i16 5, i16 *%addr2 - %addr3 = getelementptr i16, i16 *%ptr, i64 2 - store i16 9, i16 *%addr3 - %addr4 = getelementptr i16, i16 *%ptr, i64 3 - store i16 14, i16 *%addr4 + store i16 4, ptr %ptr + %addr2 = getelementptr i16, ptr %ptr, i64 1 + store i16 5, ptr %addr2 + %addr3 = getelementptr i16, ptr %ptr, i64 2 + store i16 9, ptr %addr3 + %addr4 = getelementptr i16, ptr %ptr, i64 3 + store i16 14, ptr %addr4 ret void } -define void @test_simple_2xs32(i32 *%ptr) { +define void @test_simple_2xs32(ptr %ptr) { ; CHECK-LABEL: test_simple_2xs32: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov x8, #4 ; CHECK-NEXT: movk x8, #5, lsl #32 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret - %addr1 = getelementptr i32, i32 *%ptr, i64 0 - store i32 4, i32 *%addr1 - %addr2 = getelementptr i32, i32 *%ptr, i64 1 - store i32 5, i32 *%addr2 + store i32 4, ptr %ptr + %addr2 = getelementptr i32, ptr %ptr, i64 1 + store i32 5, ptr %addr2 ret void } -define void @test_simple_2xs64_illegal(i64 *%ptr) { +define void @test_simple_2xs64_illegal(ptr %ptr) { ; CHECK-LABEL: test_simple_2xs64_illegal: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w8, #4 ; CHECK-NEXT: mov w9, #5 ; CHECK-NEXT: stp x8, x9, [x0] ; CHECK-NEXT: ret - %addr1 = getelementptr i64, i64 *%ptr, i64 0 - store i64 4, i64 *%addr1 - %addr2 = getelementptr i64, i64 *%ptr, i64 1 - store i64 5, i64 *%addr2 + store i64 4, ptr %ptr + %addr2 = getelementptr i64, ptr %ptr, i64 1 + store i64 5, ptr %addr2 ret void } ; Don't merge vectors...yet. -define void @test_simple_vector(<2 x i16> *%ptr) { +define void @test_simple_vector(ptr %ptr) { ; CHECK-LABEL: test_simple_vector: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w8, #4 @@ -91,14 +86,13 @@ ; CHECK-NEXT: strh w10, [x0, #4] ; CHECK-NEXT: strh w11, [x0, #6] ; CHECK-NEXT: ret - %addr1 = getelementptr <2 x i16>, <2 x i16> *%ptr, i64 0 - store <2 x i16> , <2 x i16> *%addr1 - %addr2 = getelementptr <2 x i16>, <2 x i16> *%ptr, i64 1 - store <2 x i16> , <2 x i16> *%addr2 + store <2 x i16> , ptr %ptr + %addr2 = getelementptr <2 x i16>, ptr %ptr, i64 1 + store <2 x i16> , ptr %addr2 ret void } -define i32 @test_unknown_alias(i32 *%ptr, i32 *%aliasptr) { +define i32 @test_unknown_alias(ptr %ptr, ptr %aliasptr) { ; CHECK-LABEL: test_unknown_alias: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w9, #4 @@ -108,15 +102,14 @@ ; CHECK-NEXT: ldr w0, [x1] ; CHECK-NEXT: str w9, [x8, #4] ; CHECK-NEXT: ret - %addr1 = getelementptr i32, i32 *%ptr, i64 0 - store i32 4, i32 *%addr1 - %ld = load i32, i32 *%aliasptr - %addr2 = getelementptr i32, i32 *%ptr, i64 1 - store i32 5, i32 *%addr2 + store i32 4, ptr %ptr + %ld = load i32, ptr %aliasptr + %addr2 = getelementptr i32, ptr %ptr, i64 1 + store i32 5, ptr %addr2 ret i32 %ld } -define void @test_2x_2xs32(i32 *%ptr, i32 *%ptr2) { +define void @test_2x_2xs32(ptr %ptr, ptr %ptr2) { ; CHECK-LABEL: test_2x_2xs32: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov x10, #9 @@ -126,60 +119,55 @@ ; CHECK-NEXT: stp w8, w9, [x0] ; CHECK-NEXT: str x10, [x1] ; CHECK-NEXT: ret - %addr1 = getelementptr i32, i32 *%ptr, i64 0 - store i32 4, i32 *%addr1 - %addr2 = getelementptr i32, i32 *%ptr, i64 1 - store i32 5, i32 *%addr2 + store i32 4, ptr %ptr + %addr2 = getelementptr i32, ptr %ptr, i64 1 + store i32 5, ptr %addr2 - %addr3 = getelementptr i32, i32 *%ptr2, i64 0 - store i32 9, i32 *%addr3 - %addr4 = getelementptr i32, i32 *%ptr2, i64 1 - store i32 17, i32 *%addr4 + store i32 9, ptr %ptr2 + %addr4 = getelementptr i32, ptr %ptr2, i64 1 + store i32 17, ptr %addr4 ret void } -define void @test_simple_var_2xs8(i8 *%ptr, i8 %v1, i8 %v2) { +define void @test_simple_var_2xs8(ptr %ptr, i8 %v1, i8 %v2) { ; CHECK-LABEL: test_simple_var_2xs8: ; CHECK: ; %bb.0: ; CHECK-NEXT: strb w1, [x0] ; CHECK-NEXT: strb w2, [x0, #1] ; CHECK-NEXT: ret - %addr1 = getelementptr i8, i8 *%ptr, i64 0 - store i8 %v1, i8 *%addr1 - %addr2 = getelementptr i8, i8 *%ptr, i64 1 - store i8 %v2, i8 *%addr2 + store i8 %v1, ptr %ptr + %addr2 = getelementptr i8, ptr %ptr, i64 1 + store i8 %v2, ptr %addr2 ret void } -define void @test_simple_var_2xs16(i16 *%ptr, i16 %v1, i16 %v2) { +define void @test_simple_var_2xs16(ptr %ptr, i16 %v1, i16 %v2) { ; CHECK-LABEL: test_simple_var_2xs16: ; CHECK: ; %bb.0: ; CHECK-NEXT: strh w1, [x0] ; CHECK-NEXT: strh w2, [x0, #2] ; CHECK-NEXT: ret - %addr1 = getelementptr i16, i16 *%ptr, i64 0 - store i16 %v1, i16 *%addr1 - %addr2 = getelementptr i16, i16 *%ptr, i64 1 - store i16 %v2, i16 *%addr2 + store i16 %v1, ptr %ptr + %addr2 = getelementptr i16, ptr %ptr, i64 1 + store i16 %v2, ptr %addr2 ret void } -define void @test_simple_var_2xs32(i32 *%ptr, i32 %v1, i32 %v2) { +define void @test_simple_var_2xs32(ptr %ptr, i32 %v1, i32 %v2) { ; CHECK-LABEL: test_simple_var_2xs32: ; CHECK: ; %bb.0: ; CHECK-NEXT: stp w1, w2, [x0] ; CHECK-NEXT: ret - %addr1 = getelementptr i32, i32 *%ptr, i64 0 - store i32 %v1, i32 *%addr1 - %addr2 = getelementptr i32, i32 *%ptr, i64 1 - store i32 %v2, i32 *%addr2 + store i32 %v1, ptr %ptr + %addr2 = getelementptr i32, ptr %ptr, i64 1 + store i32 %v2, ptr %addr2 ret void } ; The store to ptr2 prevents merging into a single store. ; We can still merge the stores into addr1 and addr2. -define void @test_alias_4xs16(i16 *%ptr, i16 *%ptr2) { +define void @test_alias_4xs16(ptr %ptr, ptr %ptr2) { ; CHECK-LABEL: test_alias_4xs16: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w8, #4 @@ -191,20 +179,19 @@ ; CHECK-NEXT: strh wzr, [x1] ; CHECK-NEXT: strh w10, [x0, #6] ; CHECK-NEXT: ret - %addr1 = getelementptr i16, i16 *%ptr, i64 0 - store i16 4, i16 *%addr1 - %addr2 = getelementptr i16, i16 *%ptr, i64 1 - store i16 5, i16 *%addr2 - %addr3 = getelementptr i16, i16 *%ptr, i64 2 - store i16 9, i16 *%addr3 - store i16 0, i16 *%ptr2 - %addr4 = getelementptr i16, i16 *%ptr, i64 3 - store i16 14, i16 *%addr4 + store i16 4, ptr %ptr + %addr2 = getelementptr i16, ptr %ptr, i64 1 + store i16 5, ptr %addr2 + %addr3 = getelementptr i16, ptr %ptr, i64 2 + store i16 9, ptr %addr3 + store i16 0, ptr %ptr2 + %addr4 = getelementptr i16, ptr %ptr, i64 3 + store i16 14, ptr %addr4 ret void } ; Here store of 5 and 9 can be merged, others have aliasing barriers. -define void @test_alias2_4xs16(i16 *%ptr, i16 *%ptr2, i16* %ptr3) { +define void @test_alias2_4xs16(ptr %ptr, ptr %ptr2, ptr %ptr3) { ; CHECK-LABEL: test_alias2_4xs16: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w8, #4 @@ -217,21 +204,20 @@ ; CHECK-NEXT: strh wzr, [x1] ; CHECK-NEXT: strh w8, [x0, #6] ; CHECK-NEXT: ret - %addr1 = getelementptr i16, i16 *%ptr, i64 0 - store i16 4, i16 *%addr1 - %addr2 = getelementptr i16, i16 *%ptr, i64 1 - store i16 0, i16 *%ptr3 - store i16 5, i16 *%addr2 - %addr3 = getelementptr i16, i16 *%ptr, i64 2 - store i16 9, i16 *%addr3 - store i16 0, i16 *%ptr2 - %addr4 = getelementptr i16, i16 *%ptr, i64 3 - store i16 14, i16 *%addr4 + store i16 4, ptr %ptr + %addr2 = getelementptr i16, ptr %ptr, i64 1 + store i16 0, ptr %ptr3 + store i16 5, ptr %addr2 + %addr3 = getelementptr i16, ptr %ptr, i64 2 + store i16 9, ptr %addr3 + store i16 0, ptr %ptr2 + %addr4 = getelementptr i16, ptr %ptr, i64 3 + store i16 14, ptr %addr4 ret void } ; No merging can be done here. -define void @test_alias3_4xs16(i16 *%ptr, i16 *%ptr2, i16 *%ptr3, i16 *%ptr4) { +define void @test_alias3_4xs16(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr %ptr4) { ; CHECK-LABEL: test_alias3_4xs16: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w8, #4 @@ -246,22 +232,21 @@ ; CHECK-NEXT: strh wzr, [x1] ; CHECK-NEXT: strh w9, [x0, #6] ; CHECK-NEXT: ret - %addr1 = getelementptr i16, i16 *%ptr, i64 0 - store i16 4, i16 *%addr1 - %addr2 = getelementptr i16, i16 *%ptr, i64 1 - store i16 0, i16 *%ptr3 - store i16 5, i16 *%addr2 - store i16 0, i16 *%ptr4 - %addr3 = getelementptr i16, i16 *%ptr, i64 2 - store i16 9, i16 *%addr3 - store i16 0, i16 *%ptr2 - %addr4 = getelementptr i16, i16 *%ptr, i64 3 - store i16 14, i16 *%addr4 + store i16 4, ptr %ptr + %addr2 = getelementptr i16, ptr %ptr, i64 1 + store i16 0, ptr %ptr3 + store i16 5, ptr %addr2 + store i16 0, ptr %ptr4 + %addr3 = getelementptr i16, ptr %ptr, i64 2 + store i16 9, ptr %addr3 + store i16 0, ptr %ptr2 + %addr4 = getelementptr i16, ptr %ptr, i64 3 + store i16 14, ptr %addr4 ret void } ; Can merge because the load is from a different alloca and can't alias. -define i32 @test_alias_allocas_2xs32(i32 *%ptr) { +define i32 @test_alias_allocas_2xs32(ptr %ptr) { ; CHECK-LABEL: test_alias_allocas_2xs32: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #32 @@ -274,15 +259,14 @@ ; CHECK-NEXT: ret %a1 = alloca [6 x i32] %a2 = alloca i32, align 4 - %addr1 = getelementptr [6 x i32], [6 x i32] *%a1, i64 0, i32 0 - store i32 4, i32 *%addr1 - %ld = load i32, i32 *%a2 - %addr2 = getelementptr [6 x i32], [6 x i32] *%a1, i64 0, i32 1 - store i32 5, i32 *%addr2 + store i32 4, ptr %a1 + %ld = load i32, ptr %a2 + %addr2 = getelementptr [6 x i32], ptr %a1, i64 0, i32 1 + store i32 5, ptr %addr2 ret i32 %ld } -define void @test_volatile(i32 **%ptr) { +define void @test_volatile(ptr %ptr) { ; CHECK-LABEL: test_volatile: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: ldr x8, [x0] @@ -290,15 +274,14 @@ ; CHECK-NEXT: str wzr, [x8, #4] ; CHECK-NEXT: ret entry: - %0 = load i32*, i32** %ptr, align 8 - store volatile i32 0, i32* %0, align 4; - %1 = bitcast i32** %ptr to i8** - %add.ptr.i.i38 = getelementptr inbounds i32, i32* %0, i64 1 - store volatile i32 0, i32* %add.ptr.i.i38, align 4 + %0 = load ptr, ptr %ptr, align 8 + store volatile i32 0, ptr %0, align 4; + %add.ptr.i.i38 = getelementptr inbounds i32, ptr %0, i64 1 + store volatile i32 0, ptr %add.ptr.i.i38, align 4 ret void } -define void @test_atomic(i32 **%ptr) { +define void @test_atomic(ptr %ptr) { ; CHECK-LABEL: test_atomic: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: ldr x8, [x0] @@ -307,10 +290,9 @@ ; CHECK-NEXT: stlr wzr, [x9] ; CHECK-NEXT: ret entry: - %0 = load i32*, i32** %ptr, align 8 - store atomic i32 0, i32* %0 release, align 4; - %1 = bitcast i32** %ptr to i8** - %add.ptr.i.i38 = getelementptr inbounds i32, i32* %0, i64 1 - store atomic i32 0, i32* %add.ptr.i.i38 release, align 4 + %0 = load ptr, ptr %ptr, align 8 + store atomic i32 0, ptr %0 release, align 4; + %add.ptr.i.i38 = getelementptr inbounds i32, ptr %0, i64 1 + store atomic i32 0, ptr %add.ptr.i.i38 release, align 4 ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll @@ -1,12 +1,12 @@ ; RUN: llc -verify-machineinstrs -frame-pointer=all -global-isel < %s -mtriple=aarch64-apple-ios | FileCheck %s -declare i8* @malloc(i64) -declare void @free(i8*) +declare ptr @malloc(i64) +declare void @free(ptr) %swift_error = type {i64, i8} ; This tests the basic usage of a swifterror parameter. "foo" is the function ; that takes a swifterror parameter and "caller" is the caller of "foo". -define float @foo(%swift_error** swifterror %error_ptr_ref) { +define float @foo(ptr swifterror %error_ptr_ref) { ; CHECK-LABEL: foo: ; CHECK: mov w0, #16 ; CHECK: malloc @@ -16,16 +16,15 @@ ; CHECK-NOT: x21 entry: - %call = call i8* @malloc(i64 16) - %call.0 = bitcast i8* %call to %swift_error* - store %swift_error* %call.0, %swift_error** %error_ptr_ref - %tmp = getelementptr inbounds i8, i8* %call, i64 8 - store i8 1, i8* %tmp + %call = call ptr @malloc(i64 16) + store ptr %call, ptr %error_ptr_ref + %tmp = getelementptr inbounds i8, ptr %call, i64 8 + store i8 1, ptr %tmp ret float 1.0 } ; "caller" calls "foo" that takes a swifterror parameter. -define float @caller(i8* %error_ref) { +define float @caller(ptr %error_ref) { ; CHECK-LABEL: caller: ; CHECK: mov [[ID:x[0-9]+]], x0 ; CHECK: bl {{.*}}foo @@ -37,25 +36,24 @@ ; CHECK: bl {{.*}}free entry: - %error_ptr_ref = alloca swifterror %swift_error* - store %swift_error* null, %swift_error** %error_ptr_ref - %call = call float @foo(%swift_error** swifterror %error_ptr_ref) - %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref - %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null - %tmp = bitcast %swift_error* %error_from_foo to i8* + %error_ptr_ref = alloca swifterror ptr + store ptr null, ptr %error_ptr_ref + %call = call float @foo(ptr swifterror %error_ptr_ref) + %error_from_foo = load ptr, ptr %error_ptr_ref + %had_error_from_foo = icmp ne ptr %error_from_foo, null br i1 %had_error_from_foo, label %handler, label %cont cont: - %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 - %t = load i8, i8* %v1 - store i8 %t, i8* %error_ref + %v1 = getelementptr inbounds %swift_error, ptr %error_from_foo, i64 0, i32 1 + %t = load i8, ptr %v1 + store i8 %t, ptr %error_ref br label %handler handler: - call void @free(i8* %tmp) + call void @free(ptr %error_from_foo) ret float 1.0 } ; "caller2" is the caller of "foo", it calls "foo" inside a loop. -define float @caller2(i8* %error_ref) { +define float @caller2(ptr %error_ref) { ; CHECK-LABEL: caller2: ; CHECK: mov [[ID:x[0-9]+]], x0 ; CHECK: fmov [[CMP:s[0-9]+]], #1.0 @@ -71,31 +69,30 @@ ; CHECK: bl {{.*}}free entry: - %error_ptr_ref = alloca swifterror %swift_error* + %error_ptr_ref = alloca swifterror ptr br label %bb_loop bb_loop: - store %swift_error* null, %swift_error** %error_ptr_ref - %call = call float @foo(%swift_error** swifterror %error_ptr_ref) - %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref - %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null - %tmp = bitcast %swift_error* %error_from_foo to i8* + store ptr null, ptr %error_ptr_ref + %call = call float @foo(ptr swifterror %error_ptr_ref) + %error_from_foo = load ptr, ptr %error_ptr_ref + %had_error_from_foo = icmp ne ptr %error_from_foo, null br i1 %had_error_from_foo, label %handler, label %cont cont: %cmp = fcmp ogt float %call, 1.000000e+00 br i1 %cmp, label %bb_end, label %bb_loop bb_end: - %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 - %t = load i8, i8* %v1 - store i8 %t, i8* %error_ref + %v1 = getelementptr inbounds %swift_error, ptr %error_from_foo, i64 0, i32 1 + %t = load i8, ptr %v1 + store i8 %t, ptr %error_ref br label %handler handler: - call void @free(i8* %tmp) + call void @free(ptr %error_from_foo) ret float 1.0 } ; "foo_if" is a function that takes a swifterror parameter, it sets swifterror ; under a certain condition. -define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) { +define float @foo_if(ptr swifterror %error_ptr_ref, i32 %cc) { ; CHECK-LABEL: foo_if: ; CHECK: cbz w0 ; CHECK: mov w0, #16 @@ -111,11 +108,10 @@ br i1 %cond, label %gen_error, label %normal gen_error: - %call = call i8* @malloc(i64 16) - %call.0 = bitcast i8* %call to %swift_error* - store %swift_error* %call.0, %swift_error** %error_ptr_ref - %tmp = getelementptr inbounds i8, i8* %call, i64 8 - store i8 1, i8* %tmp + %call = call ptr @malloc(i64 16) + store ptr %call, ptr %error_ptr_ref + %tmp = getelementptr inbounds i8, ptr %call, i64 8 + store i8 1, ptr %tmp ret float 1.0 normal: @@ -124,7 +120,7 @@ ; "foo_loop" is a function that takes a swifterror parameter, it sets swifterror ; under a certain condition inside a loop. -define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) { +define float @foo_loop(ptr swifterror %error_ptr_ref, i32 %cc, float %cc2) { ; CHECK-LABEL: foo_loop: ; CHECK: cbz ; CHECK: mov w0, #16 @@ -141,11 +137,10 @@ br i1 %cond, label %gen_error, label %bb_cont gen_error: - %call = call i8* @malloc(i64 16) - %call.0 = bitcast i8* %call to %swift_error* - store %swift_error* %call.0, %swift_error** %error_ptr_ref - %tmp = getelementptr inbounds i8, i8* %call, i64 8 - store i8 1, i8* %tmp + %call = call ptr @malloc(i64 16) + store ptr %call, ptr %error_ptr_ref + %tmp = getelementptr inbounds i8, ptr %call, i64 8 + store i8 1, ptr %tmp br label %bb_cont bb_cont: @@ -159,7 +154,7 @@ ; "foo_sret" is a function that takes a swifterror parameter, it also has a sret ; parameter. -define void @foo_sret(%struct.S* sret(%struct.S) %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) { +define void @foo_sret(ptr sret(%struct.S) %agg.result, i32 %val1, ptr swifterror %error_ptr_ref) { ; CHECK-LABEL: foo_sret: ; CHECK-DAG: mov [[SRET:x[0-9]+]], x8 ; CHECK-DAG: mov w0, #16 @@ -171,18 +166,17 @@ ; CHECK-NOT: x21 entry: - %call = call i8* @malloc(i64 16) - %call.0 = bitcast i8* %call to %swift_error* - store %swift_error* %call.0, %swift_error** %error_ptr_ref - %tmp = getelementptr inbounds i8, i8* %call, i64 8 - store i8 1, i8* %tmp - %v2 = getelementptr inbounds %struct.S, %struct.S* %agg.result, i32 0, i32 1 - store i32 %val1, i32* %v2 + %call = call ptr @malloc(i64 16) + store ptr %call, ptr %error_ptr_ref + %tmp = getelementptr inbounds i8, ptr %call, i64 8 + store i8 1, ptr %tmp + %v2 = getelementptr inbounds %struct.S, ptr %agg.result, i32 0, i32 1 + store i32 %val1, ptr %v2 ret void } ; "caller3" calls "foo_sret" that takes a swifterror parameter. -define float @caller3(i8* %error_ref) { +define float @caller3(ptr %error_ref) { ; CHECK-LABEL: caller3: ; CHECK: mov [[ID:x[0-9]+]], x0 ; CHECK: mov [[ZERO:x[0-9]+]], xzr @@ -196,27 +190,26 @@ entry: %s = alloca %struct.S, align 8 - %error_ptr_ref = alloca swifterror %swift_error* - store %swift_error* null, %swift_error** %error_ptr_ref - call void @foo_sret(%struct.S* sret(%struct.S) %s, i32 1, %swift_error** swifterror %error_ptr_ref) - %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref - %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null - %tmp = bitcast %swift_error* %error_from_foo to i8* + %error_ptr_ref = alloca swifterror ptr + store ptr null, ptr %error_ptr_ref + call void @foo_sret(ptr sret(%struct.S) %s, i32 1, ptr swifterror %error_ptr_ref) + %error_from_foo = load ptr, ptr %error_ptr_ref + %had_error_from_foo = icmp ne ptr %error_from_foo, null br i1 %had_error_from_foo, label %handler, label %cont cont: - %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 - %t = load i8, i8* %v1 - store i8 %t, i8* %error_ref + %v1 = getelementptr inbounds %swift_error, ptr %error_from_foo, i64 0, i32 1 + %t = load i8, ptr %v1 + store i8 %t, ptr %error_ref br label %handler handler: - call void @free(i8* %tmp) + call void @free(ptr %error_from_foo) ret float 1.0 } ; "foo_vararg" is a function that takes a swifterror parameter, it also has ; variable number of arguments. -declare void @llvm.va_start(i8*) nounwind -define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) { +declare void @llvm.va_start(ptr) nounwind +define float @foo_vararg(ptr swifterror %error_ptr_ref, ...) { ; CHECK-LABEL: foo_vararg: ; CHECK: mov w0, #16 ; CHECK: malloc @@ -236,30 +229,28 @@ ; CHECK: ldr {{w[0-9]+}}, [x[[ARG1]]] ; CHECK-NOT: x21 entry: - %call = call i8* @malloc(i64 16) - %call.0 = bitcast i8* %call to %swift_error* - store %swift_error* %call.0, %swift_error** %error_ptr_ref - %tmp = getelementptr inbounds i8, i8* %call, i64 8 - store i8 1, i8* %tmp + %call = call ptr @malloc(i64 16) + store ptr %call, ptr %error_ptr_ref + %tmp = getelementptr inbounds i8, ptr %call, i64 8 + store i8 1, ptr %tmp - %args = alloca i8*, align 8 + %args = alloca ptr, align 8 %a10 = alloca i32, align 4 %a11 = alloca i32, align 4 %a12 = alloca i32, align 4 - %v10 = bitcast i8** %args to i8* - call void @llvm.va_start(i8* %v10) - %v11 = va_arg i8** %args, i32 - store i32 %v11, i32* %a10, align 4 - %v12 = va_arg i8** %args, i32 - store i32 %v12, i32* %a11, align 4 - %v13 = va_arg i8** %args, i32 - store i32 %v13, i32* %a12, align 4 + call void @llvm.va_start(ptr %args) + %v11 = va_arg ptr %args, i32 + store i32 %v11, ptr %a10, align 4 + %v12 = va_arg ptr %args, i32 + store i32 %v12, ptr %a11, align 4 + %v13 = va_arg ptr %args, i32 + store i32 %v13, ptr %a12, align 4 ret float 1.0 } ; "caller4" calls "foo_vararg" that takes a swifterror parameter. -define float @caller4(i8* %error_ref) { +define float @caller4(ptr %error_ref) { ; CHECK-LABEL: caller4: ; CHECK: mov x21, xzr @@ -275,44 +266,43 @@ ; CHECK: strb [[CODE]], [{{.*}}[[ID]]] ; CHECK: bl {{.*}}free entry: - %error_ptr_ref = alloca swifterror %swift_error* - store %swift_error* null, %swift_error** %error_ptr_ref + %error_ptr_ref = alloca swifterror ptr + store ptr null, ptr %error_ptr_ref %a10 = alloca i32, align 4 %a11 = alloca i32, align 4 %a12 = alloca i32, align 4 - store i32 10, i32* %a10, align 4 - store i32 11, i32* %a11, align 4 - store i32 12, i32* %a12, align 4 - %v10 = load i32, i32* %a10, align 4 - %v11 = load i32, i32* %a11, align 4 - %v12 = load i32, i32* %a12, align 4 - - %call = call float (%swift_error**, ...) @foo_vararg(%swift_error** swifterror %error_ptr_ref, i32 %v10, i32 %v11, i32 %v12) - %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref - %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null - %tmp = bitcast %swift_error* %error_from_foo to i8* + store i32 10, ptr %a10, align 4 + store i32 11, ptr %a11, align 4 + store i32 12, ptr %a12, align 4 + %v10 = load i32, ptr %a10, align 4 + %v11 = load i32, ptr %a11, align 4 + %v12 = load i32, ptr %a12, align 4 + + %call = call float (ptr, ...) @foo_vararg(ptr swifterror %error_ptr_ref, i32 %v10, i32 %v11, i32 %v12) + %error_from_foo = load ptr, ptr %error_ptr_ref + %had_error_from_foo = icmp ne ptr %error_from_foo, null br i1 %had_error_from_foo, label %handler, label %cont cont: - %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 - %t = load i8, i8* %v1 - store i8 %t, i8* %error_ref + %v1 = getelementptr inbounds %swift_error, ptr %error_from_foo, i64 0, i32 1 + %t = load i8, ptr %v1 + store i8 %t, ptr %error_ref br label %handler handler: - call void @free(i8* %tmp) + call void @free(ptr %error_from_foo) ret float 1.0 } ; Check that we don't blow up on tail calling swifterror argument functions. -define float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref) { +define float @tailcallswifterror(ptr swifterror %error_ptr_ref) { entry: - %0 = tail call float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref) + %0 = tail call float @tailcallswifterror(ptr swifterror %error_ptr_ref) ret float %0 } -define swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref) { +define swiftcc float @tailcallswifterror_swiftcc(ptr swifterror %error_ptr_ref) { entry: - %0 = tail call swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref) + %0 = tail call swiftcc float @tailcallswifterror_swiftcc(ptr swifterror %error_ptr_ref) ret float %0 } @@ -372,14 +362,14 @@ ; CHECK: ldr x28, [sp ; CHECK-NOT: x21 ; CHECK: ret -define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8*, %swift_error** nocapture swifterror %err) { - %error_ptr_ref = alloca swifterror %swift_error*, align 8 - store %swift_error* null, %swift_error** %error_ptr_ref - call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8* null, %swift_error** nocapture swifterror %error_ptr_ref) - call swiftcc void @params_in_reg2(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i8* %8, %swift_error** nocapture swifterror %err) +define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr, ptr nocapture swifterror %err) { + %error_ptr_ref = alloca swifterror ptr, align 8 + store ptr null, ptr %error_ptr_ref + call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, ptr null, ptr nocapture swifterror %error_ptr_ref) + call swiftcc void @params_in_reg2(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, ptr %8, ptr nocapture swifterror %err) ret void } -declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* , %swift_error** nocapture swifterror %err) +declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, ptr , ptr nocapture swifterror %err) ; CHECK-LABEL: params_and_return_in_reg ; Store callee saved registers. @@ -462,18 +452,18 @@ ; CHECK: ldp x27, x26, [sp ; CHECK: ldr x28, [sp ; CHECK: ret -define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* , %swift_error** nocapture swifterror %err) { - %error_ptr_ref = alloca swifterror %swift_error*, align 8 - store %swift_error* null, %swift_error** %error_ptr_ref - call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8* null, %swift_error** nocapture swifterror %error_ptr_ref) - %val = call swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg2(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i8* %8, %swift_error** nocapture swifterror %err) - call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8* null, %swift_error** nocapture swifterror %error_ptr_ref) +define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr , ptr nocapture swifterror %err) { + %error_ptr_ref = alloca swifterror ptr, align 8 + store ptr null, ptr %error_ptr_ref + call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, ptr null, ptr nocapture swifterror %error_ptr_ref) + %val = call swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg2(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, ptr %8, ptr nocapture swifterror %err) + call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, ptr null, ptr nocapture swifterror %error_ptr_ref) ret { i64, i64, i64, i64, i64, i64, i64, i64 } %val } -declare swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* , %swift_error** nocapture swifterror %err) +declare swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, ptr , ptr nocapture swifterror %err) -declare void @acallee(i8*) +declare void @acallee(ptr) ; Make sure we don't tail call if the caller returns a swifterror value. We ; would have to move into the swifterror register before the tail call. @@ -481,9 +471,9 @@ ; CHECK-NOT: b _acallee ; CHECK: bl _acallee -define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) { +define swiftcc void @tailcall_from_swifterror(ptr swifterror %error_ptr_ref) { entry: - tail call void @acallee(i8* null) + tail call void @acallee(ptr null) ret void } @@ -491,32 +481,32 @@ ; CHECK-NOT: b _simple_fn ; CHECK: bl _simple_fn declare void @simple_fn() -define swiftcc void @tailcall_from_swifterror2(%swift_error** swifterror %error_ptr_ref) { +define swiftcc void @tailcall_from_swifterror2(ptr swifterror %error_ptr_ref) { tail call void @simple_fn() ret void } -declare swiftcc void @foo2(%swift_error** swifterror) +declare swiftcc void @foo2(ptr swifterror) ; CHECK-LABEL: testAssign ; CHECK: mov x21, xzr ; CHECK: bl _foo2 ; CHECK: mov x0, x21 -define swiftcc %swift_error* @testAssign(i8* %error_ref) { +define swiftcc ptr @testAssign(ptr %error_ref) { entry: - %error_ptr = alloca swifterror %swift_error* - store %swift_error* null, %swift_error** %error_ptr - call swiftcc void @foo2(%swift_error** swifterror %error_ptr) + %error_ptr = alloca swifterror ptr + store ptr null, ptr %error_ptr + call swiftcc void @foo2(ptr swifterror %error_ptr) br label %a a: - %error = load %swift_error*, %swift_error** %error_ptr - ret %swift_error* %error + %error = load ptr, ptr %error_ptr + ret ptr %error } ; foo takes a swifterror parameter. We should be able to see that even when ; it isn't explicitly on the call. -define float @swifterror_param_not_on_call(i8* %error_ref) { +define float @swifterror_param_not_on_call(ptr %error_ref) { ; CHECK-LABEL: swifterror_param_not_on_call: ; CHECK: mov [[ID:x[0-9]+]], x0 ; CHECK: bl {{.*}}foo @@ -528,26 +518,25 @@ ; CHECK: bl {{.*}}free entry: - %error_ptr_ref = alloca swifterror %swift_error* - store %swift_error* null, %swift_error** %error_ptr_ref - %call = call float @foo(%swift_error** %error_ptr_ref) - %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref - %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null - %tmp = bitcast %swift_error* %error_from_foo to i8* + %error_ptr_ref = alloca swifterror ptr + store ptr null, ptr %error_ptr_ref + %call = call float @foo(ptr %error_ptr_ref) + %error_from_foo = load ptr, ptr %error_ptr_ref + %had_error_from_foo = icmp ne ptr %error_from_foo, null br i1 %had_error_from_foo, label %handler, label %cont cont: - %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 - %t = load i8, i8* %v1 - store i8 %t, i8* %error_ref + %v1 = getelementptr inbounds %swift_error, ptr %error_from_foo, i64 0, i32 1 + %t = load i8, ptr %v1 + store i8 %t, ptr %error_ref br label %handler handler: - call void @free(i8* %tmp) + call void @free(ptr %error_from_foo) ret float 1.0 } ; foo_sret takes an sret parameter and a swifterror parameter. We should be ; able to see that, even if it's not explicitly on the call. -define float @swifterror_param_not_on_call2(i8* %error_ref) { +define float @swifterror_param_not_on_call2(ptr %error_ref) { ; CHECK-LABEL: swifterror_param_not_on_call2: ; CHECK: mov [[ID:x[0-9]+]], x0 ; CHECK: mov [[ZERO:x[0-9]+]], xzr @@ -561,19 +550,18 @@ entry: %s = alloca %struct.S, align 8 - %error_ptr_ref = alloca swifterror %swift_error* - store %swift_error* null, %swift_error** %error_ptr_ref - call void @foo_sret(%struct.S* %s, i32 1, %swift_error** %error_ptr_ref) - %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref - %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null - %tmp = bitcast %swift_error* %error_from_foo to i8* + %error_ptr_ref = alloca swifterror ptr + store ptr null, ptr %error_ptr_ref + call void @foo_sret(ptr %s, i32 1, ptr %error_ptr_ref) + %error_from_foo = load ptr, ptr %error_ptr_ref + %had_error_from_foo = icmp ne ptr %error_from_foo, null br i1 %had_error_from_foo, label %handler, label %cont cont: - %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 - %t = load i8, i8* %v1 - store i8 %t, i8* %error_ref + %v1 = getelementptr inbounds %swift_error, ptr %error_from_foo, i64 0, i32 1 + %t = load i8, ptr %v1 + store i8 %t, ptr %error_ref br label %handler handler: - call void @free(i8* %tmp) + call void @free(ptr %error_from_foo) ret float 1.0 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll @@ -4,8 +4,8 @@ ; CHECK-LABEL: swiftself_param: ; CHECK: mov x0, x20 ; CHECK-NEXT: ret -define i8* @swiftself_param(i8* swiftself %addr0) { - ret i8 *%addr0 +define ptr @swiftself_param(ptr swiftself %addr0) { + ret ptr %addr0 } ; Check that x20 is used to pass a swiftself argument. @@ -13,9 +13,9 @@ ; CHECK: mov x20, x0 ; CHECK: bl {{_?}}swiftself_param ; CHECK: ret -define i8 *@call_swiftself(i8* %arg) { - %res = call i8 *@swiftself_param(i8* swiftself %arg) - ret i8 *%res +define ptr @call_swiftself(ptr %arg) { + %res = call ptr @swiftself_param(ptr swiftself %arg) + ret ptr %res } ; Demonstrate that we do not need any movs when calling multiple functions @@ -26,9 +26,9 @@ ; CHECK-NOT: mov{{.*}}x20 ; CHECK-NEXT: bl {{_?}}swiftself_param ; CHECK: ret -define void @swiftself_passthrough(i8* swiftself %addr0) { - call i8 *@swiftself_param(i8* swiftself %addr0) - call i8 *@swiftself_param(i8* swiftself %addr0) +define void @swiftself_passthrough(ptr swiftself %addr0) { + call ptr @swiftself_param(ptr swiftself %addr0) + call ptr @swiftself_param(ptr swiftself %addr0) ret void } @@ -38,26 +38,26 @@ ; CHECK: mov x20, x0 ; CHECK: bl {{_?}}swiftself_param ; CHECK: ret -define i8* @swiftself_notail(i8* swiftself %addr0, i8* %addr1) nounwind { - %res = tail call i8* @swiftself_param(i8* swiftself %addr1) - ret i8* %res +define ptr @swiftself_notail(ptr swiftself %addr0, ptr %addr1) nounwind { + %res = tail call ptr @swiftself_param(ptr swiftself %addr1) + ret ptr %res } ; We cannot pretend that 'x0' is alive across the thisreturn_attribute call as ; we normally would. We marked the first parameter with swiftself which means it ; will no longer be passed in x0. -declare swiftcc i8* @thisreturn_attribute(i8* returned swiftself) +declare swiftcc ptr @thisreturn_attribute(ptr returned swiftself) ; CHECK-LABEL: swiftself_nothisreturn: ; CHECK-DAG: ldr x20, [x20] ; CHECK-DAG: mov [[CSREG:x[1-9].*]], x8 ; CHECK: bl {{_?}}thisreturn_attribute ; CHECK: str x0, [[[CSREG]] ; CHECK: ret -define hidden swiftcc void @swiftself_nothisreturn(i8** noalias nocapture sret(i8*), i8** noalias nocapture readonly swiftself) { +define hidden swiftcc void @swiftself_nothisreturn(ptr noalias nocapture sret(ptr), ptr noalias nocapture readonly swiftself) { entry: - %2 = load i8*, i8** %1, align 8 - %3 = tail call swiftcc i8* @thisreturn_attribute(i8* swiftself %2) - store i8* %3, i8** %0, align 8 + %2 = load ptr, ptr %1, align 8 + %3 = tail call swiftcc ptr @thisreturn_attribute(ptr swiftself %2) + store ptr %3, ptr %0, align 8 ret void } @@ -67,7 +67,7 @@ ; CHECK: mov x20, x0 ; CHECK: bl {{_?}}swiftself_param ; CHECK: ret -define i8 *@swiftself_not_on_call_params(i8* %arg) { - %res = call i8 *@swiftself_param(i8* %arg) - ret i8 *%res +define ptr @swiftself_not_on_call_params(ptr %arg) { + %res = call ptr @swiftself_param(ptr %arg) + ret ptr %res } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll b/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll @@ -3,7 +3,7 @@ %dag = type { { { i8, { i8 } }, { { i8, { i8 } }, { i8 } } }, { { i8, { i8 } }, { i8 } } } -define void @test_const(%dag* %dst) { +define void @test_const(ptr %dst) { ; CHECK-LABEL: name: test_const ; CHECK: bb.1.entry: ; CHECK: liveins: $x0 @@ -63,7 +63,7 @@ }, 0, 1 - store %dag %updated, %dag* %dst + store %dag %updated, ptr %dst ; 10, 20, 10, 20, 50, 10, 20, 20 sequence is expected store @@ -91,7 +91,7 @@ { i8 } { i8 20 } } }, - %dag* %dst + ptr %dst ; 10, 20, 10, 20, 20, 10, 20, 20 sequence is expected ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/translate-gep.ll b/llvm/test/CodeGen/AArch64/GlobalISel/translate-gep.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/translate-gep.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/translate-gep.ll @@ -3,7 +3,7 @@ %type = type [4 x {i8, i32}] -define i8* @translate_element_size1(i64 %arg) { +define ptr @translate_element_size1(i64 %arg) { ; CHECK-LABEL: name: translate_element_size1 ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: liveins: $x0 @@ -14,11 +14,11 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) ; CHECK-NEXT: $x0 = COPY [[COPY1]](p0) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %tmp = getelementptr i8, i8* null, i64 %arg - ret i8* %tmp + %tmp = getelementptr i8, ptr null, i64 %arg + ret ptr %tmp } -define %type* @first_offset_const(%type* %addr) { +define ptr @first_offset_const(ptr %addr) { ; CHECK-LABEL: name: first_offset_const ; CHECK: bb.1 (%ir-block.0): @@ -29,11 +29,11 @@ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: $x0 = COPY [[PTR_ADD]](p0) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %res = getelementptr %type, %type* %addr, i32 1 - ret %type* %res + %res = getelementptr %type, ptr %addr, i32 1 + ret ptr %res } -define %type* @first_offset_trivial(%type* %addr) { +define ptr @first_offset_trivial(ptr %addr) { ; CHECK-LABEL: name: first_offset_trivial ; CHECK: bb.1 (%ir-block.0): @@ -43,11 +43,11 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; CHECK-NEXT: $x0 = COPY [[COPY1]](p0) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %res = getelementptr %type, %type* %addr, i32 0 - ret %type* %res + %res = getelementptr %type, ptr %addr, i32 0 + ret ptr %res } -define %type* @first_offset_variable(%type* %addr, i64 %idx) { +define ptr @first_offset_variable(ptr %addr, i64 %idx) { ; CHECK-LABEL: name: first_offset_variable ; CHECK: bb.1 (%ir-block.0): @@ -61,11 +61,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %res = getelementptr %type, %type* %addr, i64 %idx - ret %type* %res + %res = getelementptr %type, ptr %addr, i64 %idx + ret ptr %res } -define %type* @first_offset_ext(%type* %addr, i32 %idx) { +define ptr @first_offset_ext(ptr %addr, i32 %idx) { ; CHECK-LABEL: name: first_offset_ext ; CHECK: bb.1 (%ir-block.0): @@ -80,12 +80,12 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %res = getelementptr %type, %type* %addr, i32 %idx - ret %type* %res + %res = getelementptr %type, ptr %addr, i32 %idx + ret ptr %res } %type1 = type [4 x [4 x i32]] -define i32* @const_then_var(%type1* %addr, i64 %idx) { +define ptr @const_then_var(ptr %addr, i64 %idx) { ; CHECK-LABEL: name: const_then_var ; CHECK: bb.1 (%ir-block.0): @@ -101,11 +101,11 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD1]](p0) ; CHECK-NEXT: $x0 = COPY [[COPY2]](p0) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %res = getelementptr %type1, %type1* %addr, i32 4, i32 1, i64 %idx - ret i32* %res + %res = getelementptr %type1, ptr %addr, i32 4, i32 1, i64 %idx + ret ptr %res } -define i32* @var_then_const(%type1* %addr, i64 %idx) { +define ptr @var_then_const(ptr %addr, i64 %idx) { ; CHECK-LABEL: name: var_then_const ; CHECK: bb.1 (%ir-block.0): @@ -120,13 +120,13 @@ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) ; CHECK-NEXT: $x0 = COPY [[PTR_ADD1]](p0) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %res = getelementptr %type1, %type1* %addr, i64 %idx, i32 2, i32 2 - ret i32* %res + %res = getelementptr %type1, ptr %addr, i64 %idx, i32 2, i32 2 + ret ptr %res } @arr = external global [8 x i32] -define <2 x i32*> @vec_gep_scalar_base(<2 x i64> %offs) { +define <2 x ptr> @vec_gep_scalar_base(<2 x i64> %offs) { ; CHECK-LABEL: name: vec_gep_scalar_base ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $q0 @@ -142,6 +142,6 @@ ; CHECK-NEXT: $q0 = COPY [[COPY1]](<2 x p0>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 entry: - %0 = getelementptr inbounds [8 x i32], [8 x i32]* @arr, i64 0, <2 x i64> %offs - ret <2 x i32*> %0 + %0 = getelementptr inbounds [8 x i32], ptr @arr, i64 0, <2 x i64> %offs + ret <2 x ptr> %0 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/translate-ret.ll b/llvm/test/CodeGen/AArch64/GlobalISel/translate-ret.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/translate-ret.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/translate-ret.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=arm64-apple-ios %s -o - -global-isel -global-isel-abort=1 -stop-after=irtranslator | FileCheck %s -define i128 @func_i128(i128* %ptr) { +define i128 @func_i128(ptr %ptr) { ; CHECK-LABEL: name: func_i128 ; CHECK: bb.1 (%ir-block.0): @@ -13,11 +13,11 @@ ; CHECK-NEXT: $x0 = COPY [[UV]](s64) ; CHECK-NEXT: $x1 = COPY [[UV1]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 - %val = load i128, i128* %ptr + %val = load i128, ptr %ptr ret i128 %val } -define <8 x float> @func_v8f32(<8 x float>* %ptr) { +define <8 x float> @func_v8f32(ptr %ptr) { ; CHECK-LABEL: name: func_v8f32 ; CHECK: bb.1 (%ir-block.0): @@ -29,12 +29,12 @@ ; CHECK-NEXT: $q0 = COPY [[UV]](<4 x s32>) ; CHECK-NEXT: $q1 = COPY [[UV1]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 - %val = load <8 x float>, <8 x float>* %ptr + %val = load <8 x float>, ptr %ptr ret <8 x float> %val } ; A bit weird, but s0-s5 is what SDAG does too. -define <6 x float> @func_v6f32(<6 x float>* %ptr) { +define <6 x float> @func_v6f32(ptr %ptr) { ; CHECK-LABEL: name: func_v6f32 ; CHECK: bb.1 (%ir-block.0): @@ -50,7 +50,7 @@ ; CHECK-NEXT: $s4 = COPY [[UV4]](s32) ; CHECK-NEXT: $s5 = COPY [[UV5]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $s0, implicit $s1, implicit $s2, implicit $s3, implicit $s4, implicit $s5 - %val = load <6 x float>, <6 x float>* %ptr + %val = load <6 x float>, ptr %ptr ret <6 x float> %val } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/unknown-intrinsic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/unknown-intrinsic.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/unknown-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/unknown-intrinsic.ll @@ -1,10 +1,10 @@ ; RUN: llc -O0 -mtriple=arm64 < %s -declare i8* @llvm.launder.invariant.group(i8*) +declare ptr @llvm.launder.invariant.group(ptr) -define i8* @barrier(i8* %p) { +define ptr @barrier(ptr %p) { ; CHECK: bl llvm.launder.invariant.group - %q = call i8* @llvm.launder.invariant.group(i8* %p) - ret i8* %q + %q = call ptr @llvm.launder.invariant.group(ptr %p) + ret ptr %q } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/unwind-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/unwind-inline-asm.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/unwind-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/unwind-inline-asm.ll @@ -10,7 +10,7 @@ unreachable } -define dso_local void @test() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define dso_local void @test() personality ptr @__gxx_personality_v0 { entry: ; CHECK-LABEL: test: @@ -25,17 +25,17 @@ ret void lpad: - %0 = landingpad { i8*, i32 } + %0 = landingpad { ptr, i32 } cleanup ; CHECK: bl printf - call void (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.2, i64 0, i64 0)) - resume { i8*, i32 } %0 + call void (ptr, ...) @printf(ptr @.str.2) + resume { ptr, i32 } %0 } declare dso_local i32 @__gxx_personality_v0(...) -declare dso_local void @printf(i8*, ...) +declare dso_local void @printf(ptr, ...) ; Exception table generation around the inline assembly diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/v8.4-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/v8.4-atomic-128.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/v8.4-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/v8.4-atomic-128.ll @@ -1,212 +1,192 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+v8.4a %s -o - -global-isel=1 -global-isel-abort=1 | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+lse2 %s -o - -global-isel=1 -global-isel-abort=1 | FileCheck %s -define void @test_atomic_load(i128* %addr) { +define void @test_atomic_load(ptr %addr) { ; CHECK-LABEL: test_atomic_load: ; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0] ; CHECK: mov v[[Q:[0-9]+]].d[0], [[LO]] ; CHECK: mov v[[Q]].d[1], [[HI]] ; CHECK: str q[[Q]], [x0] - %res.0 = load atomic i128, i128* %addr monotonic, align 16 - store i128 %res.0, i128* %addr + %res.0 = load atomic i128, ptr %addr monotonic, align 16 + store i128 %res.0, ptr %addr ; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0] ; CHECK: mov v[[Q:[0-9]+]].d[0], [[LO]] ; CHECK: mov v[[Q]].d[1], [[HI]] ; CHECK: str q[[Q]], [x0] - %res.1 = load atomic i128, i128* %addr unordered, align 16 - store i128 %res.1, i128* %addr + %res.1 = load atomic i128, ptr %addr unordered, align 16 + store i128 %res.1, ptr %addr ; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0] ; CHECK: dmb ish ; CHECK: mov v[[Q:[0-9]+]].d[0], [[LO]] ; CHECK: mov v[[Q]].d[1], [[HI]] ; CHECK: str q[[Q]], [x0] - %res.2 = load atomic i128, i128* %addr acquire, align 16 - store i128 %res.2, i128* %addr + %res.2 = load atomic i128, ptr %addr acquire, align 16 + store i128 %res.2, ptr %addr ; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0] ; CHECK: dmb ish ; CHECK: mov v[[Q:[0-9]+]].d[0], [[LO]] ; CHECK: mov v[[Q]].d[1], [[HI]] ; CHECK: str q[[Q]], [x0] - %res.3 = load atomic i128, i128* %addr seq_cst, align 16 - store i128 %res.3, i128* %addr + %res.3 = load atomic i128, ptr %addr seq_cst, align 16 + store i128 %res.3, ptr %addr - %addr8 = bitcast i128* %addr to i8* ; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0, #8] ; CHECK: mov v[[Q:[0-9]+]].d[0], [[LO]] ; CHECK: mov v[[Q]].d[1], [[HI]] ; CHECK: str q[[Q]], [x0] - %addr8.1 = getelementptr i8, i8* %addr8, i32 8 - %addr128.1 = bitcast i8* %addr8.1 to i128* - %res.5 = load atomic i128, i128* %addr128.1 monotonic, align 16 - store i128 %res.5, i128* %addr + %addr8.1 = getelementptr i8, ptr %addr, i32 8 + %res.5 = load atomic i128, ptr %addr8.1 monotonic, align 16 + store i128 %res.5, ptr %addr ; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0, #504] ; CHECK: mov v[[Q:[0-9]+]].d[0], [[LO]] ; CHECK: mov v[[Q]].d[1], [[HI]] ; CHECK: str q[[Q]], [x0] - %addr8.2 = getelementptr i8, i8* %addr8, i32 504 - %addr128.2 = bitcast i8* %addr8.2 to i128* - %res.6 = load atomic i128, i128* %addr128.2 monotonic, align 16 - store i128 %res.6, i128* %addr + %addr8.2 = getelementptr i8, ptr %addr, i32 504 + %res.6 = load atomic i128, ptr %addr8.2 monotonic, align 16 + store i128 %res.6, ptr %addr ; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0, #-512] ; CHECK: mov v[[Q:[0-9]+]].d[0], [[LO]] ; CHECK: mov v[[Q]].d[1], [[HI]] ; CHECK: str q[[Q]], [x0] - %addr8.3 = getelementptr i8, i8* %addr8, i32 -512 - %addr128.3 = bitcast i8* %addr8.3 to i128* - %res.7 = load atomic i128, i128* %addr128.3 monotonic, align 16 - store i128 %res.7, i128* %addr + %addr8.3 = getelementptr i8, ptr %addr, i32 -512 + %res.7 = load atomic i128, ptr %addr8.3 monotonic, align 16 + store i128 %res.7, ptr %addr ret void } -define void @test_libcall_load(i128* %addr) { +define void @test_libcall_load(ptr %addr) { ; CHECK-LABEL: test_libcall_load: ; CHECK: bl __atomic_load - %res.8 = load atomic i128, i128* %addr unordered, align 8 - store i128 %res.8, i128* %addr + %res.8 = load atomic i128, ptr %addr unordered, align 8 + store i128 %res.8, ptr %addr ret void } -define void @test_nonfolded_load1(i128* %addr) { +define void @test_nonfolded_load1(ptr %addr) { ; CHECK-LABEL: test_nonfolded_load1: - %addr8 = bitcast i128* %addr to i8* ; CHECK: add x[[ADDR:[0-9]+]], x0, #4 ; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x[[ADDR]]] ; CHECK: mov v[[Q:[0-9]+]].d[0], [[LO]] ; CHECK: mov v[[Q]].d[1], [[HI]] ; CHECK: str q[[Q]], [x0] - %addr8.1 = getelementptr i8, i8* %addr8, i32 4 - %addr128.1 = bitcast i8* %addr8.1 to i128* - %res.1 = load atomic i128, i128* %addr128.1 monotonic, align 16 - store i128 %res.1, i128* %addr + %addr8.1 = getelementptr i8, ptr %addr, i32 4 + %res.1 = load atomic i128, ptr %addr8.1 monotonic, align 16 + store i128 %res.1, ptr %addr ret void } -define void @test_nonfolded_load2(i128* %addr) { +define void @test_nonfolded_load2(ptr %addr) { ; CHECK-LABEL: test_nonfolded_load2: - %addr8 = bitcast i128* %addr to i8* ; CHECK: add x[[ADDR:[0-9]+]], x0, #512 ; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x[[ADDR]]] ; CHECK: mov v[[Q:[0-9]+]].d[0], [[LO]] ; CHECK: mov v[[Q]].d[1], [[HI]] ; CHECK: str q[[Q]], [x0] - %addr8.1 = getelementptr i8, i8* %addr8, i32 512 - %addr128.1 = bitcast i8* %addr8.1 to i128* - %res.1 = load atomic i128, i128* %addr128.1 monotonic, align 16 - store i128 %res.1, i128* %addr + %addr8.1 = getelementptr i8, ptr %addr, i32 512 + %res.1 = load atomic i128, ptr %addr8.1 monotonic, align 16 + store i128 %res.1, ptr %addr ret void } -define void @test_nonfolded_load3(i128* %addr) { +define void @test_nonfolded_load3(ptr %addr) { ; CHECK-LABEL: test_nonfolded_load3: - %addr8 = bitcast i128* %addr to i8* ; CHECK: sub x[[ADDR:[0-9]+]], x0, #520 ; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x[[ADDR]]] ; CHECK: mov v[[Q:[0-9]+]].d[0], [[LO]] ; CHECK: mov v[[Q]].d[1], [[HI]] ; CHECK: str q[[Q]], [x0] - %addr8.1 = getelementptr i8, i8* %addr8, i32 -520 - %addr128.1 = bitcast i8* %addr8.1 to i128* - %res.1 = load atomic i128, i128* %addr128.1 monotonic, align 16 - store i128 %res.1, i128* %addr + %addr8.1 = getelementptr i8, ptr %addr, i32 -520 + %res.1 = load atomic i128, ptr %addr8.1 monotonic, align 16 + store i128 %res.1, ptr %addr ret void } -define void @test_atomic_store(i128* %addr, i128 %val) { +define void @test_atomic_store(ptr %addr, i128 %val) { ; CHECK-LABEL: test_atomic_store: ; CHECK: stp x2, x3, [x0] - store atomic i128 %val, i128* %addr monotonic, align 16 + store atomic i128 %val, ptr %addr monotonic, align 16 ; CHECK: stp x2, x3, [x0] - store atomic i128 %val, i128* %addr unordered, align 16 + store atomic i128 %val, ptr %addr unordered, align 16 ; CHECK: dmb ish ; CHECK: stp x2, x3, [x0] - store atomic i128 %val, i128* %addr release, align 16 + store atomic i128 %val, ptr %addr release, align 16 ; CHECK: dmb ish ; CHECK: stp x2, x3, [x0] ; CHECK: dmb ish - store atomic i128 %val, i128* %addr seq_cst, align 16 + store atomic i128 %val, ptr %addr seq_cst, align 16 - %addr8 = bitcast i128* %addr to i8* ; CHECK: stp x2, x3, [x0, #8] - %addr8.1 = getelementptr i8, i8* %addr8, i32 8 - %addr128.1 = bitcast i8* %addr8.1 to i128* - store atomic i128 %val, i128* %addr128.1 monotonic, align 16 + %addr8.1 = getelementptr i8, ptr %addr, i32 8 + store atomic i128 %val, ptr %addr8.1 monotonic, align 16 ; CHECK: stp x2, x3, [x0, #504] - %addr8.2 = getelementptr i8, i8* %addr8, i32 504 - %addr128.2 = bitcast i8* %addr8.2 to i128* - store atomic i128 %val, i128* %addr128.2 monotonic, align 16 + %addr8.2 = getelementptr i8, ptr %addr, i32 504 + store atomic i128 %val, ptr %addr8.2 monotonic, align 16 ; CHECK: stp x2, x3, [x0, #-512] - %addr8.3 = getelementptr i8, i8* %addr8, i32 -512 - %addr128.3 = bitcast i8* %addr8.3 to i128* - store atomic i128 %val, i128* %addr128.3 monotonic, align 16 + %addr8.3 = getelementptr i8, ptr %addr, i32 -512 + store atomic i128 %val, ptr %addr8.3 monotonic, align 16 ret void } -define void @test_libcall_store(i128* %addr, i128 %val) { +define void @test_libcall_store(ptr %addr, i128 %val) { ; CHECK-LABEL: test_libcall_store: ; CHECK: bl __atomic_store - store atomic i128 %val, i128* %addr unordered, align 8 + store atomic i128 %val, ptr %addr unordered, align 8 ret void } -define void @test_nonfolded_store1(i128* %addr, i128 %val) { +define void @test_nonfolded_store1(ptr %addr, i128 %val) { ; CHECK-LABEL: test_nonfolded_store1: - %addr8 = bitcast i128* %addr to i8* ; CHECK: add x[[ADDR:[0-9]+]], x0, #4 ; CHECK: stp x2, x3, [x[[ADDR]]] - %addr8.1 = getelementptr i8, i8* %addr8, i32 4 - %addr128.1 = bitcast i8* %addr8.1 to i128* - store atomic i128 %val, i128* %addr128.1 monotonic, align 16 + %addr8.1 = getelementptr i8, ptr %addr, i32 4 + store atomic i128 %val, ptr %addr8.1 monotonic, align 16 ret void } -define void @test_nonfolded_store2(i128* %addr, i128 %val) { +define void @test_nonfolded_store2(ptr %addr, i128 %val) { ; CHECK-LABEL: test_nonfolded_store2: - %addr8 = bitcast i128* %addr to i8* ; CHECK: add x[[ADDR:[0-9]+]], x0, #512 ; CHECK: stp x2, x3, [x[[ADDR]]] - %addr8.1 = getelementptr i8, i8* %addr8, i32 512 - %addr128.1 = bitcast i8* %addr8.1 to i128* - store atomic i128 %val, i128* %addr128.1 monotonic, align 16 + %addr8.1 = getelementptr i8, ptr %addr, i32 512 + store atomic i128 %val, ptr %addr8.1 monotonic, align 16 ret void } -define void @test_nonfolded_store3(i128* %addr, i128 %val) { +define void @test_nonfolded_store3(ptr %addr, i128 %val) { ; CHECK-LABEL: test_nonfolded_store3: - %addr8 = bitcast i128* %addr to i8* ; CHECK: sub x[[ADDR:[0-9]+]], x0, #520 ; CHECK: stp x2, x3, [x[[ADDR]]] - %addr8.1 = getelementptr i8, i8* %addr8, i32 -520 - %addr128.1 = bitcast i8* %addr8.1 to i128* - store atomic i128 %val, i128* %addr128.1 monotonic, align 16 + %addr8.1 = getelementptr i8, ptr %addr, i32 -520 + store atomic i128 %val, ptr %addr8.1 monotonic, align 16 ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=aarch64-apple-ios -stop-after=instruction-select -global-isel -verify-machineinstrs %s -o - | FileCheck %s -define void @test_varargs_sentinel(i8* %list, i64, i64, i64, i64, i64, i64, i64, +define void @test_varargs_sentinel(ptr %list, i64, i64, i64, i64, i64, i64, i64, i32, ...) { ; CHECK-LABEL: name: test_varargs_sentinel ; CHECK: fixedStack: @@ -9,8 +9,8 @@ ; CHECK: [[LIST:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[VARARGS_AREA:%[0-9]+]]:gpr64common = ADDXri %fixed-stack.[[VARARGS_SLOT]], 0, 0 ; CHECK: STRXui [[VARARGS_AREA]], [[LIST]], 0 :: (store (s64) into %ir.list, align 1) - call void @llvm.va_start(i8* %list) + call void @llvm.va_start(ptr %list) ret void } -declare void @llvm.va_start(i8*) +declare void @llvm.va_start(ptr) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll b/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll @@ -2,12 +2,12 @@ ; RUN: llc -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LINUX %s -declare void @llvm.va_start(i8*) -define void @test_va_start(i8* %list) { +declare void @llvm.va_start(ptr) +define void @test_va_start(ptr %list) { ; CHECK-LABEL: name: test_va_start ; CHECK: [[LIST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-IOS: G_VASTART [[LIST]](p0) :: (store (s64) into %ir.list, align 1) ; CHECK-LINUX: G_VASTART [[LIST]](p0) :: (store (s256) into %ir.list, align 1) - call void @llvm.va_start(i8* %list) + call void @llvm.va_start(ptr %list) ret void } diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -66,6 +66,10 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: TLS Variable Hoist +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter +; CHECK-NEXT: Optimize selects ; CHECK-NEXT: Stack Safety Analysis ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction @@ -184,6 +188,7 @@ ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Shrink Wrapping analysis ; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; CHECK-NEXT: Machine Late Instructions Cleanup Pass ; CHECK-NEXT: Control Flow Optimizer ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Tail Duplication diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll --- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s ; RUN: llc < %s -mtriple=aarch64-eabi -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s +; RUN: llc < %s -mtriple=aarch64-eabi -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s define i32 @cnt32_advsimd(i32 %x) nounwind readnone { ; CHECK-LABEL: cnt32_advsimd: @@ -27,6 +28,11 @@ ; CHECK-NONEON-NEXT: mul w8, w9, w8 ; CHECK-NONEON-NEXT: lsr w0, w8, #24 ; CHECK-NONEON-NEXT: ret +; +; CHECK-CSSC-LABEL: cnt32_advsimd: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: cnt w0, w0 +; CHECK-CSSC-NEXT: ret %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) ret i32 %cnt } @@ -57,6 +63,13 @@ ; CHECK-NONEON-NEXT: mul w8, w9, w8 ; CHECK-NONEON-NEXT: lsr w0, w8, #24 ; CHECK-NONEON-NEXT: ret +; +; CHECK-CSSC-LABEL: cnt32_advsimd_2: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CSSC-NEXT: fmov w8, s0 +; CHECK-CSSC-NEXT: cnt w0, w8 +; CHECK-CSSC-NEXT: ret %1 = extractelement <2 x i32> %x, i64 0 %2 = tail call i32 @llvm.ctpop.i32(i32 %1) ret i32 %2 @@ -86,6 +99,11 @@ ; CHECK-NONEON-NEXT: mul x8, x9, x8 ; CHECK-NONEON-NEXT: lsr x0, x8, #56 ; CHECK-NONEON-NEXT: ret +; +; CHECK-CSSC-LABEL: cnt64_advsimd: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: cnt x0, x0 +; CHECK-CSSC-NEXT: ret %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) ret i64 %cnt } @@ -125,6 +143,11 @@ ; CHECK-NONEON-NEXT: mul w8, w9, w8 ; CHECK-NONEON-NEXT: lsr w0, w8, #24 ; CHECK-NONEON-NEXT: ret +; +; CHECK-CSSC-LABEL: cnt32: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: cnt w0, w0 +; CHECK-CSSC-NEXT: ret %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) ret i32 %cnt } @@ -161,6 +184,11 @@ ; CHECK-NONEON-NEXT: mul x8, x9, x8 ; CHECK-NONEON-NEXT: lsr x0, x8, #56 ; CHECK-NONEON-NEXT: ret +; +; CHECK-CSSC-LABEL: cnt64: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: cnt x0, x0 +; CHECK-CSSC-NEXT: ret %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) ret i64 %cnt } @@ -181,6 +209,13 @@ ; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq ; CHECK-NONEON-NEXT: cset w0, ne ; CHECK-NONEON-NEXT: ret +; +; CHECK-CSSC-LABEL: ctpop_eq_one: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: cnt x8, x0 +; CHECK-CSSC-NEXT: cmp x8, #1 +; CHECK-CSSC-NEXT: cset w0, eq +; CHECK-CSSC-NEXT: ret %count = tail call i64 @llvm.ctpop.i64(i64 %x) %cmp = icmp eq i64 %count, 1 %conv = zext i1 %cmp to i32 @@ -203,6 +238,13 @@ ; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq ; CHECK-NONEON-NEXT: cset w0, eq ; CHECK-NONEON-NEXT: ret +; +; CHECK-CSSC-LABEL: ctpop_ne_one: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: cnt x8, x0 +; CHECK-CSSC-NEXT: cmp x8, #1 +; CHECK-CSSC-NEXT: cset w0, ne +; CHECK-CSSC-NEXT: ret %count = tail call i64 @llvm.ctpop.i64(i64 %x) %cmp = icmp ne i64 %count, 1 %conv = zext i1 %cmp to i32 diff --git a/llvm/test/CodeGen/AArch64/ctpop-nonean.ll b/llvm/test/CodeGen/AArch64/ctpop-nonean.ll --- a/llvm/test/CodeGen/AArch64/ctpop-nonean.ll +++ b/llvm/test/CodeGen/AArch64/ctpop-nonean.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon -mattr=+cssc < %s | FileCheck %s -check-prefix=CHECK-CSSC declare i128 @llvm.ctpop.i128(i128) @@ -31,6 +32,14 @@ ; CHECK-NEXT: lsr x9, x9, #56 ; CHECK-NEXT: add x0, x9, x8, lsr #56 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: ctpop_i128: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: cnt x8, x1 +; CHECK-CSSC-NEXT: cnt x9, x0 +; CHECK-CSSC-NEXT: add x0, x9, x8 +; CHECK-CSSC-NEXT: mov x1, xzr +; CHECK-CSSC-NEXT: ret %c = call i128 @llvm.ctpop.i128(i128 %i) ret i128 %c } diff --git a/llvm/test/CodeGen/AArch64/dag-ReplaceAllUsesOfValuesWith.ll b/llvm/test/CodeGen/AArch64/dag-ReplaceAllUsesOfValuesWith.ll old mode 100755 new mode 100644 diff --git a/llvm/test/CodeGen/AArch64/parity.ll b/llvm/test/CodeGen/AArch64/parity.ll --- a/llvm/test/CodeGen/AArch64/parity.ll +++ b/llvm/test/CodeGen/AArch64/parity.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mattr=+cssc | FileCheck %s -check-prefix=CHECK-CSSC define i4 @parity_4(i4 %x) { ; CHECK-LABEL: parity_4: @@ -9,6 +10,13 @@ ; CHECK-NEXT: eor w8, w8, w8, lsr #1 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: parity_4: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: and w8, w0, #0xf +; CHECK-CSSC-NEXT: cnt w8, w8 +; CHECK-CSSC-NEXT: and w0, w8, #0x1 +; CHECK-CSSC-NEXT: ret %1 = tail call i4 @llvm.ctpop.i4(i4 %x) %2 = and i4 %1, 1 ret i4 %2 @@ -23,6 +31,13 @@ ; CHECK-NEXT: eor w8, w8, w8, lsr #1 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: parity_8: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: and w8, w0, #0xff +; CHECK-CSSC-NEXT: cnt w8, w8 +; CHECK-CSSC-NEXT: and w0, w8, #0x1 +; CHECK-CSSC-NEXT: ret %1 = tail call i8 @llvm.ctpop.i8(i8 %x) %2 = and i8 %1, 1 ret i8 %2 @@ -38,6 +53,13 @@ ; CHECK-NEXT: eor w8, w8, w8, lsr #1 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: parity_16: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: and w8, w0, #0xffff +; CHECK-CSSC-NEXT: cnt w8, w8 +; CHECK-CSSC-NEXT: and w0, w8, #0x1 +; CHECK-CSSC-NEXT: ret %1 = tail call i16 @llvm.ctpop.i16(i16 %x) %2 = and i16 %1, 1 ret i16 %2 @@ -54,6 +76,13 @@ ; CHECK-NEXT: eor w8, w8, w8, lsr #1 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: parity_17: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: and w8, w0, #0x1ffff +; CHECK-CSSC-NEXT: cnt w8, w8 +; CHECK-CSSC-NEXT: and w0, w8, #0x1 +; CHECK-CSSC-NEXT: ret %1 = tail call i17 @llvm.ctpop.i17(i17 %x) %2 = and i17 %1, 1 ret i17 %2 @@ -69,6 +98,12 @@ ; CHECK-NEXT: eor w8, w8, w8, lsr #1 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: parity_32: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: cnt w8, w0 +; CHECK-CSSC-NEXT: and w0, w8, #0x1 +; CHECK-CSSC-NEXT: ret %1 = tail call i32 @llvm.ctpop.i32(i32 %x) %2 = and i32 %1, 1 ret i32 %2 @@ -83,6 +118,12 @@ ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: parity_64: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: cnt x8, x0 +; CHECK-CSSC-NEXT: and x0, x8, #0x1 +; CHECK-CSSC-NEXT: ret %1 = tail call i64 @llvm.ctpop.i64(i64 %x) %2 = and i64 %1, 1 ret i64 %2 @@ -99,6 +140,14 @@ ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: parity_128: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: eor x8, x0, x1 +; CHECK-CSSC-NEXT: mov x1, xzr +; CHECK-CSSC-NEXT: cnt x8, x8 +; CHECK-CSSC-NEXT: and x0, x8, #0x1 +; CHECK-CSSC-NEXT: ret %1 = tail call i128 @llvm.ctpop.i128(i128 %x) %2 = and i128 %1, 1 ret i128 %2 @@ -113,6 +162,12 @@ ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: parity_64_trunc: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: cnt x8, x0 +; CHECK-CSSC-NEXT: and w0, w8, #0x1 +; CHECK-CSSC-NEXT: ret %1 = tail call i64 @llvm.ctpop.i64(i64 %x) %2 = trunc i64 %1 to i32 %3 = and i32 %2, 1 @@ -129,6 +184,12 @@ ; CHECK-NEXT: eor w8, w8, w8, lsr #1 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: parity_32_trunc: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: cnt w8, w0 +; CHECK-CSSC-NEXT: and w0, w8, #0x1 +; CHECK-CSSC-NEXT: ret %1 = tail call i32 @llvm.ctpop.i32(i32 %x) %2 = trunc i32 %1 to i8 %3 = and i8 %2, 1 @@ -144,6 +205,13 @@ ; CHECK-NEXT: eor w8, w8, w8, lsr #1 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: parity_8_zext: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: and w8, w0, #0xff +; CHECK-CSSC-NEXT: cnt w8, w8 +; CHECK-CSSC-NEXT: and w0, w8, #0x1 +; CHECK-CSSC-NEXT: ret %a = zext i8 %x to i32 %b = tail call i32 @llvm.ctpop.i32(i32 %a) %c = and i32 %b, 1 @@ -159,6 +227,13 @@ ; CHECK-NEXT: eor w8, w8, w8, lsr #1 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; CHECK-CSSC-LABEL: parity_8_mask: +; CHECK-CSSC: // %bb.0: +; CHECK-CSSC-NEXT: and w8, w0, #0xff +; CHECK-CSSC-NEXT: cnt w8, w8 +; CHECK-CSSC-NEXT: and w0, w8, #0x1 +; CHECK-CSSC-NEXT: ret %a = and i32 %x, 255 %b = tail call i32 @llvm.ctpop.i32(i32 %a) %c = and i32 %b, 1 diff --git a/llvm/test/CodeGen/AArch64/selectopt.ll b/llvm/test/CodeGen/AArch64/selectopt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/selectopt.ll @@ -0,0 +1,263 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=generic -S < %s | FileCheck %s --check-prefix=CHECKOO +; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=cortex-a55 -S < %s | FileCheck %s --check-prefix=CHECKII +; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=cortex-a510 -S < %s | FileCheck %s --check-prefix=CHECKII +; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=cortex-a72 -S < %s | FileCheck %s --check-prefix=CHECKOO +; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-n1 -S < %s | FileCheck %s --check-prefix=CHECKOO +; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=cortex-a710 -S < %s | FileCheck %s --check-prefix=CHECKOO +; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s --check-prefix=CHECKOO + +%struct.st = type { i32, i64, ptr, ptr, i16, ptr, ptr, i64, i64 } + +; This test has a select at the end of if.then, which is better transformed to a branch on OoO cores. + +define void @replace(ptr nocapture noundef %newst, ptr noundef %t, ptr noundef %h, i64 noundef %c, i64 noundef %rc, i64 noundef %ma, i64 noundef %n) { +; CHECKOO-LABEL: @replace( +; CHECKOO-NEXT: entry: +; CHECKOO-NEXT: [[T1:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[NEWST:%.*]], i64 0, i32 2 +; CHECKOO-NEXT: store ptr [[T:%.*]], ptr [[T1]], align 8 +; CHECKOO-NEXT: [[H3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 3 +; CHECKOO-NEXT: store ptr [[H:%.*]], ptr [[H3]], align 8 +; CHECKOO-NEXT: [[ORG_C:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 8 +; CHECKOO-NEXT: store i64 [[C:%.*]], ptr [[ORG_C]], align 8 +; CHECKOO-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 1 +; CHECKOO-NEXT: store i64 [[C]], ptr [[C6]], align 8 +; CHECKOO-NEXT: [[FLOW:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 7 +; CHECKOO-NEXT: store i64 [[RC:%.*]], ptr [[FLOW]], align 8 +; CHECKOO-NEXT: [[CONV:%.*]] = trunc i64 [[N:%.*]] to i32 +; CHECKOO-NEXT: store i32 [[CONV]], ptr [[NEWST]], align 8 +; CHECKOO-NEXT: [[FLOW10:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 1, i32 7 +; CHECKOO-NEXT: [[TMP0:%.*]] = load i64, ptr [[FLOW10]], align 8 +; CHECKOO-NEXT: [[FLOW12:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 2, i32 7 +; CHECKOO-NEXT: [[TMP1:%.*]] = load i64, ptr [[FLOW12]], align 8 +; CHECKOO-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECKOO-NEXT: [[CONV15:%.*]] = select i1 [[CMP13]], i64 2, i64 3 +; CHECKOO-NEXT: [[CMP16_NOT149:%.*]] = icmp sgt i64 [[CONV15]], [[MA:%.*]] +; CHECKOO-NEXT: br i1 [[CMP16_NOT149]], label [[WHILE_END:%.*]], label [[LAND_RHS:%.*]] +; CHECKOO: land.rhs: +; CHECKOO-NEXT: [[CMP_0151:%.*]] = phi i64 [ [[CMP_1:%.*]], [[IF_END87:%.*]] ], [ [[CONV15]], [[ENTRY:%.*]] ] +; CHECKOO-NEXT: [[POS_0150:%.*]] = phi i64 [ [[CMP_0151]], [[IF_END87]] ], [ 1, [[ENTRY]] ] +; CHECKOO-NEXT: [[SUB:%.*]] = add nsw i64 [[CMP_0151]], -1 +; CHECKOO-NEXT: [[FLOW19:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 7 +; CHECKOO-NEXT: [[TMP2:%.*]] = load i64, ptr [[FLOW19]], align 8 +; CHECKOO-NEXT: [[CMP20:%.*]] = icmp sgt i64 [[TMP2]], [[RC]] +; CHECKOO-NEXT: br i1 [[CMP20]], label [[WHILE_BODY:%.*]], label [[WHILE_END]] +; CHECKOO: while.body: +; CHECKOO-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]] +; CHECKOO-NEXT: [[T24:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 2 +; CHECKOO-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T24]], align 8 +; CHECKOO-NEXT: [[SUB25:%.*]] = add nsw i64 [[POS_0150]], -1 +; CHECKOO-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]] +; CHECKOO-NEXT: [[T27:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 2 +; CHECKOO-NEXT: store ptr [[TMP3]], ptr [[T27]], align 8 +; CHECKOO-NEXT: [[H30:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 3 +; CHECKOO-NEXT: [[TMP4:%.*]] = load ptr, ptr [[H30]], align 8 +; CHECKOO-NEXT: [[H33:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 3 +; CHECKOO-NEXT: store ptr [[TMP4]], ptr [[H33]], align 8 +; CHECKOO-NEXT: [[C36:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 1 +; CHECKOO-NEXT: [[TMP5:%.*]] = load i64, ptr [[C36]], align 8 +; CHECKOO-NEXT: [[C39:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 1 +; CHECKOO-NEXT: store i64 [[TMP5]], ptr [[C39]], align 8 +; CHECKOO-NEXT: [[TMP6:%.*]] = load i64, ptr [[C36]], align 8 +; CHECKOO-NEXT: [[ORG_C45:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 8 +; CHECKOO-NEXT: store i64 [[TMP6]], ptr [[ORG_C45]], align 8 +; CHECKOO-NEXT: [[FLOW51:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 7 +; CHECKOO-NEXT: store i64 [[TMP2]], ptr [[FLOW51]], align 8 +; CHECKOO-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX18]], align 8 +; CHECKOO-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX26]], align 8 +; CHECKOO-NEXT: store ptr [[T]], ptr [[T24]], align 8 +; CHECKOO-NEXT: store ptr [[H]], ptr [[H30]], align 8 +; CHECKOO-NEXT: store i64 [[C]], ptr [[C36]], align 8 +; CHECKOO-NEXT: [[ORG_C69:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 8 +; CHECKOO-NEXT: store i64 [[C]], ptr [[ORG_C69]], align 8 +; CHECKOO-NEXT: store i64 [[RC]], ptr [[FLOW19]], align 8 +; CHECKOO-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX18]], align 8 +; CHECKOO-NEXT: [[MUL:%.*]] = shl nsw i64 [[CMP_0151]], 1 +; CHECKOO-NEXT: [[ADD:%.*]] = or i64 [[MUL]], 1 +; CHECKOO-NEXT: [[CMP77_NOT:%.*]] = icmp sgt i64 [[ADD]], [[MA]] +; CHECKOO-NEXT: br i1 [[CMP77_NOT]], label [[IF_END87]], label [[IF_THEN:%.*]] +; CHECKOO: if.then: +; CHECKOO-NEXT: [[SUB79:%.*]] = add nsw i64 [[MUL]], -1 +; CHECKOO-NEXT: [[FLOW81:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB79]], i32 7 +; CHECKOO-NEXT: [[TMP8:%.*]] = load i64, ptr [[FLOW81]], align 8 +; CHECKOO-NEXT: [[FLOW83:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[MUL]], i32 7 +; CHECKOO-NEXT: [[TMP9:%.*]] = load i64, ptr [[FLOW83]], align 8 +; CHECKOO-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP8]], [[TMP9]] +; CHECKOO-NEXT: [[SPEC_SELECT_FROZEN:%.*]] = freeze i1 [[CMP84]] +; CHECKOO-NEXT: br i1 [[SPEC_SELECT_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]] +; CHECKOO: select.false: +; CHECKOO-NEXT: br label [[SELECT_END]] +; CHECKOO: select.end: +; CHECKOO-NEXT: [[SPEC_SELECT:%.*]] = phi i64 [ [[ADD]], [[IF_THEN]] ], [ [[MUL]], [[SELECT_FALSE]] ] +; CHECKOO-NEXT: br label [[IF_END87]] +; CHECKOO: if.end87: +; CHECKOO-NEXT: [[CMP_1]] = phi i64 [ [[MUL]], [[WHILE_BODY]] ], [ [[SPEC_SELECT]], [[SELECT_END]] ] +; CHECKOO-NEXT: [[CMP16_NOT:%.*]] = icmp sgt i64 [[CMP_1]], [[MA]] +; CHECKOO-NEXT: br i1 [[CMP16_NOT]], label [[WHILE_END]], label [[LAND_RHS]] +; CHECKOO: while.end: +; CHECKOO-NEXT: ret void +; +; CHECKII-LABEL: @replace( +; CHECKII-NEXT: entry: +; CHECKII-NEXT: [[T1:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[NEWST:%.*]], i64 0, i32 2 +; CHECKII-NEXT: store ptr [[T:%.*]], ptr [[T1]], align 8 +; CHECKII-NEXT: [[H3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 3 +; CHECKII-NEXT: store ptr [[H:%.*]], ptr [[H3]], align 8 +; CHECKII-NEXT: [[ORG_C:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 8 +; CHECKII-NEXT: store i64 [[C:%.*]], ptr [[ORG_C]], align 8 +; CHECKII-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 1 +; CHECKII-NEXT: store i64 [[C]], ptr [[C6]], align 8 +; CHECKII-NEXT: [[FLOW:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 7 +; CHECKII-NEXT: store i64 [[RC:%.*]], ptr [[FLOW]], align 8 +; CHECKII-NEXT: [[CONV:%.*]] = trunc i64 [[N:%.*]] to i32 +; CHECKII-NEXT: store i32 [[CONV]], ptr [[NEWST]], align 8 +; CHECKII-NEXT: [[FLOW10:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 1, i32 7 +; CHECKII-NEXT: [[TMP0:%.*]] = load i64, ptr [[FLOW10]], align 8 +; CHECKII-NEXT: [[FLOW12:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 2, i32 7 +; CHECKII-NEXT: [[TMP1:%.*]] = load i64, ptr [[FLOW12]], align 8 +; CHECKII-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]] +; CHECKII-NEXT: [[CONV15:%.*]] = select i1 [[CMP13]], i64 2, i64 3 +; CHECKII-NEXT: [[CMP16_NOT149:%.*]] = icmp sgt i64 [[CONV15]], [[MA:%.*]] +; CHECKII-NEXT: br i1 [[CMP16_NOT149]], label [[WHILE_END:%.*]], label [[LAND_RHS:%.*]] +; CHECKII: land.rhs: +; CHECKII-NEXT: [[CMP_0151:%.*]] = phi i64 [ [[CMP_1:%.*]], [[IF_END87:%.*]] ], [ [[CONV15]], [[ENTRY:%.*]] ] +; CHECKII-NEXT: [[POS_0150:%.*]] = phi i64 [ [[CMP_0151]], [[IF_END87]] ], [ 1, [[ENTRY]] ] +; CHECKII-NEXT: [[SUB:%.*]] = add nsw i64 [[CMP_0151]], -1 +; CHECKII-NEXT: [[FLOW19:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 7 +; CHECKII-NEXT: [[TMP2:%.*]] = load i64, ptr [[FLOW19]], align 8 +; CHECKII-NEXT: [[CMP20:%.*]] = icmp sgt i64 [[TMP2]], [[RC]] +; CHECKII-NEXT: br i1 [[CMP20]], label [[WHILE_BODY:%.*]], label [[WHILE_END]] +; CHECKII: while.body: +; CHECKII-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]] +; CHECKII-NEXT: [[T24:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 2 +; CHECKII-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T24]], align 8 +; CHECKII-NEXT: [[SUB25:%.*]] = add nsw i64 [[POS_0150]], -1 +; CHECKII-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]] +; CHECKII-NEXT: [[T27:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 2 +; CHECKII-NEXT: store ptr [[TMP3]], ptr [[T27]], align 8 +; CHECKII-NEXT: [[H30:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 3 +; CHECKII-NEXT: [[TMP4:%.*]] = load ptr, ptr [[H30]], align 8 +; CHECKII-NEXT: [[H33:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 3 +; CHECKII-NEXT: store ptr [[TMP4]], ptr [[H33]], align 8 +; CHECKII-NEXT: [[C36:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 1 +; CHECKII-NEXT: [[TMP5:%.*]] = load i64, ptr [[C36]], align 8 +; CHECKII-NEXT: [[C39:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 1 +; CHECKII-NEXT: store i64 [[TMP5]], ptr [[C39]], align 8 +; CHECKII-NEXT: [[TMP6:%.*]] = load i64, ptr [[C36]], align 8 +; CHECKII-NEXT: [[ORG_C45:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 8 +; CHECKII-NEXT: store i64 [[TMP6]], ptr [[ORG_C45]], align 8 +; CHECKII-NEXT: [[FLOW51:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 7 +; CHECKII-NEXT: store i64 [[TMP2]], ptr [[FLOW51]], align 8 +; CHECKII-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX18]], align 8 +; CHECKII-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX26]], align 8 +; CHECKII-NEXT: store ptr [[T]], ptr [[T24]], align 8 +; CHECKII-NEXT: store ptr [[H]], ptr [[H30]], align 8 +; CHECKII-NEXT: store i64 [[C]], ptr [[C36]], align 8 +; CHECKII-NEXT: [[ORG_C69:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 8 +; CHECKII-NEXT: store i64 [[C]], ptr [[ORG_C69]], align 8 +; CHECKII-NEXT: store i64 [[RC]], ptr [[FLOW19]], align 8 +; CHECKII-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX18]], align 8 +; CHECKII-NEXT: [[MUL:%.*]] = shl nsw i64 [[CMP_0151]], 1 +; CHECKII-NEXT: [[ADD:%.*]] = or i64 [[MUL]], 1 +; CHECKII-NEXT: [[CMP77_NOT:%.*]] = icmp sgt i64 [[ADD]], [[MA]] +; CHECKII-NEXT: br i1 [[CMP77_NOT]], label [[IF_END87]], label [[IF_THEN:%.*]] +; CHECKII: if.then: +; CHECKII-NEXT: [[SUB79:%.*]] = add nsw i64 [[MUL]], -1 +; CHECKII-NEXT: [[FLOW81:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB79]], i32 7 +; CHECKII-NEXT: [[TMP8:%.*]] = load i64, ptr [[FLOW81]], align 8 +; CHECKII-NEXT: [[FLOW83:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[MUL]], i32 7 +; CHECKII-NEXT: [[TMP9:%.*]] = load i64, ptr [[FLOW83]], align 8 +; CHECKII-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP8]], [[TMP9]] +; CHECKII-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP84]], i64 [[ADD]], i64 [[MUL]] +; CHECKII-NEXT: br label [[IF_END87]] +; CHECKII: if.end87: +; CHECKII-NEXT: [[CMP_1]] = phi i64 [ [[MUL]], [[WHILE_BODY]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ] +; CHECKII-NEXT: [[CMP16_NOT:%.*]] = icmp sgt i64 [[CMP_1]], [[MA]] +; CHECKII-NEXT: br i1 [[CMP16_NOT]], label [[WHILE_END]], label [[LAND_RHS]] +; CHECKII: while.end: +; CHECKII-NEXT: ret void +; +entry: + %t1 = getelementptr inbounds %struct.st, ptr %newst, i64 0, i32 2 + store ptr %t, ptr %t1, align 8 + %h3 = getelementptr inbounds %struct.st, ptr %newst, i64 0, i32 3 + store ptr %h, ptr %h3, align 8 + %org_c = getelementptr inbounds %struct.st, ptr %newst, i64 0, i32 8 + store i64 %c, ptr %org_c, align 8 + %c6 = getelementptr inbounds %struct.st, ptr %newst, i64 0, i32 1 + store i64 %c, ptr %c6, align 8 + %flow = getelementptr inbounds %struct.st, ptr %newst, i64 0, i32 7 + store i64 %rc, ptr %flow, align 8 + %conv = trunc i64 %n to i32 + store i32 %conv, ptr %newst, align 8 + %flow10 = getelementptr inbounds %struct.st, ptr %newst, i64 1, i32 7 + %0 = load i64, ptr %flow10, align 8 + %flow12 = getelementptr inbounds %struct.st, ptr %newst, i64 2, i32 7 + %1 = load i64, ptr %flow12, align 8 + %cmp13 = icmp sgt i64 %0, %1 + %conv15 = select i1 %cmp13, i64 2, i64 3 + %cmp16.not149 = icmp sgt i64 %conv15, %ma + br i1 %cmp16.not149, label %while.end, label %land.rhs + +land.rhs: ; preds = %entry, %if.end87 + %cmp.0151 = phi i64 [ %cmp.1, %if.end87 ], [ %conv15, %entry ] + %pos.0150 = phi i64 [ %cmp.0151, %if.end87 ], [ 1, %entry ] + %sub = add nsw i64 %cmp.0151, -1 + %flow19 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub, i32 7 + %2 = load i64, ptr %flow19, align 8 + %cmp20 = icmp sgt i64 %2, %rc + br i1 %cmp20, label %while.body, label %while.end + +while.body: ; preds = %land.rhs + %arrayidx18 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub + %t24 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub, i32 2 + %3 = load ptr, ptr %t24, align 8 + %sub25 = add nsw i64 %pos.0150, -1 + %arrayidx26 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub25 + %t27 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub25, i32 2 + store ptr %3, ptr %t27, align 8 + %h30 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub, i32 3 + %4 = load ptr, ptr %h30, align 8 + %h33 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub25, i32 3 + store ptr %4, ptr %h33, align 8 + %c36 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub, i32 1 + %5 = load i64, ptr %c36, align 8 + %c39 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub25, i32 1 + store i64 %5, ptr %c39, align 8 + %6 = load i64, ptr %c36, align 8 + %org_c45 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub25, i32 8 + store i64 %6, ptr %org_c45, align 8 + %flow51 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub25, i32 7 + store i64 %2, ptr %flow51, align 8 + %7 = load i32, ptr %arrayidx18, align 8 + store i32 %7, ptr %arrayidx26, align 8 + store ptr %t, ptr %t24, align 8 + store ptr %h, ptr %h30, align 8 + store i64 %c, ptr %c36, align 8 + %org_c69 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub, i32 8 + store i64 %c, ptr %org_c69, align 8 + store i64 %rc, ptr %flow19, align 8 + store i32 %conv, ptr %arrayidx18, align 8 + %mul = shl nsw i64 %cmp.0151, 1 + %add = or i64 %mul, 1 + %cmp77.not = icmp sgt i64 %add, %ma + br i1 %cmp77.not, label %if.end87, label %if.then + +if.then: ; preds = %while.body + %sub79 = add nsw i64 %mul, -1 + %flow81 = getelementptr inbounds %struct.st, ptr %newst, i64 %sub79, i32 7 + %8 = load i64, ptr %flow81, align 8 + %flow83 = getelementptr inbounds %struct.st, ptr %newst, i64 %mul, i32 7 + %9 = load i64, ptr %flow83, align 8 + %cmp84 = icmp slt i64 %8, %9 + %spec.select = select i1 %cmp84, i64 %add, i64 %mul + br label %if.end87 + +if.end87: ; preds = %if.then, %while.body + %cmp.1 = phi i64 [ %mul, %while.body ], [ %spec.select, %if.then ] + %cmp16.not = icmp sgt i64 %cmp.1, %ma + br i1 %cmp16.not, label %while.end, label %land.rhs + +while.end: ; preds = %land.rhs, %if.end87, %entry + ret void +} diff --git a/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll b/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll @@ -0,0 +1,297 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +; logic shift reg pattern: and +; already optimized by another pattern + +define i64 @and_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: and_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x1, x0, asr #23 +; CHECK-NEXT: and x0, x8, #0xffffffffff000000 +; CHECK-NEXT: ret + %ashr = ashr i64 %a, 23 + %and = and i64 %ashr, -16777216 + %r = and i64 %b, %and + ret i64 %r +} + +; TODO: logic shift reg pattern: bic + +define i64 @bic_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: bic_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #16777215 +; CHECK-NEXT: orn x8, x8, x0, asr #23 +; CHECK-NEXT: and x0, x1, x8 +; CHECK-NEXT: ret + %ashr = ashr i64 %a, 23 + %and = and i64 %ashr, -16777216 + %not = xor i64 %and, -1 + %r = and i64 %b, %not + ret i64 %r +} + +; logic shift reg pattern: eon + +define i64 @eon_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: eon_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl x8, x0, #36 +; CHECK-NEXT: and x8, x8, #0xffe0000000000000 +; CHECK-NEXT: eon x0, x8, x1 +; CHECK-NEXT: ret + %shl = shl i64 %a, 36 + %and = and i64 %shl, -9007199254740992 + %xor = xor i64 %and, -1 + %r = xor i64 %b, %xor + ret i64 %r +} + +; logic shift reg pattern: eor + +define i64 @eor_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: eor_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #23 +; CHECK-NEXT: and x8, x8, #0x1ffff000000 +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %lshr = lshr i64 %a, 23 + %and = and i64 %lshr, 2199006478336 + %or = xor i64 %and, %b + ret i64 %or +} + +; logic shift reg pattern: mvn +; already optimized by another pattern + +define i64 @mvn_shiftedreg_from_and(i64 %a) { +; CHECK-LABEL: mvn_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #9007199254740991 +; CHECK-NEXT: orn x0, x8, x0, lsl #36 +; CHECK-NEXT: ret + %shl = shl i64 %a, 36 + %and = and i64 %shl, -9007199254740992 + %xor = xor i64 %and, -1 + ret i64 %xor +} + +; logic shift reg pattern: orn +; already optimized by another pattern + +define i64 @orn_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: orn_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: orn x8, x1, x0, lsr #23 +; CHECK-NEXT: orr x0, x8, #0xfffffe0000ffffff +; CHECK-NEXT: ret + %lshr = lshr i64 %a, 23 + %and = and i64 %lshr, 2199006478336 + %not = xor i64 %and, -1 + %or = or i64 %not, %b + ret i64 %or +} + +; logic shift reg pattern: orr +; srl constant bitwidth == (lowbits + masklen + shiftamt) + +define i64 @orr_shiftedreg_from_and(i64 %a, i64 %b) { +; CHECK-LABEL: orr_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #23 +; CHECK-NEXT: and x8, x8, #0x1ffff000000 +; CHECK-NEXT: orr x0, x8, x1 +; CHECK-NEXT: ret + %lshr = lshr i64 %a, 23 + %and = and i64 %lshr, 2199006478336 ; 0x1ffff000000 + %or = or i64 %and, %b + ret i64 %or +} + +; logic shift reg pattern: orr +; srl constant bitwidth < (lowbits + masklen + shiftamt) + +define i64 @orr_shiftedreg_from_and_mask2(i64 %a, i64 %b) { +; CHECK-LABEL: orr_shiftedreg_from_and_mask2: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #23 +; CHECK-NEXT: and x8, x8, #0x1ffff000000 +; CHECK-NEXT: orr x0, x8, x1 +; CHECK-NEXT: ret + %lshr = lshr i64 %a, 23 + %and = and i64 %lshr, 4398029733888 ; 0x3ffff000000 + %or = or i64 %and, %b + ret i64 %or +} + + +; arithmetic shift reg pattern: add + +define i32 @add_shiftedreg_from_and(i32 %a, i32 %b) { +; CHECK-LABEL: add_shiftedreg_from_and: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w8, w0, #3 +; CHECK-NEXT: and w8, w8, #0xff000000 +; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 3 + %and = and i32 %ashr, -16777216 + %add = add i32 %and, %b + ret i32 %add +} + +; arithmetic shift reg pattern: sub + +define i64 @sub_shiftedreg_from_and_shl(i64 %a, i64 %b) { +; CHECK-LABEL: sub_shiftedreg_from_and_shl: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl x8, x0, #36 +; CHECK-NEXT: and x8, x8, #0xffe0000000000000 +; CHECK-NEXT: sub x0, x1, x8 +; CHECK-NEXT: ret + %shl = shl i64 %a, 36 + %and = and i64 %shl, -9007199254740992 + %sub = sub i64 %b, %and + ret i64 %sub +} + +; negative test: type is not i32 or i64 + +define <2 x i32> @shiftedreg_from_and_negative_type(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_type: +; CHECK: // %bb.0: +; CHECK-NEXT: shl v0.2s, v0.2s, #2 +; CHECK-NEXT: bic v0.2s, #31 +; CHECK-NEXT: sub v0.2s, v1.2s, v0.2s +; CHECK-NEXT: ret + %shl = shl <2 x i32> %a, + %and = and <2 x i32> %shl, + %sub = sub <2 x i32> %b, %and + ret <2 x i32> %sub +} + +; negative test: shift one-use + +define i32 @shiftedreg_from_and_negative_oneuse1(i32 %a, i32 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_oneuse1: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w8, w0, #23 +; CHECK-NEXT: and w9, w8, #0xff000000 +; CHECK-NEXT: add w9, w9, w1 +; CHECK-NEXT: mul w0, w8, w9 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 23 + %and = and i32 %ashr, -16777216 + %add = add i32 %and, %b + %r = mul i32 %ashr, %add + ret i32 %r +} + +; negative test: and one-use + +define i32 @shiftedreg_from_and_negative_oneuse2(i32 %a, i32 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_oneuse2: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w8, w0, #23 +; CHECK-NEXT: and w8, w8, #0xff000000 +; CHECK-NEXT: add w9, w8, w1 +; CHECK-NEXT: mul w0, w8, w9 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 23 + %and = and i32 %ashr, -16777216 + %add = add i32 %and, %b + %r = mul i32 %and, %add + ret i32 %r +} + +; negative test: and c is not mask + +define i32 @shiftedreg_from_and_negative_andc1(i32 %a, i32 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #26215 +; CHECK-NEXT: movk w8, #65510, lsl #16 +; CHECK-NEXT: and w8, w8, w0, asr #23 +; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 23 + %and = and i32 %ashr, -1677721 + %add = add i32 %and, %b + ret i32 %add +} + +; negative test: sra with and c is not legal mask + +define i32 @shiftedreg_from_and_negative_andc2(i32 %a, i32 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-285212672 +; CHECK-NEXT: and w8, w8, w0, asr #23 +; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 23 + %and = and i32 %ashr, 4009754624 ; 0xef000000 + %add = add i32 %and, %b + ret i32 %add +} + +; negative test: shl with and c is not legal mask + +define i64 @shiftedreg_from_and_negative_andc3(i64 %a, i64 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc3: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x0, x1, x0, lsl #36 +; CHECK-NEXT: ret + %shl = shl i64 %a, 36 + %and = and i64 %shl, -4294967296 + %xor = xor i64 %and, %b + ret i64 %xor +} + +; negative test: shl with and c is not legal mask + +define i64 @shiftedreg_from_and_negative_andc4(i64 %a, i64 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc4: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl x8, x0, #36 +; CHECK-NEXT: and x8, x8, #0x7fe0000000000000 +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %shl = shl i64 %a, 36 + %and = and i64 %shl, 9214364837600034816 + %xor = xor i64 %and, %b + ret i64 %xor +} + +; negative test: sra with and c is not legal mask + +define i32 @shiftedreg_from_and_negative_andc5(i32 %a, i32 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc5: +; CHECK: // %bb.0: +; CHECK-NEXT: asr w8, w0, #23 +; CHECK-NEXT: and w8, w8, #0xff000000 +; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: ret + %ashr = ashr i32 %a, 23 + %and = and i32 %ashr, -16777216 + %add = add i32 %and, %b + ret i32 %add +} + +; negative test: srl with and c is not legal mask +; srl constant bitwidth > (lowbits + masklen + shiftamt) + +define i64 @shiftedreg_from_and_negative_andc6(i64 %a, i64 %b) { +; CHECK-LABEL: shiftedreg_from_and_negative_andc6: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #2 +; CHECK-NEXT: and x8, x8, #0x6 +; CHECK-NEXT: add x0, x8, x1 +; CHECK-NEXT: ret + %lshr = lshr i64 %a, 2 + %and = and i64 %lshr, 6 + %add = add i64 %and, %b + ret i64 %add +} diff --git a/llvm/test/CodeGen/AArch64/shrink-wrap.ll b/llvm/test/CodeGen/AArch64/shrink-wrap.ll old mode 100755 new mode 100644 diff --git a/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll b/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll --- a/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll @@ -29,14 +29,8 @@ ; CHECK-NEXT: Lloh5: ; CHECK-NEXT: ldr x9, [x9] ; CHECK-NEXT: str x8, [sp] -; CHECK-NEXT: Lloh6: -; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE ; CHECK-NEXT: stur x9, [x29, #-8] -; CHECK-NEXT: Lloh7: -; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF] ; CHECK-NEXT: ldur x9, [x29, #-8] -; CHECK-NEXT: Lloh8: -; CHECK-NEXT: ldr x8, [x8] ; CHECK-NEXT: cmp x8, x9 ; CHECK-NEXT: b.ne LBB0_2 ; CHECK-NEXT: ; %bb.1: ; %entry @@ -46,7 +40,6 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_2: ; %entry ; CHECK-NEXT: bl ___stack_chk_fail -; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh6, Lloh7, Lloh8 ; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh1, Lloh3, Lloh5 ; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh2, Lloh4 entry: diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -59,26 +59,23 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: add x9, sp, #16 +; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: mov w1, #1 ; CHECK-NEXT: mov w2, #2 -; CHECK-NEXT: st1d { z16.d }, p0, [x9] -; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: mov w3, #3 ; CHECK-NEXT: mov w4, #4 ; CHECK-NEXT: mov w5, #5 ; CHECK-NEXT: mov w6, #6 -; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl] -; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: mov w7, #7 -; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl] ; CHECK-NEXT: add x9, sp, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z16.d }, p0, [x9] +; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl] +; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl] ; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl] ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl callee2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -157,8 +157,6 @@ ; FLATSCR-NEXT: v_mov_b32_e32 v1, 0 ; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 ; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:8 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16 ; FLATSCR-NEXT: s_mov_b32 s11, 0 ; FLATSCR-NEXT: s_mov_b32 s10, 0 ; FLATSCR-NEXT: s_mov_b32 s9, 0 @@ -171,9 +169,8 @@ ; FLATSCR-NEXT: s_mov_b32 s4, 0 ; FLATSCR-NEXT: s_mov_b32 s3, 0 ; FLATSCR-NEXT: s_mov_b32 s2, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: s_mov_b32 s40, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:8 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s11 offset:24 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s10 offset:32 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s9 offset:40 @@ -188,6 +185,7 @@ ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s2 offset:112 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:120 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:128 +; FLATSCR-NEXT: s_mov_b32 s40, 0 ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s40 offset:8 ; FLATSCR-NEXT: s_mov_b32 s39, 0 ; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s39 offset:16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll @@ -16,6 +16,7 @@ ; GCN-NEXT: s_load_dwordx16 s[36:51], s[22:23], 0x0 ; GCN-NEXT: s_load_dwordx16 s[52:67], s[22:23], 0x40 ; GCN-NEXT: s_load_dwordx16 s[4:19], s[22:23], 0x80 +; GCN-NEXT: v_mov_b32_e32 v64, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, s36 ; GCN-NEXT: v_mov_b32_e32 v1, s37 @@ -157,23 +158,10 @@ ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:260 ; GCN-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:264 ; GCN-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:268 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:512 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:272 -; GCN-NEXT: s_nop 0 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:276 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:280 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:284 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:272 +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:276 +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:280 +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], 0 offset:284 ; GCN-NEXT: buffer_load_dword v8, off, s[0:3], 0 offset:288 ; GCN-NEXT: buffer_load_dword v9, off, s[0:3], 0 offset:292 ; GCN-NEXT: buffer_load_dword v10, off, s[0:3], 0 offset:296 @@ -230,40 +218,38 @@ ; GCN-NEXT: buffer_load_dword v61, off, s[0:3], 0 offset:500 ; GCN-NEXT: buffer_load_dword v62, off, s[0:3], 0 offset:504 ; GCN-NEXT: buffer_load_dword v63, off, s[0:3], 0 offset:508 -; GCN-NEXT: s_nop 0 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: s_nop 0 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: s_nop 0 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: s_nop 0 -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[20:21] -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: s_nop 0 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: s_nop 0 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: s_nop 0 -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dwordx4 v0, v[1:4], s[20:21] offset:16 -; GCN-NEXT: global_store_dwordx4 v0, v[8:11], s[20:21] offset:32 -; GCN-NEXT: global_store_dwordx4 v0, v[12:15], s[20:21] offset:48 -; GCN-NEXT: global_store_dwordx4 v0, v[16:19], s[20:21] offset:64 -; GCN-NEXT: global_store_dwordx4 v0, v[20:23], s[20:21] offset:80 -; GCN-NEXT: global_store_dwordx4 v0, v[24:27], s[20:21] offset:96 -; GCN-NEXT: global_store_dwordx4 v0, v[28:31], s[20:21] offset:112 -; GCN-NEXT: global_store_dwordx4 v0, v[32:35], s[20:21] offset:128 -; GCN-NEXT: global_store_dwordx4 v0, v[36:39], s[20:21] offset:144 -; GCN-NEXT: global_store_dwordx4 v0, v[40:43], s[20:21] offset:160 -; GCN-NEXT: global_store_dwordx4 v0, v[44:47], s[20:21] offset:176 -; GCN-NEXT: global_store_dwordx4 v0, v[48:51], s[20:21] offset:192 -; GCN-NEXT: global_store_dwordx4 v0, v[52:55], s[20:21] offset:208 -; GCN-NEXT: global_store_dwordx4 v0, v[56:59], s[20:21] offset:224 -; GCN-NEXT: global_store_dwordx4 v0, v[60:63], s[20:21] offset:240 +; GCN-NEXT: s_waitcnt vmcnt(60) +; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[20:21] +; GCN-NEXT: s_waitcnt vmcnt(57) +; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[20:21] offset:16 +; GCN-NEXT: s_waitcnt vmcnt(54) +; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[20:21] offset:32 +; GCN-NEXT: s_waitcnt vmcnt(51) +; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[20:21] offset:48 +; GCN-NEXT: s_waitcnt vmcnt(48) +; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[20:21] offset:64 +; GCN-NEXT: s_waitcnt vmcnt(45) +; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[20:21] offset:80 +; GCN-NEXT: s_waitcnt vmcnt(42) +; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[20:21] offset:96 +; GCN-NEXT: s_waitcnt vmcnt(39) +; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[20:21] offset:112 +; GCN-NEXT: s_waitcnt vmcnt(36) +; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[20:21] offset:128 +; GCN-NEXT: s_waitcnt vmcnt(33) +; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[20:21] offset:144 +; GCN-NEXT: s_waitcnt vmcnt(30) +; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[20:21] offset:160 +; GCN-NEXT: s_waitcnt vmcnt(27) +; GCN-NEXT: global_store_dwordx4 v64, v[44:47], s[20:21] offset:176 +; GCN-NEXT: s_waitcnt vmcnt(24) +; GCN-NEXT: global_store_dwordx4 v64, v[48:51], s[20:21] offset:192 +; GCN-NEXT: s_waitcnt vmcnt(21) +; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[20:21] offset:208 +; GCN-NEXT: s_waitcnt vmcnt(18) +; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[20:21] offset:224 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[20:21] offset:240 ; GCN-NEXT: s_endpgm %vec = load <64 x i32>, ptr addrspace(1) %ptr %insert = insertelement <64 x i32> %vec, i32 %val, i32 %idx @@ -271,4 +257,4 @@ ret void } -attributes #0 = { "amdgpu-flat-workgroup-size"="1,256" "amdgpu-waves-per-eu"="1,10" } +attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,10" } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll @@ -1354,7 +1354,6 @@ ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64 -; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX7-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX7-NEXT: s_cbranch_execz .LBB13_2 diff --git a/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll b/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll --- a/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll @@ -5,89 +5,89 @@ ; turn out to be stores to constant memory, and will therefore be ; deleted as UB. -define void @test_constant_addrspace(i8 addrspace(4)* %p) { +define void @test_constant_addrspace(ptr addrspace(4) %p) { ; CHECK-LABEL: @test_constant_addrspace( ; CHECK-NEXT: ret void ; - store i8 0, i8 addrspace(4)* %p + store i8 0, ptr addrspace(4) %p ret void } -define void @test_constant32bit_addrspace(i8 addrspace(6)* %p) { +define void @test_constant32bit_addrspace(ptr addrspace(6) %p) { ; CHECK-LABEL: @test_constant32bit_addrspace( ; CHECK-NEXT: ret void ; - store i8 0, i8 addrspace(6)* %p + store i8 0, ptr addrspace(6) %p ret void } -define void @test_cast_generic_from_constant_addrspace(i8 addrspace(4)* %p) { +define void @test_cast_generic_from_constant_addrspace(ptr addrspace(4) %p) { ; CHECK-LABEL: @test_cast_generic_from_constant_addrspace( ; CHECK-NEXT: ret void ; - %cast = addrspacecast i8 addrspace(4)* %p to i8* - store i8 0, i8* %cast + %cast = addrspacecast ptr addrspace(4) %p to ptr + store i8 0, ptr %cast ret void } -define void @test_cast_generic_from_constant32bit_addrspace(i8 addrspace(6)* %p) { +define void @test_cast_generic_from_constant32bit_addrspace(ptr addrspace(6) %p) { ; CHECK-LABEL: @test_cast_generic_from_constant32bit_addrspace( ; CHECK-NEXT: ret void ; - %cast = addrspacecast i8 addrspace(6)* %p to i8* - store i8 0, i8* %cast + %cast = addrspacecast ptr addrspace(6) %p to ptr + store i8 0, ptr %cast ret void } -define void @test_cast_generic_to_constant_addrspace(i8* %p) { +define void @test_cast_generic_to_constant_addrspace(ptr %p) { ; CHECK-LABEL: @test_cast_generic_to_constant_addrspace( ; CHECK-NEXT: ret void ; - %cast = addrspacecast i8* %p to i8 addrspace(4)* - store i8 0, i8 addrspace(4)* %cast + %cast = addrspacecast ptr %p to ptr addrspace(4) + store i8 0, ptr addrspace(4) %cast ret void } -define void @test_cast_generic_to_constant32bit_addrspace(i8* %p) { +define void @test_cast_generic_to_constant32bit_addrspace(ptr %p) { ; CHECK-LABEL: @test_cast_generic_to_constant32bit_addrspace( ; CHECK-NEXT: ret void ; - %cast = addrspacecast i8* %p to i8 addrspace(6)* - store i8 0, i8 addrspace(6)* %cast + %cast = addrspacecast ptr %p to ptr addrspace(6) + store i8 0, ptr addrspace(6) %cast ret void } -define amdgpu_kernel void @noalias_readnone_global_kernarg(i32 addrspace(1)* noalias readnone %arg) { +define amdgpu_kernel void @noalias_readnone_global_kernarg(ptr addrspace(1) noalias readnone %arg) { ; CHECK-LABEL: @noalias_readnone_global_kernarg( ; CHECK-NEXT: ret void ; - store i32 0, i32 addrspace(1)* %arg + store i32 0, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @noalias_readonly_global_kernarg(i32 addrspace(1)* noalias readonly %arg) { +define amdgpu_kernel void @noalias_readonly_global_kernarg(ptr addrspace(1) noalias readonly %arg) { ; CHECK-LABEL: @noalias_readonly_global_kernarg( ; CHECK-NEXT: ret void ; - store i32 0, i32 addrspace(1)* %arg + store i32 0, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @readnone_global_kernarg(i32 addrspace(1)* readnone %arg) { +define amdgpu_kernel void @readnone_global_kernarg(ptr addrspace(1) readnone %arg) { ; CHECK-LABEL: @readnone_global_kernarg( -; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[ARG:%.*]], align 4 +; CHECK-NEXT: store i32 0, ptr addrspace(1) [[ARG:%.*]], align 4 ; CHECK-NEXT: ret void ; - store i32 0, i32 addrspace(1)* %arg + store i32 0, ptr addrspace(1) %arg ret void } -define amdgpu_kernel void @readonly_global_kernarg(i32 addrspace(1)* readonly %arg) { +define amdgpu_kernel void @readonly_global_kernarg(ptr addrspace(1) readonly %arg) { ; CHECK-LABEL: @readonly_global_kernarg( -; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[ARG:%.*]], align 4 +; CHECK-NEXT: store i32 0, ptr addrspace(1) [[ARG:%.*]], align 4 ; CHECK-NEXT: ret void ; - store i32 0, i32 addrspace(1)* %arg + store i32 0, ptr addrspace(1) %arg ret void } @@ -97,7 +97,7 @@ ; CHECK-LABEL: @constant_gv_global_as( ; CHECK-NEXT: ret void ; - store i32 0, i32 addrspace(1)* @global_as_constant + store i32 0, ptr addrspace(1) @global_as_constant ret void } @@ -107,6 +107,6 @@ ; CHECK-LABEL: @nonconst_gv_constant_as( ; CHECK-NEXT: ret void ; - store i32 0, i32 addrspace(4)* @global_nonconstant_constant_as + store i32 0, ptr addrspace(4) @global_nonconstant_constant_as ret void } diff --git a/llvm/test/CodeGen/AMDGPU/acc-ldst.ll b/llvm/test/CodeGen/AMDGPU/acc-ldst.ll --- a/llvm/test/CodeGen/AMDGPU/acc-ldst.ll +++ b/llvm/test/CodeGen/AMDGPU/acc-ldst.ll @@ -13,13 +13,13 @@ ; GCN-NEXT: s_nop 2 ; GCN-NOT: v_accvgpr_read ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9:]+}}, a[{{[0-9:]+}}], s[{{[0-9:]+}}] -define amdgpu_kernel void @test_load_mfma_store16(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_load_mfma_store16(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %gep + %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid + %in.1 = load <32 x float>, ptr addrspace(1) %gep %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %gep + store <32 x float> %mai.1, ptr addrspace(1) %gep ret void } @@ -32,15 +32,15 @@ ; GCN-NEXT: s_nop 2 ; GCN-NOT: v_accvgpr_read ; GCN-NEXT: global_store_dword v{{[0-9:]+}}, a[[N]], s[{{[0-9:]+}}] -define amdgpu_kernel void @test_load1_mfma_store1(float addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_load1_mfma_store1(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %tid - %in.1 = load float, float addrspace(1)* %gep + %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %tid + %in.1 = load float, ptr addrspace(1) %gep %init = insertelement <32 x float> zeroinitializer, float %in.1, i32 0 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %init, i32 1, i32 2, i32 3) %elt = extractelement <32 x float> %mai.1, i32 0 - store float %elt, float addrspace(1)* %gep + store float %elt, ptr addrspace(1) %gep ret void } @@ -51,13 +51,13 @@ ; GCN-NEXT: s_nop 4 ; GCN-NOT: v_accvgpr_read ; GCN-NEXT: global_store_dwordx4 v{{[0-9:]+}}, [[A]], s[{{[0-9:]+}}] -define amdgpu_kernel void @test_load4_mfma_store4(<4 x i32> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_load4_mfma_store4(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i32 %tid - %in.1 = load <4 x i32>, <4 x i32> addrspace(1)* %gep + %gep = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i32 %tid + %in.1 = load <4 x i32>, ptr addrspace(1) %gep %mai.1 = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %in.1, i32 0, i32 0, i32 0) - store <4 x i32> %mai.1, <4 x i32> addrspace(1)* %gep + store <4 x i32> %mai.1, ptr addrspace(1) %gep ret void } @@ -65,13 +65,13 @@ ; GCN-COUNT-8: global_load_dwordx4 v[{{[0-9:]+}}], v{{[0-9:]+}}, s[{{[0-9:]+}}] ; GCN-NOT: v_accvgpr ; GCN-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}] -define amdgpu_kernel void @test_load_store(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_load_store(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.1 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid - %gep.2 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %gep.1, i32 32 - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %gep.1 - store <32 x float> %in.1, <32 x float> addrspace(1)* %gep.2 + %gep.1 = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid + %gep.2 = getelementptr inbounds <32 x float>, ptr addrspace(1) %gep.1, i32 32 + %in.1 = load <32 x float>, ptr addrspace(1) %gep.1 + store <32 x float> %in.1, ptr addrspace(1) %gep.2 ret void } @@ -84,14 +84,14 @@ ; GCN-NEXT: s_nop 2 ; GCN-NOT: v_accvgpr_read ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9:]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_load_add_mfma_store(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_load_add_mfma_store(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %gep + %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid + %in.1 = load <32 x float>, ptr addrspace(1) %gep %add.1 = fadd <32 x float> %in.1, %in.1 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %add.1, i32 1, i32 2, i32 3) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %gep + store <32 x float> %mai.1, ptr addrspace(1) %gep ret void } @@ -101,13 +101,13 @@ ; GCN-COUNT-16: v_pk_add_f32 ; GCN-NOT: v_accvgpr ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9:]+}}, v[{{[0-9:]+}}] -define amdgpu_kernel void @test_load_add_store(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_load_add_store(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %gep + %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid + %in.1 = load <32 x float>, ptr addrspace(1) %gep %add.1 = fadd <32 x float> %in.1, %in.1 - store <32 x float> %add.1, <32 x float> addrspace(1)* %gep + store <32 x float> %add.1, ptr addrspace(1) %gep ret void } @@ -118,14 +118,14 @@ ; GCN-COUNT-32: v_accvgpr_read ; GCN: v_pk_add_f32 ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9:]+}}, v[{{[0-9:]+}}] -define amdgpu_kernel void @test_load_mfma_add_store(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_load_mfma_add_store(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %gep + %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid + %in.1 = load <32 x float>, ptr addrspace(1) %gep %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3) %add.1 = fadd <32 x float> %mai.1, %in.1 - store <32 x float> %add.1, <32 x float> addrspace(1)* %gep + store <32 x float> %add.1, ptr addrspace(1) %gep ret void } @@ -137,15 +137,15 @@ ; GCN-COUNT-32: v_accvgpr_read ; GCN: v_pk_mul_f32 ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9:]+}}, v[{{[0-9:]+}}] -define amdgpu_kernel void @test_load_add_mfma_mul_store(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_load_add_mfma_mul_store(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %gep + %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid + %in.1 = load <32 x float>, ptr addrspace(1) %gep %add.1 = fadd <32 x float> %in.1, %in.1 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %add.1, i32 1, i32 2, i32 3) %mul.1 = fmul <32 x float> %mai.1, %mai.1 - store <32 x float> %mul.1, <32 x float> addrspace(1)* %gep + store <32 x float> %mul.1, ptr addrspace(1) %gep ret void } @@ -156,15 +156,15 @@ ; GCN-COUNT-32: v_accvgpr_read ; GCN: v_pk_mul_f32 ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9:]+}}, v[{{[0-9:]+}}] -define amdgpu_kernel void @test_mixeduse_load_add_mfma_mul_store(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mixeduse_load_add_mfma_mul_store(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %gep + %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid + %in.1 = load <32 x float>, ptr addrspace(1) %gep %add.1 = fadd <32 x float> %in.1, %in.1 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %add.1, i32 1, i32 2, i32 3) %mul.1 = fmul <32 x float> %mai.1, %in.1 - store <32 x float> %mul.1, <32 x float> addrspace(1)* %gep + store <32 x float> %mul.1, ptr addrspace(1) %gep ret void } @@ -174,16 +174,16 @@ ; GCN: v_mfma_f32_32x32x1f32 ; GCN-NOT: v_accvgpr_read ; GCN-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}] -define amdgpu_kernel void @test_multiuse_load_mfma_mfma_store(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_multiuse_load_mfma_mfma_store(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.1 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid - %gep.2 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %gep.1, i32 32 - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %gep.1 + %gep.1 = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid + %gep.2 = getelementptr inbounds <32 x float>, ptr addrspace(1) %gep.1, i32 32 + %in.1 = load <32 x float>, ptr addrspace(1) %gep.1 %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3) %mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %gep.1 - store <32 x float> %mai.2, <32 x float> addrspace(1)* %gep.2 + store <32 x float> %mai.1, ptr addrspace(1) %gep.1 + store <32 x float> %mai.2, ptr addrspace(1) %gep.2 ret void } @@ -198,19 +198,19 @@ ; GCN: v_accvgpr_read_b32 [[V:v[0-9]+]], a[[N]]{{$}} ; GCN: global_atomic_add v{{[0-9]+}}, v{{[0-9:]+}}, [[V]], s[{{[0-9:]+}}] glc ; GCN: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, -define amdgpu_kernel void @test_atomic_mfma_4xi32_atomic_store(i32 addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_atomic_mfma_4xi32_atomic_store(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tid - %in.1 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 1 seq_cst + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tid + %in.1 = atomicrmw volatile sub ptr addrspace(1) %gep, i32 1 seq_cst %tmp0 = insertelement <4 x i32> undef, i32 %in.1, i32 0 %tmp1 = insertelement <4 x i32> %tmp0, i32 0, i32 1 %tmp2 = insertelement <4 x i32> %tmp1, i32 0, i32 2 %tmp3 = insertelement <4 x i32> %tmp2, i32 0, i32 3 %mai.1 = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %tmp3, i32 0, i32 0, i32 0) %elt = extractelement <4 x i32> %mai.1, i32 0 - %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %elt seq_cst - store i32 %val, i32 addrspace(1)* %arg + %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %elt seq_cst + store i32 %val, ptr addrspace(1) %arg ret void } @@ -221,11 +221,11 @@ ; GCN: v_accvgpr_read_b32 v{{[0-9]+}}, a{{[0-9]+}} ; GCN: v_accvgpr_read_b32 v{{[0-9]+}}, a{{[0-9]+}} ; GCN: global_atomic_add_x2 v[{{[0-9:]+}}], v{{[0-9:]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}] glc -define amdgpu_kernel void @test_atomic_mfma_4xi32_atomic64_store(i64 addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_atomic_mfma_4xi32_atomic64_store(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %tid - %in.1 = atomicrmw volatile sub i64 addrspace(1)* %gep, i64 1 seq_cst + %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %tid + %in.1 = atomicrmw volatile sub ptr addrspace(1) %gep, i64 1 seq_cst %tmp0 = insertelement <2 x i64> undef, i64 %in.1, i32 0 %tmp1 = insertelement <2 x i64> %tmp0, i64 0, i32 1 %tmp2 = bitcast <2 x i64> %tmp0 to <4 x i32> @@ -235,8 +235,8 @@ %v2.1 = insertelement <2 x i32> undef, i32 %elt.1, i32 0 %v2.2 = insertelement <2 x i32> %v2.1, i32 %elt.2, i32 1 %v2 = bitcast <2 x i32> %v2.2 to i64 - %val = atomicrmw volatile add i64 addrspace(1)* %gep, i64 %v2 seq_cst - store i64 %val, i64 addrspace(1)* %arg + %val = atomicrmw volatile add ptr addrspace(1) %gep, i64 %v2 seq_cst + store i64 %val, ptr addrspace(1) %arg ret void } @@ -248,17 +248,16 @@ ; GCN-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} ; GCN-NOT: v_accvgpr_read ; GCN: ds_write_b32 v{{[0-9]+}}, a[[N]] offset:128 -define amdgpu_kernel void @test_load_mfma_ds2_store(<4 x i32> addrspace(3)* %arg) #0 { +define amdgpu_kernel void @test_load_mfma_ds2_store(ptr addrspace(3) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.1 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(3)* %arg, i32 %tid - %in.1 = load <4 x i32>, <4 x i32> addrspace(3)* %gep.1 + %gep.1 = getelementptr inbounds <4 x i32>, ptr addrspace(3) %arg, i32 %tid + %in.1 = load <4 x i32>, ptr addrspace(3) %gep.1 %mai.1 = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %in.1, i32 0, i32 0, i32 0) %elt = extractelement <4 x i32> %mai.1, i32 0 - %ptr = bitcast <4 x i32> addrspace(3)* %arg to i32 addrspace(3)* - %gep.2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 32 - store i32 1, i32 addrspace(3)* %ptr - store i32 %elt, i32 addrspace(3)* %gep.2 + %gep.2 = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 32 + store i32 1, ptr addrspace(3) %arg + store i32 %elt, ptr addrspace(3) %gep.2 ret void } @@ -268,11 +267,11 @@ ; GCN: v_mfma_i32_4x4x4i8 [[RES:a\[[0-9:]+\]]], v{{[0-9:]+}}, v{{[0-9:]+}}, [[IN]] ; GCN-NOT: v_accvgpr_read ; GCN: global_store_dwordx4 v[{{[0-9:]+}}], [[RES]], -define amdgpu_kernel void @test_mfma_loop_4xi32(<4 x i32> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_loop_4xi32(ptr addrspace(1) %arg) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i32 %tid - %in = load <4 x i32>, <4 x i32> addrspace(1)* %gep + %gep = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i32 %tid + %in = load <4 x i32>, ptr addrspace(1) %gep br label %for.cond.preheader for.cond.preheader: @@ -284,7 +283,7 @@ br i1 %cc, label %exit, label %for.cond.preheader exit: - store <4 x i32> %mai.1, <4 x i32> addrspace(1)* %gep + store <4 x i32> %mai.1, ptr addrspace(1) %gep ret void } @@ -295,11 +294,11 @@ ; GCN-NOT: v_accvgpr_read ; GCN-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}], ; GCN: s_endpgm -define amdgpu_kernel void @test_mfma_loop_32xfloat(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_loop_32xfloat(ptr addrspace(1) %arg) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid - %in = load <32 x float>, <32 x float> addrspace(1)* %gep + %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid + %in = load <32 x float>, ptr addrspace(1) %gep br label %for.cond.preheader for.cond.preheader: @@ -311,7 +310,7 @@ br i1 %cc, label %exit, label %for.cond.preheader exit: - store <32 x float> %mai.1, <32 x float> addrspace(1)* %gep + store <32 x float> %mai.1, ptr addrspace(1) %gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll --- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll @@ -8,7 +8,7 @@ ; from a register. ; GCN-LABEL: name: test_load_zext ; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer -; SDAG: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load (s128) from %ir.13, addrspace 4) +; SDAG: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load (s128) from %ir.12, addrspace 4) ; GISEL: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR %{{[0-9]+}}, %[[OFFSET]], 0 :: (invariant load (<4 x s32>) from {{.*}}, addrspace 4) define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 { .entry: @@ -16,14 +16,13 @@ %6 = bitcast i64 %5 to <2 x i32> %7 = insertelement <2 x i32> %6, i32 %resNode0, i32 0 %8 = bitcast <2 x i32> %7 to i64 - %9 = inttoptr i64 %8 to [4294967295 x i8] addrspace(4)* + %9 = inttoptr i64 %8 to ptr addrspace(4) %10 = call i32 @llvm.amdgcn.reloc.constant(metadata !4) %11 = zext i32 %10 to i64 - %12 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %9, i64 0, i64 %11 - %13 = bitcast i8 addrspace(4)* %12 to <4 x i32> addrspace(4)*, !amdgpu.uniform !5 - %14 = load <4 x i32>, <4 x i32> addrspace(4)* %13, align 16, !invariant.load !5 - %15 = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %14, i32 0, i32 0) - call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %15, <4 x i32> %14, i32 0, i32 0, i32 0) + %12 = getelementptr [4294967295 x i8], ptr addrspace(4) %9, i64 0, i64 %11 + %13 = load <4 x i32>, ptr addrspace(4) %12, align 16, !invariant.load !5 + %14 = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %13, i32 0, i32 0) + call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %14, <4 x i32> %13, i32 0, i32 0, i32 0) ret void } @@ -34,13 +33,13 @@ ; GCN-DAG: %[[OFFSET:.*]]:sreg_32 = S_LSHL_B32 ; SDAG: S_LOAD_DWORD_SGPR killed %[[BASE]], killed %[[OFFSET]], ; GISEL: S_LOAD_DWORD_SGPR %[[BASE]], %[[OFFSET]], -define amdgpu_ps void @test_complex_reg_offset(float addrspace(1)* %out) { - %i = load i32, i32 addrspace(4)* @1 +define amdgpu_ps void @test_complex_reg_offset(ptr addrspace(1) %out) { + %i = load i32, ptr addrspace(4) @1 %i1 = and i32 %i, 3 %i2 = zext i32 %i1 to i64 - %i3 = getelementptr [4 x <2 x float>], [4 x <2 x float>] addrspace(4)* @0, i64 0, i64 %i2, i64 0 - %i4 = load float, float addrspace(4)* %i3, align 4 - store float %i4, float addrspace(1)* %out + %i3 = getelementptr [4 x <2 x float>], ptr addrspace(4) @0, i64 0, i64 %i2, i64 0 + %i4 = load float, ptr addrspace(4) %i3, align 4 + store float %i4, ptr addrspace(1) %out ret void } @@ -55,14 +54,13 @@ ; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2 ; GISEL-DAG: %[[BASE:.*]]:sreg_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 ; GISEL: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16, -define amdgpu_ps void @test_sgpr_plus_imm_offset(i8 addrspace(4)* inreg %base, i32 inreg %offset, - i32 addrspace(1)* inreg %out) { - %v1 = getelementptr i8, i8 addrspace(4)* %base, i64 16 +define amdgpu_ps void @test_sgpr_plus_imm_offset(ptr addrspace(4) inreg %base, i32 inreg %offset, + ptr addrspace(1) inreg %out) { + %v1 = getelementptr i8, ptr addrspace(4) %base, i64 16 %v2 = zext i32 %offset to i64 - %v3 = getelementptr i8, i8 addrspace(4)* %v1, i64 %v2 - %v4 = bitcast i8 addrspace(4)* %v3 to i32 addrspace(4)* - %v5 = load i32, i32 addrspace(4)* %v4, align 4 - store i32 %v5, i32 addrspace(1)* %out, align 4 + %v3 = getelementptr i8, ptr addrspace(4) %v1, i64 %v2 + %v5 = load i32, ptr addrspace(4) %v3, align 4 + store i32 %v5, ptr addrspace(1) %out, align 4 ret void } @@ -77,14 +75,13 @@ ; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2 ; GISEL-DAG: %[[BASE:.*]]:sreg_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 ; GISEL: S_LOAD_DWORDX2_SGPR_IMM %[[BASE]], %[[OFFSET]], 16, -define amdgpu_ps void @test_sgpr_plus_imm_offset_x2(i8 addrspace(4)* inreg %base, i32 inreg %offset, - <2 x i32> addrspace(1)* inreg %out) { - %v1 = getelementptr i8, i8 addrspace(4)* %base, i64 16 +define amdgpu_ps void @test_sgpr_plus_imm_offset_x2(ptr addrspace(4) inreg %base, i32 inreg %offset, + ptr addrspace(1) inreg %out) { + %v1 = getelementptr i8, ptr addrspace(4) %base, i64 16 %v2 = zext i32 %offset to i64 - %v3 = getelementptr i8, i8 addrspace(4)* %v1, i64 %v2 - %v4 = bitcast i8 addrspace(4)* %v3 to <2 x i32> addrspace(4)* - %v5 = load <2 x i32>, <2 x i32> addrspace(4)* %v4, align 4 - store <2 x i32> %v5, <2 x i32> addrspace(1)* %out, align 4 + %v3 = getelementptr i8, ptr addrspace(4) %v1, i64 %v2 + %v5 = load <2 x i32>, ptr addrspace(4) %v3, align 4 + store <2 x i32> %v5, ptr addrspace(1) %out, align 4 ret void } @@ -103,10 +100,10 @@ ; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr4 ; GISEL-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3 ; GISEL: S_BUFFER_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 77, -define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset(<4 x i32> inreg %base, i32 inreg %i, i32 addrspace(1)* inreg %out) { +define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) { %off = add nuw nsw i32 %i, 77 %v = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %base, i32 %off, i32 0) - store i32 %v, i32 addrspace(1)* %out, align 4 + store i32 %v, ptr addrspace(1) %out, align 4 ret void } @@ -127,11 +124,11 @@ ; GISEL-DAG: %[[SHIFT:.*]]:sreg_32 = S_LSHL_B32 %[[INDEX]], ; GISEL-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3 ; GISEL: S_BUFFER_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[SHIFT]], 5, -define amdgpu_cs void @test_buffer_load_sgpr_or_imm_offset(<4 x i32> inreg %base, i32 inreg %i, i32 addrspace(1)* inreg %out) { +define amdgpu_cs void @test_buffer_load_sgpr_or_imm_offset(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) { %shift = shl i32 %i, 7 %off = or i32 %shift, 5 %v = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %base, i32 %off, i32 0) - store i32 %v, i32 addrspace(1)* %out, align 4 + store i32 %v, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll --- a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll @@ -23,17 +23,16 @@ ; GCN-PROMOTE-NEXT: v_addc_u32_e32 [[RESULT:v[0-9]+]], vcc, 0, v0, vcc ; GCN: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) { entry: %0 = alloca [2 x i32], addrspace(5) - %1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0 - %2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %1 - store i32 1, i32 addrspace(5)* %2 - %3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in - %4 = load i32, i32 addrspace(5)* %3 - %5 = call i32 @llvm.amdgcn.workitem.id.x() - %6 = add i32 %4, %5 - store i32 %6, i32 addrspace(1)* %out + %1 = getelementptr [2 x i32], ptr addrspace(5) %0, i32 0, i32 1 + store i32 0, ptr addrspace(5) %0 + store i32 1, ptr addrspace(5) %1 + %2 = getelementptr [2 x i32], ptr addrspace(5) %0, i32 0, i32 %in + %3 = load i32, ptr addrspace(5) %2 + %4 = call i32 @llvm.amdgcn.workitem.id.x() + %5 = add i32 %3, %4 + store i32 %5, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GCN %s ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 -early-live-intervals -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s -define weak_odr amdgpu_kernel void @test_mul24_knownbits_kernel(float addrspace(1)* %p) #4 { +define weak_odr amdgpu_kernel void @test_mul24_knownbits_kernel(ptr addrspace(1) %p) #4 { ; GCN-LABEL: test_mul24_knownbits_kernel: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_and_b32_e32 v0, 3, v0 @@ -24,8 +24,8 @@ %1 = mul nsw i32 %tid, -5 %v1 = and i32 %1, -32 %v2 = sext i32 %v1 to i64 - %v3 = getelementptr inbounds float, float addrspace(1)* %p, i64 %v2 - store float 0.000, float addrspace(1)* %v3, align 4 + %v3 = getelementptr inbounds float, ptr addrspace(1) %p, i64 %v2 + store float 0.000, ptr addrspace(1) %v3, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-unroll-threshold.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-unroll-threshold.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-unroll-threshold.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-unroll-threshold.ll @@ -14,8 +14,8 @@ ; CHECK-NOT: br i1 %cmp ; CHECK: ret void -@in = internal unnamed_addr global i32* null, align 8 -@out = internal unnamed_addr global i32* null, align 8 +@in = internal unnamed_addr global ptr null, align 8 +@out = internal unnamed_addr global ptr null, align 8 define void @unroll_default() { entry: @@ -23,8 +23,8 @@ do.body: ; preds = %entry %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] - %v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 - store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 + %v1 = load i64, ptr @in, align 8 + store i64 %v1, ptr @out, align 8 %inc = add nsw i32 %i.0, 1 %cmp = icmp slt i32 %inc, 100 br i1 %cmp, label %do.body, label %do.end @@ -39,8 +39,8 @@ do.body: ; preds = %entry %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] - %v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 - store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 + %v1 = load i64, ptr @in, align 8 + store i64 %v1, ptr @out, align 8 %inc = add nsw i32 %i.0, 1 %cmp = icmp slt i32 %inc, 100 br i1 %cmp, label %do.body, label %do.end diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -49,12 +49,11 @@ ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen ; encoding: [0x00,0x10,0x70,0xe0 -; HSAOPT: [[DISPATCH_PTR:%[0-9]+]] = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -; HSAOPT: [[CAST_DISPATCH_PTR:%[0-9]+]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)* -; HSAOPT: [[GEP0:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(4)* [[CAST_DISPATCH_PTR]], i64 1 -; HSAOPT: [[LDXY:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP0]], align 4, !invariant.load !0 -; HSAOPT: [[GEP1:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(4)* [[CAST_DISPATCH_PTR]], i64 2 -; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP1]], align 4, !range !1, !invariant.load !0 +; HSAOPT: [[DISPATCH_PTR:%[0-9]+]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +; HSAOPT: [[GEP0:%[0-9]+]] = getelementptr inbounds i32, ptr addrspace(4) [[DISPATCH_PTR]], i64 1 +; HSAOPT: [[LDXY:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP0]], align 4, !invariant.load !0 +; HSAOPT: [[GEP1:%[0-9]+]] = getelementptr inbounds i32, ptr addrspace(4) [[DISPATCH_PTR]], i64 2 +; HSAOPT: [[LDZU:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP1]], align 4, !range !1, !invariant.load !0 ; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16 ; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !2 @@ -67,11 +66,10 @@ ; HSAOPT: [[ADD_YZ_X_X_YZ_SIZE:%[0-9]+]] = add i32 [[YZ_X_XID]], [[Y_X_Z_SIZE]] ; HSAOPT: [[ADD_ZID:%[0-9]+]] = add i32 [[ADD_YZ_X_X_YZ_SIZE]], [[WORKITEM_ID_Z]] -; HSAOPT: [[LOCAL_GEP:%[0-9]+]] = getelementptr inbounds [256 x [5 x i32]], [256 x [5 x i32]] addrspace(3)* @mova_same_clause.stack, i32 0, i32 [[ADD_ZID]] -; HSAOPT: %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 {{%[0-9]+}} -; HSAOPT: %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 {{%[0-9]+}} -; HSAOPT: %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 0 -; HSAOPT: %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 1 +; HSAOPT: [[LOCAL_GEP:%[0-9]+]] = getelementptr inbounds [256 x [5 x i32]], ptr addrspace(3) @mova_same_clause.stack, i32 0, i32 [[ADD_ZID]] +; HSAOPT: %arrayidx1 = getelementptr inbounds [5 x i32], ptr addrspace(3) [[LOCAL_GEP]], i32 0, i32 {{%[0-9]+}} +; HSAOPT: %arrayidx3 = getelementptr inbounds [5 x i32], ptr addrspace(3) [[LOCAL_GEP]], i32 0, i32 {{%[0-9]+}} +; HSAOPT: %arrayidx12 = getelementptr inbounds [5 x i32], ptr addrspace(3) [[LOCAL_GEP]], i32 0, i32 1 ; NOHSAOPT: call i32 @llvm.r600.read.local.size.y(), !range !0 @@ -79,45 +77,43 @@ ; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !1 ; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !1 ; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !1 -define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { +define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 { entry: %stack = alloca [5 x i32], align 4, addrspace(5) - %0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0 - store i32 4, i32 addrspace(5)* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 - %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1 - store i32 5, i32 addrspace(5)* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0 - %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 - store i32 %2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1 - %3 = load i32, i32 addrspace(5)* %arrayidx12 - %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 - store i32 %3, i32 addrspace(1)* %arrayidx13 + %0 = load i32, ptr addrspace(1) %in, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %0 + store i32 4, ptr addrspace(5) %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %in, i32 1 + %1 = load i32, ptr addrspace(1) %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %1 + store i32 5, ptr addrspace(5) %arrayidx3, align 4 + %2 = load i32, ptr addrspace(5) %stack, align 4 + store i32 %2, ptr addrspace(1) %out, align 4 + %arrayidx12 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 1 + %3 = load i32, ptr addrspace(5) %arrayidx12 + %arrayidx13 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 1 + store i32 %3, ptr addrspace(1) %arrayidx13 ret void } ; OPT-LABEL: @high_alignment( -; OPT: getelementptr inbounds [256 x [8 x i32]], [256 x [8 x i32]] addrspace(3)* @high_alignment.stack, i32 0, i32 %{{[0-9]+}} -define amdgpu_kernel void @high_alignment(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { +; OPT: getelementptr inbounds [256 x [8 x i32]], ptr addrspace(3) @high_alignment.stack, i32 0, i32 %{{[0-9]+}} +define amdgpu_kernel void @high_alignment(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 { entry: %stack = alloca [8 x i32], align 16, addrspace(5) - %0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 %0 - store i32 4, i32 addrspace(5)* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 - %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 %1 - store i32 5, i32 addrspace(5)* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 0 - %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 - store i32 %2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %stack, i32 0, i32 1 - %3 = load i32, i32 addrspace(5)* %arrayidx12 - %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 - store i32 %3, i32 addrspace(1)* %arrayidx13 + %0 = load i32, ptr addrspace(1) %in, align 4 + %arrayidx1 = getelementptr inbounds [8 x i32], ptr addrspace(5) %stack, i32 0, i32 %0 + store i32 4, ptr addrspace(5) %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %in, i32 1 + %1 = load i32, ptr addrspace(1) %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds [8 x i32], ptr addrspace(5) %stack, i32 0, i32 %1 + store i32 5, ptr addrspace(5) %arrayidx3, align 4 + %2 = load i32, ptr addrspace(5) %stack, align 4 + store i32 %2, ptr addrspace(1) %out, align 4 + %arrayidx12 = getelementptr inbounds [8 x i32], ptr addrspace(5) %stack, i32 0, i32 1 + %3 = load i32, ptr addrspace(5) %arrayidx12 + %arrayidx13 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 1 + store i32 %3, ptr addrspace(1) %arrayidx13 ret void } @@ -126,23 +122,22 @@ ; OPT: alloca [5 x i32] ; SI-NOT: ds_write -define amdgpu_kernel void @no_replace_inbounds_gep(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { +define amdgpu_kernel void @no_replace_inbounds_gep(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 { entry: %stack = alloca [5 x i32], align 4, addrspace(5) - %0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0 - store i32 4, i32 addrspace(5)* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 - %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1 - store i32 5, i32 addrspace(5)* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0 - %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 - store i32 %2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1 - %3 = load i32, i32 addrspace(5)* %arrayidx12 - %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 - store i32 %3, i32 addrspace(1)* %arrayidx13 + %0 = load i32, ptr addrspace(1) %in, align 4 + %arrayidx1 = getelementptr [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %0 + store i32 4, ptr addrspace(5) %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %in, i32 1 + %1 = load i32, ptr addrspace(1) %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %1 + store i32 5, ptr addrspace(5) %arrayidx3, align 4 + %2 = load i32, ptr addrspace(5) %stack, align 4 + store i32 %2, ptr addrspace(1) %out, align 4 + %arrayidx12 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 1 + %3 = load i32, ptr addrspace(5) %arrayidx12 + %arrayidx13 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 1 + store i32 %3, ptr addrspace(1) %arrayidx13 ret void } @@ -161,24 +156,20 @@ ; SI-NOT: v_movrel %struct.point = type { i32, i32 } -define amdgpu_kernel void @multiple_structs(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @multiple_structs(ptr addrspace(1) %out) #0 { entry: %a = alloca %struct.point, addrspace(5) %b = alloca %struct.point, addrspace(5) - %a.x.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0 - %a.y.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 1 - %b.x.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0 - %b.y.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %a.x.ptr - store i32 1, i32 addrspace(5)* %a.y.ptr - store i32 2, i32 addrspace(5)* %b.x.ptr - store i32 3, i32 addrspace(5)* %b.y.ptr - %a.indirect.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0 - %b.indirect.ptr = getelementptr %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0 - %a.indirect = load i32, i32 addrspace(5)* %a.indirect.ptr - %b.indirect = load i32, i32 addrspace(5)* %b.indirect.ptr + %a.y.ptr = getelementptr %struct.point, ptr addrspace(5) %a, i32 0, i32 1 + %b.y.ptr = getelementptr %struct.point, ptr addrspace(5) %b, i32 0, i32 1 + store i32 0, ptr addrspace(5) %a + store i32 1, ptr addrspace(5) %a.y.ptr + store i32 2, ptr addrspace(5) %b + store i32 3, ptr addrspace(5) %b.y.ptr + %a.indirect = load i32, ptr addrspace(5) %a + %b.indirect = load i32, ptr addrspace(5) %b %0 = add i32 %a.indirect, %b.indirect - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -190,35 +181,31 @@ ; R600-NOT: MOVA_INT ; SI-NOT: v_movrel -define amdgpu_kernel void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @direct_loop(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { entry: %prv_array_const = alloca [2 x i32], addrspace(5) %prv_array = alloca [2 x i32], addrspace(5) - %a = load i32, i32 addrspace(1)* %in - %b_src_ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 - %b = load i32, i32 addrspace(1)* %b_src_ptr - %a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0 - store i32 %a, i32 addrspace(5)* %a_dst_ptr - %b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 1 - store i32 %b, i32 addrspace(5)* %b_dst_ptr + %a = load i32, ptr addrspace(1) %in + %b_src_ptr = getelementptr inbounds i32, ptr addrspace(1) %in, i32 1 + %b = load i32, ptr addrspace(1) %b_src_ptr + store i32 %a, ptr addrspace(5) %prv_array_const + %b_dst_ptr = getelementptr inbounds [2 x i32], ptr addrspace(5) %prv_array_const, i32 0, i32 1 + store i32 %b, ptr addrspace(5) %b_dst_ptr br label %for.body for.body: %inc = phi i32 [0, %entry], [%count, %for.body] - %x_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0 - %x = load i32, i32 addrspace(5)* %x_ptr - %y_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0 - %y = load i32, i32 addrspace(5)* %y_ptr + %x = load i32, ptr addrspace(5) %prv_array_const + %y = load i32, ptr addrspace(5) %prv_array %xy = add i32 %x, %y - store i32 %xy, i32 addrspace(5)* %y_ptr + store i32 %xy, ptr addrspace(5) %prv_array %count = add i32 %inc, 1 %done = icmp eq i32 %count, 4095 br i1 %done, label %for.end, label %for.body for.end: - %value_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0 - %value = load i32, i32 addrspace(5)* %value_ptr - store i32 %value, i32 addrspace(1)* %out + %value = load i32, ptr addrspace(5) %prv_array + store i32 %value, ptr addrspace(1) %out ret void } @@ -235,17 +222,16 @@ ; SI-PROMOTE-VECT: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 4 ; SI-PROMOTE-VECT: s_lshr_b32 [[SREG:s[0-9]+]], 0x10000, [[SCALED_IDX]] ; SI-PROMOTE-VECT: s_and_b32 s{{[0-9]+}}, [[SREG]], 0xffff -define amdgpu_kernel void @short_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @short_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %0 = alloca [2 x i16], addrspace(5) - %1 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 0 - %2 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 1 - store i16 0, i16 addrspace(5)* %1 - store i16 1, i16 addrspace(5)* %2 - %3 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 %index - %4 = load i16, i16 addrspace(5)* %3 - %5 = sext i16 %4 to i32 - store i32 %5, i32 addrspace(1)* %out + %1 = getelementptr inbounds [2 x i16], ptr addrspace(5) %0, i32 0, i32 1 + store i16 0, ptr addrspace(5) %0 + store i16 1, ptr addrspace(5) %1 + %2 = getelementptr inbounds [2 x i16], ptr addrspace(5) %0, i32 0, i32 %index + %3 = load i16, ptr addrspace(5) %2 + %4 = sext i16 %3 to i32 + store i32 %4, ptr addrspace(1) %out ret void } @@ -258,17 +244,16 @@ ; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; encoding: [0x04,0x00,0x60,0xe0 ; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:5 ; encoding: [0x05,0x00,0x60,0xe0 -define amdgpu_kernel void @char_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @char_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %0 = alloca [2 x i8], addrspace(5) - %1 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 0 - %2 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 1 - store i8 0, i8 addrspace(5)* %1 - store i8 1, i8 addrspace(5)* %2 - %3 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 %index - %4 = load i8, i8 addrspace(5)* %3 - %5 = sext i8 %4 to i32 - store i32 %5, i32 addrspace(1)* %out + %1 = getelementptr inbounds [2 x i8], ptr addrspace(5) %0, i32 0, i32 1 + store i8 0, ptr addrspace(5) %0 + store i8 1, ptr addrspace(5) %1 + %2 = getelementptr inbounds [2 x i8], ptr addrspace(5) %0, i32 0, i32 %index + %3 = load i8, ptr addrspace(5) %2 + %4 = sext i8 %3 to i32 + store i32 %4, ptr addrspace(1) %out ret void } @@ -278,109 +263,103 @@ ; ; A total of 5 bytes should be allocated and used. ; SI: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; -define amdgpu_kernel void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 { +define amdgpu_kernel void @no_overlap(ptr addrspace(1) %out, i32 %in) #0 { entry: %0 = alloca [3 x i8], align 1, addrspace(5) %1 = alloca [2 x i8], align 1, addrspace(5) - %2 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 0 - %3 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 1 - %4 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 2 - %5 = getelementptr [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 0 - %6 = getelementptr [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 1 - store i8 0, i8 addrspace(5)* %2 - store i8 1, i8 addrspace(5)* %3 - store i8 2, i8 addrspace(5)* %4 - store i8 1, i8 addrspace(5)* %5 - store i8 0, i8 addrspace(5)* %6 - %7 = getelementptr [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 %in - %8 = getelementptr [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 %in - %9 = load i8, i8 addrspace(5)* %7 - %10 = load i8, i8 addrspace(5)* %8 - %11 = add i8 %9, %10 - %12 = sext i8 %11 to i32 - store i32 %12, i32 addrspace(1)* %out + %2 = getelementptr [3 x i8], ptr addrspace(5) %0, i32 0, i32 1 + %3 = getelementptr [3 x i8], ptr addrspace(5) %0, i32 0, i32 2 + %4 = getelementptr [2 x i8], ptr addrspace(5) %1, i32 0, i32 1 + store i8 0, ptr addrspace(5) %0 + store i8 1, ptr addrspace(5) %2 + store i8 2, ptr addrspace(5) %3 + store i8 1, ptr addrspace(5) %1 + store i8 0, ptr addrspace(5) %4 + %5 = getelementptr [3 x i8], ptr addrspace(5) %0, i32 0, i32 %in + %6 = getelementptr [2 x i8], ptr addrspace(5) %1, i32 0, i32 %in + %7 = load i8, ptr addrspace(5) %5 + %8 = load i8, ptr addrspace(5) %6 + %9 = add i8 %7, %8 + %10 = sext i8 %9 to i32 + store i32 %10, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @char_array_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %alloca = alloca [2 x [2 x i8]], addrspace(5) - %gep0 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 0 - %gep1 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 1 - store i8 0, i8 addrspace(5)* %gep0 - store i8 1, i8 addrspace(5)* %gep1 - %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index - %load = load i8, i8 addrspace(5)* %gep2 + %gep1 = getelementptr [2 x [2 x i8]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1 + store i8 0, ptr addrspace(5) %alloca + store i8 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [2 x [2 x i8]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index + %load = load i8, ptr addrspace(5) %gep2 %sext = sext i8 %load to i32 - store i32 %sext, i32 addrspace(1)* %out + store i32 %sext, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @i32_array_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %alloca = alloca [2 x [2 x i32]], addrspace(5) - %gep0 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0 - %gep1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index - %load = load i32, i32 addrspace(5)* %gep2 - store i32 %load, i32 addrspace(1)* %out + %gep1 = getelementptr [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1 + store i32 0, ptr addrspace(5) %alloca + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index + %load = load i32, ptr addrspace(5) %gep2 + store i32 %load, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @i64_array_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %alloca = alloca [2 x [2 x i64]], addrspace(5) - %gep0 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 0 - %gep1 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 1 - store i64 0, i64 addrspace(5)* %gep0 - store i64 1, i64 addrspace(5)* %gep1 - %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index - %load = load i64, i64 addrspace(5)* %gep2 - store i64 %load, i64 addrspace(1)* %out + %gep1 = getelementptr [2 x [2 x i64]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1 + store i64 0, ptr addrspace(5) %alloca + store i64 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [2 x [2 x i64]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index + %load = load i64, ptr addrspace(5) %gep2 + store i64 %load, ptr addrspace(1) %out ret void } %struct.pair32 = type { i32, i32 } -define amdgpu_kernel void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @struct_array_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %alloca = alloca [2 x [2 x %struct.pair32]], addrspace(5) - %gep0 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0, i32 1 - %gep1 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1, i32 1 - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index, i32 0 - %load = load i32, i32 addrspace(5)* %gep2 - store i32 %load, i32 addrspace(1)* %out + %gep0 = getelementptr [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 0, i32 1 + %gep1 = getelementptr [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1, i32 1 + store i32 0, ptr addrspace(5) %gep0 + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index, i32 0 + %load = load i32, ptr addrspace(5) %gep2 + store i32 %load, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @struct_pair32_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %alloca = alloca [2 x %struct.pair32], addrspace(5) - %gep0 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 0, i32 1 - %gep1 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 1, i32 0 - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 %index, i32 0 - %load = load i32, i32 addrspace(5)* %gep2 - store i32 %load, i32 addrspace(1)* %out + %gep0 = getelementptr [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1 + %gep1 = getelementptr [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 1, i32 0 + store i32 0, ptr addrspace(5) %gep0 + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 %index, i32 0 + %load = load i32, ptr addrspace(5) %gep2 + store i32 %load, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind { +define amdgpu_kernel void @select_private(ptr addrspace(1) %out, i32 %in) nounwind { entry: %tmp = alloca [2 x i32], addrspace(5) - %tmp1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0 - %tmp2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %tmp1 - store i32 1, i32 addrspace(5)* %tmp2 + %tmp2 = getelementptr [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1 + store i32 0, ptr addrspace(5) %tmp + store i32 1, ptr addrspace(5) %tmp2 %cmp = icmp eq i32 %in, 0 - %sel = select i1 %cmp, i32 addrspace(5)* %tmp1, i32 addrspace(5)* %tmp2 - %load = load i32, i32 addrspace(5)* %sel - store i32 %load, i32 addrspace(1)* %out + %sel = select i1 %cmp, ptr addrspace(5) %tmp, ptr addrspace(5) %tmp2 + %load = load i32, ptr addrspace(5) %sel + store i32 %load, ptr addrspace(1) %out ret void } @@ -392,35 +371,34 @@ ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen ; SI: v_add_{{[iu]}}32_e32 [[ADD_OFFSET:v[0-9]+]], vcc, 5, ; SI: buffer_load_dword v{{[0-9]+}}, [[ADD_OFFSET:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offen ; -define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { %alloca = alloca [16 x i32], addrspace(5) - %tmp0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a - store i32 5, i32 addrspace(5)* %tmp0 - %tmp1 = ptrtoint [16 x i32] addrspace(5)* %alloca to i32 + %tmp0 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a + store i32 5, ptr addrspace(5) %tmp0 + %tmp1 = ptrtoint ptr addrspace(5) %alloca to i32 %tmp2 = add i32 %tmp1, 5 - %tmp3 = inttoptr i32 %tmp2 to i32 addrspace(5)* - %tmp4 = getelementptr i32, i32 addrspace(5)* %tmp3, i32 %b - %tmp5 = load i32, i32 addrspace(5)* %tmp4 - store i32 %tmp5, i32 addrspace(1)* %out + %tmp3 = inttoptr i32 %tmp2 to ptr addrspace(5) + %tmp4 = getelementptr i32, ptr addrspace(5) %tmp3, i32 %b + %tmp5 = load i32, ptr addrspace(5) %tmp4 + store i32 %tmp5, ptr addrspace(1) %out ret void } ; OPT-LABEL: @pointer_typed_alloca( -; OPT: getelementptr inbounds [256 x i32 addrspace(1)*], [256 x i32 addrspace(1)*] addrspace(3)* @pointer_typed_alloca.A.addr, i32 0, i32 %{{[0-9]+}} -; OPT: load i32 addrspace(1)*, i32 addrspace(1)* addrspace(3)* %{{[0-9]+}}, align 4 -define amdgpu_kernel void @pointer_typed_alloca(i32 addrspace(1)* %A) #1 { +; OPT: getelementptr inbounds [256 x ptr addrspace(1)], ptr addrspace(3) @pointer_typed_alloca.A.addr, i32 0, i32 %{{[0-9]+}} +; OPT: load ptr addrspace(1), ptr addrspace(3) %{{[0-9]+}}, align 4 +define amdgpu_kernel void @pointer_typed_alloca(ptr addrspace(1) %A) #1 { entry: - %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5) - store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4 - %ld0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4 - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0 - store i32 1, i32 addrspace(1)* %arrayidx, align 4 - %ld1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4 - %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1 - store i32 2, i32 addrspace(1)* %arrayidx1, align 4 - %ld2 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %A.addr, align 4 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %ld2, i32 2 - store i32 3, i32 addrspace(1)* %arrayidx2, align 4 + %A.addr = alloca ptr addrspace(1), align 4, addrspace(5) + store ptr addrspace(1) %A, ptr addrspace(5) %A.addr, align 4 + %ld0 = load ptr addrspace(1), ptr addrspace(5) %A.addr, align 4 + store i32 1, ptr addrspace(1) %ld0, align 4 + %ld1 = load ptr addrspace(1), ptr addrspace(5) %A.addr, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %ld1, i32 1 + store i32 2, ptr addrspace(1) %arrayidx1, align 4 + %ld2 = load ptr addrspace(1), ptr addrspace(5) %A.addr, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %ld2, i32 2 + store i32 3, ptr addrspace(1) %arrayidx2, align 4 ret void } @@ -460,11 +438,11 @@ ; SI: buffer_load_dword ; SI: buffer_load_dword -define amdgpu_kernel void @v16i32_stack(<16 x i32> addrspace(1)* %out, i32 %a) { +define amdgpu_kernel void @v16i32_stack(ptr addrspace(1) %out, i32 %a) { %alloca = alloca [2 x <16 x i32>], addrspace(5) - %tmp0 = getelementptr [2 x <16 x i32>], [2 x <16 x i32>] addrspace(5)* %alloca, i32 0, i32 %a - %tmp5 = load <16 x i32>, <16 x i32> addrspace(5)* %tmp0 - store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out + %tmp0 = getelementptr [2 x <16 x i32>], ptr addrspace(5) %alloca, i32 0, i32 %a + %tmp5 = load <16 x i32>, ptr addrspace(5) %tmp0 + store <16 x i32> %tmp5, ptr addrspace(1) %out ret void } @@ -504,11 +482,11 @@ ; SI: buffer_load_dword ; SI: buffer_load_dword -define amdgpu_kernel void @v16float_stack(<16 x float> addrspace(1)* %out, i32 %a) { +define amdgpu_kernel void @v16float_stack(ptr addrspace(1) %out, i32 %a) { %alloca = alloca [2 x <16 x float>], addrspace(5) - %tmp0 = getelementptr [2 x <16 x float>], [2 x <16 x float>] addrspace(5)* %alloca, i32 0, i32 %a - %tmp5 = load <16 x float>, <16 x float> addrspace(5)* %tmp0 - store <16 x float> %tmp5, <16 x float> addrspace(1)* %out + %tmp0 = getelementptr [2 x <16 x float>], ptr addrspace(5) %alloca, i32 0, i32 %a + %tmp5 = load <16 x float>, ptr addrspace(5) %tmp0 + store <16 x float> %tmp5, ptr addrspace(1) %out ret void } @@ -520,35 +498,35 @@ ; SI: buffer_load_dword ; SI: buffer_load_dword -define amdgpu_kernel void @v2float_stack(<2 x float> addrspace(1)* %out, i32 %a) { +define amdgpu_kernel void @v2float_stack(ptr addrspace(1) %out, i32 %a) { %alloca = alloca [16 x <2 x float>], addrspace(5) - %tmp0 = getelementptr [16 x <2 x float>], [16 x <2 x float>] addrspace(5)* %alloca, i32 0, i32 %a - %tmp5 = load <2 x float>, <2 x float> addrspace(5)* %tmp0 - store <2 x float> %tmp5, <2 x float> addrspace(1)* %out + %tmp0 = getelementptr [16 x <2 x float>], ptr addrspace(5) %alloca, i32 0, i32 %a + %tmp5 = load <2 x float>, ptr addrspace(5) %tmp0 + store <2 x float> %tmp5, ptr addrspace(1) %out ret void } ; OPT-LABEL: @direct_alloca_read_0xi32( -; OPT: store [0 x i32] undef, [0 x i32] addrspace(3)* -; OPT: load [0 x i32], [0 x i32] addrspace(3)* -define amdgpu_kernel void @direct_alloca_read_0xi32([0 x i32] addrspace(1)* %out, i32 %index) { +; OPT: store [0 x i32] undef, ptr addrspace(3) +; OPT: load [0 x i32], ptr addrspace(3) +define amdgpu_kernel void @direct_alloca_read_0xi32(ptr addrspace(1) %out, i32 %index) { entry: %tmp = alloca [0 x i32], addrspace(5) - store [0 x i32] [], [0 x i32] addrspace(5)* %tmp - %load = load [0 x i32], [0 x i32] addrspace(5)* %tmp - store [0 x i32] %load, [0 x i32] addrspace(1)* %out + store [0 x i32] [], ptr addrspace(5) %tmp + %load = load [0 x i32], ptr addrspace(5) %tmp + store [0 x i32] %load, ptr addrspace(1) %out ret void } ; OPT-LABEL: @direct_alloca_read_1xi32( -; OPT: store [1 x i32] zeroinitializer, [1 x i32] addrspace(3)* -; OPT: load [1 x i32], [1 x i32] addrspace(3)* -define amdgpu_kernel void @direct_alloca_read_1xi32([1 x i32] addrspace(1)* %out, i32 %index) { +; OPT: store [1 x i32] zeroinitializer, ptr addrspace(3) +; OPT: load [1 x i32], ptr addrspace(3) +define amdgpu_kernel void @direct_alloca_read_1xi32(ptr addrspace(1) %out, i32 %index) { entry: %tmp = alloca [1 x i32], addrspace(5) - store [1 x i32] [i32 0], [1 x i32] addrspace(5)* %tmp - %load = load [1 x i32], [1 x i32] addrspace(5)* %tmp - store [1 x i32] %load, [1 x i32] addrspace(1)* %out + store [1 x i32] [i32 0], ptr addrspace(5) %tmp + %load = load [1 x i32], ptr addrspace(5) %tmp + store [1 x i32] %load, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll @@ -16,10 +16,10 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV {{\*? *}}[[VAL]], KC0[0].X -define amdgpu_kernel void @ngroups_x (i32 addrspace(1)* %out) { +define amdgpu_kernel void @ngroups_x (ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.x() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -31,10 +31,10 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y -define amdgpu_kernel void @ngroups_y (i32 addrspace(1)* %out) { +define amdgpu_kernel void @ngroups_y (ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.y() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -46,10 +46,10 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z -define amdgpu_kernel void @ngroups_z (i32 addrspace(1)* %out) { +define amdgpu_kernel void @ngroups_z (ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.z() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -61,10 +61,10 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV {{\*? *}}[[VAL]], KC0[0].W -define amdgpu_kernel void @global_size_x (i32 addrspace(1)* %out) { +define amdgpu_kernel void @global_size_x (ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.global.size.x() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -76,10 +76,10 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV {{\*? *}}[[VAL]], KC0[1].X -define amdgpu_kernel void @global_size_y (i32 addrspace(1)* %out) { +define amdgpu_kernel void @global_size_y (ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.global.size.y() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -91,10 +91,10 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y -define amdgpu_kernel void @global_size_z (i32 addrspace(1)* %out) { +define amdgpu_kernel void @global_size_z (ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.global.size.z() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -106,10 +106,10 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z -define amdgpu_kernel void @local_size_x (i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_x (ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.local.size.x() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -121,10 +121,10 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV {{\*? *}}[[VAL]], KC0[1].W -define amdgpu_kernel void @local_size_y (i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_y (ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.local.size.y() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -136,10 +136,10 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV {{\*? *}}[[VAL]], KC0[2].X -define amdgpu_kernel void @local_size_z (i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_z (ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.local.size.z() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/anonymous-gv.ll b/llvm/test/CodeGen/AMDGPU/anonymous-gv.ll --- a/llvm/test/CodeGen/AMDGPU/anonymous-gv.ll +++ b/llvm/test/CodeGen/AMDGPU/anonymous-gv.ll @@ -7,7 +7,7 @@ ; CHECK: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, __unnamed_1 ; CHECK: s_endpgm define amdgpu_kernel void @test() { - store i32 1, i32 addrspace(1)* @0 + store i32 1, ptr addrspace(1) @0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/are-loads-from-same-base-ptr.ll b/llvm/test/CodeGen/AMDGPU/are-loads-from-same-base-ptr.ll --- a/llvm/test/CodeGen/AMDGPU/are-loads-from-same-base-ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/are-loads-from-same-base-ptr.ll @@ -7,10 +7,10 @@ ; GCN: global_load_dword ; GCN: ds_min_u32 ; GCN: ds_max_u32 -define amdgpu_kernel void @are_loads_from_same_base_ptr_ds_atomic(i32 addrspace(1)* %arg0, i32 addrspace(3)* noalias %ptr0) #0 { - %tmp1 = load volatile i32, i32 addrspace(1)* %arg0 - %tmp2 = atomicrmw umin i32 addrspace(3)* %ptr0, i32 %tmp1 seq_cst - %tmp3 = atomicrmw umax i32 addrspace(3)* %ptr0, i32 %tmp1 seq_cst +define amdgpu_kernel void @are_loads_from_same_base_ptr_ds_atomic(ptr addrspace(1) %arg0, ptr addrspace(3) noalias %ptr0) #0 { + %tmp1 = load volatile i32, ptr addrspace(1) %arg0 + %tmp2 = atomicrmw umin ptr addrspace(3) %ptr0, i32 %tmp1 seq_cst + %tmp3 = atomicrmw umax ptr addrspace(3) %ptr0, i32 %tmp1 seq_cst ret void } diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll --- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll @@ -20,26 +20,26 @@ ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this ; alloca to a vector. It currently fails because it does not know how ; to interpret: -; getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 1, i32 %b +; getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 1, i32 %b ; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 64 ; SI-PROMOTE: ds_write_b32 [[PTRREG]] -define amdgpu_kernel void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 { +define amdgpu_kernel void @test_private_array_ptr_calc(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) #0 { %alloca = alloca [16 x i32], align 16, addrspace(5) %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0); %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo) - %a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid - %b_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inB, i32 %tid - %a = load i32, i32 addrspace(1)* %a_ptr, !range !0 - %b = load i32, i32 addrspace(1)* %b_ptr, !range !0 + %a_ptr = getelementptr inbounds i32, ptr addrspace(1) %inA, i32 %tid + %b_ptr = getelementptr inbounds i32, ptr addrspace(1) %inB, i32 %tid + %a = load i32, ptr addrspace(1) %a_ptr, !range !0 + %b = load i32, ptr addrspace(1) %b_ptr, !range !0 %result = add i32 %a, %b - %alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 1, i32 %b - store i32 %result, i32 addrspace(5)* %alloca_ptr, align 4 + %alloca_ptr = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 1, i32 %b + store i32 %result, ptr addrspace(5) %alloca_ptr, align 4 ; Dummy call call void @llvm.amdgcn.s.barrier() - %reload = load i32, i32 addrspace(5)* %alloca_ptr, align 4, !range !0 - %out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid - store i32 %reload, i32 addrspace(1)* %out_ptr, align 4 + %reload = load i32, ptr addrspace(5) %alloca_ptr, align 4, !range !0 + %out_ptr = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %tid + store i32 %reload, ptr addrspace(1) %out_ptr, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll --- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll +++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll @@ -7,15 +7,15 @@ ; SI-DAG: v_mul_u32_u24 ; SI-DAG: v_mul_hi_u32_u24 ; SI: s_endpgm -define amdgpu_kernel void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { +define amdgpu_kernel void @test_array_ptr_calc(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) { %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo) - %a_ptr = getelementptr [1025 x i32], [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0 - %b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid - %a = load i32, i32 addrspace(1)* %a_ptr - %b = load i32, i32 addrspace(1)* %b_ptr + %a_ptr = getelementptr [1025 x i32], ptr addrspace(1) %inA, i32 %tid, i32 0 + %b_ptr = getelementptr i32, ptr addrspace(1) %inB, i32 %tid + %a = load i32, ptr addrspace(1) %a_ptr + %b = load i32, ptr addrspace(1) %b_ptr %result = add i32 %a, %b - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll @@ -121,69 +121,69 @@ ; CHECK: NumSGPRsForWavesPerEU: 12 ; CHECK: NumVGPRsForWavesPerEU: 24 define amdgpu_kernel void @exactly_10() #9 { - %val0 = load volatile float, float addrspace(1)* @var - %val1 = load volatile float, float addrspace(1)* @var - %val2 = load volatile float, float addrspace(1)* @var - %val3 = load volatile float, float addrspace(1)* @var - %val4 = load volatile float, float addrspace(1)* @var - %val5 = load volatile float, float addrspace(1)* @var - %val6 = load volatile float, float addrspace(1)* @var - %val7 = load volatile float, float addrspace(1)* @var - %val8 = load volatile float, float addrspace(1)* @var - %val9 = load volatile float, float addrspace(1)* @var - %val10 = load volatile float, float addrspace(1)* @var - %val11 = load volatile float, float addrspace(1)* @var - %val12 = load volatile float, float addrspace(1)* @var - %val13 = load volatile float, float addrspace(1)* @var - %val14 = load volatile float, float addrspace(1)* @var - %val15 = load volatile float, float addrspace(1)* @var - %val16 = load volatile float, float addrspace(1)* @var - %val17 = load volatile float, float addrspace(1)* @var - %val18 = load volatile float, float addrspace(1)* @var - %val19 = load volatile float, float addrspace(1)* @var - %val20 = load volatile float, float addrspace(1)* @var - %val21 = load volatile float, float addrspace(1)* @var - %val22 = load volatile float, float addrspace(1)* @var - %val23 = load volatile float, float addrspace(1)* @var - %val24 = load volatile float, float addrspace(1)* @var - %val25 = load volatile float, float addrspace(1)* @var - %val26 = load volatile float, float addrspace(1)* @var - %val27 = load volatile float, float addrspace(1)* @var - %val28 = load volatile float, float addrspace(1)* @var - %val29 = load volatile float, float addrspace(1)* @var - %val30 = load volatile float, float addrspace(1)* @var - - store volatile float %val0, float addrspace(1)* @var - store volatile float %val1, float addrspace(1)* @var - store volatile float %val2, float addrspace(1)* @var - store volatile float %val3, float addrspace(1)* @var - store volatile float %val4, float addrspace(1)* @var - store volatile float %val5, float addrspace(1)* @var - store volatile float %val6, float addrspace(1)* @var - store volatile float %val7, float addrspace(1)* @var - store volatile float %val8, float addrspace(1)* @var - store volatile float %val9, float addrspace(1)* @var - store volatile float %val10, float addrspace(1)* @var - store volatile float %val11, float addrspace(1)* @var - store volatile float %val12, float addrspace(1)* @var - store volatile float %val13, float addrspace(1)* @var - store volatile float %val14, float addrspace(1)* @var - store volatile float %val15, float addrspace(1)* @var - store volatile float %val16, float addrspace(1)* @var - store volatile float %val17, float addrspace(1)* @var - store volatile float %val18, float addrspace(1)* @var - store volatile float %val19, float addrspace(1)* @var - store volatile float %val20, float addrspace(1)* @var - store volatile float %val21, float addrspace(1)* @var - store volatile float %val22, float addrspace(1)* @var - store volatile float %val23, float addrspace(1)* @var - store volatile float %val24, float addrspace(1)* @var - store volatile float %val25, float addrspace(1)* @var - store volatile float %val26, float addrspace(1)* @var - store volatile float %val27, float addrspace(1)* @var - store volatile float %val28, float addrspace(1)* @var - store volatile float %val29, float addrspace(1)* @var - store volatile float %val30, float addrspace(1)* @var + %val0 = load volatile float, ptr addrspace(1) @var + %val1 = load volatile float, ptr addrspace(1) @var + %val2 = load volatile float, ptr addrspace(1) @var + %val3 = load volatile float, ptr addrspace(1) @var + %val4 = load volatile float, ptr addrspace(1) @var + %val5 = load volatile float, ptr addrspace(1) @var + %val6 = load volatile float, ptr addrspace(1) @var + %val7 = load volatile float, ptr addrspace(1) @var + %val8 = load volatile float, ptr addrspace(1) @var + %val9 = load volatile float, ptr addrspace(1) @var + %val10 = load volatile float, ptr addrspace(1) @var + %val11 = load volatile float, ptr addrspace(1) @var + %val12 = load volatile float, ptr addrspace(1) @var + %val13 = load volatile float, ptr addrspace(1) @var + %val14 = load volatile float, ptr addrspace(1) @var + %val15 = load volatile float, ptr addrspace(1) @var + %val16 = load volatile float, ptr addrspace(1) @var + %val17 = load volatile float, ptr addrspace(1) @var + %val18 = load volatile float, ptr addrspace(1) @var + %val19 = load volatile float, ptr addrspace(1) @var + %val20 = load volatile float, ptr addrspace(1) @var + %val21 = load volatile float, ptr addrspace(1) @var + %val22 = load volatile float, ptr addrspace(1) @var + %val23 = load volatile float, ptr addrspace(1) @var + %val24 = load volatile float, ptr addrspace(1) @var + %val25 = load volatile float, ptr addrspace(1) @var + %val26 = load volatile float, ptr addrspace(1) @var + %val27 = load volatile float, ptr addrspace(1) @var + %val28 = load volatile float, ptr addrspace(1) @var + %val29 = load volatile float, ptr addrspace(1) @var + %val30 = load volatile float, ptr addrspace(1) @var + + store volatile float %val0, ptr addrspace(1) @var + store volatile float %val1, ptr addrspace(1) @var + store volatile float %val2, ptr addrspace(1) @var + store volatile float %val3, ptr addrspace(1) @var + store volatile float %val4, ptr addrspace(1) @var + store volatile float %val5, ptr addrspace(1) @var + store volatile float %val6, ptr addrspace(1) @var + store volatile float %val7, ptr addrspace(1) @var + store volatile float %val8, ptr addrspace(1) @var + store volatile float %val9, ptr addrspace(1) @var + store volatile float %val10, ptr addrspace(1) @var + store volatile float %val11, ptr addrspace(1) @var + store volatile float %val12, ptr addrspace(1) @var + store volatile float %val13, ptr addrspace(1) @var + store volatile float %val14, ptr addrspace(1) @var + store volatile float %val15, ptr addrspace(1) @var + store volatile float %val16, ptr addrspace(1) @var + store volatile float %val17, ptr addrspace(1) @var + store volatile float %val18, ptr addrspace(1) @var + store volatile float %val19, ptr addrspace(1) @var + store volatile float %val20, ptr addrspace(1) @var + store volatile float %val21, ptr addrspace(1) @var + store volatile float %val22, ptr addrspace(1) @var + store volatile float %val23, ptr addrspace(1) @var + store volatile float %val24, ptr addrspace(1) @var + store volatile float %val25, ptr addrspace(1) @var + store volatile float %val26, ptr addrspace(1) @var + store volatile float %val27, ptr addrspace(1) @var + store volatile float %val28, ptr addrspace(1) @var + store volatile float %val29, ptr addrspace(1) @var + store volatile float %val30, ptr addrspace(1) @var ret void } diff --git a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll --- a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll +++ b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll @@ -8,7 +8,7 @@ ; Subtargets must wait for outstanding memory instructions before a barrier if ; they cannot back off of the barrier. -define void @back_off_barrier_no_fence(i32* %in, i32* %out) #0 { +define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 { ; GFX9-NO-BACKOFF-LABEL: back_off_barrier_no_fence: ; GFX9-NO-BACKOFF: ; %bb.0: ; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -40,13 +40,13 @@ ; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] - %load = load i32, i32* %in + %load = load i32, ptr %in call void @llvm.amdgcn.s.barrier() - store i32 %load, i32* %out + store i32 %load, ptr %out ret void } -define void @back_off_barrier_with_fence(i32* %in, i32* %out) #0 { +define void @back_off_barrier_with_fence(ptr %in, ptr %out) #0 { ; GFX9-NO-BACKOFF-LABEL: back_off_barrier_with_fence: ; GFX9-NO-BACKOFF: ; %bb.0: ; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -84,11 +84,11 @@ ; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] - %load = load i32, i32* %in + %load = load i32, ptr %in fence syncscope("workgroup") release call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - store i32 %load, i32* %out + store i32 %load, ptr %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/bfe-combine.ll b/llvm/test/CodeGen/AMDGPU/bfe-combine.ll --- a/llvm/test/CodeGen/AMDGPU/bfe-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/bfe-combine.ll @@ -3,7 +3,7 @@ ; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck %s --check-prefixes=VI-SDWA ; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck %s --check-prefixes=CI -define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x) { +define amdgpu_kernel void @bfe_combine8(ptr addrspace(1) nocapture %arg, i32 %x) { ; VI-LABEL: bfe_combine8: ; VI: ; %bb.0: ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -62,13 +62,13 @@ %idx = add i32 %x, %id %srl = lshr i32 %idx, 8 %and = and i32 %srl, 255 - %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %and - %val = load i32, i32 addrspace(1)* %ptr, align 4 - store i32 %val, i32 addrspace(1)* %arg, align 4 + %ptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %and + %val = load i32, ptr addrspace(1) %ptr, align 4 + store i32 %val, ptr addrspace(1) %arg, align 4 ret void } -define amdgpu_kernel void @bfe_combine16(i32 addrspace(1)* nocapture %arg, i32 %x) { +define amdgpu_kernel void @bfe_combine16(ptr addrspace(1) nocapture %arg, i32 %x) { ; VI-LABEL: bfe_combine16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -132,9 +132,9 @@ %idx = add i32 %x, %id %srl = lshr i32 %idx, 1 %and = and i32 %srl, 2147450880 - %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %and - %val = load i32, i32 addrspace(1)* %ptr, align 4 - store i32 %val, i32 addrspace(1)* %arg, align 4 + %ptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %and + %val = load i32, ptr addrspace(1) %ptr, align 4 + store i32 %val, ptr addrspace(1) %arg, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll --- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -23,20 +23,18 @@ ; GCN: [[BB1]] ; GCN: s_or_b64 exec, exec -define hidden void @void_func_byval_struct_use_outside_entry_block(%struct.ByValStruct addrspace(5)* byval(%struct.ByValStruct) noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval(%struct.ByValStruct) noalias nocapture align 4 %arg1, i1 %cond) #1 { +define hidden void @void_func_byval_struct_use_outside_entry_block(ptr addrspace(5) byval(%struct.ByValStruct) noalias nocapture align 4 %arg0, ptr addrspace(5) byval(%struct.ByValStruct) noalias nocapture align 4 %arg1, i1 %cond) #1 { entry: br i1 %cond, label %bb0, label %bb1 bb0: - %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 - %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4 + %tmp = load volatile i32, ptr addrspace(5) %arg0, align 4 %add = add nsw i32 %tmp, 1 - store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 - %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4 + store volatile i32 %add, ptr addrspace(5) %arg0, align 4 + %tmp1 = load volatile i32, ptr addrspace(5) %arg1, align 4 %add3 = add nsw i32 %tmp1, 2 - store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4 - store volatile i32 9, i32 addrspace(1)* null, align 4 + store volatile i32 %add3, ptr addrspace(5) %arg1, align 4 + store volatile i32 9, ptr addrspace(1) null, align 4 br label %bb1 bb1: @@ -44,8 +42,8 @@ } declare hidden void @external_void_func_void() #0 -declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #3 -declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #3 +declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #3 +declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #3 attributes #0 = { nounwind } attributes #1 = { noinline norecurse nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll --- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll +++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll @@ -3,12 +3,12 @@ ; GCN-LABEL: {{^}}store_fi_lifetime: ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} ; GCN: buffer_store_dword [[FI]] -define amdgpu_kernel void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 { +define amdgpu_kernel void @store_fi_lifetime(ptr addrspace(1) %out, i32 %in) #0 { entry: %b = alloca i8, addrspace(5) - call void @llvm.lifetime.start.p5i8(i64 1, i8 addrspace(5)* %b) - store volatile i8 addrspace(5)* %b, i8 addrspace(5)* addrspace(1)* undef - call void @llvm.lifetime.end.p5i8(i64 1, i8 addrspace(5)* %b) + call void @llvm.lifetime.start.p5(i64 1, ptr addrspace(5) %b) + store volatile ptr addrspace(5) %b, ptr addrspace(1) undef + call void @llvm.lifetime.end.p5(i64 1, ptr addrspace(5) %b) ret void } @@ -18,10 +18,10 @@ ; GCN: buffer_store_dword v{{[0-9]+}}, off, ; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]] ; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]] -define amdgpu_kernel void @stored_fi_to_lds(float addrspace(5)* addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @stored_fi_to_lds(ptr addrspace(3) %ptr) #0 { %tmp = alloca float, addrspace(5) - store float 4.0, float addrspace(5)*%tmp - store float addrspace(5)* %tmp, float addrspace(5)* addrspace(3)* %ptr + store float 4.0, ptr addrspace(5) %tmp + store ptr addrspace(5) %tmp, ptr addrspace(3) %ptr ret void } @@ -38,13 +38,13 @@ ; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 8{{$}} ; GCN: ds_write_b32 [[VLDSPTR]], [[FI1]] -define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(float addrspace(5)* addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(ptr addrspace(3) %ptr) #0 { %tmp0 = alloca float, addrspace(5) %tmp1 = alloca float, addrspace(5) - store float 4.0, float addrspace(5)* %tmp0 - store float 4.0, float addrspace(5)* %tmp1 - store volatile float addrspace(5)* %tmp0, float addrspace(5)* addrspace(3)* %ptr - store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(3)* %ptr + store float 4.0, ptr addrspace(5) %tmp0 + store float 4.0, ptr addrspace(5) %tmp1 + store volatile ptr addrspace(5) %tmp0, ptr addrspace(3) %ptr + store volatile ptr addrspace(5) %tmp1, ptr addrspace(3) %ptr ret void } @@ -55,12 +55,11 @@ ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 4{{$}} ; GCN: buffer_store_dword [[ZERO]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}} define amdgpu_kernel void @stored_fi_to_self() #0 { - %tmp = alloca i32 addrspace(5)*, addrspace(5) + %tmp = alloca ptr addrspace(5), addrspace(5) ; Avoid optimizing everything out - store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp - %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp to i32 addrspace(5)* - store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp + store volatile ptr addrspace(5) inttoptr (i32 1234 to ptr addrspace(5)), ptr addrspace(5) %tmp + store volatile ptr addrspace(5) %tmp, ptr addrspace(5) %tmp ret void } @@ -75,16 +74,14 @@ ; GCN: buffer_store_dword [[OFFSETK]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2052{{$}} define amdgpu_kernel void @stored_fi_to_self_offset() #0 { %tmp0 = alloca [512 x i32], addrspace(5) - %tmp1 = alloca i32 addrspace(5)*, addrspace(5) + %tmp1 = alloca ptr addrspace(5), addrspace(5) ; Avoid optimizing everything out - %tmp0.cast = bitcast [512 x i32] addrspace(5)* %tmp0 to i32 addrspace(5)* - store volatile i32 32, i32 addrspace(5)* %tmp0.cast + store volatile i32 32, ptr addrspace(5) %tmp0 - store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp1 + store volatile ptr addrspace(5) inttoptr (i32 1234 to ptr addrspace(5)), ptr addrspace(5) %tmp1 - %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp1 to i32 addrspace(5)* - store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp1 + store volatile ptr addrspace(5) %tmp1, ptr addrspace(5) %tmp1 ret void } @@ -99,18 +96,16 @@ ; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}} ; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8{{$}} define amdgpu_kernel void @stored_fi_to_fi() #0 { - %tmp0 = alloca i32 addrspace(5)*, addrspace(5) - %tmp1 = alloca i32 addrspace(5)*, addrspace(5) - %tmp2 = alloca i32 addrspace(5)*, addrspace(5) - store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp0 - store volatile i32 addrspace(5)* inttoptr (i32 5678 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp1 - store volatile i32 addrspace(5)* inttoptr (i32 9999 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp2 - - %bitcast1 = bitcast i32 addrspace(5)* addrspace(5)* %tmp1 to i32 addrspace(5)* - %bitcast2 = bitcast i32 addrspace(5)* addrspace(5)* %tmp2 to i32 addrspace(5)* ; at offset 8 - - store volatile i32 addrspace(5)* %bitcast1, i32 addrspace(5)* addrspace(5)* %tmp2 ; store offset 4 at offset 8 - store volatile i32 addrspace(5)* %bitcast2, i32 addrspace(5)* addrspace(5)* %tmp1 ; store offset 8 at offset 4 + %tmp0 = alloca ptr addrspace(5), addrspace(5) + %tmp1 = alloca ptr addrspace(5), addrspace(5) + %tmp2 = alloca ptr addrspace(5), addrspace(5) + store volatile ptr addrspace(5) inttoptr (i32 1234 to ptr addrspace(5)), ptr addrspace(5) %tmp0 + store volatile ptr addrspace(5) inttoptr (i32 5678 to ptr addrspace(5)), ptr addrspace(5) %tmp1 + store volatile ptr addrspace(5) inttoptr (i32 9999 to ptr addrspace(5)), ptr addrspace(5) %tmp2 + + + store volatile ptr addrspace(5) %tmp1, ptr addrspace(5) %tmp2 ; store offset 4 at offset 8 + store volatile ptr addrspace(5) %tmp2, ptr addrspace(5) %tmp1 ; store offset 8 at offset 4 ret void } @@ -118,10 +113,10 @@ ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}} ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} ; GCN: buffer_store_dword [[FI]] -define amdgpu_kernel void @stored_fi_to_global(float addrspace(5)* addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @stored_fi_to_global(ptr addrspace(1) %ptr) #0 { %tmp = alloca float, addrspace(5) - store float 0.0, float addrspace(5)*%tmp - store float addrspace(5)* %tmp, float addrspace(5)* addrspace(1)* %ptr + store float 0.0, ptr addrspace(5) %tmp + store ptr addrspace(5) %tmp, ptr addrspace(1) %ptr ret void } @@ -136,15 +131,15 @@ ; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}} ; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -define amdgpu_kernel void @stored_fi_to_global_2_small_objects(float addrspace(5)* addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @stored_fi_to_global_2_small_objects(ptr addrspace(1) %ptr) #0 { %tmp0 = alloca float, addrspace(5) %tmp1 = alloca float, addrspace(5) %tmp2 = alloca float, addrspace(5) - store volatile float 0.0, float addrspace(5)*%tmp0 - store volatile float 0.0, float addrspace(5)*%tmp1 - store volatile float 0.0, float addrspace(5)*%tmp2 - store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(1)* %ptr - store volatile float addrspace(5)* %tmp2, float addrspace(5)* addrspace(1)* %ptr + store volatile float 0.0, ptr addrspace(5) %tmp0 + store volatile float 0.0, ptr addrspace(5) %tmp1 + store volatile float 0.0, ptr addrspace(5) %tmp2 + store volatile ptr addrspace(5) %tmp1, ptr addrspace(1) %ptr + store volatile ptr addrspace(5) %tmp2, ptr addrspace(1) %ptr ret void } @@ -163,19 +158,18 @@ ; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} ; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(i32 addrspace(5)* addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(ptr addrspace(1) %ptr) #0 { %tmp0 = alloca [4096 x i32], addrspace(5) %tmp1 = alloca [4096 x i32], addrspace(5) - %gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 0 - store volatile i32 0, i32 addrspace(5)* %gep0.tmp0 - %gep1.tmp0 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 4095 - store volatile i32 999, i32 addrspace(5)* %gep1.tmp0 - %gep0.tmp1 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 14 - store i32 addrspace(5)* %gep0.tmp1, i32 addrspace(5)* addrspace(1)* %ptr + store volatile i32 0, ptr addrspace(5) %tmp0 + %gep1.tmp0 = getelementptr [4096 x i32], ptr addrspace(5) %tmp0, i32 0, i32 4095 + store volatile i32 999, ptr addrspace(5) %gep1.tmp0 + %gep0.tmp1 = getelementptr [4096 x i32], ptr addrspace(5) %tmp0, i32 0, i32 14 + store ptr addrspace(5) %gep0.tmp1, ptr addrspace(1) %ptr ret void } -@g1 = external addrspace(1) global i32 addrspace(5)* +@g1 = external addrspace(1) global ptr addrspace(5) ; This was leaving a dead node around resulting in failing to select ; on the leftover AssertZext's ValueType operand. @@ -186,18 +180,18 @@ ; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC_HI]], g1@gotpcrel32@hi+12 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} ; GCN: buffer_store_dword [[FI]] -define amdgpu_kernel void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 { +define amdgpu_kernel void @cannot_select_assertzext_valuetype(ptr addrspace(1) %out, i32 %idx) #0 { entry: %b = alloca i32, align 4, addrspace(5) - %tmp1 = load volatile i32 addrspace(5)*, i32 addrspace(5)* addrspace(1)* @g1, align 4 - %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %tmp1, i32 %idx - %tmp2 = load i32, i32 addrspace(5)* %arrayidx, align 4 - store volatile i32 addrspace(5)* %b, i32 addrspace(5)* addrspace(1)* undef + %tmp1 = load volatile ptr addrspace(5), ptr addrspace(1) @g1, align 4 + %arrayidx = getelementptr inbounds i32, ptr addrspace(5) %tmp1, i32 %idx + %tmp2 = load i32, ptr addrspace(5) %arrayidx, align 4 + store volatile ptr addrspace(5) %b, ptr addrspace(1) undef ret void } -declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1 -declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1 +declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #1 +declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #1 attributes #0 = { nounwind } attributes #1 = { argmemonly nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll --- a/llvm/test/CodeGen/AMDGPU/cc-update.ll +++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll @@ -61,7 +61,7 @@ ; GFX1100-NEXT: s_endpgm entry: %x = alloca i32, align 4, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %x, align 4 + store volatile i32 0, ptr addrspace(5) %x, align 4 ret void } @@ -243,7 +243,7 @@ entry: %x = alloca i32, align 4, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %x, align 4 + store volatile i32 0, ptr addrspace(5) %x, align 4 tail call void @ex() #0 ret void } @@ -314,7 +314,7 @@ ; GFX1100-NEXT: s_endpgm entry: %x = alloca i32, align 4, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %x, align 4 + store volatile i32 0, ptr addrspace(5) %x, align 4 ret void } @@ -521,7 +521,7 @@ ; GFX1100-NEXT: s_endpgm entry: %x = alloca i32, align 4, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %x, align 4 + store volatile i32 0, ptr addrspace(5) %x, align 4 tail call void @ex() #2 ret void } @@ -537,7 +537,6 @@ ; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill ; GFX803-NEXT: ;;#ASMSTART ; GFX803-NEXT: ;;#ASMEND -; GFX803-NEXT: s_mov_b32 s4, 0x40000 ; GFX803-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload ; GFX803-NEXT: s_waitcnt vmcnt(0) ; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 @@ -554,7 +553,6 @@ ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s4, 0x40000 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 @@ -569,8 +567,6 @@ ; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc dlc ; GFX1010-NEXT: s_waitcnt vmcnt(0) ; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill -; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 -; GFX1010-NEXT: s_mov_b32 s4, 0x20000 ; GFX1010-NEXT: ;;#ASMSTART ; GFX1010-NEXT: ;;#ASMEND ; GFX1010-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload @@ -585,7 +581,6 @@ ; GFX1100-NEXT: s_waitcnt vmcnt(0) ; GFX1100-NEXT: s_movk_i32 s0, 0x1000 ; GFX1100-NEXT: scratch_store_b32 off, v0, s0 ; 4-byte Folded Spill -; GFX1100-NEXT: s_movk_i32 s0, 0x1000 ; GFX1100-NEXT: ;;#ASMSTART ; GFX1100-NEXT: ;;#ASMEND ; GFX1100-NEXT: scratch_load_b32 v0, off, s0 ; 4-byte Folded Reload @@ -598,19 +593,18 @@ ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not ; fit in the instruction, and has to live in the SGPR offset. %alloca = alloca i8, i32 4092, align 4, addrspace(5) - %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* - %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 + %aptr = getelementptr i32, ptr addrspace(5) %alloca, i32 1 ; 0x40000 / 64 = 4096 (for wave64) ; CHECK: s_add_u32 s6, s7, 0x40000 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill - %a = load volatile i32, i32 addrspace(5)* %aptr + %a = load volatile i32, ptr addrspace(5) %aptr ; Force %a to spill call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () - %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 - store volatile i32 %a, i32 addrspace(5)* %outptr + %outptr = getelementptr i32, ptr addrspace(5) %alloca, i32 1 + store volatile i32 %a, ptr addrspace(5) %outptr ret void } diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll @@ -5,20 +5,20 @@ ; Make sure we match the addressing mode offset of csub intrinsics across blocks. -define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; OPT-LABEL: @test_sink_small_offset_global_atomic_csub_i32( ; OPT-NEXT: entry: ; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] ; OPT-NEXT: [[CMP:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-NEXT: br i1 [[CMP]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT: if: -; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i32 7 -; OPT-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* [[IN_GEP]], i32 2) +; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[IN:%.*]], i32 7 +; OPT-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) [[IN_GEP]], i32 2) ; OPT-NEXT: br label [[ENDIF]] ; OPT: endif: ; OPT-NEXT: [[X:%.*]] = phi i32 [ [[VAL]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] -; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, i32 addrspace(1)* [[OUT:%.*]], i32 999999 -; OPT-NEXT: store i32 [[X]], i32 addrspace(1)* [[OUT_GEP]], align 4 +; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT:%.*]], i32 999999 +; OPT-NEXT: store i32 [[X]], ptr addrspace(1) [[OUT_GEP]], align 4 ; OPT-NEXT: br label [[DONE:%.*]] ; OPT: done: ; OPT-NEXT: ret void @@ -48,21 +48,21 @@ br i1 %cmp, label %endif, label %if if: - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 7 - %val = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %in.gep, i32 2) + %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 7 + %val = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %in.gep, i32 2) br label %endif endif: %x = phi i32 [ %val, %if ], [ 0, %entry ] - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 999999 - store i32 %x, i32 addrspace(1)* %out.gep + %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 999999 + store i32 %x, ptr addrspace(1) %out.gep br label %done done: ret void } -declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #0 +declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #0 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 attributes #0 = { argmemonly nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll @@ -4,21 +4,21 @@ ; Make sure we match the addressing mode offset of globla.atomic.fadd intrinsics across blocks. -define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(float addrspace(1)* %out, float addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; OPT-LABEL: @test_sink_small_offset_global_atomic_fadd_f32( ; OPT-NEXT: entry: ; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] ; OPT-NEXT: [[CMP:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-NEXT: br i1 [[CMP]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT: if: -; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr float, float addrspace(1)* [[IN:%.*]], i32 7 -; OPT-NEXT: [[FADD2:%.*]] = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* [[IN_GEP]], float 2.000000e+00) -; OPT-NEXT: [[VAL:%.*]] = load volatile float, float addrspace(1)* undef, align 4 +; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr float, ptr addrspace(1) [[IN:%.*]], i32 7 +; OPT-NEXT: [[FADD2:%.*]] = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) [[IN_GEP]], float 2.000000e+00) +; OPT-NEXT: [[VAL:%.*]] = load volatile float, ptr addrspace(1) undef, align 4 ; OPT-NEXT: br label [[ENDIF]] ; OPT: endif: ; OPT-NEXT: [[X:%.*]] = phi float [ [[VAL]], [[IF]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr float, float addrspace(1)* [[OUT:%.*]], i32 999999 -; OPT-NEXT: store float [[X]], float addrspace(1)* [[OUT_GEP]], align 4 +; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr float, ptr addrspace(1) [[OUT:%.*]], i32 999999 +; OPT-NEXT: store float [[X]], ptr addrspace(1) [[OUT_GEP]], align 4 ; OPT-NEXT: br label [[DONE:%.*]] ; OPT: done: ; OPT-NEXT: ret void @@ -50,15 +50,15 @@ br i1 %cmp, label %endif, label %if if: - %in.gep = getelementptr float, float addrspace(1)* %in, i32 7 - %fadd2 = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %in.gep, float 2.0) - %val = load volatile float, float addrspace(1)* undef + %in.gep = getelementptr float, ptr addrspace(1) %in, i32 7 + %fadd2 = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %in.gep, float 2.0) + %val = load volatile float, ptr addrspace(1) undef br label %endif endif: %x = phi float [ %val, %if ], [ 0.0, %entry ] - %out.gep = getelementptr float, float addrspace(1)* %out, i32 999999 - store float %x, float addrspace(1)* %out.gep + %out.gep = getelementptr float, ptr addrspace(1) %out, i32 999999 + store float %x, ptr addrspace(1) %out.gep br label %done done: @@ -66,7 +66,7 @@ } declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 -declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) #2 +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #2 attributes #0 = { argmemonly nounwind } attributes #1 = { nounwind readnone willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -10,27 +10,27 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" ; OPT-LABEL: @test_sink_global_small_offset_i32( -; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in -; OPT-VI: getelementptr i32, i32 addrspace(1)* %in +; OPT-CI-NOT: getelementptr i32, ptr addrspace(1) %in +; OPT-VI: getelementptr i32, ptr addrspace(1) %in ; OPT: br i1 ; OPT-CI: getelementptr i8, ; GCN-LABEL: {{^}}test_sink_global_small_offset_i32: -define amdgpu_kernel void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_global_small_offset_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999 + %in.gep = getelementptr i32, ptr addrspace(1) %in, i64 7 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i32, i32 addrspace(1)* %in.gep + %tmp1 = load i32, ptr addrspace(1) %in.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -38,7 +38,7 @@ } ; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset( -; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 +; OPT: %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 65535 ; OPT: br i1 ; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset: @@ -49,22 +49,22 @@ ; GFX9: global_load_sbyte {{v[0-9]+}}, [[VOFFSET]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}} ; GCN: {{^}}.LBB1_2: ; GCN: s_or_b64 exec -define amdgpu_kernel void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_global_small_max_i32_ds_offset(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 - %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 99999 + %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 65535 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i8, i8 addrspace(1)* %in.gep + %tmp1 = load i8, ptr addrspace(1) %in.gep %tmp2 = sext i8 %tmp1 to i32 br label %endif endif: %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -78,22 +78,22 @@ ; GFX9: global_load_sbyte {{v[0-9]+}}, [[ZERO]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}} ; GCN: {{^}}.LBB2_2: ; GCN: s_or_b64 exec -define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 - %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 1024 + %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 4095 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i8, i8 addrspace(1)* %in.gep + %tmp1 = load i8, ptr addrspace(1) %in.gep %tmp2 = sext i8 %tmp1 to i32 br label %endif endif: %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -107,22 +107,22 @@ ; GFX9: global_load_sbyte {{v[0-9]+}}, [[VOFFSET]], {{s\[[0-9]+:[0-9]+\]$}} ; GCN: {{^}}.LBB3_2: ; GCN: s_or_b64 exec -define amdgpu_kernel void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_global_small_max_plus_1_mubuf_offset(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 - %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 99999 + %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 4096 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i8, i8 addrspace(1)* %in.gep + %tmp1 = load i8, ptr addrspace(1) %in.gep %tmp2 = sext i8 %tmp1 to i32 br label %endif endif: %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -139,27 +139,27 @@ ; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4092{{$}} ; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4092 glc{{$}} ; GCN: {{^}}.LBB4_2: -define amdgpu_kernel void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { +define amdgpu_kernel void @test_sink_scratch_small_offset_i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %arg) { entry: %alloca = alloca [512 x i32], align 4, addrspace(5) - %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %out.gep.0 = getelementptr i32, ptr addrspace(1) %out, i64 999998 + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i64 999999 %add.arg = add i32 %arg, 8 - %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1022 + %alloca.gep = getelementptr [512 x i32], ptr addrspace(5) %alloca, i32 0, i32 1022 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - store volatile i32 123, i32 addrspace(5)* %alloca.gep - %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep + store volatile i32 123, ptr addrspace(5) %alloca.gep + %tmp1 = load volatile i32, ptr addrspace(5) %alloca.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep.0 - %load = load volatile i32, i32 addrspace(5)* %alloca.gep - store i32 %load, i32 addrspace(1)* %out.gep.1 + store i32 %x, ptr addrspace(1) %out.gep.0 + %load = load volatile i32, ptr addrspace(5) %alloca.gep + store i32 %load, ptr addrspace(1) %out.gep.1 br label %done done: @@ -180,27 +180,27 @@ ; GCN: buffer_load_dword {{v[0-9]+}}, [[BASE_FI1]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen offset:4092 glc{{$}} ; GCN: {{^.LBB[0-9]+}}_2: -define amdgpu_kernel void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { +define amdgpu_kernel void @test_sink_scratch_small_offset_i32_reserved(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %arg) { entry: %alloca = alloca [512 x i32], align 4, addrspace(5) - %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %out.gep.0 = getelementptr i32, ptr addrspace(1) %out, i64 999998 + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i64 999999 %add.arg = add i32 %arg, 8 - %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1023 + %alloca.gep = getelementptr [512 x i32], ptr addrspace(5) %alloca, i32 0, i32 1023 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - store volatile i32 123, i32 addrspace(5)* %alloca.gep - %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep + store volatile i32 123, ptr addrspace(5) %alloca.gep + %tmp1 = load volatile i32, ptr addrspace(5) %alloca.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep.0 - %load = load volatile i32, i32 addrspace(5)* %alloca.gep - store i32 %load, i32 addrspace(1)* %out.gep.1 + store i32 %x, ptr addrspace(1) %out.gep.0 + %load = load volatile i32, ptr addrspace(5) %alloca.gep + store i32 %load, ptr addrspace(1) %out.gep.1 br label %done done: @@ -208,7 +208,7 @@ } ; OPT-LABEL: @test_no_sink_scratch_large_offset_i32( -; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1024 +; OPT: %alloca.gep = getelementptr [512 x i32], ptr addrspace(5) %alloca, i32 0, i32 1024 ; OPT: br i1 ; OPT-NOT: ptrtoint @@ -217,27 +217,27 @@ ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc{{$}} ; GCN: {{^.LBB[0-9]+}}_2: -define amdgpu_kernel void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { +define amdgpu_kernel void @test_no_sink_scratch_large_offset_i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %arg) { entry: %alloca = alloca [512 x i32], align 4, addrspace(5) - %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %out.gep.0 = getelementptr i32, ptr addrspace(1) %out, i64 999998 + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i64 999999 %add.arg = add i32 %arg, 8 - %alloca.gep = getelementptr [512 x i32], [512 x i32] addrspace(5)* %alloca, i32 0, i32 1024 + %alloca.gep = getelementptr [512 x i32], ptr addrspace(5) %alloca, i32 0, i32 1024 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - store volatile i32 123, i32 addrspace(5)* %alloca.gep - %tmp1 = load volatile i32, i32 addrspace(5)* %alloca.gep + store volatile i32 123, ptr addrspace(5) %alloca.gep + %tmp1 = load volatile i32, ptr addrspace(5) %alloca.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep.0 - %load = load volatile i32, i32 addrspace(5)* %alloca.gep - store i32 %load, i32 addrspace(1)* %out.gep.1 + store i32 %x, ptr addrspace(1) %out.gep.0 + %load = load volatile i32, ptr addrspace(5) %alloca.gep + store i32 %load, ptr addrspace(1) %out.gep.1 br label %done done: @@ -249,22 +249,22 @@ ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GCN: {{^.LBB[0-9]+}}_2: -define amdgpu_kernel void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) { +define amdgpu_kernel void @test_sink_global_vreg_sreg_i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %offset) { entry: %offset.ext = zext i32 %offset to i64 - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999 + %in.gep = getelementptr i32, ptr addrspace(1) %in, i64 %offset.ext %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i32, i32 addrspace(1)* %in.gep + %tmp1 = load i32, ptr addrspace(1) %in.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -272,28 +272,28 @@ } ; OPT-LABEL: @test_sink_constant_small_offset_i32 -; OPT-NOT: getelementptr i32, i32 addrspace(4)* +; OPT-NOT: getelementptr i32, ptr addrspace(4) ; OPT: br i1 ; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32: ; GCN: s_and_saveexec_b64 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}} ; GCN: s_or_b64 exec, exec -define amdgpu_kernel void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) { +define amdgpu_kernel void @test_sink_constant_small_offset_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999 + %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 7 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i32, i32 addrspace(4)* %in.gep + %tmp1 = load i32, ptr addrspace(4) %in.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -301,28 +301,28 @@ } ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32 -; OPT-NOT: getelementptr i32, i32 addrspace(4)* +; OPT-NOT: getelementptr i32, ptr addrspace(4) ; OPT: br i1 ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32: ; GCN: s_and_saveexec_b64 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}} ; GCN: s_or_b64 exec, exec -define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) { +define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 255 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999 + %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 255 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i32, i32 addrspace(4)* %in.gep + %tmp1 = load i32, ptr addrspace(4) %in.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -330,9 +330,9 @@ } ; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32 -; OPT-SI: getelementptr i32, i32 addrspace(4)* -; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)* -; OPT-VI-NOT: getelementptr i32, i32 addrspace(4)* +; OPT-SI: getelementptr i32, ptr addrspace(4) +; OPT-CI-NOT: getelementptr i32, ptr addrspace(4) +; OPT-VI-NOT: getelementptr i32, ptr addrspace(4) ; OPT: br i1 ; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32: @@ -341,21 +341,21 @@ ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} ; GCN: s_or_b64 exec, exec -define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) { +define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 256 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999 + %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 256 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i32, i32 addrspace(4)* %in.gep + %tmp1 = load i32, ptr addrspace(4) %in.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -363,8 +363,8 @@ } ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32 -; OPT-SI: getelementptr i32, i32 addrspace(4)* -; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)* +; OPT-SI: getelementptr i32, ptr addrspace(4) +; OPT-CI-NOT: getelementptr i32, ptr addrspace(4) ; OPT: br i1 ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32: @@ -380,21 +380,21 @@ ; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffffff{{$}} ; GCN: s_or_b64 exec, exec -define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) { +define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 4294967295 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999 + %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 4294967295 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i32, i32 addrspace(4)* %in.gep + %tmp1 = load i32, ptr addrspace(4) %in.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -402,7 +402,7 @@ } ; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32 -; OPT: getelementptr i32, i32 addrspace(4)* +; OPT: getelementptr i32, ptr addrspace(4) ; OPT: br i1 ; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32: @@ -411,21 +411,21 @@ ; GCN: s_addc_u32 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} ; GCN: s_or_b64 exec, exec -define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) { +define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 17179869181 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999 + %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 17179869181 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i32, i32 addrspace(4)* %in.gep + %tmp1 = load i32, ptr addrspace(4) %in.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -441,21 +441,21 @@ ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}} ; GCN: s_or_b64 exec, exec -define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) { +define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 262143 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999 + %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 262143 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i32, i32 addrspace(4)* %in.gep + %tmp1 = load i32, ptr addrspace(4) %in.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -463,9 +463,9 @@ } ; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32 -; OPT-SI: getelementptr i32, i32 addrspace(4)* -; OPT-CI-NOT: getelementptr i32, i32 addrspace(4)* -; OPT-VI: getelementptr i32, i32 addrspace(4)* +; OPT-SI: getelementptr i32, ptr addrspace(4) +; OPT-CI-NOT: getelementptr i32, ptr addrspace(4) +; OPT-VI: getelementptr i32, ptr addrspace(4) ; OPT: br i1 ; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32: @@ -479,21 +479,21 @@ ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} ; GCN: s_or_b64 exec, exec -define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) { +define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 262144 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999 + %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 262144 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i32, i32 addrspace(4)* %in.gep + %tmp1 = load i32, ptr addrspace(4) %in.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -509,15 +509,15 @@ ; GCN: s_load_dword [[SREG1:s[0-9]+]], ; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]] ; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5 -define amdgpu_kernel void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind { +define amdgpu_kernel void @sink_ds_address(ptr addrspace(3) nocapture %ptr) nounwind { entry: - %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 - %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2 + %x = getelementptr inbounds %struct.foo, ptr addrspace(3) %ptr, i32 0, i32 1, i32 0 + %y = getelementptr inbounds %struct.foo, ptr addrspace(3) %ptr, i32 0, i32 1, i32 2 br label %bb32 bb32: - %a = load float, float addrspace(3)* %x, align 4 - %b = load float, float addrspace(3)* %y, align 4 + %a = load float, ptr addrspace(3) %x, align 4 + %b = load float, ptr addrspace(3) %y, align 4 %cmp = fcmp one float %a, %b br i1 %cmp, label %bb34, label %bb33 @@ -535,22 +535,21 @@ ; OPT: br i1 %tmp0, ; OPT: if: ; OPT: getelementptr i8, {{.*}} 4095 -define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(4)* %in) { +define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(ptr addrspace(1) %out, ptr addrspace(4) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 - %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 1024 + %in.gep = getelementptr i8, ptr addrspace(4) %in, i64 4095 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %bitcast = bitcast i8 addrspace(4)* %in.gep to i32 addrspace(4)* - %tmp1 = load i32, i32 addrspace(4)* %bitcast, align 1 + %tmp1 = load i32, ptr addrspace(4) %in.gep, align 1 br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -558,25 +557,23 @@ } ; OPT-LABEL: @test_sink_local_small_offset_atomicrmw_i32( -; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* -; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 -; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* -; OPT: %tmp1 = atomicrmw add i32 addrspace(3)* %1, i32 2 seq_cst -define amdgpu_kernel void @test_sink_local_small_offset_atomicrmw_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +; OPT: %sunkaddr = getelementptr i8, ptr addrspace(3) %in, i32 28 +; OPT: %tmp1 = atomicrmw add ptr addrspace(3) %sunkaddr, i32 2 seq_cst +define amdgpu_kernel void @test_sink_local_small_offset_atomicrmw_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 - %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %out.gep = getelementptr i32, ptr addrspace(3) %out, i32 999999 + %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = atomicrmw add i32 addrspace(3)* %in.gep, i32 2 seq_cst + %tmp1 = atomicrmw add ptr addrspace(3) %in.gep, i32 2 seq_cst br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(3)* %out.gep + store i32 %x, ptr addrspace(3) %out.gep br label %done done: @@ -584,26 +581,24 @@ } ; OPT-LABEL: @test_sink_local_small_offset_cmpxchg_i32( -; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* -; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 -; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* -; OPT: %tmp1.struct = cmpxchg i32 addrspace(3)* %1, i32 undef, i32 2 seq_cst monotonic -define amdgpu_kernel void @test_sink_local_small_offset_cmpxchg_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +; OPT: %sunkaddr = getelementptr i8, ptr addrspace(3) %in, i32 28 +; OPT: %tmp1.struct = cmpxchg ptr addrspace(3) %sunkaddr, i32 undef, i32 2 seq_cst monotonic +define amdgpu_kernel void @test_sink_local_small_offset_cmpxchg_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 - %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %out.gep = getelementptr i32, ptr addrspace(3) %out, i32 999999 + %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1.struct = cmpxchg i32 addrspace(3)* %in.gep, i32 undef, i32 2 seq_cst monotonic + %tmp1.struct = cmpxchg ptr addrspace(3) %in.gep, i32 undef, i32 2 seq_cst monotonic %tmp1 = extractvalue { i32, i1 } %tmp1.struct, 0 br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(3)* %out.gep + store i32 %x, ptr addrspace(3) %out.gep br label %done done: @@ -611,25 +606,25 @@ } ; OPT-LABEL: @test_wrong_operand_local_small_offset_cmpxchg_i32( -; OPT: %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 +; OPT: %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7 ; OPT: br i1 -; OPT: cmpxchg i32 addrspace(3)* addrspace(3)* undef, i32 addrspace(3)* %in.gep, i32 addrspace(3)* undef seq_cst monotonic -define amdgpu_kernel void @test_wrong_operand_local_small_offset_cmpxchg_i32(i32 addrspace(3)* addrspace(3)* %out, i32 addrspace(3)* %in) { +; OPT: cmpxchg ptr addrspace(3) undef, ptr addrspace(3) %in.gep, ptr addrspace(3) undef seq_cst monotonic +define amdgpu_kernel void @test_wrong_operand_local_small_offset_cmpxchg_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) { entry: - %out.gep = getelementptr i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* %out, i32 999999 - %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %out.gep = getelementptr ptr addrspace(3), ptr addrspace(3) %out, i32 999999 + %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1.struct = cmpxchg i32 addrspace(3)* addrspace(3)* undef, i32 addrspace(3)* %in.gep, i32 addrspace(3)* undef seq_cst monotonic - %tmp1 = extractvalue { i32 addrspace(3)*, i1 } %tmp1.struct, 0 + %tmp1.struct = cmpxchg ptr addrspace(3) undef, ptr addrspace(3) %in.gep, ptr addrspace(3) undef seq_cst monotonic + %tmp1 = extractvalue { ptr addrspace(3), i1 } %tmp1.struct, 0 br label %endif endif: - %x = phi i32 addrspace(3)* [ %tmp1, %if ], [ null, %entry ] - store i32 addrspace(3)* %x, i32 addrspace(3)* addrspace(3)* %out.gep + %x = phi ptr addrspace(3) [ %tmp1, %if ], [ null, %entry ] + store ptr addrspace(3) %x, ptr addrspace(3) %out.gep br label %done done: @@ -637,25 +632,23 @@ } ; OPT-LABEL: @test_sink_local_small_offset_atomic_inc_i32( -; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* -; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 -; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* -; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %1, i32 2, i32 0, i32 0, i1 false) -define amdgpu_kernel void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +; OPT: %sunkaddr = getelementptr i8, ptr addrspace(3) %in, i32 28 +; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %sunkaddr, i32 2, i32 0, i32 0, i1 false) +define amdgpu_kernel void @test_sink_local_small_offset_atomic_inc_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 - %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %out.gep = getelementptr i32, ptr addrspace(3) %out, i32 999999 + %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2, i32 0, i32 0, i1 false) + %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %in.gep, i32 2, i32 0, i32 0, i1 false) br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(3)* %out.gep + store i32 %x, ptr addrspace(3) %out.gep br label %done done: @@ -663,25 +656,23 @@ } ; OPT-LABEL: @test_sink_local_small_offset_atomic_dec_i32( -; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* -; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 -; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* -; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %1, i32 2, i32 0, i32 0, i1 false) -define amdgpu_kernel void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +; OPT: %sunkaddr = getelementptr i8, ptr addrspace(3) %in, i32 28 +; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %sunkaddr, i32 2, i32 0, i32 0, i1 false) +define amdgpu_kernel void @test_sink_local_small_offset_atomic_dec_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 - %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %out.gep = getelementptr i32, ptr addrspace(3) %out, i32 999999 + %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2, i32 0, i32 0, i1 false) + %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %in.gep, i32 2, i32 0, i32 0, i1 false) br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(3)* %out.gep + store i32 %x, ptr addrspace(3) %out.gep br label %done done: @@ -689,33 +680,33 @@ } ; OPT-LABEL: @test_sink_global_small_min_scratch_global_offset( -; OPT-SICIVI: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 -4096 +; OPT-SICIVI: %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 -4096 ; OPT-SICIV: br -; OPT-SICIVI: %tmp1 = load i8, i8 addrspace(1)* %in.gep +; OPT-SICIVI: %tmp1 = load i8, ptr addrspace(1) %in.gep ; OPT-GFX9: br -; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(1)* %in, i64 -4096 -; OPT-GFX9: load i8, i8 addrspace(1)* %sunkaddr +; OPT-GFX9: %sunkaddr = getelementptr i8, ptr addrspace(1) %in, i64 -4096 +; OPT-GFX9: load i8, ptr addrspace(1) %sunkaddr ; GCN-LABEL: {{^}}test_sink_global_small_min_scratch_global_offset: ; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GFX9: global_load_sbyte v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}} offset:-4096{{$}} -define amdgpu_kernel void @test_sink_global_small_min_scratch_global_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_global_small_min_scratch_global_offset(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 - %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 -4096 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 1024 + %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 -4096 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i8, i8 addrspace(1)* %in.gep + %tmp1 = load i8, ptr addrspace(1) %in.gep %tmp2 = sext i8 %tmp1 to i32 br label %endif endif: %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -723,27 +714,27 @@ } ; OPT-LABEL: @test_sink_global_small_min_scratch_global_neg1_offset( -; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 -4097 +; OPT: %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 -4097 ; OPT: br -; OPT: load i8, i8 addrspace(1)* %in.gep +; OPT: load i8, ptr addrspace(1) %in.gep ; GCN-LABEL: {{^}}test_sink_global_small_min_scratch_global_neg1_offset: -define amdgpu_kernel void @test_sink_global_small_min_scratch_global_neg1_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_global_small_min_scratch_global_neg1_offset(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 - %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 -4097 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 99999 + %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 -4097 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i8, i8 addrspace(1)* %in.gep + %tmp1 = load i8, ptr addrspace(1) %in.gep %tmp2 = sext i8 %tmp1 to i32 br label %endif endif: %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(1)* %out.gep + store i32 %x, ptr addrspace(1) %out.gep br label %done done: @@ -751,25 +742,23 @@ } ; OPT-LABEL: @test_sink_small_offset_ds_append( -; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* -; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 -; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* -; OPT: %tmp1 = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %1, i1 false) -define amdgpu_kernel void @test_sink_small_offset_ds_append(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +; OPT: %sunkaddr = getelementptr i8, ptr addrspace(3) %in, i32 28 +; OPT: %tmp1 = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %sunkaddr, i1 false) +define amdgpu_kernel void @test_sink_small_offset_ds_append(ptr addrspace(3) %out, ptr addrspace(3) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 - %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %out.gep = getelementptr i32, ptr addrspace(3) %out, i32 999999 + %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %in.gep, i1 false) + %tmp1 = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %in.gep, i1 false) br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(3)* %out.gep + store i32 %x, ptr addrspace(3) %out.gep br label %done done: @@ -777,25 +766,23 @@ } ; OPT-LABEL: @test_sink_small_offset_ds_consume( -; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* -; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 -; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* -; OPT: %tmp1 = call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %1, i1 false) -define amdgpu_kernel void @test_sink_small_offset_ds_consume(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +; OPT: %sunkaddr = getelementptr i8, ptr addrspace(3) %in, i32 28 +; OPT: %tmp1 = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %sunkaddr, i1 false) +define amdgpu_kernel void @test_sink_small_offset_ds_consume(ptr addrspace(3) %out, ptr addrspace(3) %in) { entry: - %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 - %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %out.gep = getelementptr i32, ptr addrspace(3) %out, i32 999999 + %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %in.gep, i1 false) + %tmp1 = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %in.gep, i1 false) br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(3)* %out.gep + store i32 %x, ptr addrspace(3) %out.gep br label %done done: @@ -803,10 +790,10 @@ } declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 -declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #3 -declare i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #3 +declare i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) #3 +declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) nocapture, i1 immarg) #3 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -62,10 +62,9 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %gep_lo = getelementptr inbounds half, half addrspace(5)* null, i64 1 - %load_lo = load half, half addrspace(5)* %gep_lo - %gep_hi = getelementptr inbounds half, half addrspace(5)* null, i64 0 - %load_hi = load half, half addrspace(5)* %gep_hi + %gep_lo = getelementptr inbounds half, ptr addrspace(5) null, i64 1 + %load_lo = load half, ptr addrspace(5) %gep_lo + %load_hi = load half, ptr addrspace(5) null %temp = insertelement <2 x half> undef, half %load_lo, i32 0 %result = insertelement <2 x half> %temp, half %load_hi, i32 1 @@ -73,7 +72,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_private_different_bases(half addrspace(5)* %base_lo, half addrspace(5)* %base_hi) { +define <2 x half> @chain_hi_to_lo_private_different_bases(ptr addrspace(5) %base_lo, ptr addrspace(5) %base_hi) { ; GFX900-LABEL: chain_hi_to_lo_private_different_bases: ; GFX900: ; %bb.0: ; %bb ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -122,8 +121,8 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %load_lo = load half, half addrspace(5)* %base_lo - %load_hi = load half, half addrspace(5)* %base_hi + %load_lo = load half, ptr addrspace(5) %base_lo + %load_hi = load half, ptr addrspace(5) %base_hi %temp = insertelement <2 x half> undef, half %load_lo, i32 0 %result = insertelement <2 x half> %temp, half %load_hi, i32 1 @@ -131,7 +130,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_arithmatic(half addrspace(5)* %base, half %in) { +define <2 x half> @chain_hi_to_lo_arithmatic(ptr addrspace(5) %base, half %in) { ; GFX900-LABEL: chain_hi_to_lo_arithmatic: ; GFX900: ; %bb.0: ; %bb ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -181,7 +180,7 @@ ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: %arith_lo = fadd half %in, 1.0 - %load_hi = load half, half addrspace(5)* %base + %load_hi = load half, ptr addrspace(5) %base %temp = insertelement <2 x half> undef, half %arith_lo, i32 0 %result = insertelement <2 x half> %temp, half %load_hi, i32 1 @@ -222,10 +221,9 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %gep_lo = getelementptr inbounds half, half addrspace(3)* null, i64 1 - %load_lo = load half, half addrspace(3)* %gep_lo - %gep_hi = getelementptr inbounds half, half addrspace(3)* null, i64 0 - %load_hi = load half, half addrspace(3)* %gep_hi + %gep_lo = getelementptr inbounds half, ptr addrspace(3) null, i64 1 + %load_lo = load half, ptr addrspace(3) %gep_lo + %load_hi = load half, ptr addrspace(3) null %temp = insertelement <2 x half> undef, half %load_lo, i32 0 %result = insertelement <2 x half> %temp, half %load_hi, i32 1 @@ -233,7 +231,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_group_different_bases(half addrspace(3)* %base_lo, half addrspace(3)* %base_hi) { +define <2 x half> @chain_hi_to_lo_group_different_bases(ptr addrspace(3) %base_lo, ptr addrspace(3) %base_hi) { ; GCN-LABEL: chain_hi_to_lo_group_different_bases: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -263,8 +261,8 @@ ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %load_lo = load half, half addrspace(3)* %base_lo - %load_hi = load half, half addrspace(3)* %base_hi + %load_lo = load half, ptr addrspace(3) %base_lo + %load_hi = load half, ptr addrspace(3) %base_hi %temp = insertelement <2 x half> undef, half %load_lo, i32 0 %result = insertelement <2 x half> %temp, half %load_hi, i32 1 @@ -314,10 +312,9 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %gep_lo = getelementptr inbounds half, half addrspace(1)* null, i64 1 - %load_lo = load half, half addrspace(1)* %gep_lo - %gep_hi = getelementptr inbounds half, half addrspace(1)* null, i64 0 - %load_hi = load half, half addrspace(1)* %gep_hi + %gep_lo = getelementptr inbounds half, ptr addrspace(1) null, i64 1 + %load_lo = load half, ptr addrspace(1) %gep_lo + %load_hi = load half, ptr addrspace(1) null %temp = insertelement <2 x half> undef, half %load_lo, i32 0 %result = insertelement <2 x half> %temp, half %load_hi, i32 1 @@ -325,7 +322,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_global_different_bases(half addrspace(1)* %base_lo, half addrspace(1)* %base_hi) { +define <2 x half> @chain_hi_to_lo_global_different_bases(ptr addrspace(1) %base_lo, ptr addrspace(1) %base_hi) { ; GCN-LABEL: chain_hi_to_lo_global_different_bases: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -355,8 +352,8 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %load_lo = load half, half addrspace(1)* %base_lo - %load_hi = load half, half addrspace(1)* %base_hi + %load_lo = load half, ptr addrspace(1) %base_lo + %load_hi = load half, ptr addrspace(1) %base_hi %temp = insertelement <2 x half> undef, half %load_lo, i32 0 %result = insertelement <2 x half> %temp, half %load_hi, i32 1 @@ -406,10 +403,9 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %gep_lo = getelementptr inbounds half, half* null, i64 1 - %load_lo = load half, half* %gep_lo - %gep_hi = getelementptr inbounds half, half* null, i64 0 - %load_hi = load half, half* %gep_hi + %gep_lo = getelementptr inbounds half, ptr null, i64 1 + %load_lo = load half, ptr %gep_lo + %load_hi = load half, ptr null %temp = insertelement <2 x half> undef, half %load_lo, i32 0 %result = insertelement <2 x half> %temp, half %load_hi, i32 1 @@ -417,7 +413,7 @@ ret <2 x half> %result } -define <2 x half> @chain_hi_to_lo_flat_different_bases(half* %base_lo, half* %base_hi) { +define <2 x half> @chain_hi_to_lo_flat_different_bases(ptr %base_lo, ptr %base_hi) { ; GCN-LABEL: chain_hi_to_lo_flat_different_bases: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -447,8 +443,8 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %load_lo = load half, half* %base_lo - %load_hi = load half, half* %base_hi + %load_lo = load half, ptr %base_lo + %load_hi = load half, ptr %base_hi %temp = insertelement <2 x half> undef, half %load_lo, i32 0 %result = insertelement <2 x half> %temp, half %load_hi, i32 1 @@ -457,7 +453,7 @@ } ; Make sure we don't lose any of the private stores. -define amdgpu_kernel void @vload2_private(i16 addrspace(1)* nocapture readonly %in, <2 x i16> addrspace(1)* nocapture %out) #0 { +define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %in, ptr addrspace(1) nocapture %out) #0 { ; GFX900-LABEL: vload2_private: ; GFX900: ; %bb.0: ; %entry ; GFX900-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 @@ -610,33 +606,28 @@ ; GFX11-NEXT: s_endpgm entry: %loc = alloca [3 x i16], align 2, addrspace(5) - %loc.0.sroa_cast1 = bitcast [3 x i16] addrspace(5)* %loc to i8 addrspace(5)* - %tmp = load i16, i16 addrspace(1)* %in, align 2 - %loc.0.sroa_idx = getelementptr inbounds [3 x i16], [3 x i16] addrspace(5)* %loc, i32 0, i32 0 - store volatile i16 %tmp, i16 addrspace(5)* %loc.0.sroa_idx - %arrayidx.1 = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 1 - %tmp1 = load i16, i16 addrspace(1)* %arrayidx.1, align 2 - %loc.2.sroa_idx3 = getelementptr inbounds [3 x i16], [3 x i16] addrspace(5)* %loc, i32 0, i32 1 - store volatile i16 %tmp1, i16 addrspace(5)* %loc.2.sroa_idx3 - %arrayidx.2 = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 2 - %tmp2 = load i16, i16 addrspace(1)* %arrayidx.2, align 2 - %loc.4.sroa_idx = getelementptr inbounds [3 x i16], [3 x i16] addrspace(5)* %loc, i32 0, i32 2 - store volatile i16 %tmp2, i16 addrspace(5)* %loc.4.sroa_idx - %loc.0.sroa_cast = bitcast [3 x i16] addrspace(5)* %loc to <2 x i16> addrspace(5)* - %loc.0. = load <2 x i16>, <2 x i16> addrspace(5)* %loc.0.sroa_cast, align 2 - store <2 x i16> %loc.0., <2 x i16> addrspace(1)* %out, align 4 - %loc.2.sroa_idx = getelementptr inbounds [3 x i16], [3 x i16] addrspace(5)* %loc, i32 0, i32 1 - %loc.2.sroa_cast = bitcast i16 addrspace(5)* %loc.2.sroa_idx to <2 x i16> addrspace(5)* - %loc.2. = load <2 x i16>, <2 x i16> addrspace(5)* %loc.2.sroa_cast, align 2 - %arrayidx6 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 1 - store <2 x i16> %loc.2., <2 x i16> addrspace(1)* %arrayidx6, align 4 - %loc.0.sroa_cast2 = bitcast [3 x i16] addrspace(5)* %loc to i8 addrspace(5)* + %tmp = load i16, ptr addrspace(1) %in, align 2 + store volatile i16 %tmp, ptr addrspace(5) %loc + %arrayidx.1 = getelementptr inbounds i16, ptr addrspace(1) %in, i64 1 + %tmp1 = load i16, ptr addrspace(1) %arrayidx.1, align 2 + %loc.2.sroa_idx3 = getelementptr inbounds [3 x i16], ptr addrspace(5) %loc, i32 0, i32 1 + store volatile i16 %tmp1, ptr addrspace(5) %loc.2.sroa_idx3 + %arrayidx.2 = getelementptr inbounds i16, ptr addrspace(1) %in, i64 2 + %tmp2 = load i16, ptr addrspace(1) %arrayidx.2, align 2 + %loc.4.sroa_idx = getelementptr inbounds [3 x i16], ptr addrspace(5) %loc, i32 0, i32 2 + store volatile i16 %tmp2, ptr addrspace(5) %loc.4.sroa_idx + %loc.0. = load <2 x i16>, ptr addrspace(5) %loc, align 2 + store <2 x i16> %loc.0., ptr addrspace(1) %out, align 4 + %loc.2.sroa_idx = getelementptr inbounds [3 x i16], ptr addrspace(5) %loc, i32 0, i32 1 + %loc.2. = load <2 x i16>, ptr addrspace(5) %loc.2.sroa_idx, align 2 + %arrayidx6 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 1 + store <2 x i16> %loc.2., ptr addrspace(1) %arrayidx6, align 4 ret void } ; There is another instruction between the misordered instruction and ; the value dependent load, so a simple operand check is insufficient. -define <2 x i16> @chain_hi_to_lo_group_other_dep(i16 addrspace(3)* %ptr) { +define <2 x i16> @chain_hi_to_lo_group_other_dep(ptr addrspace(3) %ptr) { ; GCN-LABEL: chain_hi_to_lo_group_other_dep: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -672,10 +663,9 @@ ; GFX11-NEXT: v_mov_b32_e32 v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %gep_lo = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i64 1 - %load_lo = load i16, i16 addrspace(3)* %gep_lo - %gep_hi = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i64 0 - %load_hi = load i16, i16 addrspace(3)* %gep_hi + %gep_lo = getelementptr inbounds i16, ptr addrspace(3) %ptr, i64 1 + %load_lo = load i16, ptr addrspace(3) %gep_lo + %load_hi = load i16, ptr addrspace(3) %ptr %to.hi = insertelement <2 x i16> undef, i16 %load_hi, i32 1 %op.hi = add <2 x i16> %to.hi, %result = insertelement <2 x i16> %op.hi, i16 %load_lo, i32 0 @@ -683,7 +673,7 @@ } ; The volatile operations aren't put on the same chain -define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(i16 addrspace(3)* %ptr) { +define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(ptr addrspace(3) %ptr) { ; GFX900-LABEL: chain_hi_to_lo_group_other_dep_multi_chain: ; GFX900: ; %bb.0: ; %bb ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -729,17 +719,16 @@ ; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v1, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %gep_lo = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i64 1 - %load_lo = load volatile i16, i16 addrspace(3)* %gep_lo - %gep_hi = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i64 0 - %load_hi = load volatile i16, i16 addrspace(3)* %gep_hi + %gep_lo = getelementptr inbounds i16, ptr addrspace(3) %ptr, i64 1 + %load_lo = load volatile i16, ptr addrspace(3) %gep_lo + %load_hi = load volatile i16, ptr addrspace(3) %ptr %to.hi = insertelement <2 x i16> undef, i16 %load_hi, i32 1 %op.hi = add <2 x i16> %to.hi, %result = insertelement <2 x i16> %op.hi, i16 %load_lo, i32 0 ret <2 x i16> %result } -define <2 x i16> @chain_hi_to_lo_private_other_dep(i16 addrspace(5)* %ptr) { +define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) { ; GFX900-LABEL: chain_hi_to_lo_private_other_dep: ; GFX900: ; %bb.0: ; %bb ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -798,17 +787,16 @@ ; GFX11-NEXT: v_mov_b32_e32 v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %gep_lo = getelementptr inbounds i16, i16 addrspace(5)* %ptr, i64 1 - %load_lo = load i16, i16 addrspace(5)* %gep_lo - %gep_hi = getelementptr inbounds i16, i16 addrspace(5)* %ptr, i64 0 - %load_hi = load i16, i16 addrspace(5)* %gep_hi + %gep_lo = getelementptr inbounds i16, ptr addrspace(5) %ptr, i64 1 + %load_lo = load i16, ptr addrspace(5) %gep_lo + %load_hi = load i16, ptr addrspace(5) %ptr %to.hi = insertelement <2 x i16> undef, i16 %load_hi, i32 1 %op.hi = add <2 x i16> %to.hi, %result = insertelement <2 x i16> %op.hi, i16 %load_lo, i32 0 ret <2 x i16> %result } -define <2 x i16> @chain_hi_to_lo_global_other_dep(i16 addrspace(1)* %ptr) { +define <2 x i16> @chain_hi_to_lo_global_other_dep(ptr addrspace(1) %ptr) { ; GFX900-LABEL: chain_hi_to_lo_global_other_dep: ; GFX900: ; %bb.0: ; %bb ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -858,17 +846,16 @@ ; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %gep_lo = getelementptr inbounds i16, i16 addrspace(1)* %ptr, i64 1 - %load_lo = load volatile i16, i16 addrspace(1)* %gep_lo - %gep_hi = getelementptr inbounds i16, i16 addrspace(1)* %ptr, i64 0 - %load_hi = load volatile i16, i16 addrspace(1)* %gep_hi + %gep_lo = getelementptr inbounds i16, ptr addrspace(1) %ptr, i64 1 + %load_lo = load volatile i16, ptr addrspace(1) %gep_lo + %load_hi = load volatile i16, ptr addrspace(1) %ptr %to.hi = insertelement <2 x i16> undef, i16 %load_hi, i32 1 %op.hi = add <2 x i16> %to.hi, %result = insertelement <2 x i16> %op.hi, i16 %load_lo, i32 0 ret <2 x i16> %result } -define <2 x i16> @chain_hi_to_lo_flat_other_dep(i16 addrspace(0)* %ptr) { +define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) { ; GFX900-LABEL: chain_hi_to_lo_flat_other_dep: ; GFX900: ; %bb.0: ; %bb ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -922,17 +909,16 @@ ; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %gep_lo = getelementptr inbounds i16, i16 addrspace(0)* %ptr, i64 1 - %load_lo = load volatile i16, i16 addrspace(0)* %gep_lo - %gep_hi = getelementptr inbounds i16, i16 addrspace(0)* %ptr, i64 0 - %load_hi = load volatile i16, i16 addrspace(0)* %gep_hi + %gep_lo = getelementptr inbounds i16, ptr addrspace(0) %ptr, i64 1 + %load_lo = load volatile i16, ptr addrspace(0) %gep_lo + %load_hi = load volatile i16, ptr addrspace(0) %ptr %to.hi = insertelement <2 x i16> undef, i16 %load_hi, i32 1 %op.hi = add <2 x i16> %to.hi, %result = insertelement <2 x i16> %op.hi, i16 %load_lo, i32 0 ret <2 x i16> %result } -define <2 x i16> @chain_hi_to_lo_group_may_alias_store(i16 addrspace(3)* %ptr, i16 addrspace(3)* %may.alias) { +define <2 x i16> @chain_hi_to_lo_group_may_alias_store(ptr addrspace(3) %ptr, ptr addrspace(3) %may.alias) { ; GFX900-LABEL: chain_hi_to_lo_group_may_alias_store: ; GFX900: ; %bb.0: ; %bb ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -981,11 +967,10 @@ ; GFX11-NEXT: v_perm_b32 v0, v3, v0, 0x5040100 ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: - %gep_lo = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i64 1 - %gep_hi = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i64 0 - %load_hi = load i16, i16 addrspace(3)* %gep_hi - store i16 123, i16 addrspace(3)* %may.alias - %load_lo = load i16, i16 addrspace(3)* %gep_lo + %gep_lo = getelementptr inbounds i16, ptr addrspace(3) %ptr, i64 1 + %load_hi = load i16, ptr addrspace(3) %ptr + store i16 123, ptr addrspace(3) %may.alias + %load_lo = load i16, ptr addrspace(3) %gep_lo %to.hi = insertelement <2 x i16> undef, i16 %load_hi, i32 1 %result = insertelement <2 x i16> %to.hi, i16 %load_lo, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll --- a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll +++ b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll @@ -27,7 +27,7 @@ ; DBG-NOT: Cluster ld/st -define amdgpu_kernel void @cluster_load_cluster_store(i32* noalias %lb, i32* noalias %sb) { +define amdgpu_kernel void @cluster_load_cluster_store(ptr noalias %lb, ptr noalias %sb) { ; GFX9-LABEL: cluster_load_cluster_store: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -116,23 +116,21 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: - %la0 = getelementptr inbounds i32, i32* %lb, i32 0 - %ld0 = load i32, i32* %la0 - %la1 = getelementptr inbounds i32, i32* %lb, i32 2 - %ld1 = load i32, i32* %la1 - %la2 = getelementptr inbounds i32, i32* %lb, i32 4 - %ld2 = load i32, i32* %la2 - %la3 = getelementptr inbounds i32, i32* %lb, i32 6 - %ld3 = load i32, i32* %la3 + %ld0 = load i32, ptr %lb + %la1 = getelementptr inbounds i32, ptr %lb, i32 2 + %ld1 = load i32, ptr %la1 + %la2 = getelementptr inbounds i32, ptr %lb, i32 4 + %ld2 = load i32, ptr %la2 + %la3 = getelementptr inbounds i32, ptr %lb, i32 6 + %ld3 = load i32, ptr %la3 - %sa0 = getelementptr inbounds i32, i32* %sb, i32 0 - store i32 %ld0, i32* %sa0 - %sa1 = getelementptr inbounds i32, i32* %sb, i32 2 - store i32 %ld1, i32* %sa1 - %sa2 = getelementptr inbounds i32, i32* %sb, i32 4 - store i32 %ld2, i32* %sa2 - %sa3 = getelementptr inbounds i32, i32* %sb, i32 6 - store i32 %ld3, i32* %sa3 + store i32 %ld0, ptr %sb + %sa1 = getelementptr inbounds i32, ptr %sb, i32 2 + store i32 %ld1, ptr %sa1 + %sa2 = getelementptr inbounds i32, ptr %sb, i32 4 + store i32 %ld2, ptr %sa2 + %sa3 = getelementptr inbounds i32, ptr %sb, i32 6 + store i32 %ld3, ptr %sa3 ret void } @@ -155,7 +153,7 @@ ; DBG-NOT: Cluster ld/st -define amdgpu_kernel void @cluster_load_valu_cluster_store(i32* noalias %lb, i32* noalias %sb) { +define amdgpu_kernel void @cluster_load_valu_cluster_store(ptr noalias %lb, ptr noalias %sb) { ; GFX9-LABEL: cluster_load_valu_cluster_store: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -248,24 +246,22 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm bb: - %la0 = getelementptr inbounds i32, i32* %lb, i32 0 - %ld0 = load i32, i32* %la0 - %la1 = getelementptr inbounds i32, i32* %lb, i32 2 - %ld1 = load i32, i32* %la1 - %la2 = getelementptr inbounds i32, i32* %lb, i32 4 - %ld2 = load i32, i32* %la2 - %la3 = getelementptr inbounds i32, i32* %lb, i32 6 - %ld3 = load i32, i32* %la3 + %ld0 = load i32, ptr %lb + %la1 = getelementptr inbounds i32, ptr %lb, i32 2 + %ld1 = load i32, ptr %la1 + %la2 = getelementptr inbounds i32, ptr %lb, i32 4 + %ld2 = load i32, ptr %la2 + %la3 = getelementptr inbounds i32, ptr %lb, i32 6 + %ld3 = load i32, ptr %la3 - %sa0 = getelementptr inbounds i32, i32* %sb, i32 0 - store i32 %ld0, i32* %sa0 - %sa1 = getelementptr inbounds i32, i32* %sb, i32 2 + store i32 %ld0, ptr %sb + %sa1 = getelementptr inbounds i32, ptr %sb, i32 2 %add = add i32 %ld1, 1 - store i32 %add, i32* %sa1 - %sa2 = getelementptr inbounds i32, i32* %sb, i32 4 - store i32 %ld2, i32* %sa2 - %sa3 = getelementptr inbounds i32, i32* %sb, i32 6 - store i32 %ld3, i32* %sa3 + store i32 %add, ptr %sa1 + %sa2 = getelementptr inbounds i32, ptr %sb, i32 4 + store i32 %ld2, ptr %sa2 + %sa3 = getelementptr inbounds i32, ptr %sb, i32 6 + store i32 %ld3, ptr %sa3 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll --- a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll +++ b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll @@ -21,7 +21,7 @@ br i1 %tmp9, label %bb1, label %bb2 bb1: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef br label %bb2 bb2: @@ -46,7 +46,7 @@ br i1 %tmp9, label %bb1, label %bb2 bb1: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef br label %bb2 bb2: diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll --- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s -define amdgpu_kernel void @add1(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @add1(ptr addrspace(1) nocapture %arg) { ; GCN-LABEL: add1: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -32,16 +32,16 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = zext i1 %cmp to i32 %add = add i32 %v, %ext - store i32 %add, i32 addrspace(1)* %gep, align 4 + store i32 %add, ptr addrspace(1) %gep, align 4 ret void } -define i16 @add1_i16(i32 addrspace(1)* nocapture %arg, i16 addrspace(1)* nocapture %dst) { +define i16 @add1_i16(ptr addrspace(1) nocapture %arg, ptr addrspace(1) nocapture %dst) { ; GCN-LABEL: add1_i16: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -76,8 +76,8 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = zext i1 %cmp to i32 %add = add i32 %v, %ext @@ -85,7 +85,7 @@ ret i16 %trunc } -define amdgpu_kernel void @sub1(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @sub1(ptr addrspace(1) nocapture %arg) { ; GCN-LABEL: sub1: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -115,16 +115,16 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = sext i1 %cmp to i32 %add = add i32 %v, %ext - store i32 %add, i32 addrspace(1)* %gep, align 4 + store i32 %add, ptr addrspace(1) %gep, align 4 ret void } -define amdgpu_kernel void @add_adde(i32 addrspace(1)* nocapture %arg, i32 %a) { +define amdgpu_kernel void @add_adde(ptr addrspace(1) nocapture %arg, i32 %a) { ; GCN-LABEL: add_adde: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -158,17 +158,17 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = zext i1 %cmp to i32 %adde = add i32 %v, %ext %add2 = add i32 %adde, %a - store i32 %add2, i32 addrspace(1)* %gep, align 4 + store i32 %add2, ptr addrspace(1) %gep, align 4 ret void } -define amdgpu_kernel void @adde_add(i32 addrspace(1)* nocapture %arg, i32 %a) { +define amdgpu_kernel void @adde_add(ptr addrspace(1) nocapture %arg, i32 %a) { ; GCN-LABEL: adde_add: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -202,17 +202,17 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = zext i1 %cmp to i32 %add = add i32 %v, %a %adde = add i32 %add, %ext - store i32 %adde, i32 addrspace(1)* %gep, align 4 + store i32 %adde, ptr addrspace(1) %gep, align 4 ret void } -define amdgpu_kernel void @sub_sube(i32 addrspace(1)* nocapture %arg, i32 %a) { +define amdgpu_kernel void @sub_sube(ptr addrspace(1) nocapture %arg, i32 %a) { ; GCN-LABEL: sub_sube: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -246,17 +246,17 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = sext i1 %cmp to i32 %adde = add i32 %v, %ext %sub = sub i32 %adde, %a - store i32 %sub, i32 addrspace(1)* %gep, align 4 + store i32 %sub, ptr addrspace(1) %gep, align 4 ret void } -define amdgpu_kernel void @sub_sube_commuted(i32 addrspace(1)* nocapture %arg, i32 %a) { +define amdgpu_kernel void @sub_sube_commuted(ptr addrspace(1) nocapture %arg, i32 %a) { ; GCN-LABEL: sub_sube_commuted: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -294,18 +294,18 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = sext i1 %cmp to i32 %adde = add i32 %v, %ext %sub = sub i32 %adde, %a %sub2 = sub i32 100, %sub - store i32 %sub2, i32 addrspace(1)* %gep, align 4 + store i32 %sub2, ptr addrspace(1) %gep, align 4 ret void } -define amdgpu_kernel void @sube_sub(i32 addrspace(1)* nocapture %arg, i32 %a) { +define amdgpu_kernel void @sube_sub(ptr addrspace(1) nocapture %arg, i32 %a) { ; GCN-LABEL: sube_sub: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -339,17 +339,17 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = sext i1 %cmp to i32 %sub = sub i32 %v, %a %adde = add i32 %sub, %ext - store i32 %adde, i32 addrspace(1)* %gep, align 4 + store i32 %adde, ptr addrspace(1) %gep, align 4 ret void } -define amdgpu_kernel void @zext_flclass(i32 addrspace(1)* nocapture %arg, float %x) { +define amdgpu_kernel void @zext_flclass(ptr addrspace(1) nocapture %arg, float %x) { ; GCN-LABEL: zext_flclass: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -382,16 +382,16 @@ ; GFX9-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = tail call zeroext i1 @llvm.amdgcn.class.f32(float %x, i32 608) %ext = zext i1 %cmp to i32 %add = add i32 %v, %ext - store i32 %add, i32 addrspace(1)* %gep, align 4 + store i32 %add, ptr addrspace(1) %gep, align 4 ret void } -define amdgpu_kernel void @sext_flclass(i32 addrspace(1)* nocapture %arg, float %x) { +define amdgpu_kernel void @sext_flclass(ptr addrspace(1) nocapture %arg, float %x) { ; GCN-LABEL: sext_flclass: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -424,16 +424,16 @@ ; GFX9-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = tail call zeroext i1 @llvm.amdgcn.class.f32(float %x, i32 608) %ext = sext i1 %cmp to i32 %add = add i32 %v, %ext - store i32 %add, i32 addrspace(1)* %gep, align 4 + store i32 %add, ptr addrspace(1) %gep, align 4 ret void } -define amdgpu_kernel void @add_and(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @add_and(ptr addrspace(1) nocapture %arg) { ; GCN-LABEL: add_and: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -467,19 +467,19 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp1 = icmp ugt i32 %x, %y %cmp2 = icmp ugt i32 %x, 1 %cmp = and i1 %cmp1, %cmp2 %ext = zext i1 %cmp to i32 %add = add i32 %v, %ext - store i32 %add, i32 addrspace(1)* %gep, align 4 + store i32 %add, ptr addrspace(1) %gep, align 4 ret void } ; sub x, sext (setcc) => addcarry x, 0, setcc -define amdgpu_kernel void @cmp_sub_sext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @cmp_sub_sext(ptr addrspace(1) nocapture %arg) { ; GCN-LABEL: cmp_sub_sext: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -509,17 +509,17 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = sext i1 %cmp to i32 %add = sub i32 %v, %ext - store i32 %add, i32 addrspace(1)* %gep, align 4 + store i32 %add, ptr addrspace(1) %gep, align 4 ret void } ; sub x, zext (setcc) => subcarry x, 0, setcc -define amdgpu_kernel void @cmp_sub_zext(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @cmp_sub_zext(ptr addrspace(1) nocapture %arg) { ; GCN-LABEL: cmp_sub_zext: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -549,16 +549,16 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = zext i1 %cmp to i32 %add = sub i32 %v, %ext - store i32 %add, i32 addrspace(1)* %gep, align 4 + store i32 %add, ptr addrspace(1) %gep, align 4 ret void } -define amdgpu_kernel void @sub_addcarry(i32 addrspace(1)* nocapture %arg, i32 %a) { +define amdgpu_kernel void @sub_addcarry(ptr addrspace(1) nocapture %arg, i32 %a) { ; GCN-LABEL: sub_addcarry: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -592,17 +592,17 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = zext i1 %cmp to i32 %adde = add i32 %v, %ext %add2 = sub i32 %adde, %a - store i32 %add2, i32 addrspace(1)* %gep, align 4 + store i32 %add2, ptr addrspace(1) %gep, align 4 ret void } -define amdgpu_kernel void @sub_subcarry(i32 addrspace(1)* nocapture %arg, i32 %a) { +define amdgpu_kernel void @sub_subcarry(ptr addrspace(1) nocapture %arg, i32 %a) { ; GCN-LABEL: sub_subcarry: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -636,18 +636,18 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = zext i1 %cmp to i32 %adde = sub i32 %v, %ext %add2 = sub i32 %adde, %a - store i32 %add2, i32 addrspace(1)* %gep, align 4 + store i32 %add2, ptr addrspace(1) %gep, align 4 ret void } ; Check case where sub is commuted with zext -define amdgpu_kernel void @sub_zext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) { +define amdgpu_kernel void @sub_zext_setcc_commute(ptr addrspace(1) nocapture %arg, i32 %a, i32%b) { ; GCN-LABEL: sub_zext_setcc_commute: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -684,19 +684,19 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = zext i1 %cmp to i32 %adde = sub i32 %v, %ext %sub = sub i32 %a, %adde %sub2 = sub i32 %sub, %b - store i32 %sub2, i32 addrspace(1)* %gep, align 4 + store i32 %sub2, ptr addrspace(1) %gep, align 4 ret void } ; Check case where sub is commuted with sext -define amdgpu_kernel void @sub_sext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) { +define amdgpu_kernel void @sub_sext_setcc_commute(ptr addrspace(1) nocapture %arg, i32 %a, i32%b) { ; GCN-LABEL: sub_sext_setcc_commute: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -733,14 +733,14 @@ bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() %y = tail call i32 @llvm.amdgcn.workitem.id.y() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x - %v = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %x + %v = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %x, %y %ext = sext i1 %cmp to i32 %adde = sub i32 %v, %ext %sub = sub i32 %a, %adde %sub2 = sub i32 %sub, %b - store i32 %sub2, i32 addrspace(1)* %gep, align 4 + store i32 %sub2, ptr addrspace(1) %gep, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/combine-reg-or-const.ll b/llvm/test/CodeGen/AMDGPU/combine-reg-or-const.ll --- a/llvm/test/CodeGen/AMDGPU/combine-reg-or-const.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-reg-or-const.ll @@ -9,7 +9,7 @@ ; CHECK: s_add_i32 [[S2:s[0-9]+]], {{s[0-9]+}}, [[S1]] ; CHECK: s_or_b32 {{s[0-9]+}}, [[S2]], 0xc0 -define protected amdgpu_kernel void @_Z11test_kernelPii(i32 addrspace(1)* nocapture %Ad.coerce, i32 %s) local_unnamed_addr #5 { +define protected amdgpu_kernel void @_Z11test_kernelPii(ptr addrspace(1) nocapture %Ad.coerce, i32 %s) local_unnamed_addr #5 { entry: %cmp = icmp eq i32 %s, 3 br i1 %cmp, label %if.then, label %if.end @@ -19,11 +19,11 @@ %rem4 = urem i16 %rem.lhs.trunc, 12 %rem.zext = zext i16 %rem4 to i32 %idxprom = zext i32 %s to i64 - %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %Ad.coerce, i64 %idxprom + %arrayidx3 = getelementptr inbounds i32, ptr addrspace(1) %Ad.coerce, i64 %idxprom %div = lshr i32 %rem.zext, 3 %or = or i32 %rem.zext, 192 %add = add nuw nsw i32 %or, %div - store i32 %add, i32 addrspace(1)* %arrayidx3, align 4 + store i32 %add, ptr addrspace(1) %arrayidx3, align 4 br label %if.end if.end: ; preds = %if.then, %entry diff --git a/llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll b/llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll --- a/llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -define amdgpu_kernel void @vectorLoadCombine(<4 x i8>* %in, i32* %out) { +define amdgpu_kernel void @vectorLoadCombine(ptr %in, ptr %out) { ; GCN-LABEL: vectorLoadCombine: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -15,7 +15,7 @@ ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm entry: - %0 = load <4 x i8>, <4 x i8>* %in, align 4 + %0 = load <4 x i8>, ptr %in, align 4 %1 = extractelement <4 x i8> %0, i32 0 %2 = extractelement <4 x i8> %0, i32 1 %3 = extractelement <4 x i8> %0, i32 2 @@ -30,11 +30,11 @@ %zext3 = zext i8 %4 to i32 %shift3 = shl nuw i32 %zext3, 24 %insert3 = or i32 %insert2, %shift3 - store i32 %insert3, i32* %out + store i32 %insert3, ptr %out ret void } -define amdgpu_kernel void @vectorLoadShuffle(<4 x i8>* %in, i32* %out) { +define amdgpu_kernel void @vectorLoadShuffle(ptr %in, ptr %out) { ; GCN-LABEL: vectorLoadShuffle: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -55,7 +55,7 @@ ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm entry: - %0 = load <4 x i8>, <4 x i8>* %in, align 4 + %0 = load <4 x i8>, ptr %in, align 4 %1 = extractelement <4 x i8> %0, i32 0 %2 = extractelement <4 x i8> %0, i32 1 %3 = extractelement <4 x i8> %0, i32 2 @@ -70,19 +70,19 @@ %zext3 = zext i8 %4 to i32 %shift3 = shl nuw i32 %zext3, 24 %insert3 = or i32 %insert2, %shift3 - store i32 %insert3, i32* %out + store i32 %insert3, ptr %out ret void } -define i32 @load_2xi16_combine(i16 addrspace(1)* %p) #0 { +define i32 @load_2xi16_combine(ptr addrspace(1) %p) #0 { ; GCN-LABEL: load_2xi16_combine: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: global_load_dword v0, v[0:1], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %gep.p = getelementptr i16, i16 addrspace(1)* %p, i32 1 - %p.0 = load i16, i16 addrspace(1)* %p, align 4 - %p.1 = load i16, i16 addrspace(1)* %gep.p, align 4 + %gep.p = getelementptr i16, ptr addrspace(1) %p, i32 1 + %p.0 = load i16, ptr addrspace(1) %p, align 4 + %p.1 = load i16, ptr addrspace(1) %gep.p, align 4 %zext.0 = zext i16 %p.0 to i32 %zext.1 = zext i16 %p.1 to i32 %shl.1 = shl i32 %zext.1, 16 @@ -90,7 +90,7 @@ ret i32 %or } -define i32 @load_2xi16_noncombine(i16 addrspace(1)* %p) #0 { +define i32 @load_2xi16_noncombine(ptr addrspace(1) %p) #0 { ; GCN-LABEL: load_2xi16_noncombine: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -99,9 +99,9 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_lshl_or_b32 v0, v3, 16, v2 ; GCN-NEXT: s_setpc_b64 s[30:31] - %gep.p = getelementptr i16, i16 addrspace(1)* %p, i32 2 - %p.0 = load i16, i16 addrspace(1)* %p, align 4 - %p.1 = load i16, i16 addrspace(1)* %gep.p, align 4 + %gep.p = getelementptr i16, ptr addrspace(1) %p, i32 2 + %p.0 = load i16, ptr addrspace(1) %p, align 4 + %p.1 = load i16, ptr addrspace(1) %gep.p, align 4 %zext.0 = zext i16 %p.0 to i32 %zext.1 = zext i16 %p.1 to i32 %shl.1 = shl i32 %zext.1, 16 @@ -109,16 +109,16 @@ ret i32 %or } -define i64 @load_2xi32_combine(i32 addrspace(1)* %p) #0 { +define i64 @load_2xi32_combine(ptr addrspace(1) %p) #0 { ; GCN-LABEL: load_2xi32_combine: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %gep.p = getelementptr i32, i32 addrspace(1)* %p, i32 1 - %p.0 = load i32, i32 addrspace(1)* %p, align 4 - %p.1 = load i32, i32 addrspace(1)* %gep.p, align 4 + %gep.p = getelementptr i32, ptr addrspace(1) %p, i32 1 + %p.0 = load i32, ptr addrspace(1) %p, align 4 + %p.1 = load i32, ptr addrspace(1) %gep.p, align 4 %zext.0 = zext i32 %p.0 to i64 %zext.1 = zext i32 %p.1 to i64 %shl.1 = shl i64 %zext.1, 32 @@ -126,7 +126,7 @@ ret i64 %or } -define i64 @load_2xi32_noncombine(i32 addrspace(1)* %p) #0 { +define i64 @load_2xi32_noncombine(ptr addrspace(1) %p) #0 { ; GCN-LABEL: load_2xi32_noncombine: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -137,9 +137,9 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, v3 ; GCN-NEXT: s_setpc_b64 s[30:31] - %gep.p = getelementptr i32, i32 addrspace(1)* %p, i32 2 - %p.0 = load i32, i32 addrspace(1)* %p, align 4 - %p.1 = load i32, i32 addrspace(1)* %gep.p, align 4 + %gep.p = getelementptr i32, ptr addrspace(1) %p, i32 2 + %p.0 = load i32, ptr addrspace(1) %p, align 4 + %p.1 = load i32, ptr addrspace(1) %gep.p, align 4 %zext.0 = zext i32 %p.0 to i64 %zext.1 = zext i32 %p.1 to i64 %shl.1 = shl i64 %zext.1, 32 @@ -147,20 +147,20 @@ ret i64 %or } -define i64 @load_4xi16_combine(i16 addrspace(1)* %p) #0 { +define i64 @load_4xi16_combine(ptr addrspace(1) %p) #0 { ; GCN-LABEL: load_4xi16_combine: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %gep.p = getelementptr i16, i16 addrspace(1)* %p, i32 1 - %gep.2p = getelementptr i16, i16 addrspace(1)* %p, i32 2 - %gep.3p = getelementptr i16, i16 addrspace(1)* %p, i32 3 - %p.0 = load i16, i16 addrspace(1)* %p, align 4 - %p.1 = load i16, i16 addrspace(1)* %gep.p, align 4 - %p.2 = load i16, i16 addrspace(1)* %gep.2p, align 4 - %p.3 = load i16, i16 addrspace(1)* %gep.3p, align 4 + %gep.p = getelementptr i16, ptr addrspace(1) %p, i32 1 + %gep.2p = getelementptr i16, ptr addrspace(1) %p, i32 2 + %gep.3p = getelementptr i16, ptr addrspace(1) %p, i32 3 + %p.0 = load i16, ptr addrspace(1) %p, align 4 + %p.1 = load i16, ptr addrspace(1) %gep.p, align 4 + %p.2 = load i16, ptr addrspace(1) %gep.2p, align 4 + %p.3 = load i16, ptr addrspace(1) %gep.3p, align 4 %zext.0 = zext i16 %p.0 to i64 %zext.1 = zext i16 %p.1 to i64 %zext.2 = zext i16 %p.2 to i64 @@ -175,7 +175,7 @@ } -define i64 @load_4xi16_noncombine(i16 addrspace(1)* %p) #0 { +define i64 @load_4xi16_noncombine(ptr addrspace(1) %p) #0 { ; GCN-LABEL: load_4xi16_noncombine: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -186,13 +186,13 @@ ; GCN-NEXT: v_bfi_b32 v0, s4, v2, v3 ; GCN-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GCN-NEXT: s_setpc_b64 s[30:31] - %gep.p = getelementptr i16, i16 addrspace(1)* %p, i32 3 - %gep.2p = getelementptr i16, i16 addrspace(1)* %p, i32 2 - %gep.3p = getelementptr i16, i16 addrspace(1)* %p, i32 1 - %p.0 = load i16, i16 addrspace(1)* %p, align 4 - %p.1 = load i16, i16 addrspace(1)* %gep.p, align 4 - %p.2 = load i16, i16 addrspace(1)* %gep.2p, align 4 - %p.3 = load i16, i16 addrspace(1)* %gep.3p, align 4 + %gep.p = getelementptr i16, ptr addrspace(1) %p, i32 3 + %gep.2p = getelementptr i16, ptr addrspace(1) %p, i32 2 + %gep.3p = getelementptr i16, ptr addrspace(1) %p, i32 1 + %p.0 = load i16, ptr addrspace(1) %p, align 4 + %p.1 = load i16, ptr addrspace(1) %gep.p, align 4 + %p.2 = load i16, ptr addrspace(1) %gep.2p, align 4 + %p.3 = load i16, ptr addrspace(1) %gep.3p, align 4 %zext.0 = zext i16 %p.0 to i64 %zext.1 = zext i16 %p.1 to i64 %zext.2 = zext i16 %p.2 to i64 @@ -206,7 +206,7 @@ ret i64 %or.3 } -define i64 @load_3xi16_combine(i16 addrspace(1)* %p) #0 { +define i64 @load_3xi16_combine(ptr addrspace(1) %p) #0 { ; GCN-LABEL: load_3xi16_combine: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -217,11 +217,11 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, v3 ; GCN-NEXT: s_setpc_b64 s[30:31] - %gep.p = getelementptr i16, i16 addrspace(1)* %p, i32 1 - %gep.2p = getelementptr i16, i16 addrspace(1)* %p, i32 2 - %p.0 = load i16, i16 addrspace(1)* %p, align 4 - %p.1 = load i16, i16 addrspace(1)* %gep.p, align 4 - %p.2 = load i16, i16 addrspace(1)* %gep.2p, align 4 + %gep.p = getelementptr i16, ptr addrspace(1) %p, i32 1 + %gep.2p = getelementptr i16, ptr addrspace(1) %p, i32 2 + %p.0 = load i16, ptr addrspace(1) %p, align 4 + %p.1 = load i16, ptr addrspace(1) %gep.p, align 4 + %p.2 = load i16, ptr addrspace(1) %gep.2p, align 4 %zext.0 = zext i16 %p.0 to i64 %zext.1 = zext i16 %p.1 to i64 %zext.2 = zext i16 %p.2 to i64 @@ -232,7 +232,7 @@ ret i64 %or.2 } -define i64 @load_3xi16_noncombine(i16 addrspace(1)* %p) #0 { +define i64 @load_3xi16_noncombine(ptr addrspace(1) %p) #0 { ; GCN-LABEL: load_3xi16_noncombine: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -243,11 +243,11 @@ ; GCN-NEXT: v_and_or_b32 v0, v3, s4, v2 ; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v3 ; GCN-NEXT: s_setpc_b64 s[30:31] - %gep.p = getelementptr i16, i16 addrspace(1)* %p, i32 3 - %gep.2p = getelementptr i16, i16 addrspace(1)* %p, i32 2 - %p.0 = load i16, i16 addrspace(1)* %p, align 4 - %p.1 = load i16, i16 addrspace(1)* %gep.p, align 4 - %p.2 = load i16, i16 addrspace(1)* %gep.2p, align 4 + %gep.p = getelementptr i16, ptr addrspace(1) %p, i32 3 + %gep.2p = getelementptr i16, ptr addrspace(1) %p, i32 2 + %p.0 = load i16, ptr addrspace(1) %p, align 4 + %p.1 = load i16, ptr addrspace(1) %gep.p, align 4 + %p.2 = load i16, ptr addrspace(1) %gep.2p, align 4 %zext.0 = zext i16 %p.0 to i64 %zext.1 = zext i16 %p.1 to i64 %zext.2 = zext i16 %p.2 to i64 diff --git a/llvm/test/CodeGen/AMDGPU/combine_vloads.ll b/llvm/test/CodeGen/AMDGPU/combine_vloads.ll --- a/llvm/test/CodeGen/AMDGPU/combine_vloads.ll +++ b/llvm/test/CodeGen/AMDGPU/combine_vloads.ll @@ -12,7 +12,7 @@ ; EG-LABEL: {{^}}combine_vloads: ; EG: VTX_READ_128 ; EG: VTX_READ_128 -define amdgpu_kernel void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind { +define amdgpu_kernel void @combine_vloads(ptr addrspace(1) nocapture %src, ptr addrspace(1) nocapture %result) nounwind { entry: br label %for.body @@ -21,21 +21,18 @@ for.body: ; preds = %for.body, %entry %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ] - %arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)* - %0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)* - %vecload2 = load <8 x i32>, <8 x i32> addrspace(1)* %0, align 32 - %1 = bitcast <8 x i32> %vecload2 to <32 x i8> - %tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> - %tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> + %vecload2 = load <8 x i32>, ptr addrspace(1) %src, align 32 + %0 = bitcast <8 x i32> %vecload2 to <32 x i8> + %tmp5 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> + %tmp8 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> %tmp9 = add nsw <8 x i8> %tmp5, %tmp8 - %tmp12 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> + %tmp12 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> %tmp13 = add nsw <8 x i8> %tmp9, %tmp12 - %tmp16 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> + %tmp16 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> %tmp17 = add nsw <8 x i8> %tmp13, %tmp16 - %scevgep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %result, i32 %i.01 - %2 = bitcast <8 x i8> %tmp17 to <2 x i32> - %3 = bitcast <8 x i8> addrspace(1)* %scevgep to <2 x i32> addrspace(1)* - store <2 x i32> %2, <2 x i32> addrspace(1)* %3, align 8 + %scevgep = getelementptr <8 x i8>, ptr addrspace(1) %result, i32 %i.01 + %1 = bitcast <8 x i8> %tmp17 to <2 x i32> + store <2 x i32> %1, ptr addrspace(1) %scevgep, align 8 %tmp19 = add nsw i32 %i.01, 1 %exitcond = icmp eq i32 %tmp19, 1024 br i1 %exitcond, label %for.exit, label %for.body diff --git a/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll b/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll --- a/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll @@ -8,13 +8,13 @@ ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, 2.0 ; SI: buffer_store_dword [[REG]] -define amdgpu_kernel void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @commute_add_imm_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %x = load float, float addrspace(1)* %gep.0 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %x = load float, ptr addrspace(1) %gep.0 %x.fabs = call float @llvm.fabs.f32(float %x) #1 %z = fadd float 2.0, %x.fabs - store float %z, float addrspace(1)* %out + store float %z, ptr addrspace(1) %out ret void } @@ -22,14 +22,14 @@ ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -4.0 ; SI: buffer_store_dword [[REG]] -define amdgpu_kernel void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @commute_mul_imm_fneg_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %x = load float, float addrspace(1)* %gep.0 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %x = load float, ptr addrspace(1) %gep.0 %x.fabs = call float @llvm.fabs.f32(float %x) #1 %x.fneg.fabs = fsub float -0.000000e+00, %x.fabs %z = fmul float 4.0, %x.fneg.fabs - store float %z, float addrspace(1)* %out + store float %z, ptr addrspace(1) %out ret void } @@ -37,13 +37,13 @@ ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]] ; SI: buffer_store_dword [[REG]] -define amdgpu_kernel void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @commute_mul_imm_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %x = load float, float addrspace(1)* %gep.0 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %x = load float, ptr addrspace(1) %gep.0 %x.fneg = fsub float -0.000000e+00, %x %z = fmul float 4.0, %x.fneg - store float %z, float addrspace(1)* %out + store float %z, ptr addrspace(1) %out ret void } @@ -53,13 +53,13 @@ ; SI: s_mov_b32 [[K:s[0-9]+]], 0x44800000 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]] ; SI: buffer_store_dword [[REG]] -define amdgpu_kernel void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @commute_add_lit_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %x = load float, float addrspace(1)* %gep.0 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %x = load float, ptr addrspace(1) %gep.0 %x.fabs = call float @llvm.fabs.f32(float %x) #1 %z = fadd float 1024.0, %x.fabs - store float %z, float addrspace(1)* %out + store float %z, ptr addrspace(1) %out ret void } @@ -68,15 +68,15 @@ ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]| ; SI: buffer_store_dword [[REG]] -define amdgpu_kernel void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @commute_add_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %x = load volatile float, float addrspace(1)* %gep.0 - %y = load volatile float, float addrspace(1)* %gep.1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %x = load volatile float, ptr addrspace(1) %gep.0 + %y = load volatile float, ptr addrspace(1) %gep.1 %y.fabs = call float @llvm.fabs.f32(float %y) #1 %z = fadd float %x, %y.fabs - store float %z, float addrspace(1)* %out + store float %z, ptr addrspace(1) %out ret void } @@ -85,15 +85,15 @@ ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]] ; SI: buffer_store_dword [[REG]] -define amdgpu_kernel void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @commute_mul_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %x = load volatile float, float addrspace(1)* %gep.0 - %y = load volatile float, float addrspace(1)* %gep.1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %x = load volatile float, ptr addrspace(1) %gep.0 + %y = load volatile float, ptr addrspace(1) %gep.1 %y.fneg = fsub float -0.000000e+00, %y %z = fmul float %x, %y.fneg - store float %z, float addrspace(1)* %out + store float %z, ptr addrspace(1) %out ret void } @@ -102,16 +102,16 @@ ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]| ; SI: buffer_store_dword [[REG]] -define amdgpu_kernel void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @commute_mul_fabs_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %x = load volatile float, float addrspace(1)* %gep.0 - %y = load volatile float, float addrspace(1)* %gep.1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %x = load volatile float, ptr addrspace(1) %gep.0 + %y = load volatile float, ptr addrspace(1) %gep.1 %y.fabs = call float @llvm.fabs.f32(float %y) #1 %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs %z = fmul float %x, %y.fabs.fneg - store float %z, float addrspace(1)* %out + store float %z, ptr addrspace(1) %out ret void } @@ -121,16 +121,16 @@ ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]| ; SI: buffer_store_dword [[REG]] -define amdgpu_kernel void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @commute_mul_fabs_x_fabs_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %x = load volatile float, float addrspace(1)* %gep.0 - %y = load volatile float, float addrspace(1)* %gep.1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %x = load volatile float, ptr addrspace(1) %gep.0 + %y = load volatile float, ptr addrspace(1) %gep.1 %x.fabs = call float @llvm.fabs.f32(float %x) #1 %y.fabs = call float @llvm.fabs.f32(float %y) #1 %z = fmul float %x.fabs, %y.fabs - store float %z, float addrspace(1)* %out + store float %z, ptr addrspace(1) %out ret void } @@ -139,17 +139,17 @@ ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]| ; SI: buffer_store_dword [[REG]] -define amdgpu_kernel void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @commute_mul_fabs_x_fneg_fabs_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %x = load volatile float, float addrspace(1)* %gep.0 - %y = load volatile float, float addrspace(1)* %gep.1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %x = load volatile float, ptr addrspace(1) %gep.0 + %y = load volatile float, ptr addrspace(1) %gep.1 %x.fabs = call float @llvm.fabs.f32(float %x) #1 %y.fabs = call float @llvm.fabs.f32(float %y) #1 %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs %z = fmul float %x.fabs, %y.fabs.fneg - store float %z, float addrspace(1)* %out + store float %z, ptr addrspace(1) %out ret void } @@ -161,19 +161,19 @@ ; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, |[[R2]]| ; SI: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { +define amdgpu_kernel void @fma_a_2.0_neg_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid - %r1 = load volatile float, float addrspace(1)* %gep.0 - %r2 = load volatile float, float addrspace(1)* %gep.1 + %r1 = load volatile float, ptr addrspace(1) %gep.0 + %r2 = load volatile float, ptr addrspace(1) %gep.1 %r2.fabs = call float @llvm.fabs.f32(float %r2) %r3 = tail call float @llvm.fma.f32(float %r1, float 2.0, float %r2.fabs) - store float %r3, float addrspace(1)* %gep.out + store float %r3, ptr addrspace(1) %gep.out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/ctpop.ll b/llvm/test/CodeGen/AMDGPU/ctpop.ll --- a/llvm/test/CodeGen/AMDGPU/ctpop.ll +++ b/llvm/test/CodeGen/AMDGPU/ctpop.ll @@ -18,9 +18,9 @@ ; GCN: s_endpgm ; EG: BCNT_INT -define amdgpu_kernel void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { +define amdgpu_kernel void @s_ctpop_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind { %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone - store i32 %ctpop, i32 addrspace(1)* %out, align 4 + store i32 %ctpop, ptr addrspace(1) %out, align 4 ret void } @@ -32,12 +32,12 @@ ; GCN: s_endpgm ; EG: BCNT_INT -define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone - store i32 %ctpop, i32 addrspace(1)* %out, align 4 + store i32 %ctpop, ptr addrspace(1) %out, align 4 ret void } @@ -54,16 +54,16 @@ ; EG: BCNT_INT ; EG: BCNT_INT -define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind { +define amdgpu_kernel void @v_ctpop_add_chain_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in0, ptr addrspace(1) noalias %in1) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %tid - %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %tid - %val0 = load volatile i32, i32 addrspace(1)* %in0.gep, align 4 - %val1 = load volatile i32, i32 addrspace(1)* %in1.gep, align 4 + %in0.gep = getelementptr i32, ptr addrspace(1) %in0, i32 %tid + %in1.gep = getelementptr i32, ptr addrspace(1) %in1, i32 %tid + %val0 = load volatile i32, ptr addrspace(1) %in0.gep, align 4 + %val1 = load volatile i32, ptr addrspace(1) %in1.gep, align 4 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone %add = add i32 %ctpop0, %ctpop1 - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -73,13 +73,13 @@ ; GCN-NEXT: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm -define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %sval) nounwind { +define amdgpu_kernel void @v_ctpop_add_sgpr_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %sval) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %add = add i32 %ctpop, %sval - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -90,12 +90,12 @@ ; EG: BCNT_INT ; EG: BCNT_INT -define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid - %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8 + %in.gep = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 %tid + %val = load <2 x i32>, ptr addrspace(1) %in.gep, align 8 %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone - store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8 + store <2 x i32> %ctpop, ptr addrspace(1) %out, align 8 ret void } @@ -110,12 +110,12 @@ ; EG: BCNT_INT ; EG: BCNT_INT ; EG: BCNT_INT -define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid - %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16 + %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 %tid + %val = load <4 x i32>, ptr addrspace(1) %in.gep, align 16 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone - store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %ctpop, ptr addrspace(1) %out, align 16 ret void } @@ -138,12 +138,12 @@ ; EG: BCNT_INT ; EG: BCNT_INT ; EG: BCNT_INT -define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_v8i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <8 x i32>, <8 x i32> addrspace(1)* %in, i32 %tid - %val = load <8 x i32>, <8 x i32> addrspace(1)* %in.gep, align 32 + %in.gep = getelementptr <8 x i32>, ptr addrspace(1) %in, i32 %tid + %val = load <8 x i32>, ptr addrspace(1) %in.gep, align 32 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone - store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %ctpop, ptr addrspace(1) %out, align 32 ret void } @@ -182,12 +182,12 @@ ; EG: BCNT_INT ; EG: BCNT_INT ; EG: BCNT_INT -define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_v16i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <16 x i32>, <16 x i32> addrspace(1)* %in, i32 %tid - %val = load <16 x i32>, <16 x i32> addrspace(1)* %in.gep, align 32 + %in.gep = getelementptr <16 x i32>, ptr addrspace(1) %in, i32 %tid + %val = load <16 x i32>, ptr addrspace(1) %in.gep, align 32 %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone - store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32 + store <16 x i32> %ctpop, ptr addrspace(1) %out, align 32 ret void } @@ -198,13 +198,13 @@ ; GCN: s_endpgm ; EG: BCNT_INT -define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %add = add i32 %ctpop, 4 - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -215,13 +215,13 @@ ; GCN: s_endpgm ; EG: BCNT_INT -define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %add = add i32 4, %ctpop - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -232,13 +232,13 @@ ; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm -define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_i32_add_literal(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %add = add i32 %ctpop, 99999 - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -250,13 +250,13 @@ ; GCN: s_endpgm ; EG: BCNT_INT -define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { +define amdgpu_kernel void @v_ctpop_i32_add_var(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %const) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %add = add i32 %ctpop, %const - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -268,13 +268,13 @@ ; GCN: s_endpgm ; EG: BCNT_INT -define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { +define amdgpu_kernel void @v_ctpop_i32_add_var_inv(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %const) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone %add = add i32 %const, %ctpop - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -289,15 +289,15 @@ ; GCN: s_endpgm ; EG: BCNT_INT -define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { +define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %constptr) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone - %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 %tid - %const = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr i32, ptr addrspace(1) %constptr, i32 %tid + %const = load i32, ptr addrspace(1) %gep, align 4 %add = add i32 %const, %ctpop - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -309,7 +309,7 @@ ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm ; EG: BCNT_INT -define amdgpu_kernel void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, [8 x i32], i32 %cond) { +define amdgpu_kernel void @ctpop_i32_in_br(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %ctpop_arg, [8 x i32], i32 %cond) { entry: %tmp0 = icmp eq i32 %cond, 0 br i1 %tmp0, label %if, label %else @@ -319,12 +319,12 @@ br label %endif else: - %tmp3 = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %tmp4 = load i32, i32 addrspace(1)* %tmp3 + %tmp3 = getelementptr i32, ptr addrspace(1) %in, i32 1 + %tmp4 = load i32, ptr addrspace(1) %tmp3 br label %endif endif: %tmp5 = phi i32 [%tmp2, %if], [%tmp4, %else] - store i32 %tmp5, i32 addrspace(1)* %out + store i32 %tmp5, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll --- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll +++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll @@ -11,7 +11,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone -define amdgpu_kernel void @s_ctpop_i16(i16 addrspace(1)* noalias %out, i16 %val) nounwind { +define amdgpu_kernel void @s_ctpop_i16(ptr addrspace(1) noalias %out, i16 %val) nounwind { ; SI-LABEL: s_ctpop_i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -64,12 +64,12 @@ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone - store i16 %ctpop, i16 addrspace(1)* %out, align 4 + store i16 %ctpop, ptr addrspace(1) %out, align 4 ret void } ; XXX - Why 0 in register? -define amdgpu_kernel void @v_ctpop_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: v_ctpop_i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -132,14 +132,14 @@ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid - %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid + %val = load i16, ptr addrspace(1) %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone - store i16 %ctpop, i16 addrspace(1)* %out, align 4 + store i16 %ctpop, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v_ctpop_add_chain_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in0, i16 addrspace(1)* noalias %in1) nounwind { +define amdgpu_kernel void @v_ctpop_add_chain_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in0, ptr addrspace(1) noalias %in1) nounwind { ; SI-LABEL: v_ctpop_add_chain_i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -225,18 +225,18 @@ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in0.gep = getelementptr i16, i16 addrspace(1)* %in0, i32 %tid - %in1.gep = getelementptr i16, i16 addrspace(1)* %in1, i32 %tid - %val0 = load volatile i16, i16 addrspace(1)* %in0.gep, align 4 - %val1 = load volatile i16, i16 addrspace(1)* %in1.gep, align 4 + %in0.gep = getelementptr i16, ptr addrspace(1) %in0, i32 %tid + %in1.gep = getelementptr i16, ptr addrspace(1) %in1, i32 %tid + %val0 = load volatile i16, ptr addrspace(1) %in0.gep, align 4 + %val1 = load volatile i16, ptr addrspace(1) %in1.gep, align 4 %ctpop0 = call i16 @llvm.ctpop.i16(i16 %val0) nounwind readnone %ctpop1 = call i16 @llvm.ctpop.i16(i16 %val1) nounwind readnone %add = add i16 %ctpop0, %ctpop1 - store i16 %add, i16 addrspace(1)* %out, align 4 + store i16 %add, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v_ctpop_add_sgpr_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %sval) nounwind { +define amdgpu_kernel void @v_ctpop_add_sgpr_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i16 %sval) nounwind { ; SI-LABEL: v_ctpop_add_sgpr_i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -309,15 +309,15 @@ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid - %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid + %val = load i16, ptr addrspace(1) %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %add = add i16 %ctpop, %sval - store i16 %add, i16 addrspace(1)* %out, align 4 + store i16 %add, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v_ctpop_v2i16(<2 x i16> addrspace(1)* noalias %out, <2 x i16> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_v2i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: v_ctpop_v2i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -390,14 +390,14 @@ ; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid - %val = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep, align 8 + %in.gep = getelementptr <2 x i16>, ptr addrspace(1) %in, i32 %tid + %val = load <2 x i16>, ptr addrspace(1) %in.gep, align 8 %ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %val) nounwind readnone - store <2 x i16> %ctpop, <2 x i16> addrspace(1)* %out, align 8 + store <2 x i16> %ctpop, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @v_ctpop_v4i16(<4 x i16> addrspace(1)* noalias %out, <4 x i16> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_v4i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: v_ctpop_v4i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -514,14 +514,14 @@ ; EG-NEXT: MOV T5.X, PV.Y, ; EG-NEXT: MOV * T8.X, T4.X, %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid - %val = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep, align 16 + %in.gep = getelementptr <4 x i16>, ptr addrspace(1) %in, i32 %tid + %val = load <4 x i16>, ptr addrspace(1) %in.gep, align 16 %ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %val) nounwind readnone - store <4 x i16> %ctpop, <4 x i16> addrspace(1)* %out, align 16 + store <4 x i16> %ctpop, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @v_ctpop_v8i16(<8 x i16> addrspace(1)* noalias %out, <8 x i16> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_v8i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: v_ctpop_v8i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -694,14 +694,14 @@ ; EG-NEXT: MOV * T0.X, T4.X, ; EG-NEXT: MOV * T0.Z, T8.X, %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <8 x i16>, <8 x i16> addrspace(1)* %in, i32 %tid - %val = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep, align 32 + %in.gep = getelementptr <8 x i16>, ptr addrspace(1) %in, i32 %tid + %val = load <8 x i16>, ptr addrspace(1) %in.gep, align 32 %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val) nounwind readnone - store <8 x i16> %ctpop, <8 x i16> addrspace(1)* %out, align 32 + store <8 x i16> %ctpop, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_kernel void @v_ctpop_v16i16(<16 x i16> addrspace(1)* noalias %out, <16 x i16> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_v16i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: v_ctpop_v16i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1010,14 +1010,14 @@ ; EG-NEXT: MOV T20.X, T12.X, ; EG-NEXT: MOV * T20.Z, T16.X, BS:VEC_120/SCL_212 %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <16 x i16>, <16 x i16> addrspace(1)* %in, i32 %tid - %val = load <16 x i16>, <16 x i16> addrspace(1)* %in.gep, align 32 + %in.gep = getelementptr <16 x i16>, ptr addrspace(1) %in, i32 %tid + %val = load <16 x i16>, ptr addrspace(1) %in.gep, align 32 %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %val) nounwind readnone - store <16 x i16> %ctpop, <16 x i16> addrspace(1)* %out, align 32 + store <16 x i16> %ctpop, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_kernel void @v_ctpop_i16_add_inline_constant(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_i16_add_inline_constant(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: v_ctpop_i16_add_inline_constant: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1081,15 +1081,15 @@ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid - %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid + %val = load i16, ptr addrspace(1) %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %add = add i16 %ctpop, 4 - store i16 %add, i16 addrspace(1)* %out, align 4 + store i16 %add, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v_ctpop_i16_add_inline_constant_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_i16_add_inline_constant_inv(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: v_ctpop_i16_add_inline_constant_inv: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1153,15 +1153,15 @@ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid - %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid + %val = load i16, ptr addrspace(1) %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %add = add i16 4, %ctpop - store i16 %add, i16 addrspace(1)* %out, align 4 + store i16 %add, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v_ctpop_i16_add_literal(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_i16_add_literal(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: v_ctpop_i16_add_literal: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1227,15 +1227,15 @@ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid - %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid + %val = load i16, ptr addrspace(1) %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %add = add i16 %ctpop, 999 - store i16 %add, i16 addrspace(1)* %out, align 4 + store i16 %add, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v_ctpop_i16_add_var(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind { +define amdgpu_kernel void @v_ctpop_i16_add_var(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i16 %const) nounwind { ; SI-LABEL: v_ctpop_i16_add_var: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1308,15 +1308,15 @@ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid - %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid + %val = load i16, ptr addrspace(1) %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %add = add i16 %ctpop, %const - store i16 %add, i16 addrspace(1)* %out, align 4 + store i16 %add, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v_ctpop_i16_add_var_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind { +define amdgpu_kernel void @v_ctpop_i16_add_var_inv(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i16 %const) nounwind { ; SI-LABEL: v_ctpop_i16_add_var_inv: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1389,15 +1389,15 @@ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid - %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid + %val = load i16, ptr addrspace(1) %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone %add = add i16 %const, %ctpop - store i16 %add, i16 addrspace(1)* %out, align 4 + store i16 %add, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v_ctpop_i16_add_vvar_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 addrspace(1)* noalias %constptr) nounwind { +define amdgpu_kernel void @v_ctpop_i16_add_vvar_inv(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %constptr) nounwind { ; SI-LABEL: v_ctpop_i16_add_vvar_inv: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -1476,19 +1476,19 @@ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid - %val = load i16, i16 addrspace(1)* %in.gep, align 4 + %in.gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid + %val = load i16, ptr addrspace(1) %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone - %gep = getelementptr i16, i16 addrspace(1)* %constptr, i32 %tid - %const = load i16, i16 addrspace(1)* %gep, align 4 + %gep = getelementptr i16, ptr addrspace(1) %constptr, i32 %tid + %const = load i16, ptr addrspace(1) %gep, align 4 %add = add i16 %const, %ctpop - store i16 %add, i16 addrspace(1)* %out, align 4 + store i16 %add, ptr addrspace(1) %out, align 4 ret void } ; FIXME: We currently disallow SALU instructions in all branches, ; but there are some cases when the should be allowed. -define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %ctpop_arg, i16 %cond) { +define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(1) %in, i16 %ctpop_arg, i16 %cond) { ; SI-LABEL: ctpop_i16_in_br: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s4, s[0:1], 0xd @@ -1619,12 +1619,12 @@ br label %endif else: - %tmp3 = getelementptr i16, i16 addrspace(1)* %in, i16 1 - %tmp4 = load i16, i16 addrspace(1)* %tmp3 + %tmp3 = getelementptr i16, ptr addrspace(1) %in, i16 1 + %tmp4 = load i16, ptr addrspace(1) %tmp3 br label %endif endif: %tmp5 = phi i16 [%tmp2, %if], [%tmp4, %else] - store i16 %tmp5, i16 addrspace(1)* %out + store i16 %tmp5, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/ctpop64.ll b/llvm/test/CodeGen/AMDGPU/ctpop64.ll --- a/llvm/test/CodeGen/AMDGPU/ctpop64.ll +++ b/llvm/test/CodeGen/AMDGPU/ctpop64.ll @@ -19,10 +19,10 @@ ; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] ; GCN: buffer_store_dword [[VRESULT]], ; GCN: s_endpgm -define amdgpu_kernel void @s_ctpop_i64(i32 addrspace(1)* noalias %out, [8 x i32], i64 %val) nounwind { +define amdgpu_kernel void @s_ctpop_i64(ptr addrspace(1) noalias %out, [8 x i32], i64 %val) nounwind { %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone %truncctpop = trunc i64 %ctpop to i32 - store i32 %truncctpop, i32 addrspace(1)* %out, align 4 + store i32 %truncctpop, ptr addrspace(1) %out, align 4 ret void } @@ -33,13 +33,13 @@ ; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm -define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid - %val = load i64, i64 addrspace(1)* %in.gep, align 8 + %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid + %val = load i64, ptr addrspace(1) %in.gep, align 8 %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone %truncctpop = trunc i64 %ctpop to i32 - store i32 %truncctpop, i32 addrspace(1)* %out, align 4 + store i32 %truncctpop, ptr addrspace(1) %out, align 4 ret void } @@ -52,13 +52,13 @@ ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}} ; GCN: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]] ; GCN: s_endpgm -define amdgpu_kernel void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind { +define amdgpu_kernel void @v_ctpop_i64_user(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i64 %s.val) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid - %val = load i64, i64 addrspace(1)* %in.gep, align 8 + %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid + %val = load i64, ptr addrspace(1) %in.gep, align 8 %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone %or = or i64 %ctpop, %s.val - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out ret void } @@ -66,10 +66,10 @@ ; GCN: s_bcnt1_i32_b64 ; GCN: s_bcnt1_i32_b64 ; GCN: s_endpgm -define amdgpu_kernel void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind { +define amdgpu_kernel void @s_ctpop_v2i64(ptr addrspace(1) noalias %out, <2 x i64> %val) nounwind { %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone %truncctpop = trunc <2 x i64> %ctpop to <2 x i32> - store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8 + store <2 x i32> %truncctpop, ptr addrspace(1) %out, align 8 ret void } @@ -79,10 +79,10 @@ ; GCN: s_bcnt1_i32_b64 ; GCN: s_bcnt1_i32_b64 ; GCN: s_endpgm -define amdgpu_kernel void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind { +define amdgpu_kernel void @s_ctpop_v4i64(ptr addrspace(1) noalias %out, <4 x i64> %val) nounwind { %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone %truncctpop = trunc <4 x i64> %ctpop to <4 x i32> - store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %truncctpop, ptr addrspace(1) %out, align 16 ret void } @@ -92,13 +92,13 @@ ; GCN: v_bcnt_u32_b32 ; GCN: v_bcnt_u32_b32 ; GCN: s_endpgm -define amdgpu_kernel void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_v2i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i32 %tid - %val = load <2 x i64>, <2 x i64> addrspace(1)* %in.gep, align 16 + %in.gep = getelementptr <2 x i64>, ptr addrspace(1) %in, i32 %tid + %val = load <2 x i64>, ptr addrspace(1) %in.gep, align 16 %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone %truncctpop = trunc <2 x i64> %ctpop to <2 x i32> - store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8 + store <2 x i32> %truncctpop, ptr addrspace(1) %out, align 8 ret void } @@ -112,13 +112,13 @@ ; GCN: v_bcnt_u32_b32 ; GCN: v_bcnt_u32_b32 ; GCN: s_endpgm -define amdgpu_kernel void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_v4i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid - %val = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep, align 32 + %in.gep = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tid + %val = load <4 x i64>, ptr addrspace(1) %in.gep, align 32 %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone %truncctpop = trunc <4 x i64> %ctpop to <4 x i32> - store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %truncctpop, ptr addrspace(1) %out, align 16 ret void } @@ -131,7 +131,7 @@ ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[ZERO]] ; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]] ; GCN: s_endpgm -define amdgpu_kernel void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) { +define amdgpu_kernel void @ctpop_i64_in_br(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %ctpop_arg, i32 %cond) { entry: %tmp0 = icmp eq i32 %cond, 0 br i1 %tmp0, label %if, label %else @@ -141,13 +141,13 @@ br label %endif else: - %tmp3 = getelementptr i64, i64 addrspace(1)* %in, i32 1 - %tmp4 = load i64, i64 addrspace(1)* %tmp3 + %tmp3 = getelementptr i64, ptr addrspace(1) %in, i32 1 + %tmp4 = load i64, ptr addrspace(1) %tmp3 br label %endif endif: %tmp5 = phi i64 [%tmp2, %if], [%tmp4, %else] - store i64 %tmp5, i64 addrspace(1)* %out + store i64 %tmp5, ptr addrspace(1) %out ret void } @@ -156,10 +156,10 @@ ; GCN: s_bcnt1_i32_b64 [[SRESULT1:s[0-9]+]], ; GCN: s_add_i32 s{{[0-9]+}}, [[SRESULT1]], [[SRESULT0]] ; GCN: s_endpgm -define amdgpu_kernel void @s_ctpop_i128(i32 addrspace(1)* noalias %out, i128 %val) nounwind { +define amdgpu_kernel void @s_ctpop_i128(ptr addrspace(1) noalias %out, i128 %val) nounwind { %ctpop = call i128 @llvm.ctpop.i128(i128 %val) nounwind readnone %truncctpop = trunc i128 %ctpop to i32 - store i32 %truncctpop, i32 addrspace(1)* %out, align 4 + store i32 %truncctpop, ptr addrspace(1) %out, align 4 ret void } @@ -169,10 +169,10 @@ ; GCN: s_bcnt1_i32_b64 [[REG1:s[0-9]+]], ; GCN: s_add_i32 {{s[0-9]+}}, [[REG0]], [[REG1]] ; GCN: s_endpgm -define amdgpu_kernel void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val) nounwind { +define amdgpu_kernel void @s_ctpop_i65(ptr addrspace(1) noalias %out, i65 %val) nounwind { %ctpop = call i65 @llvm.ctpop.i65(i65 %val) nounwind readnone %truncctpop = trunc i65 %ctpop to i32 - store i32 %truncctpop, i32 addrspace(1)* %out, align 4 + store i32 %truncctpop, ptr addrspace(1) %out, align 4 ret void } @@ -192,12 +192,12 @@ ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm -define amdgpu_kernel void @v_ctpop_i128(i32 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v_ctpop_i128(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %tid - %val = load i128, i128 addrspace(1)* %in.gep, align 8 + %in.gep = getelementptr i128, ptr addrspace(1) %in, i32 %tid + %val = load i128, ptr addrspace(1) %in.gep, align 8 %ctpop = call i128 @llvm.ctpop.i128(i128 %val) nounwind readnone %truncctpop = trunc i128 %ctpop to i32 - store i32 %truncctpop, i32 addrspace(1)* %out, align 4 + store i32 %truncctpop, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -191,7 +191,7 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %lshr.8 = lshr i32 %arg0, 8 - store i32 %lshr.8, i32 addrspace(1)* undef + store i32 %lshr.8, ptr addrspace(1) undef %masked = and i32 %lshr.8, 255 %cvt = uitofp i32 %masked to float ret float %cvt @@ -945,7 +945,7 @@ ret double %cvt } -define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_i8_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_i8_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1013,14 +1013,14 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid - %load = load i8, i8 addrspace(1)* %gep, align 1 + %gep = getelementptr i8, ptr addrspace(1) %in, i32 %tid + %load = load i8, ptr addrspace(1) %gep, align 1 %cvt = uitofp i8 %load to float - store float %cvt, float addrspace(1)* %out, align 4 + store float %cvt, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v2i8_to_v2f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v2i8_to_v2f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1098,14 +1098,14 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid - %load = load <2 x i8>, <2 x i8> addrspace(1)* %gep, align 2 + %gep = getelementptr <2 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <2 x i8>, ptr addrspace(1) %gep, align 2 %cvt = uitofp <2 x i8> %load to <2 x float> - store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16 + store <2 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v3i8_to_v3f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v3i8_to_v3f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1188,14 +1188,14 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid - %load = load <3 x i8>, <3 x i8> addrspace(1)* %gep, align 4 + %gep = getelementptr <3 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <3 x i8>, ptr addrspace(1) %gep, align 4 %cvt = uitofp <3 x i8> %load to <3 x float> - store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16 + store <3 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v4i8_to_v4f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v4i8_to_v4f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1283,10 +1283,10 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid - %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4 + %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <4 x i8>, ptr addrspace(1) %gep, align 4 %cvt = uitofp <4 x i8> %load to <4 x float> - store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } @@ -1294,7 +1294,7 @@ ; position in the word for the component. ; FIXME: Packing bytes -define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v4i8_to_v4f32_unaligned: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1419,16 +1419,16 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid - %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1 + %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <4 x i8>, ptr addrspace(1) %gep, align 1 %cvt = uitofp <4 x i8> %load to <4 x float> - store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } ; FIXME: Need to handle non-uniform case for function below (load without gep). ; Instructions still emitted to repack bytes for add use. -define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %out2, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v4i8_to_v4f32_2_uses: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd @@ -1612,17 +1612,17 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid.x = call i32 @llvm.amdgcn.workitem.id.x() - %in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x - %load = load <4 x i8>, <4 x i8> addrspace(1)* %in.ptr, align 4 + %in.ptr = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid.x + %load = load <4 x i8>, ptr addrspace(1) %in.ptr, align 4 %cvt = uitofp <4 x i8> %load to <4 x float> - store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %cvt, ptr addrspace(1) %out, align 16 %add = add <4 x i8> %load, ; Second use of %load - store <4 x i8> %add, <4 x i8> addrspace(1)* %out2, align 4 + store <4 x i8> %add, ptr addrspace(1) %out2, align 4 ret void } ; Make sure this doesn't crash. -define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v7i8_to_v7f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1798,14 +1798,14 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid - %load = load <7 x i8>, <7 x i8> addrspace(1)* %gep, align 1 + %gep = getelementptr <7 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <7 x i8>, ptr addrspace(1) %gep, align 1 %cvt = uitofp <7 x i8> %load to <7 x float> - store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16 + store <7 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @load_v8i8_to_v8f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: load_v8i8_to_v8f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1919,14 +1919,14 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid - %load = load <8 x i8>, <8 x i8> addrspace(1)* %gep, align 8 + %gep = getelementptr <8 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <8 x i8>, ptr addrspace(1) %gep, align 8 %cvt = uitofp <8 x i8> %load to <8 x float> - store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16 + store <8 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: i8_zext_inreg_i32_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2004,16 +2004,16 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %load = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %load = load i32, ptr addrspace(1) %gep, align 4 %add = add i32 %load, 2 %inreg = and i32 %add, 255 %cvt = uitofp i32 %inreg to float - store float %cvt, float addrspace(1)* %out, align 4 + store float %cvt, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: i8_zext_inreg_hi1_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2085,18 +2085,18 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %load = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %load = load i32, ptr addrspace(1) %gep, align 4 %inreg = and i32 %load, 65280 %shr = lshr i32 %inreg, 8 %cvt = uitofp i32 %shr to float - store float %cvt, float addrspace(1)* %out, align 4 + store float %cvt, ptr addrspace(1) %out, align 4 ret void } ; We don't get these ones because of the zext, but instcombine removes ; them so it shouldn't really matter. -define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @i8_zext_i32_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: i8_zext_i32_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2164,15 +2164,15 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid - %load = load i8, i8 addrspace(1)* %gep, align 1 + %gep = getelementptr i8, ptr addrspace(1) %in, i32 %tid + %load = load i8, ptr addrspace(1) %gep, align 1 %ext = zext i8 %load to i32 %cvt = uitofp i32 %ext to float - store float %cvt, float addrspace(1)* %out, align 4 + store float %cvt, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: v4i8_zext_v4i32_to_v4f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2297,15 +2297,15 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid - %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1 + %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid + %load = load <4 x i8>, ptr addrspace(1) %gep, align 1 %ext = zext <4 x i8> %load to <4 x i32> %cvt = uitofp <4 x i32> %ext to <4 x float> - store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @extract_byte0_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: extract_byte0_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2377,15 +2377,15 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %gep %and = and i32 %val, 255 %cvt = uitofp i32 %and to float - store float %cvt, float addrspace(1)* %out + store float %cvt, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @extract_byte1_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: extract_byte1_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2457,16 +2457,16 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %gep %srl = lshr i32 %val, 8 %and = and i32 %srl, 255 %cvt = uitofp i32 %and to float - store float %cvt, float addrspace(1)* %out + store float %cvt, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @extract_byte2_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: extract_byte2_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2538,16 +2538,16 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %gep %srl = lshr i32 %val, 16 %and = and i32 %srl, 255 %cvt = uitofp i32 %and to float - store float %cvt, float addrspace(1)* %out + store float %cvt, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { +define amdgpu_kernel void @extract_byte3_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind { ; SI-LABEL: extract_byte3_to_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2619,16 +2619,16 @@ ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %val = load i32, ptr addrspace(1) %gep %srl = lshr i32 %val, 24 %and = and i32 %srl, 255 %cvt = uitofp i32 %and to float - store float %cvt, float addrspace(1)* %out + store float %cvt, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @cvt_ubyte0_or_multiuse(i32 addrspace(1)* %in, float addrspace(1)* %out) { +define amdgpu_kernel void @cvt_ubyte0_or_multiuse(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; SI-LABEL: cvt_ubyte0_or_multiuse: ; SI: ; %bb.0: ; %bb ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2714,14 +2714,14 @@ ; GFX11-NEXT: s_endpgm bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %lid - %load = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %lid + %load = load i32, ptr addrspace(1) %gep %or = or i32 %load, -2147483647 %and = and i32 %or, 255 %uitofp = uitofp i32 %and to float %cast = bitcast i32 %or to float %add = fadd float %cast, %uitofp - store float %add, float addrspace(1)* %out + store float %add, ptr addrspace(1) %out ret void } @@ -2857,15 +2857,14 @@ br label %for.body.i for.body.i: ; preds = %for.body.i, %entry - %retval.sroa.0.0.copyload = load %Vec*, %Vec* addrspace(1)* undef, align 8 - %add.ptr = getelementptr inbounds %Vec, %Vec* %retval.sroa.0.0.copyload, i64 undef - %retval.sroa.0.0..sroa_cast = bitcast %Vec* %add.ptr to i32* - %retval.sroa.0.0..sroa_cast_adr = addrspacecast i32* %retval.sroa.0.0..sroa_cast to i32 addrspace(1)* - %retval.sroa.0.0.copyload.i = load i32, i32 addrspace(1)* %retval.sroa.0.0..sroa_cast_adr, align 1 + %retval.sroa.0.0.copyload = load ptr, ptr addrspace(1) undef, align 8 + %add.ptr = getelementptr inbounds %Vec, ptr %retval.sroa.0.0.copyload, i64 undef + %retval.sroa.0.0..sroa_cast_adr = addrspacecast ptr %add.ptr to ptr addrspace(1) + %retval.sroa.0.0.copyload.i = load i32, ptr addrspace(1) %retval.sroa.0.0..sroa_cast_adr, align 1 %p1.sroa.6.0.extract.shift = lshr i32 %retval.sroa.0.0.copyload.i, 24 %p1.sroa.6.0.extract.trunc = trunc i32 %p1.sroa.6.0.extract.shift to i8 %conv12 = uitofp i8 %p1.sroa.6.0.extract.trunc to float - %0 = load float, float addrspace(1)* undef, align 8 + %0 = load float, ptr addrspace(1) undef, align 8 %mul = fmul contract float %0, %conv12 %add = fadd contract float %mul, 5.000000e-01 %conv13 = fptoui float %add to i8 @@ -2877,6 +2876,6 @@ %retval.sroa.2.0.insert.insert = or i32 %retval.sroa.3.0.insert.insert, %retval.sroa.2.0.insert.ext %retval.sroa.0.0.insert.ext = and i32 %retval.sroa.0.0.copyload.i, 255 %retval.sroa.0.0.insert.insert = or i32 %retval.sroa.2.0.insert.insert, %retval.sroa.0.0.insert.ext - store i32 %retval.sroa.0.0.insert.insert, i32 addrspace(1)* undef, align 1 + store i32 %retval.sroa.0.0.insert.insert, ptr addrspace(1) undef, align 1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll --- a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll @@ -10,10 +10,10 @@ ; SI-NOT: add ; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; SI: s_endpgm -define amdgpu_kernel void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) #0 { %floor = call float @llvm.floor.f32(float %x) #1 %cvt = fptosi float %floor to i32 - store i32 %cvt, i32 addrspace(1)* %out + store i32 %cvt, ptr addrspace(1) %out ret void } @@ -22,11 +22,11 @@ ; SI-SAFE-NOT: v_cvt_flr_i32_f32 ; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]] ; SI: s_endpgm -define amdgpu_kernel void @cvt_flr_i32_f32_1(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @cvt_flr_i32_f32_1(ptr addrspace(1) %out, float %x) #0 { %fadd = fadd float %x, 1.0 %floor = call float @llvm.floor.f32(float %fadd) #1 %cvt = fptosi float %floor to i32 - store i32 %cvt, i32 addrspace(1)* %out + store i32 %cvt, ptr addrspace(1) %out ret void } @@ -35,11 +35,11 @@ ; SI-SAFE-NOT: v_cvt_flr_i32_f32 ; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}| ; SI: s_endpgm -define amdgpu_kernel void @cvt_flr_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @cvt_flr_i32_f32_fabs(ptr addrspace(1) %out, float %x) #0 { %x.fabs = call float @llvm.fabs.f32(float %x) #1 %floor = call float @llvm.floor.f32(float %x.fabs) #1 %cvt = fptosi float %floor to i32 - store i32 %cvt, i32 addrspace(1)* %out + store i32 %cvt, ptr addrspace(1) %out ret void } @@ -48,11 +48,11 @@ ; SI-SAFE-NOT: v_cvt_flr_i32_f32 ; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}} ; SI: s_endpgm -define amdgpu_kernel void @cvt_flr_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @cvt_flr_i32_f32_fneg(ptr addrspace(1) %out, float %x) #0 { %x.fneg = fsub float -0.000000e+00, %x %floor = call float @llvm.floor.f32(float %x.fneg) #1 %cvt = fptosi float %floor to i32 - store i32 %cvt, i32 addrspace(1)* %out + store i32 %cvt, ptr addrspace(1) %out ret void } @@ -61,12 +61,12 @@ ; SI-SAFE-NOT: v_cvt_flr_i32_f32 ; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}| ; SI: s_endpgm -define amdgpu_kernel void @cvt_flr_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @cvt_flr_i32_f32_fabs_fneg(ptr addrspace(1) %out, float %x) #0 { %x.fabs = call float @llvm.fabs.f32(float %x) #1 %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs %floor = call float @llvm.floor.f32(float %x.fabs.fneg) #1 %cvt = fptosi float %floor to i32 - store i32 %cvt, i32 addrspace(1)* %out + store i32 %cvt, ptr addrspace(1) %out ret void } @@ -75,10 +75,10 @@ ; SI: v_floor_f32 ; SI: v_cvt_u32_f32_e32 ; SI: s_endpgm -define amdgpu_kernel void @no_cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @no_cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) #0 { %floor = call float @llvm.floor.f32(float %x) #1 %cvt = fptoui float %floor to i32 - store i32 %cvt, i32 addrspace(1)* %out + store i32 %cvt, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll --- a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll @@ -9,11 +9,11 @@ ; SI-SAFE-NOT: v_cvt_rpi_i32_f32 ; SI-NONAN: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; SI: s_endpgm -define amdgpu_kernel void @cvt_rpi_i32_f32(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @cvt_rpi_i32_f32(ptr addrspace(1) %out, float %x) #0 { %fadd = fadd float %x, 0.5 %floor = call float @llvm.floor.f32(float %fadd) #1 %cvt = fptosi float %floor to i32 - store i32 %cvt, i32 addrspace(1)* %out + store i32 %cvt, ptr addrspace(1) %out ret void } @@ -21,12 +21,12 @@ ; SI-SAFE-NOT: v_cvt_rpi_i32_f32 ; SI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} ; SI: s_endpgm -define amdgpu_kernel void @cvt_rpi_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @cvt_rpi_i32_f32_fabs(ptr addrspace(1) %out, float %x) #0 { %x.fabs = call float @llvm.fabs.f32(float %x) #1 %fadd = fadd float %x.fabs, 0.5 %floor = call float @llvm.floor.f32(float %fadd) #1 %cvt = fptosi float %floor to i32 - store i32 %cvt, i32 addrspace(1)* %out + store i32 %cvt, ptr addrspace(1) %out ret void } @@ -37,12 +37,12 @@ ; SI-SAFE-NOT: v_cvt_flr_i32_f32 ; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] ; SI: s_endpgm -define amdgpu_kernel void @cvt_rpi_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @cvt_rpi_i32_f32_fneg(ptr addrspace(1) %out, float %x) #0 { %x.fneg = fsub float -0.000000e+00, %x %fadd = fadd float %x.fneg, 0.5 %floor = call float @llvm.floor.f32(float %fadd) #1 %cvt = fptosi float %floor to i32 - store i32 %cvt, i32 addrspace(1)* %out + store i32 %cvt, ptr addrspace(1) %out ret void } @@ -55,13 +55,13 @@ ; SI-SAFE-NOT: v_cvt_flr_i32_f32 ; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] ; SI: s_endpgm -define amdgpu_kernel void @cvt_rpi_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @cvt_rpi_i32_f32_fabs_fneg(ptr addrspace(1) %out, float %x) #0 { %x.fabs = call float @llvm.fabs.f32(float %x) #1 %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs %fadd = fadd float %x.fabs.fneg, 0.5 %floor = call float @llvm.floor.f32(float %fadd) #1 %cvt = fptosi float %floor to i32 - store i32 %cvt, i32 addrspace(1)* %out + store i32 %cvt, ptr addrspace(1) %out ret void } @@ -71,11 +71,11 @@ ; SI: v_floor_f32 ; SI: v_cvt_u32_f32 ; SI: s_endpgm -define amdgpu_kernel void @no_cvt_rpi_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @no_cvt_rpi_i32_f32_0(ptr addrspace(1) %out, float %x) #0 { %fadd = fadd float %x, 0.5 %floor = call float @llvm.floor.f32(float %fadd) #1 %cvt = fptoui float %floor to i32 - store i32 %cvt, i32 addrspace(1)* %out + store i32 %cvt, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll --- a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll @@ -39,15 +39,15 @@ ret i32 %x } -define amdgpu_kernel void @uniform_opt_lshr_and_cmp(i1 addrspace(1)* %out, i32 %x) { +define amdgpu_kernel void @uniform_opt_lshr_and_cmp(ptr addrspace(1) %out, i32 %x) { ; GCN-LABEL: name: uniform_opt_lshr_and_cmp ; GCN: bb.0.entry: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) - ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset.cast, addrspace 4) + ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) + ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset, addrspace 4) ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def dead $scc @@ -89,10 +89,10 @@ out.true: %2 = xor i1 %1, -1 - store i1 %2, i1 addrspace(1)* %out + store i1 %2, ptr addrspace(1) %out ret void out.else: - store i1 %1, i1 addrspace(1)* %out + store i1 %1, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll --- a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll @@ -9,22 +9,22 @@ ; CHECK: buffer_store_dword v{{[0-9]+}}, [[VADDR]], [[SADDR]] ; CHECK: buffer_store_dword v{{[0-9]+}}, [[VADDR]], [[SADDR]] -define amdgpu_kernel void @store_same_base_ptr(i32 addrspace(1)* %out) { +define amdgpu_kernel void @store_same_base_ptr(ptr addrspace(1) %out) { entry: %id = call i32 @llvm.amdgcn.workitem.id.x() #0 %offset = sext i32 %id to i64 %offset0 = add i64 %offset, 1027 - %ptr0 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset0 - store volatile i32 3, i32 addrspace(1)* %ptr0 + %ptr0 = getelementptr i32, ptr addrspace(1) %out, i64 %offset0 + store volatile i32 3, ptr addrspace(1) %ptr0 %offset1 = add i64 %offset, 1026 - %ptr1 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset1 - store volatile i32 2, i32 addrspace(1)* %ptr1 + %ptr1 = getelementptr i32, ptr addrspace(1) %out, i64 %offset1 + store volatile i32 2, ptr addrspace(1) %ptr1 %offset2 = add i64 %offset, 1025 - %ptr2 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset2 - store volatile i32 1, i32 addrspace(1)* %ptr2 + %ptr2 = getelementptr i32, ptr addrspace(1) %out, i64 %offset2 + store volatile i32 1, ptr addrspace(1) %ptr2 %offset3 = add i64 %offset, 1024 - %ptr3 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset3 - store volatile i32 0, i32 addrspace(1)* %ptr3 + %ptr3 = getelementptr i32, ptr addrspace(1) %out, i64 %offset3 + store volatile i32 0, ptr addrspace(1) %ptr3 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-v1i8-extractvecelt-crash.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-v1i8-extractvecelt-crash.ll --- a/llvm/test/CodeGen/AMDGPU/dagcombine-v1i8-extractvecelt-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-v1i8-extractvecelt-crash.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx908 < %s | FileCheck %s -define void @wombat(i1 %cond, <1 x i8> addrspace(5)* %addr) { +define void @wombat(i1 %cond, ptr addrspace(5) %addr) { ; CHECK-LABEL: wombat: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -20,7 +20,7 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: - %load = load <1 x i8>, <1 x i8> addrspace(5)* %addr, align 1 + %load = load <1 x i8>, ptr addrspace(5) %addr, align 1 br i1 %cond, label %then, label %end then: @@ -28,6 +28,6 @@ end: %phi_value = phi <1 x i8> [%load, %entry], [zeroinitializer, %then] - store <1 x i8> %phi_value, <1 x i8> addrspace(5)* %addr, align 1 + store <1 x i8> %phi_value, ptr addrspace(5) %addr, align 1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll --- a/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll @@ -10,27 +10,27 @@ ; CHECK: {{^}}sint: ; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @sint(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %sint = load i32, i32 addrspace(1) * %in + %ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 + %sint = load i32, ptr addrspace(1) %in %conv = sitofp i32 %sint to float %0 = insertelement <4 x float> undef, float %conv, i32 0 %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer - store <4 x float> %splat, <4 x float> addrspace(1)* %out + store <4 x float> %splat, ptr addrspace(1) %out ret void } ;CHECK: {{^}}uint: ;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @uint(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %uint = load i32, i32 addrspace(1) * %in + %ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 + %uint = load i32, ptr addrspace(1) %in %conv = uitofp i32 %uint to float %0 = insertelement <4 x float> undef, float %conv, i32 0 %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer - store <4 x float> %splat, <4 x float> addrspace(1)* %out + store <4 x float> %splat, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll b/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll --- a/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll +++ b/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll @@ -12,7 +12,7 @@ ] sw.bb4: - %x = load i64, i64 addrspace(1)* undef, align 8 + %x = load i64, ptr addrspace(1) undef, align 8 %c = sitofp i64 %x to float %v = insertelement <2 x float> , float %c, i32 0 br label %foo.exit @@ -23,6 +23,6 @@ foo.exit: %agg = phi <2 x float> [ %v, %sw.bb4 ], [ zeroinitializer, %entry ] %s = extractelement <2 x float> %agg, i32 1 - store float %s, float addrspace(1)* undef, align 4 + store float %s, ptr addrspace(1) undef, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll b/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll --- a/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll +++ b/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll @@ -7,58 +7,58 @@ ; GCN: GLOBAL_LOAD_DWORDX4_SADDR ; GCN: GLOBAL_LOAD_DWORDX4_SADDR ; GCN-NEXT: KILL -define amdgpu_kernel void @vector_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) { +define amdgpu_kernel void @vector_clause(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture %arg1) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = zext i32 %tmp to i64 - %tmp3 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp2 - %tmp4 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp3, align 16 - %tmp5 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp2 + %tmp3 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp2 + %tmp4 = load <4 x i32>, ptr addrspace(1) %tmp3, align 16 + %tmp5 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp2 %tmp6 = add nuw nsw i64 %tmp2, 1 - %tmp7 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp6 - %tmp8 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp7, align 16 - %tmp9 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp6 + %tmp7 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp6 + %tmp8 = load <4 x i32>, ptr addrspace(1) %tmp7, align 16 + %tmp9 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp6 %tmp10 = add nuw nsw i64 %tmp2, 2 - %tmp11 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp10 - %tmp12 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp11, align 16 - %tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp10 + %tmp11 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp10 + %tmp12 = load <4 x i32>, ptr addrspace(1) %tmp11, align 16 + %tmp13 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp10 %tmp14 = add nuw nsw i64 %tmp2, 3 - %tmp15 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp14 - %tmp16 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp15, align 16 - %tmp17 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp14 - store <4 x i32> %tmp8, <4 x i32> addrspace(1)* %tmp9, align 16 - store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %tmp5, align 16 - store <4 x i32> %tmp12, <4 x i32> addrspace(1)* %tmp13, align 16 - store <4 x i32> %tmp16, <4 x i32> addrspace(1)* %tmp17, align 16 + %tmp15 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp14 + %tmp16 = load <4 x i32>, ptr addrspace(1) %tmp15, align 16 + %tmp17 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp14 + store <4 x i32> %tmp8, ptr addrspace(1) %tmp9, align 16 + store <4 x i32> %tmp4, ptr addrspace(1) %tmp5, align 16 + store <4 x i32> %tmp12, ptr addrspace(1) %tmp13, align 16 + store <4 x i32> %tmp16, ptr addrspace(1) %tmp17, align 16 ret void } ; GCN-LABEL: {{^}}name:{{[ ]*}}no_vector_clause ; GCN-NOT: BUNDLE ; GCN-NOT: KILL -define amdgpu_kernel void @no_vector_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) #0 { +define amdgpu_kernel void @no_vector_clause(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture %arg1) #0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = zext i32 %tmp to i64 - %tmp3 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp2 - %tmp4 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp3, align 16 - %tmp5 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp2 + %tmp3 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp2 + %tmp4 = load <4 x i32>, ptr addrspace(1) %tmp3, align 16 + %tmp5 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp2 %tmp6 = add nuw nsw i64 %tmp2, 1 - %tmp7 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp6 - %tmp8 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp7, align 16 - %tmp9 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp6 + %tmp7 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp6 + %tmp8 = load <4 x i32>, ptr addrspace(1) %tmp7, align 16 + %tmp9 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp6 %tmp10 = add nuw nsw i64 %tmp2, 2 - %tmp11 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp10 - %tmp12 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp11, align 16 - %tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp10 + %tmp11 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp10 + %tmp12 = load <4 x i32>, ptr addrspace(1) %tmp11, align 16 + %tmp13 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp10 %tmp14 = add nuw nsw i64 %tmp2, 3 - %tmp15 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp14 - %tmp16 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp15, align 16 - %tmp17 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp14 - store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %tmp5, align 16 - store <4 x i32> %tmp8, <4 x i32> addrspace(1)* %tmp9, align 16 - store <4 x i32> %tmp12, <4 x i32> addrspace(1)* %tmp13, align 16 - store <4 x i32> %tmp16, <4 x i32> addrspace(1)* %tmp17, align 16 + %tmp15 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i64 %tmp14 + %tmp16 = load <4 x i32>, ptr addrspace(1) %tmp15, align 16 + %tmp17 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg1, i64 %tmp14 + store <4 x i32> %tmp4, ptr addrspace(1) %tmp5, align 16 + store <4 x i32> %tmp8, ptr addrspace(1) %tmp9, align 16 + store <4 x i32> %tmp12, ptr addrspace(1) %tmp13, align 16 + store <4 x i32> %tmp16, ptr addrspace(1) %tmp17, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll b/llvm/test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll --- a/llvm/test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll @@ -9,7 +9,7 @@ ; CHECK: ALU_PUSH_BEFORE ; CHECK-NEXT: JUMP ; CHECK-NEXT: LOOP_BREAK -define amdgpu_kernel void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind { +define amdgpu_kernel void @loop_ge(ptr addrspace(1) nocapture %out, i32 %iterations) nounwind { entry: %cmp5 = icmp sgt i32 %iterations, 0 br i1 %cmp5, label %for.body, label %for.end @@ -18,8 +18,8 @@ %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ] %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ] %i.07 = add nsw i32 %i.07.in, -1 - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %ai.06 - store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %ai.06 + store i32 %i.07, ptr addrspace(1) %arrayidx, align 4 %add = add nsw i32 %ai.06, 1 %exitcond = icmp eq i32 %add, %iterations br i1 %exitcond, label %for.end, label %for.body diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll --- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll +++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll @@ -7,15 +7,15 @@ ; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]], ; DPP64: v_ceil_f64_dpp [[V]], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} ; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} -define amdgpu_kernel void @dpp64_ceil(i64 addrspace(1)* %arg, i64 %in1) { +define amdgpu_kernel void @dpp64_ceil(ptr addrspace(1) %arg, i64 %in1) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %id - %load = load i64, i64 addrspace(1)* %gep + %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id + %load = load i64, ptr addrspace(1) %gep %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 337, i32 15, i32 15, i1 1) #0 %tmp1 = bitcast i64 %tmp0 to double %round = tail call double @llvm.ceil.f64(double %tmp1) %tmp2 = bitcast double %round to i64 - store i64 %tmp2, i64 addrspace(1)* %gep + store i64 %tmp2, ptr addrspace(1) %gep ret void } @@ -23,30 +23,30 @@ ; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]], ; DPP64: v_rcp_f64_dpp [[V]], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} ; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} -define amdgpu_kernel void @dpp64_rcp(i64 addrspace(1)* %arg, i64 %in1) { +define amdgpu_kernel void @dpp64_rcp(ptr addrspace(1) %arg, i64 %in1) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %id - %load = load i64, i64 addrspace(1)* %gep + %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id + %load = load i64, ptr addrspace(1) %gep %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 337, i32 15, i32 15, i1 1) #0 %tmp1 = bitcast i64 %tmp0 to double %rcp = call double @llvm.amdgcn.rcp.f64(double %tmp1) %tmp2 = bitcast double %rcp to i64 - store i64 %tmp2, i64 addrspace(1)* %gep + store i64 %tmp2, ptr addrspace(1) %gep ret void } ; GCN-LABEL: {{^}}dpp64_rcp_unsupported_ctl: ; GCN-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} ; GCN: v_rcp_f64_e32 -define amdgpu_kernel void @dpp64_rcp_unsupported_ctl(i64 addrspace(1)* %arg, i64 %in1) { +define amdgpu_kernel void @dpp64_rcp_unsupported_ctl(ptr addrspace(1) %arg, i64 %in1) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %id - %load = load i64, i64 addrspace(1)* %gep + %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id + %load = load i64, ptr addrspace(1) %gep %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 15, i32 15, i1 1) #0 %tmp1 = bitcast i64 %tmp0 to double %rcp = fdiv fast double 1.0, %tmp1 %tmp2 = bitcast double %rcp to i64 - store i64 %tmp2, i64 addrspace(1)* %gep + store i64 %tmp2, ptr addrspace(1) %gep ret void } @@ -57,15 +57,15 @@ ; GFX10PLUS-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} ; GCN: v_div_scale_f64 ; GCN: v_rcp_f64_e32 -define amdgpu_kernel void @dpp64_div(i64 addrspace(1)* %arg, i64 %in1) { +define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %id - %load = load i64, i64 addrspace(1)* %gep + %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id + %load = load i64, ptr addrspace(1) %gep %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 337, i32 15, i32 15, i1 1) #0 %tmp1 = bitcast i64 %tmp0 to double %rcp = fdiv double 15.0, %tmp1 %tmp2 = bitcast double %rcp to i64 - store i64 %tmp2, i64 addrspace(1)* %gep + store i64 %tmp2, ptr addrspace(1) %gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll --- a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll @@ -5,44 +5,44 @@ ; GCN-LABEL: {{^}}dpp_add: ; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]], ; GCN: v_add_{{(nc_)?}}u32_dpp [[V]], [[V]], [[V]] quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} -define amdgpu_kernel void @dpp_add(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @dpp_add(ptr addrspace(1) %arg) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id - %load = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id + %load = load i32, ptr addrspace(1) %gep %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) #0 %add = add i32 %tmp0, %load - store i32 %add, i32 addrspace(1)* %gep + store i32 %add, ptr addrspace(1) %gep ret void } ; GCN-LABEL: {{^}}dpp_ceil: ; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]], ; GCN: v_ceil_f32_dpp [[V]], [[V]] quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} -define amdgpu_kernel void @dpp_ceil(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @dpp_ceil(ptr addrspace(1) %arg) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id - %load = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id + %load = load i32, ptr addrspace(1) %gep %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) #0 %tmp1 = bitcast i32 %tmp0 to float %round = tail call float @llvm.ceil.f32(float %tmp1) %tmp2 = bitcast float %round to i32 - store i32 %tmp2, i32 addrspace(1)* %gep + store i32 %tmp2, ptr addrspace(1) %gep ret void } ; GCN-LABEL: {{^}}dpp_fadd: ; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]], ; GCN: v_add_f32_dpp [[V]], [[V]], [[V]] quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} -define amdgpu_kernel void @dpp_fadd(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @dpp_fadd(ptr addrspace(1) %arg) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id - %load = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id + %load = load i32, ptr addrspace(1) %gep %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) #0 %tmp1 = bitcast i32 %tmp0 to float %t = bitcast i32 %load to float %add = fadd float %tmp1, %t %tmp2 = bitcast float %add to i32 - store i32 %tmp2, i32 addrspace(1)* %gep + store i32 %tmp2, ptr addrspace(1) %gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll b/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll --- a/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll @@ -9,29 +9,29 @@ ; GCN: buffer_load_dword ; GCN: ds_write2_b32 ; GCN: s_endpgm -define amdgpu_kernel void @reschedule_global_load_lds_store(i32 addrspace(1)* noalias %gptr0, i32 addrspace(1)* noalias %gptr1, i32 addrspace(3)* noalias %lptr, i32 %c) #0 { +define amdgpu_kernel void @reschedule_global_load_lds_store(ptr addrspace(1) noalias %gptr0, ptr addrspace(1) noalias %gptr1, ptr addrspace(3) noalias %lptr, i32 %c) #0 { entry: %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx = shl i32 %tid, 2 - %gep0 = getelementptr i32, i32 addrspace(1)* %gptr0, i32 %idx - %gep1 = getelementptr i32, i32 addrspace(1)* %gptr1, i32 %idx - %gep2 = getelementptr i32, i32 addrspace(3)* %lptr, i32 %tid + %gep0 = getelementptr i32, ptr addrspace(1) %gptr0, i32 %idx + %gep1 = getelementptr i32, ptr addrspace(1) %gptr1, i32 %idx + %gep2 = getelementptr i32, ptr addrspace(3) %lptr, i32 %tid %cmp0 = icmp eq i32 %c, 0 br i1 %cmp0, label %for.body, label %exit for.body: ; preds = %for.body, %entry %i = phi i32 [ 0, %entry ], [ %i.inc, %for.body ] - %gptr0.phi = phi i32 addrspace(1)* [ %gep0, %entry ], [ %gep0.inc, %for.body ] - %gptr1.phi = phi i32 addrspace(1)* [ %gep1, %entry ], [ %gep1.inc, %for.body ] - %lptr0.phi = phi i32 addrspace(3)* [ %gep2, %entry ], [ %gep2.inc, %for.body ] - %lptr1 = getelementptr i32, i32 addrspace(3)* %lptr0.phi, i32 2 - %val0 = load i32, i32 addrspace(1)* %gep0 - store i32 %val0, i32 addrspace(3)* %lptr0.phi - %val1 = load i32, i32 addrspace(1)* %gep1 - store i32 %val1, i32 addrspace(3)* %lptr1 - %gep0.inc = getelementptr i32, i32 addrspace(1)* %gptr0.phi, i32 4 - %gep1.inc = getelementptr i32, i32 addrspace(1)* %gptr1.phi, i32 4 - %gep2.inc = getelementptr i32, i32 addrspace(3)* %lptr0.phi, i32 4 + %gptr0.phi = phi ptr addrspace(1) [ %gep0, %entry ], [ %gep0.inc, %for.body ] + %gptr1.phi = phi ptr addrspace(1) [ %gep1, %entry ], [ %gep1.inc, %for.body ] + %lptr0.phi = phi ptr addrspace(3) [ %gep2, %entry ], [ %gep2.inc, %for.body ] + %lptr1 = getelementptr i32, ptr addrspace(3) %lptr0.phi, i32 2 + %val0 = load i32, ptr addrspace(1) %gep0 + store i32 %val0, ptr addrspace(3) %lptr0.phi + %val1 = load i32, ptr addrspace(1) %gep1 + store i32 %val1, ptr addrspace(3) %lptr1 + %gep0.inc = getelementptr i32, ptr addrspace(1) %gptr0.phi, i32 4 + %gep1.inc = getelementptr i32, ptr addrspace(1) %gptr1.phi, i32 4 + %gep2.inc = getelementptr i32, ptr addrspace(3) %lptr0.phi, i32 4 %i.inc = add nsw i32 %i, 1 %cmp1 = icmp ne i32 %i, 256 br i1 %cmp1, label %for.body, label %exit diff --git a/llvm/test/CodeGen/AMDGPU/ds-alignment.ll b/llvm/test/CodeGen/AMDGPU/ds-alignment.ll --- a/llvm/test/CodeGen/AMDGPU/ds-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-alignment.ll @@ -4,7 +4,7 @@ ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck %s -check-prefixes=GCN,UNALIGNED,UNALIGNED-SDAG ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck %s -check-prefixes=GCN,UNALIGNED,UNALIGNED-GISEL -define amdgpu_kernel void @ds1align1(i8 addrspace(3)* %in, i8 addrspace(3)* %out) { +define amdgpu_kernel void @ds1align1(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; GCN-LABEL: ds1align1: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -15,12 +15,12 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_write_b8 v1, v0 ; GCN-NEXT: s_endpgm - %val = load i8, i8 addrspace(3)* %in, align 1 - store i8 %val, i8 addrspace(3)* %out, align 1 + %val = load i8, ptr addrspace(3) %in, align 1 + store i8 %val, ptr addrspace(3) %out, align 1 ret void } -define amdgpu_kernel void @ds2align1(i16 addrspace(3)* %in, i16 addrspace(3)* %out) { +define amdgpu_kernel void @ds2align1(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-SDAG-LABEL: ds2align1: ; ALIGNED-SDAG: ; %bb.0: ; ALIGNED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -60,12 +60,12 @@ ; UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-NEXT: ds_write_b16 v1, v0 ; UNALIGNED-NEXT: s_endpgm - %val = load i16, i16 addrspace(3)* %in, align 1 - store i16 %val, i16 addrspace(3)* %out, align 1 + %val = load i16, ptr addrspace(3) %in, align 1 + store i16 %val, ptr addrspace(3) %out, align 1 ret void } -define amdgpu_kernel void @ds2align2(i16 addrspace(3)* %in, i16 addrspace(3)* %out) { +define amdgpu_kernel void @ds2align2(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; GCN-LABEL: ds2align2: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -76,12 +76,12 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_write_b16 v1, v0 ; GCN-NEXT: s_endpgm - %val = load i16, i16 addrspace(3)* %in, align 2 - store i16 %val, i16 addrspace(3)* %out, align 2 + %val = load i16, ptr addrspace(3) %in, align 2 + store i16 %val, ptr addrspace(3) %out, align 2 ret void } -define amdgpu_kernel void @ds4align1(i32 addrspace(3)* %in, i32 addrspace(3)* %out) { +define amdgpu_kernel void @ds4align1(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-SDAG-LABEL: ds4align1: ; ALIGNED-SDAG: ; %bb.0: ; ALIGNED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -138,12 +138,12 @@ ; UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-NEXT: ds_write_b32 v1, v0 ; UNALIGNED-NEXT: s_endpgm - %val = load i32, i32 addrspace(3)* %in, align 1 - store i32 %val, i32 addrspace(3)* %out, align 1 + %val = load i32, ptr addrspace(3) %in, align 1 + store i32 %val, ptr addrspace(3) %out, align 1 ret void } -define amdgpu_kernel void @ds4align2(i32 addrspace(3)* %in, i32 addrspace(3)* %out) { +define amdgpu_kernel void @ds4align2(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-SDAG-LABEL: ds4align2: ; ALIGNED-SDAG: ; %bb.0: ; ALIGNED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -182,12 +182,12 @@ ; UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-NEXT: ds_write_b32 v1, v0 ; UNALIGNED-NEXT: s_endpgm - %val = load i32, i32 addrspace(3)* %in, align 2 - store i32 %val, i32 addrspace(3)* %out, align 2 + %val = load i32, ptr addrspace(3) %in, align 2 + store i32 %val, ptr addrspace(3) %out, align 2 ret void } -define amdgpu_kernel void @ds4align4(i32 addrspace(3)* %in, i32 addrspace(3)* %out) { +define amdgpu_kernel void @ds4align4(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; GCN-LABEL: ds4align4: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -198,12 +198,12 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_write_b32 v1, v0 ; GCN-NEXT: s_endpgm - %val = load i32, i32 addrspace(3)* %in, align 4 - store i32 %val, i32 addrspace(3)* %out, align 4 + %val = load i32, ptr addrspace(3) %in, align 4 + store i32 %val, ptr addrspace(3) %out, align 4 ret void } -define amdgpu_kernel void @ds8align1(<2 x i32> addrspace(3)* %in, <2 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds8align1(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-SDAG-LABEL: ds8align1: ; ALIGNED-SDAG: ; %bb.0: ; ALIGNED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -283,12 +283,12 @@ ; UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-NEXT: ds_write_b64 v2, v[0:1] ; UNALIGNED-NEXT: s_endpgm - %val = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 1 - store <2 x i32> %val, <2 x i32> addrspace(3)* %out, align 1 + %val = load <2 x i32>, ptr addrspace(3) %in, align 1 + store <2 x i32> %val, ptr addrspace(3) %out, align 1 ret void } -define amdgpu_kernel void @ds8align2(<2 x i32> addrspace(3)* %in, <2 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds8align2(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-SDAG-LABEL: ds8align2: ; ALIGNED-SDAG: ; %bb.0: ; ALIGNED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -339,12 +339,12 @@ ; UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-NEXT: ds_write_b64 v2, v[0:1] ; UNALIGNED-NEXT: s_endpgm - %val = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 2 - store <2 x i32> %val, <2 x i32> addrspace(3)* %out, align 2 + %val = load <2 x i32>, ptr addrspace(3) %in, align 2 + store <2 x i32> %val, ptr addrspace(3) %out, align 2 ret void } -define amdgpu_kernel void @ds8align4(<2 x i32> addrspace(3)* %in, <2 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds8align4(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; GCN-LABEL: ds8align4: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -355,12 +355,12 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_write2_b32 v2, v0, v1 offset1:1 ; GCN-NEXT: s_endpgm - %val = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 4 - store <2 x i32> %val, <2 x i32> addrspace(3)* %out, align 4 + %val = load <2 x i32>, ptr addrspace(3) %in, align 4 + store <2 x i32> %val, ptr addrspace(3) %out, align 4 ret void } -define amdgpu_kernel void @ds8align8(<2 x i32> addrspace(3)* %in, <2 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds8align8(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; GCN-LABEL: ds8align8: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -371,12 +371,12 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_write_b64 v2, v[0:1] ; GCN-NEXT: s_endpgm - %val = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 8 - store <2 x i32> %val, <2 x i32> addrspace(3)* %out, align 8 + %val = load <2 x i32>, ptr addrspace(3) %in, align 8 + store <2 x i32> %val, ptr addrspace(3) %out, align 8 ret void } -define amdgpu_kernel void @ds12align1(<3 x i32> addrspace(3)* %in, <3 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds12align1(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-SDAG-LABEL: ds12align1: ; ALIGNED-SDAG: ; %bb.0: ; ALIGNED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -483,12 +483,12 @@ ; UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-NEXT: ds_write_b96 v3, v[0:2] ; UNALIGNED-NEXT: s_endpgm - %val = load <3 x i32>, <3 x i32> addrspace(3)* %in, align 1 - store <3 x i32> %val, <3 x i32> addrspace(3)* %out, align 1 + %val = load <3 x i32>, ptr addrspace(3) %in, align 1 + store <3 x i32> %val, ptr addrspace(3) %out, align 1 ret void } -define amdgpu_kernel void @ds12align2(<3 x i32> addrspace(3)* %in, <3 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds12align2(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-SDAG-LABEL: ds12align2: ; ALIGNED-SDAG: ; %bb.0: ; ALIGNED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -549,12 +549,12 @@ ; UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-NEXT: ds_write_b96 v3, v[0:2] ; UNALIGNED-NEXT: s_endpgm - %val = load <3 x i32>, <3 x i32> addrspace(3)* %in, align 2 - store <3 x i32> %val, <3 x i32> addrspace(3)* %out, align 2 + %val = load <3 x i32>, ptr addrspace(3) %in, align 2 + store <3 x i32> %val, ptr addrspace(3) %out, align 2 ret void } -define amdgpu_kernel void @ds12align4(<3 x i32> addrspace(3)* %in, <3 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds12align4(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-LABEL: ds12align4: ; ALIGNED: ; %bb.0: ; ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -593,12 +593,12 @@ ; UNALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-GISEL-NEXT: ds_write_b96 v3, v[0:2] ; UNALIGNED-GISEL-NEXT: s_endpgm - %val = load <3 x i32>, <3 x i32> addrspace(3)* %in, align 4 - store <3 x i32> %val, <3 x i32> addrspace(3)* %out, align 4 + %val = load <3 x i32>, ptr addrspace(3) %in, align 4 + store <3 x i32> %val, ptr addrspace(3) %out, align 4 ret void } -define amdgpu_kernel void @ds12align8(<3 x i32> addrspace(3)* %in, <3 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds12align8(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-SDAG-LABEL: ds12align8: ; ALIGNED-SDAG: ; %bb.0: ; ALIGNED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -651,12 +651,12 @@ ; UNALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-GISEL-NEXT: ds_write_b96 v3, v[0:2] ; UNALIGNED-GISEL-NEXT: s_endpgm - %val = load <3 x i32>, <3 x i32> addrspace(3)* %in, align 8 - store <3 x i32> %val, <3 x i32> addrspace(3)* %out, align 8 + %val = load <3 x i32>, ptr addrspace(3) %in, align 8 + store <3 x i32> %val, ptr addrspace(3) %out, align 8 ret void } -define amdgpu_kernel void @ds12align16(<3 x i32> addrspace(3)* %in, <3 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds12align16(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; GCN-LABEL: ds12align16: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -667,12 +667,12 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_write_b96 v3, v[0:2] ; GCN-NEXT: s_endpgm - %val = load <3 x i32>, <3 x i32> addrspace(3)* %in, align 16 - store <3 x i32> %val, <3 x i32> addrspace(3)* %out, align 16 + %val = load <3 x i32>, ptr addrspace(3) %in, align 16 + store <3 x i32> %val, ptr addrspace(3) %out, align 16 ret void } -define amdgpu_kernel void @ds16align1(<4 x i32> addrspace(3)* %in, <4 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds16align1(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-SDAG-LABEL: ds16align1: ; ALIGNED-SDAG: ; %bb.0: ; ALIGNED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -805,12 +805,12 @@ ; UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-NEXT: ds_write_b128 v4, v[0:3] ; UNALIGNED-NEXT: s_endpgm - %val = load <4 x i32>, <4 x i32> addrspace(3)* %in, align 1 - store <4 x i32> %val, <4 x i32> addrspace(3)* %out, align 1 + %val = load <4 x i32>, ptr addrspace(3) %in, align 1 + store <4 x i32> %val, ptr addrspace(3) %out, align 1 ret void } -define amdgpu_kernel void @ds16align2(<4 x i32> addrspace(3)* %in, <4 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds16align2(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-SDAG-LABEL: ds16align2: ; ALIGNED-SDAG: ; %bb.0: ; ALIGNED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -881,12 +881,12 @@ ; UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-NEXT: ds_write_b128 v4, v[0:3] ; UNALIGNED-NEXT: s_endpgm - %val = load <4 x i32>, <4 x i32> addrspace(3)* %in, align 2 - store <4 x i32> %val, <4 x i32> addrspace(3)* %out, align 2 + %val = load <4 x i32>, ptr addrspace(3) %in, align 2 + store <4 x i32> %val, ptr addrspace(3) %out, align 2 ret void } -define amdgpu_kernel void @ds16align4(<4 x i32> addrspace(3)* %in, <4 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds16align4(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; ALIGNED-LABEL: ds16align4: ; ALIGNED: ; %bb.0: ; ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -925,12 +925,12 @@ ; UNALIGNED-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; UNALIGNED-GISEL-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 ; UNALIGNED-GISEL-NEXT: s_endpgm - %val = load <4 x i32>, <4 x i32> addrspace(3)* %in, align 4 - store <4 x i32> %val, <4 x i32> addrspace(3)* %out, align 4 + %val = load <4 x i32>, ptr addrspace(3) %in, align 4 + store <4 x i32> %val, ptr addrspace(3) %out, align 4 ret void } -define amdgpu_kernel void @ds16align8(<4 x i32> addrspace(3)* %in, <4 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds16align8(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; GCN-LABEL: ds16align8: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -941,12 +941,12 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_write2_b64 v4, v[0:1], v[2:3] offset1:1 ; GCN-NEXT: s_endpgm - %val = load <4 x i32>, <4 x i32> addrspace(3)* %in, align 8 - store <4 x i32> %val, <4 x i32> addrspace(3)* %out, align 8 + %val = load <4 x i32>, ptr addrspace(3) %in, align 8 + store <4 x i32> %val, ptr addrspace(3) %out, align 8 ret void } -define amdgpu_kernel void @ds16align16(<4 x i32> addrspace(3)* %in, <4 x i32> addrspace(3)* %out) { +define amdgpu_kernel void @ds16align16(ptr addrspace(3) %in, ptr addrspace(3) %out) { ; GCN-LABEL: ds16align16: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -957,7 +957,7 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_write_b128 v4, v[0:3] ; GCN-NEXT: s_endpgm - %val = load <4 x i32>, <4 x i32> addrspace(3)* %in, align 16 - store <4 x i32> %val, <4 x i32> addrspace(3)* %out, align 16 + %val = load <4 x i32>, ptr addrspace(3) %in, align 16 + store <4 x i32> %val, ptr addrspace(3) %out, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll b/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll --- a/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll @@ -17,32 +17,32 @@ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:72 offset1:172 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset0:144 offset1:244 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset0:88 offset1:188 -define amdgpu_kernel void @ds_read32_combine_stride_400(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +define amdgpu_kernel void @ds_read32_combine_stride_400(ptr addrspace(3) nocapture readonly %arg, ptr nocapture %arg1) { bb: - %tmp = load float, float addrspace(3)* %arg, align 4 + %tmp = load float, ptr addrspace(3) %arg, align 4 %tmp2 = fadd float %tmp, 0.000000e+00 - %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 - %tmp4 = load float, float addrspace(3)* %tmp3, align 4 + %tmp3 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 100 + %tmp4 = load float, ptr addrspace(3) %tmp3, align 4 %tmp5 = fadd float %tmp2, %tmp4 - %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 - %tmp7 = load float, float addrspace(3)* %tmp6, align 4 + %tmp6 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 200 + %tmp7 = load float, ptr addrspace(3) %tmp6, align 4 %tmp8 = fadd float %tmp5, %tmp7 - %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 - %tmp10 = load float, float addrspace(3)* %tmp9, align 4 + %tmp9 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 300 + %tmp10 = load float, ptr addrspace(3) %tmp9, align 4 %tmp11 = fadd float %tmp8, %tmp10 - %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 - %tmp13 = load float, float addrspace(3)* %tmp12, align 4 + %tmp12 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 400 + %tmp13 = load float, ptr addrspace(3) %tmp12, align 4 %tmp14 = fadd float %tmp11, %tmp13 - %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 - %tmp16 = load float, float addrspace(3)* %tmp15, align 4 + %tmp15 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 500 + %tmp16 = load float, ptr addrspace(3) %tmp15, align 4 %tmp17 = fadd float %tmp14, %tmp16 - %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 - %tmp19 = load float, float addrspace(3)* %tmp18, align 4 + %tmp18 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 600 + %tmp19 = load float, ptr addrspace(3) %tmp18, align 4 %tmp20 = fadd float %tmp17, %tmp19 - %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 - %tmp22 = load float, float addrspace(3)* %tmp21, align 4 + %tmp21 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 700 + %tmp22 = load float, ptr addrspace(3) %tmp21, align 4 %tmp23 = fadd float %tmp20, %tmp22 - store float %tmp23, float *%arg1, align 4 + store float %tmp23, ptr %arg1, align 4 ret void } @@ -60,33 +60,33 @@ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:184 offset1:204 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:224 offset1:244 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset0:8 offset1:28 -define amdgpu_kernel void @ds_read32_combine_stride_20(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +define amdgpu_kernel void @ds_read32_combine_stride_20(ptr addrspace(3) nocapture readonly %arg, ptr nocapture %arg1) { bb: - %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 - %tmp1 = load float, float addrspace(3)* %tmp, align 4 + %tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 400 + %tmp1 = load float, ptr addrspace(3) %tmp, align 4 %tmp2 = fadd float %tmp1, 0.000000e+00 - %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 420 - %tmp4 = load float, float addrspace(3)* %tmp3, align 4 + %tmp3 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 420 + %tmp4 = load float, ptr addrspace(3) %tmp3, align 4 %tmp5 = fadd float %tmp2, %tmp4 - %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 440 - %tmp7 = load float, float addrspace(3)* %tmp6, align 4 + %tmp6 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 440 + %tmp7 = load float, ptr addrspace(3) %tmp6, align 4 %tmp8 = fadd float %tmp5, %tmp7 - %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 460 - %tmp10 = load float, float addrspace(3)* %tmp9, align 4 + %tmp9 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 460 + %tmp10 = load float, ptr addrspace(3) %tmp9, align 4 %tmp11 = fadd float %tmp8, %tmp10 - %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 480 - %tmp13 = load float, float addrspace(3)* %tmp12, align 4 + %tmp12 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 480 + %tmp13 = load float, ptr addrspace(3) %tmp12, align 4 %tmp14 = fadd float %tmp11, %tmp13 - %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 - %tmp16 = load float, float addrspace(3)* %tmp15, align 4 + %tmp15 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 500 + %tmp16 = load float, ptr addrspace(3) %tmp15, align 4 %tmp17 = fadd float %tmp14, %tmp16 - %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 520 - %tmp19 = load float, float addrspace(3)* %tmp18, align 4 + %tmp18 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 520 + %tmp19 = load float, ptr addrspace(3) %tmp18, align 4 %tmp20 = fadd float %tmp17, %tmp19 - %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 540 - %tmp22 = load float, float addrspace(3)* %tmp21, align 4 + %tmp21 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 540 + %tmp22 = load float, ptr addrspace(3) %tmp21, align 4 %tmp23 = fadd float %tmp20, %tmp22 - store float %tmp23, float *%arg1, align 4 + store float %tmp23, ptr %arg1, align 4 ret void } @@ -106,32 +106,32 @@ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:88 offset1:188 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset0:144 offset1:244 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset0:72 offset1:172 -define amdgpu_kernel void @ds_read32_combine_stride_400_back(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +define amdgpu_kernel void @ds_read32_combine_stride_400_back(ptr addrspace(3) nocapture readonly %arg, ptr nocapture %arg1) { bb: - %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 - %tmp2 = load float, float addrspace(3)* %tmp, align 4 + %tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 700 + %tmp2 = load float, ptr addrspace(3) %tmp, align 4 %tmp3 = fadd float %tmp2, 0.000000e+00 - %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 - %tmp5 = load float, float addrspace(3)* %tmp4, align 4 + %tmp4 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 600 + %tmp5 = load float, ptr addrspace(3) %tmp4, align 4 %tmp6 = fadd float %tmp3, %tmp5 - %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 - %tmp8 = load float, float addrspace(3)* %tmp7, align 4 + %tmp7 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 500 + %tmp8 = load float, ptr addrspace(3) %tmp7, align 4 %tmp9 = fadd float %tmp6, %tmp8 - %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 - %tmp11 = load float, float addrspace(3)* %tmp10, align 4 + %tmp10 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 400 + %tmp11 = load float, ptr addrspace(3) %tmp10, align 4 %tmp12 = fadd float %tmp9, %tmp11 - %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 - %tmp14 = load float, float addrspace(3)* %tmp13, align 4 + %tmp13 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 300 + %tmp14 = load float, ptr addrspace(3) %tmp13, align 4 %tmp15 = fadd float %tmp12, %tmp14 - %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 - %tmp17 = load float, float addrspace(3)* %tmp16, align 4 + %tmp16 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 200 + %tmp17 = load float, ptr addrspace(3) %tmp16, align 4 %tmp18 = fadd float %tmp15, %tmp17 - %tmp19 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 - %tmp20 = load float, float addrspace(3)* %tmp19, align 4 + %tmp19 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 100 + %tmp20 = load float, ptr addrspace(3) %tmp19, align 4 %tmp21 = fadd float %tmp18, %tmp20 - %tmp22 = load float, float addrspace(3)* %arg, align 4 + %tmp22 = load float, ptr addrspace(3) %arg, align 4 %tmp23 = fadd float %tmp21, %tmp22 - store float %tmp23, float *%arg1, align 4 + store float %tmp23, ptr %arg1, align 4 ret void } @@ -142,32 +142,32 @@ ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:64 offset1:96 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:128 offset1:160 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:192 offset1:224 -define amdgpu_kernel void @ds_read32_combine_stride_8192(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +define amdgpu_kernel void @ds_read32_combine_stride_8192(ptr addrspace(3) nocapture readonly %arg, ptr nocapture %arg1) { bb: - %tmp = load float, float addrspace(3)* %arg, align 4 + %tmp = load float, ptr addrspace(3) %arg, align 4 %tmp2 = fadd float %tmp, 0.000000e+00 - %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048 - %tmp4 = load float, float addrspace(3)* %tmp3, align 4 + %tmp3 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 2048 + %tmp4 = load float, ptr addrspace(3) %tmp3, align 4 %tmp5 = fadd float %tmp2, %tmp4 - %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096 - %tmp7 = load float, float addrspace(3)* %tmp6, align 4 + %tmp6 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 4096 + %tmp7 = load float, ptr addrspace(3) %tmp6, align 4 %tmp8 = fadd float %tmp5, %tmp7 - %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144 - %tmp10 = load float, float addrspace(3)* %tmp9, align 4 + %tmp9 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 6144 + %tmp10 = load float, ptr addrspace(3) %tmp9, align 4 %tmp11 = fadd float %tmp8, %tmp10 - %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192 - %tmp13 = load float, float addrspace(3)* %tmp12, align 4 + %tmp12 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 8192 + %tmp13 = load float, ptr addrspace(3) %tmp12, align 4 %tmp14 = fadd float %tmp11, %tmp13 - %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240 - %tmp16 = load float, float addrspace(3)* %tmp15, align 4 + %tmp15 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 10240 + %tmp16 = load float, ptr addrspace(3) %tmp15, align 4 %tmp17 = fadd float %tmp14, %tmp16 - %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288 - %tmp19 = load float, float addrspace(3)* %tmp18, align 4 + %tmp18 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 12288 + %tmp19 = load float, ptr addrspace(3) %tmp18, align 4 %tmp20 = fadd float %tmp17, %tmp19 - %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336 - %tmp22 = load float, float addrspace(3)* %tmp21, align 4 + %tmp21 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 14336 + %tmp22 = load float, ptr addrspace(3) %tmp21, align 4 %tmp23 = fadd float %tmp20, %tmp22 - store float %tmp23, float *%arg1, align 4 + store float %tmp23, ptr %arg1, align 4 ret void } @@ -181,27 +181,27 @@ ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:64 offset1:96 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:128 offset1:160 -define amdgpu_kernel void @ds_read32_combine_stride_8192_shifted(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +define amdgpu_kernel void @ds_read32_combine_stride_8192_shifted(ptr addrspace(3) nocapture readonly %arg, ptr nocapture %arg1) { bb: - %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2 - %tmp2 = load float, float addrspace(3)* %tmp, align 4 + %tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 2 + %tmp2 = load float, ptr addrspace(3) %tmp, align 4 %tmp3 = fadd float %tmp2, 0.000000e+00 - %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2050 - %tmp5 = load float, float addrspace(3)* %tmp4, align 4 + %tmp4 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 2050 + %tmp5 = load float, ptr addrspace(3) %tmp4, align 4 %tmp6 = fadd float %tmp3, %tmp5 - %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4098 - %tmp8 = load float, float addrspace(3)* %tmp7, align 4 + %tmp7 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 4098 + %tmp8 = load float, ptr addrspace(3) %tmp7, align 4 %tmp9 = fadd float %tmp6, %tmp8 - %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6146 - %tmp11 = load float, float addrspace(3)* %tmp10, align 4 + %tmp10 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 6146 + %tmp11 = load float, ptr addrspace(3) %tmp10, align 4 %tmp12 = fadd float %tmp9, %tmp11 - %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8194 - %tmp14 = load float, float addrspace(3)* %tmp13, align 4 + %tmp13 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 8194 + %tmp14 = load float, ptr addrspace(3) %tmp13, align 4 %tmp15 = fadd float %tmp12, %tmp14 - %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10242 - %tmp17 = load float, float addrspace(3)* %tmp16, align 4 + %tmp16 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 10242 + %tmp17 = load float, ptr addrspace(3) %tmp16, align 4 %tmp18 = fadd float %tmp15, %tmp17 - store float %tmp18, float *%arg1, align 4 + store float %tmp18, ptr %arg1, align 4 ret void } @@ -216,32 +216,32 @@ ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:44 offset1:94 -define amdgpu_kernel void @ds_read64_combine_stride_400(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) { +define amdgpu_kernel void @ds_read64_combine_stride_400(ptr addrspace(3) nocapture readonly %arg, ptr nocapture %arg1) { bb: - %tmp = load double, double addrspace(3)* %arg, align 8 + %tmp = load double, ptr addrspace(3) %arg, align 8 %tmp2 = fadd double %tmp, 0.000000e+00 - %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 50 - %tmp4 = load double, double addrspace(3)* %tmp3, align 8 + %tmp3 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 50 + %tmp4 = load double, ptr addrspace(3) %tmp3, align 8 %tmp5 = fadd double %tmp2, %tmp4 - %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100 - %tmp7 = load double, double addrspace(3)* %tmp6, align 8 + %tmp6 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 100 + %tmp7 = load double, ptr addrspace(3) %tmp6, align 8 %tmp8 = fadd double %tmp5, %tmp7 - %tmp9 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150 - %tmp10 = load double, double addrspace(3)* %tmp9, align 8 + %tmp9 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 150 + %tmp10 = load double, ptr addrspace(3) %tmp9, align 8 %tmp11 = fadd double %tmp8, %tmp10 - %tmp12 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200 - %tmp13 = load double, double addrspace(3)* %tmp12, align 8 + %tmp12 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 200 + %tmp13 = load double, ptr addrspace(3) %tmp12, align 8 %tmp14 = fadd double %tmp11, %tmp13 - %tmp15 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250 - %tmp16 = load double, double addrspace(3)* %tmp15, align 8 + %tmp15 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 250 + %tmp16 = load double, ptr addrspace(3) %tmp15, align 8 %tmp17 = fadd double %tmp14, %tmp16 - %tmp18 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300 - %tmp19 = load double, double addrspace(3)* %tmp18, align 8 + %tmp18 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 300 + %tmp19 = load double, ptr addrspace(3) %tmp18, align 8 %tmp20 = fadd double %tmp17, %tmp19 - %tmp21 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350 - %tmp22 = load double, double addrspace(3)* %tmp21, align 8 + %tmp21 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 350 + %tmp22 = load double, ptr addrspace(3) %tmp21, align 8 %tmp23 = fadd double %tmp20, %tmp22 - store double %tmp23, double *%arg1, align 8 + store double %tmp23, ptr %arg1, align 8 ret void } @@ -255,27 +255,27 @@ ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:32 offset1:48 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset0:64 offset1:80 -define amdgpu_kernel void @ds_read64_combine_stride_8192_shifted(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) { +define amdgpu_kernel void @ds_read64_combine_stride_8192_shifted(ptr addrspace(3) nocapture readonly %arg, ptr nocapture %arg1) { bb: - %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1 - %tmp2 = load double, double addrspace(3)* %tmp, align 8 + %tmp = getelementptr inbounds double, ptr addrspace(3) %arg, i32 1 + %tmp2 = load double, ptr addrspace(3) %tmp, align 8 %tmp3 = fadd double %tmp2, 0.000000e+00 - %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025 - %tmp5 = load double, double addrspace(3)* %tmp4, align 8 + %tmp4 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 1025 + %tmp5 = load double, ptr addrspace(3) %tmp4, align 8 %tmp6 = fadd double %tmp3, %tmp5 - %tmp7 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049 - %tmp8 = load double, double addrspace(3)* %tmp7, align 8 + %tmp7 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 2049 + %tmp8 = load double, ptr addrspace(3) %tmp7, align 8 %tmp9 = fadd double %tmp6, %tmp8 - %tmp10 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073 - %tmp11 = load double, double addrspace(3)* %tmp10, align 8 + %tmp10 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 3073 + %tmp11 = load double, ptr addrspace(3) %tmp10, align 8 %tmp12 = fadd double %tmp9, %tmp11 - %tmp13 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097 - %tmp14 = load double, double addrspace(3)* %tmp13, align 8 + %tmp13 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 4097 + %tmp14 = load double, ptr addrspace(3) %tmp13, align 8 %tmp15 = fadd double %tmp12, %tmp14 - %tmp16 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121 - %tmp17 = load double, double addrspace(3)* %tmp16, align 8 + %tmp16 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 5121 + %tmp17 = load double, ptr addrspace(3) %tmp16, align 8 %tmp18 = fadd double %tmp15, %tmp17 - store double %tmp18, double *%arg1, align 8 + store double %tmp18, ptr %arg1, align 8 ret void } @@ -295,23 +295,23 @@ ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:72 offset1:172 ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset0:144 offset1:244 ; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset0:88 offset1:188 -define amdgpu_kernel void @ds_write32_combine_stride_400(float addrspace(3)* nocapture %arg) { +define amdgpu_kernel void @ds_write32_combine_stride_400(ptr addrspace(3) nocapture %arg) { bb: - store float 1.000000e+00, float addrspace(3)* %arg, align 4 - %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 - store float 1.000000e+00, float addrspace(3)* %tmp, align 4 - %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 - store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 - %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 - store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 - %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 - store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 - %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 - store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 - %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 - store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 - %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 - store float 1.000000e+00, float addrspace(3)* %tmp6, align 4 + store float 1.000000e+00, ptr addrspace(3) %arg, align 4 + %tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 100 + store float 1.000000e+00, ptr addrspace(3) %tmp, align 4 + %tmp1 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 200 + store float 1.000000e+00, ptr addrspace(3) %tmp1, align 4 + %tmp2 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 300 + store float 1.000000e+00, ptr addrspace(3) %tmp2, align 4 + %tmp3 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 400 + store float 1.000000e+00, ptr addrspace(3) %tmp3, align 4 + %tmp4 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 500 + store float 1.000000e+00, ptr addrspace(3) %tmp4, align 4 + %tmp5 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 600 + store float 1.000000e+00, ptr addrspace(3) %tmp5, align 4 + %tmp6 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 700 + store float 1.000000e+00, ptr addrspace(3) %tmp6, align 4 ret void } @@ -331,23 +331,23 @@ ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset0:144 offset1:244 ; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset0:72 offset1:172 ; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 -define amdgpu_kernel void @ds_write32_combine_stride_400_back(float addrspace(3)* nocapture %arg) { +define amdgpu_kernel void @ds_write32_combine_stride_400_back(ptr addrspace(3) nocapture %arg) { bb: - %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 - store float 1.000000e+00, float addrspace(3)* %tmp, align 4 - %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 - store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 - %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 - store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 - %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 - store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 - %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 - store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 - %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 - store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 - %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 - store float 1.000000e+00, float addrspace(3)* %tmp6, align 4 - store float 1.000000e+00, float addrspace(3)* %arg, align 4 + %tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 700 + store float 1.000000e+00, ptr addrspace(3) %tmp, align 4 + %tmp1 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 600 + store float 1.000000e+00, ptr addrspace(3) %tmp1, align 4 + %tmp2 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 500 + store float 1.000000e+00, ptr addrspace(3) %tmp2, align 4 + %tmp3 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 400 + store float 1.000000e+00, ptr addrspace(3) %tmp3, align 4 + %tmp4 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 300 + store float 1.000000e+00, ptr addrspace(3) %tmp4, align 4 + %tmp5 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 200 + store float 1.000000e+00, ptr addrspace(3) %tmp5, align 4 + %tmp6 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 100 + store float 1.000000e+00, ptr addrspace(3) %tmp6, align 4 + store float 1.000000e+00, ptr addrspace(3) %arg, align 4 ret void } @@ -358,23 +358,23 @@ ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:64 offset1:96 ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:128 offset1:160 ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:192 offset1:224 -define amdgpu_kernel void @ds_write32_combine_stride_8192(float addrspace(3)* nocapture %arg) { +define amdgpu_kernel void @ds_write32_combine_stride_8192(ptr addrspace(3) nocapture %arg) { bb: - store float 1.000000e+00, float addrspace(3)* %arg, align 4 - %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048 - store float 1.000000e+00, float addrspace(3)* %tmp, align 4 - %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096 - store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 - %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144 - store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 - %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192 - store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 - %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240 - store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 - %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288 - store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 - %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336 - store float 1.000000e+00, float addrspace(3)* %tmp6, align 4 + store float 1.000000e+00, ptr addrspace(3) %arg, align 4 + %tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 2048 + store float 1.000000e+00, ptr addrspace(3) %tmp, align 4 + %tmp1 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 4096 + store float 1.000000e+00, ptr addrspace(3) %tmp1, align 4 + %tmp2 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 6144 + store float 1.000000e+00, ptr addrspace(3) %tmp2, align 4 + %tmp3 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 8192 + store float 1.000000e+00, ptr addrspace(3) %tmp3, align 4 + %tmp4 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 10240 + store float 1.000000e+00, ptr addrspace(3) %tmp4, align 4 + %tmp5 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 12288 + store float 1.000000e+00, ptr addrspace(3) %tmp5, align 4 + %tmp6 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 14336 + store float 1.000000e+00, ptr addrspace(3) %tmp6, align 4 ret void } @@ -388,20 +388,20 @@ ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:64 offset1:96 ; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:128 offset1:160 -define amdgpu_kernel void @ds_write32_combine_stride_8192_shifted(float addrspace(3)* nocapture %arg) { +define amdgpu_kernel void @ds_write32_combine_stride_8192_shifted(ptr addrspace(3) nocapture %arg) { bb: - %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 1 - store float 1.000000e+00, float addrspace(3)* %tmp, align 4 - %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2049 - store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 - %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4097 - store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 - %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6145 - store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 - %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8193 - store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 - %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10241 - store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 + %tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 1 + store float 1.000000e+00, ptr addrspace(3) %tmp, align 4 + %tmp1 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 2049 + store float 1.000000e+00, ptr addrspace(3) %tmp1, align 4 + %tmp2 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 4097 + store float 1.000000e+00, ptr addrspace(3) %tmp2, align 4 + %tmp3 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 6145 + store float 1.000000e+00, ptr addrspace(3) %tmp3, align 4 + %tmp4 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 8193 + store float 1.000000e+00, ptr addrspace(3) %tmp4, align 4 + %tmp5 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 10241 + store float 1.000000e+00, ptr addrspace(3) %tmp5, align 4 ret void } @@ -416,23 +416,23 @@ ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250 ; GCN-DAG: ds_write2_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:44 offset1:94 -define amdgpu_kernel void @ds_write64_combine_stride_400(double addrspace(3)* nocapture %arg) { +define amdgpu_kernel void @ds_write64_combine_stride_400(ptr addrspace(3) nocapture %arg) { bb: - store double 1.000000e+00, double addrspace(3)* %arg, align 8 - %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 50 - store double 1.000000e+00, double addrspace(3)* %tmp, align 8 - %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100 - store double 1.000000e+00, double addrspace(3)* %tmp1, align 8 - %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150 - store double 1.000000e+00, double addrspace(3)* %tmp2, align 8 - %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200 - store double 1.000000e+00, double addrspace(3)* %tmp3, align 8 - %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250 - store double 1.000000e+00, double addrspace(3)* %tmp4, align 8 - %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300 - store double 1.000000e+00, double addrspace(3)* %tmp5, align 8 - %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350 - store double 1.000000e+00, double addrspace(3)* %tmp6, align 8 + store double 1.000000e+00, ptr addrspace(3) %arg, align 8 + %tmp = getelementptr inbounds double, ptr addrspace(3) %arg, i32 50 + store double 1.000000e+00, ptr addrspace(3) %tmp, align 8 + %tmp1 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 100 + store double 1.000000e+00, ptr addrspace(3) %tmp1, align 8 + %tmp2 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 150 + store double 1.000000e+00, ptr addrspace(3) %tmp2, align 8 + %tmp3 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 200 + store double 1.000000e+00, ptr addrspace(3) %tmp3, align 8 + %tmp4 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 250 + store double 1.000000e+00, ptr addrspace(3) %tmp4, align 8 + %tmp5 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 300 + store double 1.000000e+00, ptr addrspace(3) %tmp5, align 8 + %tmp6 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 350 + store double 1.000000e+00, ptr addrspace(3) %tmp6, align 8 ret void } @@ -446,19 +446,19 @@ ; GCN-DAG: ds_write2st64_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 ; GCN-DAG: ds_write2st64_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:32 offset1:48 ; GCN-DAG: ds_write2st64_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:64 offset1:80 -define amdgpu_kernel void @ds_write64_combine_stride_8192_shifted(double addrspace(3)* nocapture %arg) { +define amdgpu_kernel void @ds_write64_combine_stride_8192_shifted(ptr addrspace(3) nocapture %arg) { bb: - %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1 - store double 1.000000e+00, double addrspace(3)* %tmp, align 8 - %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025 - store double 1.000000e+00, double addrspace(3)* %tmp1, align 8 - %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049 - store double 1.000000e+00, double addrspace(3)* %tmp2, align 8 - %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073 - store double 1.000000e+00, double addrspace(3)* %tmp3, align 8 - %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097 - store double 1.000000e+00, double addrspace(3)* %tmp4, align 8 - %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121 - store double 1.000000e+00, double addrspace(3)* %tmp5, align 8 + %tmp = getelementptr inbounds double, ptr addrspace(3) %arg, i32 1 + store double 1.000000e+00, ptr addrspace(3) %tmp, align 8 + %tmp1 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 1025 + store double 1.000000e+00, ptr addrspace(3) %tmp1, align 8 + %tmp2 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 2049 + store double 1.000000e+00, ptr addrspace(3) %tmp2, align 8 + %tmp3 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 3073 + store double 1.000000e+00, ptr addrspace(3) %tmp3, align 8 + %tmp4 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 4097 + store double 1.000000e+00, ptr addrspace(3) %tmp4, align 8 + %tmp5 = getelementptr inbounds double, ptr addrspace(3) %arg, i32 5121 + store double 1.000000e+00, ptr addrspace(3) %tmp5, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/ds-combine-with-dependence.ll b/llvm/test/CodeGen/AMDGPU/ds-combine-with-dependence.ll --- a/llvm/test/CodeGen/AMDGPU/ds-combine-with-dependence.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-combine-with-dependence.ll @@ -9,27 +9,23 @@ ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27 ; GCN-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:7 offset1:8 ; GCN: s_waitcnt lgkmcnt({{[0-9]+}}) -define amdgpu_kernel void @ds_combine_nodep(float addrspace(1)* %out, float addrspace(3)* %inptr) { +define amdgpu_kernel void @ds_combine_nodep(ptr addrspace(1) %out, ptr addrspace(3) %inptr) { - %base = bitcast float addrspace(3)* %inptr to i8 addrspace(3)* - %addr0 = getelementptr i8, i8 addrspace(3)* %base, i32 24 - %tmp0 = bitcast i8 addrspace(3)* %addr0 to float addrspace(3)* - %vaddr0 = bitcast float addrspace(3)* %tmp0 to <3 x float> addrspace(3)* - %load0 = load <3 x float>, <3 x float> addrspace(3)* %vaddr0, align 4 + %addr0 = getelementptr i8, ptr addrspace(3) %inptr, i32 24 + %load0 = load <3 x float>, ptr addrspace(3) %addr0, align 4 %v0 = extractelement <3 x float> %load0, i32 2 %tmp1 = insertelement <2 x float> undef, float 1.0, i32 0 %data = insertelement <2 x float> %tmp1, float 2.0, i32 1 - %tmp2 = getelementptr float, float addrspace(3)* %inptr, i32 26 - %vaddrs = bitcast float addrspace(3)* %tmp2 to <2 x float> addrspace(3)* - store <2 x float> %data, <2 x float> addrspace(3)* %vaddrs, align 4 + %tmp2 = getelementptr float, ptr addrspace(3) %inptr, i32 26 + store <2 x float> %data, ptr addrspace(3) %tmp2, align 4 - %vaddr1 = getelementptr float, float addrspace(3)* %inptr, i32 7 - %v1 = load float, float addrspace(3)* %vaddr1, align 4 + %vaddr1 = getelementptr float, ptr addrspace(3) %inptr, i32 7 + %v1 = load float, ptr addrspace(3) %vaddr1, align 4 %sum = fadd float %v0, %v1 - store float %sum, float addrspace(1)* %out, align 4 + store float %sum, ptr addrspace(1) %out, align 4 ret void } @@ -41,27 +37,23 @@ ; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:7 offset1:27 ; GCN-NEXT: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27 -define amdgpu_kernel void @ds_combine_WAR(float addrspace(1)* %out, float addrspace(3)* %inptr) { +define amdgpu_kernel void @ds_combine_WAR(ptr addrspace(1) %out, ptr addrspace(3) %inptr) { - %base = bitcast float addrspace(3)* %inptr to i8 addrspace(3)* - %addr0 = getelementptr i8, i8 addrspace(3)* %base, i32 100 - %tmp0 = bitcast i8 addrspace(3)* %addr0 to float addrspace(3)* - %vaddr0 = bitcast float addrspace(3)* %tmp0 to <3 x float> addrspace(3)* - %load0 = load <3 x float>, <3 x float> addrspace(3)* %vaddr0, align 4 + %addr0 = getelementptr i8, ptr addrspace(3) %inptr, i32 100 + %load0 = load <3 x float>, ptr addrspace(3) %addr0, align 4 %v0 = extractelement <3 x float> %load0, i32 2 %tmp1 = insertelement <2 x float> undef, float 1.0, i32 0 %data = insertelement <2 x float> %tmp1, float 2.0, i32 1 - %tmp2 = getelementptr float, float addrspace(3)* %inptr, i32 26 - %vaddrs = bitcast float addrspace(3)* %tmp2 to <2 x float> addrspace(3)* - store <2 x float> %data, <2 x float> addrspace(3)* %vaddrs, align 4 + %tmp2 = getelementptr float, ptr addrspace(3) %inptr, i32 26 + store <2 x float> %data, ptr addrspace(3) %tmp2, align 4 - %vaddr1 = getelementptr float, float addrspace(3)* %inptr, i32 7 - %v1 = load float, float addrspace(3)* %vaddr1, align 4 + %vaddr1 = getelementptr float, ptr addrspace(3) %inptr, i32 7 + %v1 = load float, ptr addrspace(3) %vaddr1, align 4 %sum = fadd float %v0, %v1 - store float %sum, float addrspace(1)* %out, align 4 + store float %sum, ptr addrspace(1) %out, align 4 ret void } @@ -75,27 +67,23 @@ ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27 ; GCN-NEXT: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 ; GCN-NEXT: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:104 -define amdgpu_kernel void @ds_combine_RAW(float addrspace(1)* %out, float addrspace(3)* %inptr) { +define amdgpu_kernel void @ds_combine_RAW(ptr addrspace(1) %out, ptr addrspace(3) %inptr) { - %base = bitcast float addrspace(3)* %inptr to i8 addrspace(3)* - %addr0 = getelementptr i8, i8 addrspace(3)* %base, i32 24 - %tmp0 = bitcast i8 addrspace(3)* %addr0 to float addrspace(3)* - %vaddr0 = bitcast float addrspace(3)* %tmp0 to <3 x float> addrspace(3)* - %load0 = load <3 x float>, <3 x float> addrspace(3)* %vaddr0, align 4 + %addr0 = getelementptr i8, ptr addrspace(3) %inptr, i32 24 + %load0 = load <3 x float>, ptr addrspace(3) %addr0, align 4 %v0 = extractelement <3 x float> %load0, i32 2 %tmp1 = insertelement <2 x float> undef, float 1.0, i32 0 %data = insertelement <2 x float> %tmp1, float 2.0, i32 1 - %tmp2 = getelementptr float, float addrspace(3)* %inptr, i32 26 - %vaddrs = bitcast float addrspace(3)* %tmp2 to <2 x float> addrspace(3)* - store <2 x float> %data, <2 x float> addrspace(3)* %vaddrs, align 4 + %tmp2 = getelementptr float, ptr addrspace(3) %inptr, i32 26 + store <2 x float> %data, ptr addrspace(3) %tmp2, align 4 - %vaddr1 = getelementptr float, float addrspace(3)* %inptr, i32 26 - %v1 = load float, float addrspace(3)* %vaddr1, align 4 + %vaddr1 = getelementptr float, ptr addrspace(3) %inptr, i32 26 + %v1 = load float, ptr addrspace(3) %vaddr1, align 4 %sum = fadd float %v0, %v1 - store float %sum, float addrspace(1)* %out, align 4 + store float %sum, ptr addrspace(1) %out, align 4 ret void } @@ -108,26 +96,22 @@ ; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:108 ; GCN-NEXT: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27 ; GCN-NEXT: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:104 -define amdgpu_kernel void @ds_combine_WAR_RAW(float addrspace(1)* %out, float addrspace(3)* %inptr) { +define amdgpu_kernel void @ds_combine_WAR_RAW(ptr addrspace(1) %out, ptr addrspace(3) %inptr) { - %base = bitcast float addrspace(3)* %inptr to i8 addrspace(3)* - %addr0 = getelementptr i8, i8 addrspace(3)* %base, i32 100 - %tmp0 = bitcast i8 addrspace(3)* %addr0 to float addrspace(3)* - %vaddr0 = bitcast float addrspace(3)* %tmp0 to <3 x float> addrspace(3)* - %load0 = load <3 x float>, <3 x float> addrspace(3)* %vaddr0, align 4 + %addr0 = getelementptr i8, ptr addrspace(3) %inptr, i32 100 + %load0 = load <3 x float>, ptr addrspace(3) %addr0, align 4 %v0 = extractelement <3 x float> %load0, i32 2 %tmp1 = insertelement <2 x float> undef, float 1.0, i32 0 %data = insertelement <2 x float> %tmp1, float 2.0, i32 1 - %tmp2 = getelementptr float, float addrspace(3)* %inptr, i32 26 - %vaddrs = bitcast float addrspace(3)* %tmp2 to <2 x float> addrspace(3)* - store <2 x float> %data, <2 x float> addrspace(3)* %vaddrs, align 4 + %tmp2 = getelementptr float, ptr addrspace(3) %inptr, i32 26 + store <2 x float> %data, ptr addrspace(3) %tmp2, align 4 - %vaddr1 = getelementptr float, float addrspace(3)* %inptr, i32 26 - %v1 = load float, float addrspace(3)* %vaddr1, align 4 + %vaddr1 = getelementptr float, ptr addrspace(3) %inptr, i32 26 + %v1 = load float, ptr addrspace(3) %vaddr1, align 4 %sum = fadd float %v0, %v1 - store float %sum, float addrspace(1)* %out, align 4 + store float %sum, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll --- a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll @@ -5,8 +5,8 @@ ; CHECK: in function test_dynamic_stackalloc{{.*}}: unsupported dynamic alloca -define amdgpu_kernel void @test_dynamic_stackalloc(i32 addrspace(1)* %out, i32 %n) { +define amdgpu_kernel void @test_dynamic_stackalloc(ptr addrspace(1) %out, i32 %n) { %alloca = alloca i32, i32 %n, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca + store volatile i32 0, ptr addrspace(5) %alloca ret void } diff --git a/llvm/test/CodeGen/AMDGPU/early-if-convert-cost.ll b/llvm/test/CodeGen/AMDGPU/early-if-convert-cost.ll --- a/llvm/test/CodeGen/AMDGPU/early-if-convert-cost.ll +++ b/llvm/test/CodeGen/AMDGPU/early-if-convert-cost.ll @@ -11,9 +11,9 @@ ; GCN-DAG: v_cndmask_b32_e32 v[[RESULT_LO:[0-9]+]], v[[ADD_LO]], v[[VAL_LO]], vcc ; GCN-DAG: v_cndmask_b32_e32 v[[RESULT_HI:[0-9]+]], v[[ADD_HI]], v[[VAL_HI]], vcc ; GCN: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]] -define amdgpu_kernel void @test_vccnz_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { entry: - %v = load double, double addrspace(1)* %in + %v = load double, ptr addrspace(1) %in %cc = fcmp oeq double %v, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -23,7 +23,7 @@ endif: %r = phi double [ %v, %entry ], [ %u, %if ] - store double %r, double addrspace(1)* %out + store double %r, ptr addrspace(1) %out ret void } @@ -33,9 +33,9 @@ ; GCN: v_add_f64 ; GCN: v_cndmask_b32_e32 ; GCN: v_cndmask_b32_e32 -define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(4)* %in) #0 { +define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { entry: - %v = load double, double addrspace(4)* %in + %v = load double, ptr addrspace(4) %in %cc = fcmp oeq double %v, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -45,7 +45,7 @@ endif: %r = phi double [ %v, %entry ], [ %u, %if ] - store double %r, double addrspace(1)* %out + store double %r, ptr addrspace(1) %out ret void } @@ -64,9 +64,9 @@ ; SI-DAG: buffer_store_dwordx2 ; SI-DAG: buffer_store_dword v ; GCNX3: buffer_store_dwordx3 -define amdgpu_kernel void @test_vccnz_ifcvt_triangle96(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in, float %cnd) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle96(ptr addrspace(1) %out, ptr addrspace(1) %in, float %cnd) #0 { entry: - %v = load <3 x i32>, <3 x i32> addrspace(1)* %in + %v = load <3 x i32>, ptr addrspace(1) %in %cc = fcmp oeq float %cnd, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -76,7 +76,7 @@ endif: %r = phi <3 x i32> [ %v, %entry ], [ %u, %if ] - store <3 x i32> %r, <3 x i32> addrspace(1)* %out + store <3 x i32> %r, ptr addrspace(1) %out ret void } @@ -95,9 +95,9 @@ ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc ; GCN: buffer_store_dwordx4 -define amdgpu_kernel void @test_vccnz_ifcvt_triangle128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in, float %cnd) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle128(ptr addrspace(1) %out, ptr addrspace(1) %in, float %cnd) #0 { entry: - %v = load <4 x i32>, <4 x i32> addrspace(1)* %in + %v = load <4 x i32>, ptr addrspace(1) %in %cc = fcmp oeq float %cnd, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -107,6 +107,6 @@ endif: %r = phi <4 x i32> [ %v, %entry ], [ %u, %if ] - store <4 x i32> %r, <4 x i32> addrspace(1)* %out + store <4 x i32> %r, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll --- a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll +++ b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll @@ -9,9 +9,9 @@ ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], [[VAL]], [[VAL]] ; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], [[ADD]], [[VAL]], vcc ; GCN: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @test_vccnz_ifcvt_triangle(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { entry: - %v = load float, float addrspace(1)* %in + %v = load float, ptr addrspace(1) %in %cc = fcmp oeq float %v, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -21,7 +21,7 @@ endif: %r = phi float [ %v, %entry ], [ %u, %if ] - store float %r, float addrspace(1)* %out + store float %r, ptr addrspace(1) %out ret void } @@ -31,9 +31,9 @@ ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[VAL]], [[VAL]] ; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[VAL]], [[VAL]] ; GCN: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @test_vccnz_ifcvt_diamond(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_diamond(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { entry: - %v = load float, float addrspace(1)* %in + %v = load float, ptr addrspace(1) %in %cc = fcmp oeq float %v, 1.000000e+00 br i1 %cc, label %if, label %else @@ -47,7 +47,7 @@ endif: %r = phi float [ %u0, %if ], [ %u1, %else ] - store float %r, float addrspace(1)* %out + store float %r, ptr addrspace(1) %out ret void } @@ -57,9 +57,9 @@ ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc ; GCN: s_mov_b64 vcc, [[CMP]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc -define amdgpu_kernel void @test_vccnz_ifcvt_triangle_vcc_clobber(i32 addrspace(1)* %out, i32 addrspace(1)* %in, float %k) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle_vcc_clobber(ptr addrspace(1) %out, ptr addrspace(1) %in, float %k) #0 { entry: - %v = load i32, i32 addrspace(1)* %in + %v = load i32, ptr addrspace(1) %in %cc = fcmp oeq float %k, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -70,7 +70,7 @@ endif: %r = phi i32 [ %v, %entry ], [ %u, %if ] - store i32 %r, i32 addrspace(1)* %out + store i32 %r, ptr addrspace(1) %out ret void } @@ -86,9 +86,9 @@ ; GCN: v_mul_f32 ; GCN: v_mul_f32 ; GCN: v_cndmask_b32_e32 -define amdgpu_kernel void @test_vccnz_ifcvt_triangle_max_cheap(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle_max_cheap(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { entry: - %v = load float, float addrspace(1)* %in + %v = load float, ptr addrspace(1) %in %cc = fcmp oeq float %v, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -106,7 +106,7 @@ endif: %r = phi float [ %v, %entry ], [ %u.8, %if ] - store float %r, float addrspace(1)* %out + store float %r, ptr addrspace(1) %out ret void } @@ -127,9 +127,9 @@ ; GCN: [[ENDIF]]: ; GCN: buffer_store_dword -define amdgpu_kernel void @test_vccnz_ifcvt_triangle_min_expensive(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle_min_expensive(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { entry: - %v = load float, float addrspace(1)* %in + %v = load float, ptr addrspace(1) %in %cc = fcmp oeq float %v, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -148,7 +148,7 @@ endif: %r = phi float [ %v, %entry ], [ %u.9, %if ] - store float %r, float addrspace(1)* %out + store float %r, ptr addrspace(1) %out ret void } @@ -161,9 +161,9 @@ ; GCN: [[ENDIF]]: ; GCN: buffer_store_dword -define amdgpu_kernel void @test_vccnz_ifcvt_triangle_expensive(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle_expensive(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { entry: - %v = load float, float addrspace(1)* %in + %v = load float, ptr addrspace(1) %in %cc = fcmp oeq float %v, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -173,7 +173,7 @@ endif: %r = phi float [ %v, %entry ], [ %u, %if ] - store float %r, float addrspace(1)* %out + store float %r, ptr addrspace(1) %out ret void } @@ -186,9 +186,9 @@ ; GCN: [[ENDIF]]: ; GCN: buffer_store_dword -define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle(i32 addrspace(1)* %out, i32 addrspace(4)* %in, float %cnd) #0 { +define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle(ptr addrspace(1) %out, ptr addrspace(4) %in, float %cnd) #0 { entry: - %v = load i32, i32 addrspace(4)* %in + %v = load i32, ptr addrspace(4) %in %cc = fcmp oeq float %cnd, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -198,16 +198,16 @@ endif: %r = phi i32 [ %v, %entry ], [ %u, %if ] - store i32 %r, i32 addrspace(1)* %out + store i32 %r, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_constant_load: ; GCN: v_cndmask_b32 -define amdgpu_kernel void @test_vccnz_ifcvt_triangle_constant_load(float addrspace(1)* %out, float addrspace(4)* %in) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle_constant_load(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { entry: - %v = load float, float addrspace(4)* %in + %v = load float, ptr addrspace(4) %in %cc = fcmp oeq float %v, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -217,7 +217,7 @@ endif: %r = phi float [ %v, %entry ], [ %u, %if ] - store float %r, float addrspace(1)* %out + store float %r, ptr addrspace(1) %out ret void } @@ -226,7 +226,7 @@ ; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_argload: ; GCN: v_cndmask_b32 -define amdgpu_kernel void @test_vccnz_ifcvt_triangle_argload(float addrspace(1)* %out, float %v) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle_argload(ptr addrspace(1) %out, float %v) #0 { entry: %cc = fcmp oeq float %v, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -237,7 +237,7 @@ endif: %r = phi float [ %v, %entry ], [ %u, %if ] - store float %r, float addrspace(1)* %out + store float %r, ptr addrspace(1) %out ret void } @@ -247,9 +247,9 @@ ; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], [[VAL]] ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1 ; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[VAL]], [[ADD]] -define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(4)* %in, i32 %cond) #0 { +define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(ptr addrspace(4) %in, i32 %cond) #0 { entry: - %v = load i32, i32 addrspace(4)* %in + %v = load i32, ptr addrspace(4) %in %cc = icmp eq i32 %cond, 1 br i1 %cc, label %if, label %endif @@ -273,9 +273,9 @@ ; GCN: [[ENDIF]]: ; GCN: buffer_store_dword -define amdgpu_kernel void @test_scc1_vgpr_ifcvt_triangle(float addrspace(1)* %out, float addrspace(1)* %in, i32 %cond) #0 { +define amdgpu_kernel void @test_scc1_vgpr_ifcvt_triangle(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %cond) #0 { entry: - %v = load float, float addrspace(1)* %in + %v = load float, ptr addrspace(1) %in %cc = icmp eq i32 %cond, 1 br i1 %cc, label %if, label %endif @@ -285,7 +285,7 @@ endif: %r = phi float [ %v, %entry ], [ %u, %if ] - store float %r, float addrspace(1)* %out + store float %r, ptr addrspace(1) %out ret void } @@ -294,9 +294,9 @@ ; GCN: s_addc_u32 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1 ; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle64(i64 addrspace(4)* %in, i32 %cond) #0 { +define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle64(ptr addrspace(4) %in, i32 %cond) #0 { entry: - %v = load i64, i64 addrspace(4)* %in + %v = load i64, ptr addrspace(4) %in %cc = icmp eq i32 %cond, 1 br i1 %cc, label %if, label %endif @@ -319,9 +319,9 @@ ; GCN-NEXT: s_cselect_b32 s ; GCN-NEXT: s_cselect_b32 s ; GCN-NEXT: s_cselect_b32 s -define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle96(<3 x i32> addrspace(4)* %in, i32 %cond) #0 { +define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle96(ptr addrspace(4) %in, i32 %cond) #0 { entry: - %v = load <3 x i32>, <3 x i32> addrspace(4)* %in + %v = load <3 x i32>, ptr addrspace(4) %in %cc = icmp eq i32 %cond, 1 br i1 %cc, label %if, label %endif @@ -344,9 +344,9 @@ ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1 ; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} ; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle128(<4 x i32> addrspace(4)* %in, i32 %cond) #0 { +define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle128(ptr addrspace(4) %in, i32 %cond) #0 { entry: - %v = load <4 x i32>, <4 x i32> addrspace(4)* %in + %v = load <4 x i32>, ptr addrspace(4) %in %cc = icmp eq i32 %cond, 1 br i1 %cc, label %if, label %endif @@ -363,7 +363,7 @@ ; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc_constant_select: ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 1{{$}} -define amdgpu_kernel void @uniform_if_swap_br_targets_scc_constant_select(i32 %cond, i32 addrspace(1)* %out) { +define amdgpu_kernel void @uniform_if_swap_br_targets_scc_constant_select(i32 %cond, ptr addrspace(1) %out) { entry: %cmp0 = icmp eq i32 %cond, 0 br i1 %cmp0, label %else, label %if @@ -376,7 +376,7 @@ done: %value = phi i32 [0, %if], [1, %else] - store i32 %value, i32 addrspace(1)* %out + store i32 %value, ptr addrspace(1) %out ret void } @@ -384,7 +384,7 @@ ; GCN: {{^}}; %bb.0: ; GCN-NEXT: s_load_dwordx2 ; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 0, 1{{$}} -define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, i32 addrspace(1)* %out) { +define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, ptr addrspace(1) %out) { entry: br i1 undef, label %else, label %if @@ -396,7 +396,7 @@ done: %value = phi i32 [0, %if], [1, %else] - store i32 %value, i32 addrspace(1)* %out + store i32 %value, ptr addrspace(1) %out ret void } @@ -409,9 +409,9 @@ ; GCN: [[ENDIF]]: ; GCN: buffer_store_dword -define amdgpu_kernel void @test_vccnz_ifcvt_triangle256(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in, float %cnd) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle256(ptr addrspace(1) %out, ptr addrspace(1) %in, float %cnd) #0 { entry: - %v = load <8 x i32>, <8 x i32> addrspace(1)* %in + %v = load <8 x i32>, ptr addrspace(1) %in %cc = fcmp oeq float %cnd, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -421,7 +421,7 @@ endif: %r = phi <8 x i32> [ %v, %entry ], [ %u, %if ] - store <8 x i32> %r, <8 x i32> addrspace(1)* %out + store <8 x i32> %r, ptr addrspace(1) %out ret void } @@ -434,9 +434,9 @@ ; GCN: [[ENDIF]]: ; GCN: buffer_store_dword -define amdgpu_kernel void @test_vccnz_ifcvt_triangle512(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in, float %cnd) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle512(ptr addrspace(1) %out, ptr addrspace(1) %in, float %cnd) #0 { entry: - %v = load <16 x i32>, <16 x i32> addrspace(1)* %in + %v = load <16 x i32>, ptr addrspace(1) %in %cc = fcmp oeq float %cnd, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -446,7 +446,7 @@ endif: %r = phi <16 x i32> [ %v, %entry ], [ %u, %if ] - store <16 x i32> %r, <16 x i32> addrspace(1)* %out + store <16 x i32> %r, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/early-inline-alias.ll b/llvm/test/CodeGen/AMDGPU/early-inline-alias.ll --- a/llvm/test/CodeGen/AMDGPU/early-inline-alias.ll +++ b/llvm/test/CodeGen/AMDGPU/early-inline-alias.ll @@ -1,10 +1,10 @@ ; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 %s | FileCheck %s -; CHECK: @add1alias = alias i32 (i32), i32 (i32)* @add1 -; CHECK: @add1alias2 = alias i32 (i32), i32 (i32)* @add1 +; CHECK: @add1alias = alias i32 (i32), ptr @add1 +; CHECK: @add1alias2 = alias i32 (i32), ptr @add1 -@add1alias = alias i32 (i32), i32 (i32)* @add1 -@add1alias2 = alias i32 (i32), i32 (i32)* @add1 +@add1alias = alias i32 (i32), ptr @add1 +@add1alias2 = alias i32 (i32), ptr @add1 define i32 @add1(i32) { %2 = add nsw i32 %0, 1 diff --git a/llvm/test/CodeGen/AMDGPU/early-inline.ll b/llvm/test/CodeGen/AMDGPU/early-inline.ll --- a/llvm/test/CodeGen/AMDGPU/early-inline.ll +++ b/llvm/test/CodeGen/AMDGPU/early-inline.ll @@ -1,6 +1,6 @@ ; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 -amdgpu-early-inline-all %s | FileCheck %s -@c_alias = dso_local alias i32 (i32), i32 (i32)* @callee +@c_alias = dso_local alias i32 (i32), ptr @callee define dso_local i32 @callee(i32 %x) { entry: @@ -19,7 +19,7 @@ define amdgpu_kernel void @caller(i32 %x) { entry: %res = call i32 @callee(i32 %x) - store volatile i32 %res, i32 addrspace(1)* undef + store volatile i32 %res, ptr addrspace(1) undef ret void } @@ -28,6 +28,6 @@ define amdgpu_kernel void @alias_caller(i32 %x) { entry: %res = call i32 @c_alias(i32 %x) - store volatile i32 %res, i32 addrspace(1)* undef + store volatile i32 %res, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll b/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll --- a/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll +++ b/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll @@ -7,32 +7,32 @@ ; GCN-LABEL: {{^}}fneg_fsub_f32_fmf: ; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} ; GCN-FMF-NOT: xor -define amdgpu_kernel void @fneg_fsub_f32_fmf(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @fneg_fsub_f32_fmf(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %tid, 1 - %gep = getelementptr float, float addrspace(1)* %in, i32 %tid - %b_ptr = getelementptr float, float addrspace(1)* %in, i32 %add - %a = load float, float addrspace(1)* %gep, align 4 - %b = load float, float addrspace(1)* %b_ptr, align 4 + %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid + %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 %add + %a = load float, ptr addrspace(1) %gep, align 4 + %b = load float, ptr addrspace(1) %b_ptr, align 4 %result = fsub fast float %a, %b %neg.result = fsub fast float -0.0, %result - store float %neg.result, float addrspace(1)* %out, align 4 + store float %neg.result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}fneg_fsub_f32_safe: ; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]] -define amdgpu_kernel void @fneg_fsub_f32_safe(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @fneg_fsub_f32_safe(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %tid, 1 - %gep = getelementptr float, float addrspace(1)* %in, i32 %tid - %b_ptr = getelementptr float, float addrspace(1)* %in, i32 %add - %a = load float, float addrspace(1)* %gep, align 4 - %b = load float, float addrspace(1)* %b_ptr, align 4 + %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid + %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 %add + %a = load float, ptr addrspace(1) %gep, align 4 + %b = load float, ptr addrspace(1) %b_ptr, align 4 %result = fsub float %a, %b %neg.result = fsub float -0.0, %result - store float %neg.result, float addrspace(1)* %out, align 4 + store float %neg.result, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll --- a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll +++ b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll @@ -76,12 +76,10 @@ ; CHECK-NEXT: ; %bb.10: ; %bb16 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_mov_b64 s[16:17], 0 -; CHECK-NEXT: s_mov_b64 s[20:21], -1 ; CHECK-NEXT: s_mov_b64 s[22:23], s[10:11] ; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17] ; CHECK-NEXT: s_branch .LBB0_2 ; CHECK-NEXT: .LBB0_11: ; in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: s_mov_b64 s[22:23], -1 ; CHECK-NEXT: s_mov_b64 s[20:21], 0 ; CHECK-NEXT: ; implicit-def: $sgpr16_sgpr17 ; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17] diff --git a/llvm/test/CodeGen/AMDGPU/extra-sroa-after-unroll.ll b/llvm/test/CodeGen/AMDGPU/extra-sroa-after-unroll.ll --- a/llvm/test/CodeGen/AMDGPU/extra-sroa-after-unroll.ll +++ b/llvm/test/CodeGen/AMDGPU/extra-sroa-after-unroll.ll @@ -9,82 +9,73 @@ ; O3-NOT: alloca ; GCN-COUNT-27: = load ; GCN-COUNT-26: = add -define protected amdgpu_kernel void @t0(i32 addrspace(1)* %p.coerce) #0 { +define protected amdgpu_kernel void @t0(ptr addrspace(1) %p.coerce) #0 { entry: - %p = alloca i32*, align 8, addrspace(5) - %p.ascast = addrspacecast i32* addrspace(5)* %p to i32** - %p.addr = alloca i32*, align 8, addrspace(5) - %p.addr.ascast = addrspacecast i32* addrspace(5)* %p.addr to i32** + %p = alloca ptr, align 8, addrspace(5) + %p.ascast = addrspacecast ptr addrspace(5) %p to ptr + %p.addr = alloca ptr, align 8, addrspace(5) + %p.addr.ascast = addrspacecast ptr addrspace(5) %p.addr to ptr %t = alloca [27 x i32], align 16, addrspace(5) - %t.ascast = addrspacecast [27 x i32] addrspace(5)* %t to [27 x i32]* + %t.ascast = addrspacecast ptr addrspace(5) %t to ptr %sum = alloca i32, align 4, addrspace(5) - %sum.ascast = addrspacecast i32 addrspace(5)* %sum to i32* + %sum.ascast = addrspacecast ptr addrspace(5) %sum to ptr %i = alloca i32, align 4, addrspace(5) - %i.ascast = addrspacecast i32 addrspace(5)* %i to i32* + %i.ascast = addrspacecast ptr addrspace(5) %i to ptr %cleanup.dest.slot = alloca i32, align 4, addrspace(5) - %0 = addrspacecast i32 addrspace(1)* %p.coerce to i32* - store i32* %0, i32** %p.ascast, align 8 - %p1 = load i32*, i32** %p.ascast, align 8 - store i32* %p1, i32** %p.addr.ascast, align 8 - %1 = bitcast [27 x i32] addrspace(5)* %t to i8 addrspace(5)* - call void @llvm.lifetime.start.p5i8(i64 48, i8 addrspace(5)* %1) - %arraydecay = getelementptr inbounds [27 x i32], [27 x i32]* %t.ascast, i64 0, i64 0 - %2 = load i32*, i32** %p.addr.ascast, align 8 - call void @copy(i32* %arraydecay, i32* %2, i32 27) - %3 = bitcast i32 addrspace(5)* %sum to i8 addrspace(5)* - call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* %3) - store i32 0, i32* %sum.ascast, align 4 - %4 = bitcast i32 addrspace(5)* %i to i8 addrspace(5)* - call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* %4) - store i32 0, i32* %i.ascast, align 4 + %0 = addrspacecast ptr addrspace(1) %p.coerce to ptr + store ptr %0, ptr %p.ascast, align 8 + %p1 = load ptr, ptr %p.ascast, align 8 + store ptr %p1, ptr %p.addr.ascast, align 8 + call void @llvm.lifetime.start.p5(i64 48, ptr addrspace(5) %t) + %1 = load ptr, ptr %p.addr.ascast, align 8 + call void @copy(ptr %t.ascast, ptr %1, i32 27) + call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %sum) + store i32 0, ptr %sum.ascast, align 4 + call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %i) + store i32 0, ptr %i.ascast, align 4 br label %for.cond for.cond: ; preds = %for.inc, %entry - %5 = load i32, i32* %i.ascast, align 4 - %cmp = icmp slt i32 %5, 27 + %2 = load i32, ptr %i.ascast, align 4 + %cmp = icmp slt i32 %2, 27 br i1 %cmp, label %for.body, label %for.cond.cleanup for.cond.cleanup: ; preds = %for.cond - %6 = bitcast i32 addrspace(5)* %i to i8 addrspace(5)* - call void @llvm.lifetime.end.p5i8(i64 4, i8 addrspace(5)* %6) + call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %i) br label %for.end for.body: ; preds = %for.cond - %7 = load i32, i32* %i.ascast, align 4 - %idxprom = sext i32 %7 to i64 - %arrayidx = getelementptr inbounds [27 x i32], [27 x i32]* %t.ascast, i64 0, i64 %idxprom - %8 = load i32, i32* %arrayidx, align 4 - %9 = load i32, i32* %sum.ascast, align 4 - %add = add nsw i32 %9, %8 - store i32 %add, i32* %sum.ascast, align 4 + %3 = load i32, ptr %i.ascast, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds [27 x i32], ptr %t.ascast, i64 0, i64 %idxprom + %4 = load i32, ptr %arrayidx, align 4 + %5 = load i32, ptr %sum.ascast, align 4 + %add = add nsw i32 %5, %4 + store i32 %add, ptr %sum.ascast, align 4 br label %for.inc for.inc: ; preds = %for.body - %10 = load i32, i32* %i.ascast, align 4 - %inc = add nsw i32 %10, 1 - store i32 %inc, i32* %i.ascast, align 4 + %6 = load i32, ptr %i.ascast, align 4 + %inc = add nsw i32 %6, 1 + store i32 %inc, ptr %i.ascast, align 4 br label %for.cond for.end: ; preds = %for.cond.cleanup - %11 = load i32, i32* %sum.ascast, align 4 - %12 = load i32*, i32** %p.addr.ascast, align 8 - store i32 %11, i32* %12, align 4 - %13 = bitcast i32 addrspace(5)* %sum to i8 addrspace(5)* - call void @llvm.lifetime.end.p5i8(i64 4, i8 addrspace(5)* %13) - %14 = bitcast [27 x i32] addrspace(5)* %t to i8 addrspace(5)* - call void @llvm.lifetime.end.p5i8(i64 48, i8 addrspace(5)* %14) + %7 = load i32, ptr %sum.ascast, align 4 + %8 = load ptr, ptr %p.addr.ascast, align 8 + store i32 %7, ptr %8, align 4 + call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %sum) + call void @llvm.lifetime.end.p5(i64 48, ptr addrspace(5) %t) ret void } -define internal void @copy(i32* %d, i32* %s, i32 %N) { +define internal void @copy(ptr %d, ptr %s, i32 %N) { entry: - %d8 = bitcast i32* %d to i8* - %s8 = bitcast i32* %s to i8* %N8 = mul i32 %N, 4 - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d8, i8* %s8, i32 %N8, i1 false) + tail call void @llvm.memcpy.p0.p0.i32(ptr %d, ptr %s, i32 %N8, i1 false) ret void } -declare void @llvm.lifetime.start.p5i8(i64 immarg, i8 addrspace(5)* nocapture) -declare void @llvm.lifetime.end.p5i8(i64 immarg, i8 addrspace(5)* nocapture) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) +declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture) +declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) +declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) diff --git a/llvm/test/CodeGen/AMDGPU/extract-vector-elt-build-vector-combine.ll b/llvm/test/CodeGen/AMDGPU/extract-vector-elt-build-vector-combine.ll --- a/llvm/test/CodeGen/AMDGPU/extract-vector-elt-build-vector-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-vector-elt-build-vector-combine.ll @@ -13,32 +13,32 @@ ; GCN: buffer_store_dword ; GCN: buffer_store_dword ; GCN: buffer_store_dword -define amdgpu_kernel void @store_build_vector_multiple_uses_v4i32(<4 x i32> addrspace(1)* noalias %out0, - <4 x i32> addrspace(1)* noalias %out1, - i32 addrspace(1)* noalias %out2, - i32 addrspace(1)* %in) { - %elt0 = load volatile i32, i32 addrspace(1)* %in - %elt1 = load volatile i32, i32 addrspace(1)* %in - %elt2 = load volatile i32, i32 addrspace(1)* %in - %elt3 = load volatile i32, i32 addrspace(1)* %in +define amdgpu_kernel void @store_build_vector_multiple_uses_v4i32(ptr addrspace(1) noalias %out0, + ptr addrspace(1) noalias %out1, + ptr addrspace(1) noalias %out2, + ptr addrspace(1) %in) { + %elt0 = load volatile i32, ptr addrspace(1) %in + %elt1 = load volatile i32, ptr addrspace(1) %in + %elt2 = load volatile i32, ptr addrspace(1) %in + %elt3 = load volatile i32, ptr addrspace(1) %in %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0 %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1 %vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2 %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3 - store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out0 - store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out1 + store <4 x i32> %vec3, ptr addrspace(1) %out0 + store <4 x i32> %vec3, ptr addrspace(1) %out1 %extract0 = extractelement <4 x i32> %vec3, i32 0 %extract1 = extractelement <4 x i32> %vec3, i32 1 %extract2 = extractelement <4 x i32> %vec3, i32 2 %extract3 = extractelement <4 x i32> %vec3, i32 3 - store volatile i32 %extract0, i32 addrspace(1)* %out2 - store volatile i32 %extract1, i32 addrspace(1)* %out2 - store volatile i32 %extract2, i32 addrspace(1)* %out2 - store volatile i32 %extract3, i32 addrspace(1)* %out2 + store volatile i32 %extract0, ptr addrspace(1) %out2 + store volatile i32 %extract1, ptr addrspace(1) %out2 + store volatile i32 %extract2, ptr addrspace(1) %out2 + store volatile i32 %extract3, ptr addrspace(1) %out2 ret void } @@ -55,14 +55,14 @@ ; GCN: buffer_store_dword ; GCN: buffer_store_dword ; GCN: buffer_store_dword -define amdgpu_kernel void @store_build_vector_multiple_extract_uses_v4i32(<4 x i32> addrspace(1)* noalias %out0, - <4 x i32> addrspace(1)* noalias %out1, - i32 addrspace(1)* noalias %out2, - i32 addrspace(1)* %in) { - %elt0 = load volatile i32, i32 addrspace(1)* %in - %elt1 = load volatile i32, i32 addrspace(1)* %in - %elt2 = load volatile i32, i32 addrspace(1)* %in - %elt3 = load volatile i32, i32 addrspace(1)* %in +define amdgpu_kernel void @store_build_vector_multiple_extract_uses_v4i32(ptr addrspace(1) noalias %out0, + ptr addrspace(1) noalias %out1, + ptr addrspace(1) noalias %out2, + ptr addrspace(1) %in) { + %elt0 = load volatile i32, ptr addrspace(1) %in + %elt1 = load volatile i32, ptr addrspace(1) %in + %elt2 = load volatile i32, ptr addrspace(1) %in + %elt3 = load volatile i32, ptr addrspace(1) %in %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0 %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1 @@ -79,12 +79,12 @@ %op2 = xor i32 %extract2, 1231412 %op3 = and i32 %extract3, 258233412312 - store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out0 + store <4 x i32> %vec3, ptr addrspace(1) %out0 - store volatile i32 %op0, i32 addrspace(1)* %out2 - store volatile i32 %op1, i32 addrspace(1)* %out2 - store volatile i32 %op2, i32 addrspace(1)* %out2 - store volatile i32 %op3, i32 addrspace(1)* %out2 + store volatile i32 %op0, ptr addrspace(1) %out2 + store volatile i32 %op1, ptr addrspace(1) %out2 + store volatile i32 %op2, ptr addrspace(1) %out2 + store volatile i32 %op3, ptr addrspace(1) %out2 ret void } @@ -99,14 +99,14 @@ ; GCN: buffer_store_dwordx2 ; GCN: buffer_store_dwordx2 -define amdgpu_kernel void @store_build_vector_multiple_uses_v4i32_bitcast_to_v2i64(<2 x i64> addrspace(1)* noalias %out0, - <4 x i32> addrspace(1)* noalias %out1, - i64 addrspace(1)* noalias %out2, - i32 addrspace(1)* %in) { - %elt0 = load volatile i32, i32 addrspace(1)* %in - %elt1 = load volatile i32, i32 addrspace(1)* %in - %elt2 = load volatile i32, i32 addrspace(1)* %in - %elt3 = load volatile i32, i32 addrspace(1)* %in +define amdgpu_kernel void @store_build_vector_multiple_uses_v4i32_bitcast_to_v2i64(ptr addrspace(1) noalias %out0, + ptr addrspace(1) noalias %out1, + ptr addrspace(1) noalias %out2, + ptr addrspace(1) %in) { + %elt0 = load volatile i32, ptr addrspace(1) %in + %elt1 = load volatile i32, ptr addrspace(1) %in + %elt2 = load volatile i32, ptr addrspace(1) %in + %elt3 = load volatile i32, ptr addrspace(1) %in %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0 %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1 @@ -114,13 +114,13 @@ %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3 %bc.vec3 = bitcast <4 x i32> %vec3 to <2 x i64> - store <2 x i64> %bc.vec3, <2 x i64> addrspace(1)* %out0 + store <2 x i64> %bc.vec3, ptr addrspace(1) %out0 %extract0 = extractelement <2 x i64> %bc.vec3, i32 0 %extract1 = extractelement <2 x i64> %bc.vec3, i32 1 - store volatile i64 %extract0, i64 addrspace(1)* %out2 - store volatile i64 %extract1, i64 addrspace(1)* %out2 + store volatile i64 %extract0, ptr addrspace(1) %out2 + store volatile i64 %extract1, ptr addrspace(1) %out2 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fceil.ll b/llvm/test/CodeGen/AMDGPU/fceil.ll --- a/llvm/test/CodeGen/AMDGPU/fceil.ll +++ b/llvm/test/CodeGen/AMDGPU/fceil.ll @@ -13,9 +13,9 @@ ; SI: v_ceil_f32_e32 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] ; EG: CEIL {{\*? *}}[[RESULT]] -define amdgpu_kernel void @fceil_f32(float addrspace(1)* %out, float %x) { +define amdgpu_kernel void @fceil_f32(ptr addrspace(1) %out, float %x) { %y = call float @llvm.ceil.f32(float %x) nounwind readnone - store float %y, float addrspace(1)* %out + store float %y, ptr addrspace(1) %out ret void } @@ -25,9 +25,9 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}} ; EG: CEIL {{\*? *}}[[RESULT]] ; EG: CEIL {{\*? *}}[[RESULT]] -define amdgpu_kernel void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) { +define amdgpu_kernel void @fceil_v2f32(ptr addrspace(1) %out, <2 x float> %x) { %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) nounwind readnone - store <2 x float> %y, <2 x float> addrspace(1)* %out + store <2 x float> %y, ptr addrspace(1) %out ret void } @@ -41,9 +41,9 @@ ; EG-DAG: CEIL {{\*? *}}[[RESULT1]] ; EG-DAG: CEIL {{\*? *}}[[RESULT2]] ; EG-DAG: CEIL {{\*? *}}[[RESULT2]] -define amdgpu_kernel void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) { +define amdgpu_kernel void @fceil_v3f32(ptr addrspace(1) %out, <3 x float> %x) { %y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) nounwind readnone - store <3 x float> %y, <3 x float> addrspace(1)* %out + store <3 x float> %y, ptr addrspace(1) %out ret void } @@ -57,9 +57,9 @@ ; EG: CEIL {{\*? *}}[[RESULT]] ; EG: CEIL {{\*? *}}[[RESULT]] ; EG: CEIL {{\*? *}}[[RESULT]] -define amdgpu_kernel void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) { +define amdgpu_kernel void @fceil_v4f32(ptr addrspace(1) %out, <4 x float> %x) { %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone - store <4 x float> %y, <4 x float> addrspace(1)* %out + store <4 x float> %y, ptr addrspace(1) %out ret void } @@ -82,9 +82,9 @@ ; EG-DAG: CEIL {{\*? *}}[[RESULT2]] ; EG-DAG: CEIL {{\*? *}}[[RESULT2]] ; EG-DAG: CEIL {{\*? *}}[[RESULT2]] -define amdgpu_kernel void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) { +define amdgpu_kernel void @fceil_v8f32(ptr addrspace(1) %out, <8 x float> %x) { %y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) nounwind readnone - store <8 x float> %y, <8 x float> addrspace(1)* %out + store <8 x float> %y, ptr addrspace(1) %out ret void } @@ -125,8 +125,8 @@ ; EG-DAG: CEIL {{\*? *}}[[RESULT4]] ; EG-DAG: CEIL {{\*? *}}[[RESULT4]] ; EG-DAG: CEIL {{\*? *}}[[RESULT4]] -define amdgpu_kernel void @fceil_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) { +define amdgpu_kernel void @fceil_v16f32(ptr addrspace(1) %out, <16 x float> %x) { %y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) nounwind readnone - store <16 x float> %y, <16 x float> addrspace(1)* %out + store <16 x float> %y, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fceil64.ll b/llvm/test/CodeGen/AMDGPU/fceil64.ll --- a/llvm/test/CodeGen/AMDGPU/fceil64.ll +++ b/llvm/test/CodeGen/AMDGPU/fceil64.ll @@ -29,18 +29,18 @@ ; SI-DAG: s_cselect_b32 s{{[0-9]+}}, 0x3ff00000, 0 ; SI: v_add_f64 ; SI: s_endpgm -define amdgpu_kernel void @fceil_f64(double addrspace(1)* %out, double %x) { +define amdgpu_kernel void @fceil_f64(ptr addrspace(1) %out, double %x) { %y = call double @llvm.ceil.f64(double %x) nounwind readnone - store double %y, double addrspace(1)* %out + store double %y, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}fceil_v2f64: ; CI: v_ceil_f64_e32 ; CI: v_ceil_f64_e32 -define amdgpu_kernel void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { +define amdgpu_kernel void @fceil_v2f64(ptr addrspace(1) %out, <2 x double> %x) { %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone - store <2 x double> %y, <2 x double> addrspace(1)* %out + store <2 x double> %y, ptr addrspace(1) %out ret void } @@ -48,9 +48,9 @@ ; FIXME-CI: v_ceil_f64_e32 ; FIXME-CI: v_ceil_f64_e32 ; FIXME-CI: v_ceil_f64_e32 -; define amdgpu_kernel void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { +; define amdgpu_kernel void @fceil_v3f64(ptr addrspace(1) %out, <3 x double> %x) { ; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone -; store <3 x double> %y, <3 x double> addrspace(1)* %out +; store <3 x double> %y, ptr addrspace(1) %out ; ret void ; } @@ -59,9 +59,9 @@ ; CI: v_ceil_f64_e32 ; CI: v_ceil_f64_e32 ; CI: v_ceil_f64_e32 -define amdgpu_kernel void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { +define amdgpu_kernel void @fceil_v4f64(ptr addrspace(1) %out, <4 x double> %x) { %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone - store <4 x double> %y, <4 x double> addrspace(1)* %out + store <4 x double> %y, ptr addrspace(1) %out ret void } @@ -74,9 +74,9 @@ ; CI: v_ceil_f64_e32 ; CI: v_ceil_f64_e32 ; CI: v_ceil_f64_e32 -define amdgpu_kernel void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { +define amdgpu_kernel void @fceil_v8f64(ptr addrspace(1) %out, <8 x double> %x) { %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone - store <8 x double> %y, <8 x double> addrspace(1)* %out + store <8 x double> %y, ptr addrspace(1) %out ret void } @@ -97,8 +97,8 @@ ; CI: v_ceil_f64_e32 ; CI: v_ceil_f64_e32 ; CI: v_ceil_f64_e32 -define amdgpu_kernel void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { +define amdgpu_kernel void @fceil_v16f64(ptr addrspace(1) %out, <16 x double> %x) { %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone - store <16 x double> %y, <16 x double> addrspace(1)* %out + store <16 x double> %y, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll --- a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll @@ -1,8 +1,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s ; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=GCN %s -declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() +declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() declare i32 @llvm.amdgcn.workitem.id.x() declare i32 @llvm.amdgcn.workgroup.id.x() declare void @llvm.amdgcn.s.barrier() @@ -17,41 +17,39 @@ ; GCN: s_waitcnt lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier ; GCN: flat_store_dword -define amdgpu_kernel void @test_local(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @test_local(ptr addrspace(1) %arg) { bb: - %i = alloca i32 addrspace(1)*, align 4, addrspace(5) - store i32 addrspace(1)* %arg, i32 addrspace(1)* addrspace(5)* %i, align 4 + %i = alloca ptr addrspace(1), align 4, addrspace(5) + store ptr addrspace(1) %arg, ptr addrspace(5) %i, align 4 %i1 = call i32 @llvm.amdgcn.workitem.id.x() %i2 = zext i32 %i1 to i64 %i3 = icmp eq i64 %i2, 0 br i1 %i3, label %bb4, label %bb5 bb4: ; preds = %bb - store i32 1911, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_local.temp, i64 0, i64 0), align 4 + store i32 1911, ptr addrspace(3) @test_local.temp, align 4 br label %bb5 bb5: ; preds = %bb4, %bb fence syncscope("workgroup") release call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %i6 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_local.temp, i64 0, i64 0), align 4 - %i7 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %i, align 4 - %i8 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %i6 = load i32, ptr addrspace(3) @test_local.temp, align 4 + %i7 = load ptr addrspace(1), ptr addrspace(5) %i, align 4 + %i8 = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %i9 = call i32 @llvm.amdgcn.workitem.id.x() %i10 = call i32 @llvm.amdgcn.workgroup.id.x() - %i11 = getelementptr inbounds i8, i8 addrspace(4)* %i8, i64 4 - %i12 = bitcast i8 addrspace(4)* %i11 to i16 addrspace(4)* - %i13 = load i16, i16 addrspace(4)* %i12, align 4 + %i11 = getelementptr inbounds i8, ptr addrspace(4) %i8, i64 4 + %i13 = load i16, ptr addrspace(4) %i11, align 4 %i14 = zext i16 %i13 to i32 %i15 = mul i32 %i10, %i14 %i16 = add i32 %i15, %i9 - %i17 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %i17 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i18 = zext i32 %i16 to i64 - %i19 = bitcast i8 addrspace(4)* %i17 to i64 addrspace(4)* - %i20 = load i64, i64 addrspace(4)* %i19, align 8 + %i20 = load i64, ptr addrspace(4) %i17, align 8 %i21 = add i64 %i20, %i18 - %i22 = getelementptr inbounds i32, i32 addrspace(1)* %i7, i64 %i21 - store i32 %i6, i32 addrspace(1)* %i22, align 4 + %i22 = getelementptr inbounds i32, ptr addrspace(1) %i7, i64 %i21 + store i32 %i6, ptr addrspace(1) %i22, align 4 ret void } @@ -60,80 +58,74 @@ ; GCN: flat_store_dword ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier -define amdgpu_kernel void @test_global(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @test_global(ptr addrspace(1) %arg) { bb: - %i = alloca i32 addrspace(1)*, align 4, addrspace(5) + %i = alloca ptr addrspace(1), align 4, addrspace(5) %i1 = alloca i32, align 4, addrspace(5) - store i32 addrspace(1)* %arg, i32 addrspace(1)* addrspace(5)* %i, align 4 - store i32 0, i32 addrspace(5)* %i1, align 4 + store ptr addrspace(1) %arg, ptr addrspace(5) %i, align 4 + store i32 0, ptr addrspace(5) %i1, align 4 br label %bb2 bb2: ; preds = %bb56, %bb - %i3 = load i32, i32 addrspace(5)* %i1, align 4 + %i3 = load i32, ptr addrspace(5) %i1, align 4 %i4 = sext i32 %i3 to i64 - %i5 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %i5 = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %i6 = call i32 @llvm.amdgcn.workitem.id.x() %i7 = call i32 @llvm.amdgcn.workgroup.id.x() - %i8 = getelementptr inbounds i8, i8 addrspace(4)* %i5, i64 4 - %i9 = bitcast i8 addrspace(4)* %i8 to i16 addrspace(4)* - %i10 = load i16, i16 addrspace(4)* %i9, align 4 + %i8 = getelementptr inbounds i8, ptr addrspace(4) %i5, i64 4 + %i10 = load i16, ptr addrspace(4) %i8, align 4 %i11 = zext i16 %i10 to i32 %i12 = mul i32 %i7, %i11 %i13 = add i32 %i12, %i6 - %i14 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %i14 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i15 = zext i32 %i13 to i64 - %i16 = bitcast i8 addrspace(4)* %i14 to i64 addrspace(4)* - %i17 = load i64, i64 addrspace(4)* %i16, align 8 + %i17 = load i64, ptr addrspace(4) %i14, align 8 %i18 = add i64 %i17, %i15 %i19 = icmp ult i64 %i4, %i18 br i1 %i19, label %bb20, label %bb59 bb20: ; preds = %bb2 - %i21 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %i21 = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %i22 = call i32 @llvm.amdgcn.workitem.id.x() %i23 = call i32 @llvm.amdgcn.workgroup.id.x() - %i24 = getelementptr inbounds i8, i8 addrspace(4)* %i21, i64 4 - %i25 = bitcast i8 addrspace(4)* %i24 to i16 addrspace(4)* - %i26 = load i16, i16 addrspace(4)* %i25, align 4 + %i24 = getelementptr inbounds i8, ptr addrspace(4) %i21, i64 4 + %i26 = load i16, ptr addrspace(4) %i24, align 4 %i27 = zext i16 %i26 to i32 %i28 = mul i32 %i23, %i27 %i29 = add i32 %i28, %i22 - %i30 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %i30 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i31 = zext i32 %i29 to i64 - %i32 = bitcast i8 addrspace(4)* %i30 to i64 addrspace(4)* - %i33 = load i64, i64 addrspace(4)* %i32, align 8 + %i33 = load i64, ptr addrspace(4) %i30, align 8 %i34 = add i64 %i33, %i31 %i35 = add i64 %i34, 2184 %i36 = trunc i64 %i35 to i32 - %i37 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %i, align 4 - %i38 = load i32, i32 addrspace(5)* %i1, align 4 + %i37 = load ptr addrspace(1), ptr addrspace(5) %i, align 4 + %i38 = load i32, ptr addrspace(5) %i1, align 4 %i39 = sext i32 %i38 to i64 - %i40 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %i40 = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %i41 = call i32 @llvm.amdgcn.workitem.id.x() %i42 = call i32 @llvm.amdgcn.workgroup.id.x() - %i43 = getelementptr inbounds i8, i8 addrspace(4)* %i40, i64 4 - %i44 = bitcast i8 addrspace(4)* %i43 to i16 addrspace(4)* - %i45 = load i16, i16 addrspace(4)* %i44, align 4 + %i43 = getelementptr inbounds i8, ptr addrspace(4) %i40, i64 4 + %i45 = load i16, ptr addrspace(4) %i43, align 4 %i46 = zext i16 %i45 to i32 %i47 = mul i32 %i42, %i46 %i48 = add i32 %i47, %i41 - %i49 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %i49 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i50 = zext i32 %i48 to i64 - %i51 = bitcast i8 addrspace(4)* %i49 to i64 addrspace(4)* - %i52 = load i64, i64 addrspace(4)* %i51, align 8 + %i52 = load i64, ptr addrspace(4) %i49, align 8 %i53 = add i64 %i52, %i50 %i54 = add i64 %i39, %i53 - %i55 = getelementptr inbounds i32, i32 addrspace(1)* %i37, i64 %i54 - store i32 %i36, i32 addrspace(1)* %i55, align 4 + %i55 = getelementptr inbounds i32, ptr addrspace(1) %i37, i64 %i54 + store i32 %i36, ptr addrspace(1) %i55, align 4 fence syncscope("workgroup") release call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire br label %bb56 bb56: ; preds = %bb20 - %i57 = load i32, i32 addrspace(5)* %i1, align 4 + %i57 = load i32, ptr addrspace(5) %i1, align 4 %i58 = add nsw i32 %i57, 1 - store i32 %i58, i32 addrspace(5)* %i1, align 4 + store i32 %i58, ptr addrspace(5) %i1, align 4 br label %bb2 bb59: ; preds = %bb2 @@ -146,57 +138,53 @@ ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier ; GCN: flat_store_dword -define amdgpu_kernel void @test_global_local(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @test_global_local(ptr addrspace(1) %arg) { bb: - %i = alloca i32 addrspace(1)*, align 4, addrspace(5) - store i32 addrspace(1)* %arg, i32 addrspace(1)* addrspace(5)* %i, align 4 - %i1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %i, align 4 - %i2 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %i = alloca ptr addrspace(1), align 4, addrspace(5) + store ptr addrspace(1) %arg, ptr addrspace(5) %i, align 4 + %i1 = load ptr addrspace(1), ptr addrspace(5) %i, align 4 + %i2 = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %i3 = call i32 @llvm.amdgcn.workitem.id.x() %i4 = call i32 @llvm.amdgcn.workgroup.id.x() - %i5 = getelementptr inbounds i8, i8 addrspace(4)* %i2, i64 4 - %i6 = bitcast i8 addrspace(4)* %i5 to i16 addrspace(4)* - %i7 = load i16, i16 addrspace(4)* %i6, align 4 + %i5 = getelementptr inbounds i8, ptr addrspace(4) %i2, i64 4 + %i7 = load i16, ptr addrspace(4) %i5, align 4 %i8 = zext i16 %i7 to i32 %i9 = mul i32 %i4, %i8 %i10 = add i32 %i9, %i3 - %i11 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %i11 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i12 = zext i32 %i10 to i64 - %i13 = bitcast i8 addrspace(4)* %i11 to i64 addrspace(4)* - %i14 = load i64, i64 addrspace(4)* %i13, align 8 + %i14 = load i64, ptr addrspace(4) %i11, align 8 %i15 = add i64 %i14, %i12 - %i16 = getelementptr inbounds i32, i32 addrspace(1)* %i1, i64 %i15 - store i32 1, i32 addrspace(1)* %i16, align 4 + %i16 = getelementptr inbounds i32, ptr addrspace(1) %i1, i64 %i15 + store i32 1, ptr addrspace(1) %i16, align 4 %i17 = call i32 @llvm.amdgcn.workitem.id.x() %i18 = zext i32 %i17 to i64 %i19 = icmp eq i64 %i18, 0 br i1 %i19, label %bb20, label %bb21 bb20: ; preds = %bb - store i32 2457, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_global_local.temp, i64 0, i64 0), align 4 + store i32 2457, ptr addrspace(3) @test_global_local.temp, align 4 br label %bb21 bb21: ; preds = %bb20, %bb fence syncscope("workgroup") release call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %i22 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_global_local.temp, i64 0, i64 0), align 4 - %i23 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %i, align 4 - %i24 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %i22 = load i32, ptr addrspace(3) @test_global_local.temp, align 4 + %i23 = load ptr addrspace(1), ptr addrspace(5) %i, align 4 + %i24 = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %i25 = call i32 @llvm.amdgcn.workitem.id.x() %i26 = call i32 @llvm.amdgcn.workgroup.id.x() - %i27 = getelementptr inbounds i8, i8 addrspace(4)* %i24, i64 4 - %i28 = bitcast i8 addrspace(4)* %i27 to i16 addrspace(4)* - %i29 = load i16, i16 addrspace(4)* %i28, align 4 + %i27 = getelementptr inbounds i8, ptr addrspace(4) %i24, i64 4 + %i29 = load i16, ptr addrspace(4) %i27, align 4 %i30 = zext i16 %i29 to i32 %i31 = mul i32 %i26, %i30 %i32 = add i32 %i31, %i25 - %i33 = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %i33 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %i34 = zext i32 %i32 to i64 - %i35 = bitcast i8 addrspace(4)* %i33 to i64 addrspace(4)* - %i36 = load i64, i64 addrspace(4)* %i35, align 8 + %i36 = load i64, ptr addrspace(4) %i33, align 8 %i37 = add i64 %i36, %i34 - %i38 = getelementptr inbounds i32, i32 addrspace(1)* %i23, i64 %i37 - store i32 %i22, i32 addrspace(1)* %i38, align 4 + %i38 = getelementptr inbounds i32, ptr addrspace(1) %i23, i64 %i37 + store i32 %i22, ptr addrspace(1) %i38, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll --- a/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll +++ b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll @@ -32,33 +32,33 @@ ; GCN-NEXT: s_endpgm bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 - %tmp1 = getelementptr inbounds [576 x double], [576 x double] addrspace(3)* @lds, i32 0, i32 %tmp - store double 4.000000e+00, double addrspace(3)* %tmp1, align 8 - %tmp2 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 66 - store double 4.000000e+00, double addrspace(3)* %tmp2, align 8 - %tmp3 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 132 - store double 4.000000e+00, double addrspace(3)* %tmp3, align 8 - %tmp4 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 198 - store double 4.000000e+00, double addrspace(3)* %tmp4, align 8 - %tmp5 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 264 - store double 4.000000e+00, double addrspace(3)* %tmp5, align 8 - %tmp6 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 330 - store double 4.000000e+00, double addrspace(3)* %tmp6, align 8 - %tmp7 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 396 - store double 4.000000e+00, double addrspace(3)* %tmp7, align 8 - %tmp8 = getelementptr inbounds double, double addrspace(3)* %tmp1, i32 462 - store double 4.000000e+00, double addrspace(3)* %tmp8, align 8 + %tmp1 = getelementptr inbounds [576 x double], ptr addrspace(3) @lds, i32 0, i32 %tmp + store double 4.000000e+00, ptr addrspace(3) %tmp1, align 8 + %tmp2 = getelementptr inbounds double, ptr addrspace(3) %tmp1, i32 66 + store double 4.000000e+00, ptr addrspace(3) %tmp2, align 8 + %tmp3 = getelementptr inbounds double, ptr addrspace(3) %tmp1, i32 132 + store double 4.000000e+00, ptr addrspace(3) %tmp3, align 8 + %tmp4 = getelementptr inbounds double, ptr addrspace(3) %tmp1, i32 198 + store double 4.000000e+00, ptr addrspace(3) %tmp4, align 8 + %tmp5 = getelementptr inbounds double, ptr addrspace(3) %tmp1, i32 264 + store double 4.000000e+00, ptr addrspace(3) %tmp5, align 8 + %tmp6 = getelementptr inbounds double, ptr addrspace(3) %tmp1, i32 330 + store double 4.000000e+00, ptr addrspace(3) %tmp6, align 8 + %tmp7 = getelementptr inbounds double, ptr addrspace(3) %tmp1, i32 396 + store double 4.000000e+00, ptr addrspace(3) %tmp7, align 8 + %tmp8 = getelementptr inbounds double, ptr addrspace(3) %tmp1, i32 462 + store double 4.000000e+00, ptr addrspace(3) %tmp8, align 8 fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - store double 1.000000e+00, double addrspace(3)* %tmp1, align 8 - store double 1.000000e+00, double addrspace(3)* %tmp2, align 8 - store double 1.000000e+00, double addrspace(3)* %tmp3, align 8 - store double 1.000000e+00, double addrspace(3)* %tmp4, align 8 - store double 1.000000e+00, double addrspace(3)* %tmp5, align 8 - store double 1.000000e+00, double addrspace(3)* %tmp6, align 8 - store double 1.000000e+00, double addrspace(3)* %tmp7, align 8 - store double 1.000000e+00, double addrspace(3)* %tmp8, align 8 + store double 1.000000e+00, ptr addrspace(3) %tmp1, align 8 + store double 1.000000e+00, ptr addrspace(3) %tmp2, align 8 + store double 1.000000e+00, ptr addrspace(3) %tmp3, align 8 + store double 1.000000e+00, ptr addrspace(3) %tmp4, align 8 + store double 1.000000e+00, ptr addrspace(3) %tmp5, align 8 + store double 1.000000e+00, ptr addrspace(3) %tmp6, align 8 + store double 1.000000e+00, ptr addrspace(3) %tmp7, align 8 + store double 1.000000e+00, ptr addrspace(3) %tmp8, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll --- a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll @@ -130,6 +130,7 @@ define amdgpu_kernel void @flat_scratch_unaligned_load() { %scratch = alloca i32, addrspace(5) %fptr = addrspacecast i32 addrspace(5)* %scratch to i32* + store volatile i32* %fptr, i32* addrspace(3)* null %ld = load volatile i32, i32* %fptr, align 1 ret void } @@ -142,6 +143,7 @@ define amdgpu_kernel void @flat_scratch_unaligned_store() { %scratch = alloca i32, addrspace(5) %fptr = addrspacecast i32 addrspace(5)* %scratch to i32* + store volatile i32* %fptr, i32* addrspace(3)* null store volatile i32 0, i32* %fptr, align 1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -22,18 +22,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 ; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_mov_b32 vcc_lo, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:52 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:36 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: zero_init_kernel: @@ -43,7 +37,6 @@ ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: v_mov_b32_e32 v4, 4 ; GFX10-NEXT: s_mov_b32 s1, s0 ; GFX10-NEXT: s_mov_b32 s2, s0 ; GFX10-NEXT: s_mov_b32 s3, s0 @@ -55,15 +48,12 @@ ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:36 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:20 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:4 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v4 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: zero_init_kernel: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: v_mov_b32_e32 v4, 4 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s1, s0 ; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: s_mov_b32 s3, s0 @@ -74,9 +64,6 @@ ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:36 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:20 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v4 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; @@ -100,16 +87,10 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 ; GFX9-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-PAL-NEXT: s_mov_b32 s0, 0 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:52 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:36 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4 -; GFX9-PAL-NEXT: s_nop 0 -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: zero_init_kernel: @@ -124,11 +105,6 @@ ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:36 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:20 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: zero_init_kernel: @@ -153,15 +129,10 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 ; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 ; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 -; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1010-PAL-NEXT: v_mov_b32_e32 v4, 4 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:52 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:36 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:20 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:4 -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v4 -; GFX1010-PAL-NEXT: ;;#ASMEND ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: zero_init_kernel: @@ -176,7 +147,6 @@ ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX1030-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1030-PAL-NEXT: v_mov_b32_e32 v4, 4 ; GFX1030-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1030-PAL-NEXT: s_mov_b32 s2, s0 ; GFX1030-PAL-NEXT: s_mov_b32 s3, s0 @@ -188,15 +158,12 @@ ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:36 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:20 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:4 -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v4 -; GFX1030-PAL-NEXT: ;;#ASMEND ; GFX1030-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: zero_init_kernel: ; GFX11-PAL: ; %bb.0: ; GFX11-PAL-NEXT: s_mov_b32 s0, 0 -; GFX11-PAL-NEXT: v_mov_b32_e32 v4, 4 +; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-PAL-NEXT: s_mov_b32 s1, s0 ; GFX11-PAL-NEXT: s_mov_b32 s2, s0 ; GFX11-PAL-NEXT: s_mov_b32 s3, s0 @@ -207,15 +174,11 @@ ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:36 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:20 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v4 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-PAL-NEXT: s_endpgm %alloca = alloca [32 x i16], align 2, addrspace(5) %cast = bitcast [32 x i16] addrspace(5)* %alloca to i8 addrspace(5)* call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 2 dereferenceable(64) %cast, i8 0, i64 64, i1 false) - call void asm sideeffect "; use $0", "s"([32 x i16] addrspace(5)* %alloca) #0 ret void } @@ -235,11 +198,6 @@ ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s32 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_mov_b32_e32 v0, s32 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -248,7 +206,6 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: v_mov_b32_e32 v4, s32 ; GFX10-NEXT: s_mov_b32 s1, s0 ; GFX10-NEXT: s_mov_b32 s2, s0 ; GFX10-NEXT: s_mov_b32 s3, s0 @@ -260,9 +217,6 @@ ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v4 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -271,7 +225,7 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: v_mov_b32_e32 v4, s32 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s1, s0 ; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: s_mov_b32 s3, s0 @@ -282,9 +236,6 @@ ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:32 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v4 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -303,11 +254,6 @@ ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 -; GFX9-PAL-NEXT: s_nop 0 -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, s32 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; @@ -324,11 +270,6 @@ ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, s32 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] ; @@ -337,7 +278,6 @@ ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: s_mov_b32 s0, 0 -; GFX10-PAL-NEXT: v_mov_b32_e32 v4, s32 ; GFX10-PAL-NEXT: s_mov_b32 s1, s0 ; GFX10-PAL-NEXT: s_mov_b32 s2, s0 ; GFX10-PAL-NEXT: s_mov_b32 s3, s0 @@ -349,9 +289,6 @@ ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v4 -; GFX10-PAL-NEXT: ;;#ASMEND ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] ; @@ -360,7 +297,7 @@ ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_mov_b32 s0, 0 -; GFX11-PAL-NEXT: v_mov_b32_e32 v4, s32 +; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-PAL-NEXT: s_mov_b32 s1, s0 ; GFX11-PAL-NEXT: s_mov_b32 s2, s0 ; GFX11-PAL-NEXT: s_mov_b32 s3, s0 @@ -371,15 +308,26 @@ ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:32 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:16 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v4 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: zero_init_foo: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s0, 0 +; GCN-NEXT: s_mov_b32 s1, s0 +; GCN-NEXT: s_mov_b32 s2, s0 +; GCN-NEXT: s_mov_b32 s3, s0 +; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GCN-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:48 +; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32 +; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 +; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] %alloca = alloca [32 x i16], align 2, addrspace(5) %cast = bitcast [32 x i16] addrspace(5)* %alloca to i8 addrspace(5)* call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 2 dereferenceable(64) %cast, i8 0, i64 64, i1 false) - call void asm sideeffect "; use $0", "s"([32 x i16] addrspace(5)* %alloca) #0 ret void } @@ -400,10 +348,6 @@ ; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_sindex_kernel: @@ -424,10 +368,6 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_kernel: @@ -444,10 +384,6 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: store_load_sindex_kernel: @@ -470,10 +406,6 @@ ; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: store_load_sindex_kernel: @@ -490,10 +422,6 @@ ; GFX940-NEXT: s_add_i32 s0, s0, 4 ; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: store_load_sindex_kernel: @@ -519,10 +447,6 @@ ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v0 -; GFX10-PAL-NEXT: ;;#ASMEND ; GFX10-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: store_load_sindex_kernel: @@ -539,11 +463,22 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_endpgm +; GCN-LABEL: store_load_sindex_kernel: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s0, s[0:1], 0x24 +; GCN-NEXT: v_mov_b32_e32 v0, 15 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshl_b32 s1, s0, 2 +; GCN-NEXT: s_and_b32 s0, s0, 15 +; GCN-NEXT: s_lshl_b32 s0, s0, 2 +; GCN-NEXT: s_add_u32 s1, 4, s1 +; GCN-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_add_u32 s0, 4, s0 +; GCN-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_endpgm bb: %i = alloca [32 x float], align 4, addrspace(5) %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* @@ -554,7 +489,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -573,10 +507,6 @@ ; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_sindex_foo: @@ -595,10 +525,6 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_foo: @@ -613,10 +539,6 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: store_load_sindex_foo: @@ -638,10 +560,6 @@ ; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: store_load_sindex_foo: @@ -656,10 +574,6 @@ ; GFX940-NEXT: s_add_i32 s0, s0, 4 ; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: store_load_sindex_foo: @@ -683,10 +597,6 @@ ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v0 -; GFX10-PAL-NEXT: ;;#ASMEND ; GFX10-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: store_load_sindex_foo: @@ -701,11 +611,20 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_endpgm +; GCN-LABEL: store_load_sindex_foo: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_lshl_b32 s1, s0, 2 +; GCN-NEXT: s_and_b32 s0, s0, 15 +; GCN-NEXT: s_lshl_b32 s0, s0, 2 +; GCN-NEXT: s_add_u32 s1, 4, s1 +; GCN-NEXT: v_mov_b32_e32 v0, 15 +; GCN-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_add_u32 s0, 4, s0 +; GCN-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_endpgm bb: %i = alloca [32 x float], align 4, addrspace(5) %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* @@ -716,7 +635,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -733,10 +651,6 @@ ; GFX9-NEXT: v_sub_u32_e32 v0, 4, v0 ; GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_vindex_kernel: @@ -753,10 +667,6 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v0, off offset:124 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_vindex_kernel: @@ -768,10 +678,6 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v2, off offset:124 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: store_load_vindex_kernel: @@ -791,10 +697,6 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: scratch_load_dword v0, v0, off offset:124 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: store_load_vindex_kernel: @@ -806,10 +708,6 @@ ; GFX940-NEXT: v_sub_u32_e32 v0, 4, v0 ; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: store_load_vindex_kernel: @@ -831,10 +729,6 @@ ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, v0, off offset:124 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v0 -; GFX10-PAL-NEXT: ;;#ASMEND ; GFX10-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: store_load_vindex_kernel: @@ -846,11 +740,17 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, v2, off offset:124 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_endpgm +; GCN-LABEL: store_load_vindex_kernel: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_mov_b32_e32 v1, 15 +; GCN-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_sub_u32_e32 v0, 4, v0 +; GCN-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_endpgm bb: %i = alloca [32 x float], align 4, addrspace(5) %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* @@ -863,7 +763,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -880,9 +779,6 @@ ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 ; GFX9-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v1 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: store_load_vindex_foo: @@ -897,10 +793,6 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s32 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_vindex_foo: @@ -915,10 +807,6 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v1, s32 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, s32 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-PAL-LABEL: store_load_vindex_foo: @@ -933,9 +821,6 @@ ; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 ; GFX9-PAL-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v1 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: store_load_vindex_foo: @@ -949,10 +834,6 @@ ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX940-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s32 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_vindex_foo: @@ -967,10 +848,6 @@ ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: v_mov_b32_e32 v0, s32 -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v0 -; GFX10-PAL-NEXT: ;;#ASMEND ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-PAL-LABEL: store_load_vindex_foo: @@ -985,11 +862,19 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, s32 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, s32 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: store_load_vindex_foo: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, 15 +; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GCN-NEXT: v_and_b32_e32 v0, v0, v2 +; GCN-NEXT: scratch_store_dword v1, v2, s32 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] bb: %i = alloca [32 x float], align 4, addrspace(5) %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)* @@ -1000,7 +885,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -1064,6 +948,13 @@ ; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off offset:4 ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: private_ptr_foo: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, 0x41200000 +; GCN-NEXT: scratch_store_dword v0, v1, off offset:4 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr inbounds float, float addrspace(5)* %arg, i32 1 store float 1.000000e+01, float addrspace(5)* %gep, align 4 ret void @@ -1086,22 +977,12 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 ; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_mov_b32 vcc_lo, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:260 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:276 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: zero_init_small_offset_kernel: @@ -1113,7 +994,6 @@ ; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: v_mov_b32_e32 v4, 4 ; GFX10-NEXT: s_mov_b32 s1, s0 ; GFX10-NEXT: s_mov_b32 s2, s0 ; GFX10-NEXT: s_mov_b32 s3, s0 @@ -1121,17 +1001,10 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: v_mov_b32_e32 v5, 0x104 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:260 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:276 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:292 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:308 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v4 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v5 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: zero_init_small_offset_kernel: @@ -1139,7 +1012,7 @@ ; GFX11-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: v_dual_mov_b32 v4, 4 :: v_dual_mov_b32 v5, 0x104 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s1, s0 ; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: s_mov_b32 s3, s0 @@ -1150,12 +1023,6 @@ ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:276 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:292 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], off offset:308 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v4 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v5 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; @@ -1182,20 +1049,10 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 ; GFX9-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-PAL-NEXT: s_mov_b32 s0, 0 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:260 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:276 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308 -; GFX9-PAL-NEXT: s_nop 0 -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: zero_init_small_offset_kernel: @@ -1212,15 +1069,6 @@ ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:276 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:292 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:308 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: zero_init_small_offset_kernel: @@ -1247,20 +1095,11 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 ; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 ; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 -; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 ; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:260 -; GFX1010-PAL-NEXT: v_mov_b32_e32 v4, 4 -; GFX1010-PAL-NEXT: v_mov_b32_e32 v5, 0x104 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:276 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:292 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:308 -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v4 -; GFX1010-PAL-NEXT: ;;#ASMEND -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v5 -; GFX1010-PAL-NEXT: ;;#ASMEND ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: zero_init_small_offset_kernel: @@ -1277,7 +1116,6 @@ ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1030-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1030-PAL-NEXT: v_mov_b32_e32 v4, 4 ; GFX1030-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1030-PAL-NEXT: s_mov_b32 s2, s0 ; GFX1030-PAL-NEXT: s_mov_b32 s3, s0 @@ -1285,17 +1123,10 @@ ; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX1030-PAL-NEXT: v_mov_b32_e32 v5, 0x104 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:260 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:276 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:292 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:308 -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v4 -; GFX1030-PAL-NEXT: ;;#ASMEND -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v5 -; GFX1030-PAL-NEXT: ;;#ASMEND ; GFX1030-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: zero_init_small_offset_kernel: @@ -1303,7 +1134,7 @@ ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_mov_b32 s0, 0 -; GFX11-PAL-NEXT: v_dual_mov_b32 v4, 4 :: v_dual_mov_b32 v5, 0x104 +; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-PAL-NEXT: s_mov_b32 s1, s0 ; GFX11-PAL-NEXT: s_mov_b32 s2, s0 ; GFX11-PAL-NEXT: s_mov_b32 s3, s0 @@ -1314,12 +1145,6 @@ ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:276 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:292 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], off offset:308 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v4 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v5 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-PAL-NEXT: s_endpgm %padding = alloca [64 x i32], align 4, addrspace(5) @@ -1328,8 +1153,6 @@ %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 %cast = bitcast [32 x i16] addrspace(5)* %alloca to i8 addrspace(5)* call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 2 dereferenceable(64) %cast, i8 0, i64 64, i1 false) - call void asm sideeffect "; use $0", "s"([64 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x i16] addrspace(5)* %alloca) #0 ret void } @@ -1351,15 +1174,6 @@ ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304 -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100 -; GFX9-NEXT: v_mov_b32_e32 v0, s32 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1370,7 +1184,6 @@ ; GFX10-NEXT: scratch_load_dword v0, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-NEXT: s_mov_b32 s1, s0 ; GFX10-NEXT: s_mov_b32 s2, s0 ; GFX10-NEXT: s_mov_b32 s3, s0 @@ -1378,18 +1191,10 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: v_mov_b32_e32 v4, s32 -; GFX10-NEXT: v_mov_b32_e32 v5, vcc_lo ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v4 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v5 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1400,24 +1205,17 @@ ; GFX11-NEXT: scratch_load_b32 v0, off, s32 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x100 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s1, s0 ; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: s_mov_b32 s3, s0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-NEXT: v_dual_mov_b32 v4, s32 :: v_dual_mov_b32 v5, vcc_lo ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:256 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:272 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:288 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 offset:304 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v4 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v5 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -1438,15 +1236,6 @@ ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304 -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x100 -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, s32 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; @@ -1465,16 +1254,6 @@ ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304 -; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, s32 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] ; @@ -1485,7 +1264,6 @@ ; GFX10-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX10-PAL-NEXT: s_mov_b32 s0, 0 -; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-PAL-NEXT: s_mov_b32 s1, s0 ; GFX10-PAL-NEXT: s_mov_b32 s2, s0 ; GFX10-PAL-NEXT: s_mov_b32 s3, s0 @@ -1493,18 +1271,10 @@ ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-PAL-NEXT: v_mov_b32_e32 v4, s32 -; GFX10-PAL-NEXT: v_mov_b32_e32 v5, vcc_lo ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304 -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v4 -; GFX10-PAL-NEXT: ;;#ASMEND -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v5 -; GFX10-PAL-NEXT: ;;#ASMEND ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] ; @@ -1515,34 +1285,42 @@ ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s32 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_mov_b32 s0, 0 -; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100 +; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-PAL-NEXT: s_mov_b32 s1, s0 ; GFX11-PAL-NEXT: s_mov_b32 s2, s0 ; GFX11-PAL-NEXT: s_mov_b32 s3, s0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-PAL-NEXT: v_dual_mov_b32 v4, s32 :: v_dual_mov_b32 v5, vcc_lo ; GFX11-PAL-NEXT: s_clause 0x3 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:256 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:272 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:288 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:304 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v4 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v5 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: zero_init_small_offset_foo: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: scratch_load_dword v0, off, s32 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_mov_b32 s0, 0 +; GCN-NEXT: s_mov_b32 s1, s0 +; GCN-NEXT: s_mov_b32 s2, s0 +; GCN-NEXT: s_mov_b32 s3, s0 +; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GCN-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256 +; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272 +; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288 +; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] %padding = alloca [64 x i32], align 4, addrspace(5) %alloca = alloca [32 x i16], align 2, addrspace(5) %pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 %cast = bitcast [32 x i16] addrspace(5)* %alloca to i8 addrspace(5)* call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 2 dereferenceable(64) %cast, i8 0, i64 64, i1 false) - call void asm sideeffect "; use $0", "s"([64 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x i16] addrspace(5)* %alloca) #0 ret void } @@ -1565,14 +1343,6 @@ ; GFX9-NEXT: s_addk_i32 s0, 0x104 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_sindex_small_offset_kernel: @@ -1585,7 +1355,6 @@ ; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, 15 -; GFX10-NEXT: v_mov_b32_e32 v1, 0x104 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 @@ -1596,13 +1365,6 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v1 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_small_offset_kernel: @@ -1610,7 +1372,7 @@ ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x24 ; GFX11-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, 0x104 +; GFX11-NEXT: v_mov_b32_e32 v0, 15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_and_b32 s1, s0, 15 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 @@ -1621,13 +1383,6 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v1 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: store_load_sindex_small_offset_kernel: @@ -1653,14 +1408,6 @@ ; GFX9-PAL-NEXT: s_addk_i32 s0, 0x104 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: store_load_sindex_small_offset_kernel: @@ -1679,14 +1426,6 @@ ; GFX940-NEXT: s_addk_i32 s0, 0x104 ; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_sindex_small_offset_kernel: @@ -1702,7 +1441,6 @@ ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 ; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 0x104 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 @@ -1716,13 +1454,6 @@ ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v0 -; GFX1010-PAL-NEXT: ;;#ASMEND -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v1 -; GFX1010-PAL-NEXT: ;;#ASMEND ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: store_load_sindex_small_offset_kernel: @@ -1740,7 +1471,6 @@ ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15 -; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 0x104 ; GFX1030-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2 @@ -1751,13 +1481,6 @@ ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v0 -; GFX1030-PAL-NEXT: ;;#ASMEND -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v1 -; GFX1030-PAL-NEXT: ;;#ASMEND ; GFX1030-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: store_load_sindex_small_offset_kernel: @@ -1765,7 +1488,7 @@ ; GFX11-PAL-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, 0x104 +; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 2 @@ -1776,13 +1499,6 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v1 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_endpgm bb: %padding = alloca [64 x i32], align 4, addrspace(5) @@ -1797,8 +1513,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([64 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -1820,14 +1534,6 @@ ; GFX9-NEXT: s_addk_i32 s0, 0x104 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_sindex_small_offset_foo: @@ -1848,21 +1554,13 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-NEXT: v_mov_b32_e32 v1, 0x104 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v1 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_small_offset_foo: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, 0x104 +; GFX11-NEXT: v_mov_b32_e32 v0, 15 ; GFX11-NEXT: s_and_b32 s1, s0, 15 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_lshl_b32 s1, s1, 2 @@ -1872,13 +1570,6 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v1 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: store_load_sindex_small_offset_foo: @@ -1903,14 +1594,6 @@ ; GFX9-PAL-NEXT: s_addk_i32 s0, 0x104 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: store_load_sindex_small_offset_foo: @@ -1927,14 +1610,6 @@ ; GFX940-NEXT: s_addk_i32 s0, 0x104 ; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_sindex_small_offset_foo: @@ -1961,14 +1636,6 @@ ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 0x104 -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v0 -; GFX1010-PAL-NEXT: ;;#ASMEND -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v1 -; GFX1010-PAL-NEXT: ;;#ASMEND ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: store_load_sindex_small_offset_foo: @@ -1994,21 +1661,13 @@ ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 0x104 -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v0 -; GFX1030-PAL-NEXT: ;;#ASMEND -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v1 -; GFX1030-PAL-NEXT: ;;#ASMEND ; GFX1030-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: store_load_sindex_small_offset_foo: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, 0x104 +; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX11-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-PAL-NEXT: s_lshl_b32 s1, s1, 2 @@ -2018,13 +1677,6 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v1 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_endpgm bb: %padding = alloca [64 x i32], align 4, addrspace(5) @@ -2039,8 +1691,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([64 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -2060,14 +1710,6 @@ ; GFX9-NEXT: v_sub_u32_e32 v0, 0x104, v0 ; GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX9-NEXT: v_mov_b32_e32 v1, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v1 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_vindex_small_offset_kernel: @@ -2086,14 +1728,6 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v0, off offset:124 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-NEXT: v_mov_b32_e32 v1, 0x104 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v1 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_vindex_small_offset_kernel: @@ -2104,16 +1738,8 @@ ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 0x104, v0 ; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:260 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_mov_b32_e32 v1, 0x104 ; GFX11-NEXT: scratch_load_b32 v0, v2, off offset:124 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v1 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: store_load_vindex_small_offset_kernel: @@ -2136,14 +1762,6 @@ ; GFX9-PAL-NEXT: v_sub_u32_e32 v0, 0x104, v0 ; GFX9-PAL-NEXT: scratch_load_dword v0, v0, off offset:124 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v1 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: store_load_vindex_small_offset_kernel: @@ -2157,14 +1775,6 @@ ; GFX940-NEXT: v_sub_u32_e32 v0, 0x104, v0 ; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0x104 -; GFX940-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_vindex_small_offset_kernel: @@ -2189,14 +1799,6 @@ ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, v0, off offset:124 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 0x104 -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v0 -; GFX1010-PAL-NEXT: ;;#ASMEND -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v1 -; GFX1010-PAL-NEXT: ;;#ASMEND ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: store_load_vindex_small_offset_kernel: @@ -2220,14 +1822,6 @@ ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, v0, off offset:124 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 0x104 -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v0 -; GFX1030-PAL-NEXT: ;;#ASMEND -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v1 -; GFX1030-PAL-NEXT: ;;#ASMEND ; GFX1030-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: store_load_vindex_small_offset_kernel: @@ -2238,16 +1832,8 @@ ; GFX11-PAL-NEXT: v_sub_nc_u32_e32 v2, 0x104, v0 ; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off offset:260 dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-PAL-NEXT: v_mov_b32_e32 v1, 0x104 ; GFX11-PAL-NEXT: scratch_load_b32 v0, v2, off offset:124 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v1 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_endpgm bb: %padding = alloca [64 x i32], align 4, addrspace(5) @@ -2264,8 +1850,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([64 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -2285,13 +1869,6 @@ ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 ; GFX9-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s32 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v1 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: store_load_vindex_small_offset_foo: @@ -2299,26 +1876,17 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-NEXT: s_add_i32 s1, s32, 0x100 ; GFX10-NEXT: s_add_i32 s0, s32, 0x100 -; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s1 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, s0 +; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo ; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s32 -; GFX10-NEXT: v_mov_b32_e32 v1, vcc_lo -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v1 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_vindex_small_offset_foo: @@ -2326,7 +1894,6 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 -; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -2335,13 +1902,6 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v1, s32 offset:256 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, s32 :: v_dual_mov_b32 v1, vcc_lo -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v1 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-PAL-LABEL: store_load_vindex_small_offset_foo: @@ -2359,13 +1919,6 @@ ; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 ; GFX9-PAL-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, s32 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v1 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: store_load_vindex_small_offset_foo: @@ -2381,15 +1934,6 @@ ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s32 -; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_vindex_small_offset_foo: @@ -2397,26 +1941,17 @@ ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-PAL-NEXT: s_add_i32 s1, s32, 0x100 ; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x100 -; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s1 +; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100 +; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, s0 +; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo ; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-PAL-NEXT: scratch_store_dword v0, v2, off ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: v_mov_b32_e32 v0, s32 -; GFX10-PAL-NEXT: v_mov_b32_e32 v1, vcc_lo -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v0 -; GFX10-PAL-NEXT: ;;#ASMEND -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v1 -; GFX10-PAL-NEXT: ;;#ASMEND ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-PAL-LABEL: store_load_vindex_small_offset_foo: @@ -2424,7 +1959,6 @@ ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 -; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-PAL-NEXT: scratch_load_b32 v3, off, s32 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) @@ -2433,14 +1967,21 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, s32 offset:256 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_dual_mov_b32 v0, s32 :: v_dual_mov_b32 v1, vcc_lo -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v1 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: store_load_vindex_small_offset_foo: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, 15 +; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GCN-NEXT: v_and_b32_e32 v0, v0, v2 +; GCN-NEXT: scratch_store_dword v1, v2, s32 offset:256 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] bb: %padding = alloca [64 x i32], align 4, addrspace(5) %i = alloca [32 x float], align 4, addrspace(5) @@ -2454,8 +1995,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([64 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -2483,15 +2022,6 @@ ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: zero_init_large_offset_kernel: @@ -2514,18 +2044,10 @@ ; GFX10-NEXT: s_movk_i32 s2, 0x4004 ; GFX10-NEXT: s_movk_i32 s1, 0x4004 ; GFX10-NEXT: s_movk_i32 s0, 0x4004 -; GFX10-NEXT: v_mov_b32_e32 v4, 4 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX10-NEXT: v_mov_b32_e32 v5, 0x4004 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v4 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v5 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: zero_init_large_offset_kernel: @@ -2542,18 +2064,11 @@ ; GFX11-NEXT: s_movk_i32 s2, 0x4004 ; GFX11-NEXT: s_movk_i32 s1, 0x4004 ; GFX11-NEXT: s_movk_i32 s0, 0x4004 -; GFX11-NEXT: v_dual_mov_b32 v4, 4 :: v_dual_mov_b32 v5, 0x4004 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v4 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v5 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; @@ -2585,15 +2100,6 @@ ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 -; GFX9-PAL-NEXT: s_nop 0 -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: zero_init_large_offset_kernel: @@ -2614,15 +2120,6 @@ ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: zero_init_large_offset_kernel: @@ -2652,17 +2149,9 @@ ; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x4004 ; GFX1010-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX1010-PAL-NEXT: v_mov_b32_e32 v4, 4 -; GFX1010-PAL-NEXT: v_mov_b32_e32 v5, 0x4004 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v4 -; GFX1010-PAL-NEXT: ;;#ASMEND -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v5 -; GFX1010-PAL-NEXT: ;;#ASMEND ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: zero_init_large_offset_kernel: @@ -2690,18 +2179,10 @@ ; GFX1030-PAL-NEXT: s_movk_i32 s2, 0x4004 ; GFX1030-PAL-NEXT: s_movk_i32 s1, 0x4004 ; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x4004 -; GFX1030-PAL-NEXT: v_mov_b32_e32 v4, 4 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX1030-PAL-NEXT: v_mov_b32_e32 v5, 0x4004 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v4 -; GFX1030-PAL-NEXT: ;;#ASMEND -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v5 -; GFX1030-PAL-NEXT: ;;#ASMEND ; GFX1030-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: zero_init_large_offset_kernel: @@ -2718,18 +2199,11 @@ ; GFX11-PAL-NEXT: s_movk_i32 s2, 0x4004 ; GFX11-PAL-NEXT: s_movk_i32 s1, 0x4004 ; GFX11-PAL-NEXT: s_movk_i32 s0, 0x4004 -; GFX11-PAL-NEXT: v_dual_mov_b32 v4, 4 :: v_dual_mov_b32 v5, 0x4004 ; GFX11-PAL-NEXT: s_clause 0x3 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s2 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v4 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v5 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-PAL-NEXT: s_endpgm %padding = alloca [4096 x i32], align 4, addrspace(5) @@ -2738,8 +2212,6 @@ %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 %cast = bitcast [32 x i16] addrspace(5)* %alloca to i8 addrspace(5)* call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 2 dereferenceable(64) %cast, i8 0, i64 64, i1 false) - call void asm sideeffect "; use $0", "s"([4096 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x i16] addrspace(5)* %alloca) #0 ret void } @@ -2757,24 +2229,14 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_add_i32 s3, s32, 0x4004 -; GFX9-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX9-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX9-NEXT: s_add_i32 vcc_lo, s32, 4 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 +; GFX9-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX9-NEXT: v_mov_b32_e32 v0, vcc_lo -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2785,7 +2247,6 @@ ; GFX10-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: s_add_i32 s4, s32, 0x4004 ; GFX10-NEXT: s_mov_b32 s1, s0 ; GFX10-NEXT: s_mov_b32 s2, s0 ; GFX10-NEXT: s_mov_b32 s3, s0 @@ -2793,23 +2254,14 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: s_add_i32 s3, s32, 4 ; GFX10-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX10-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s4 -; GFX10-NEXT: v_mov_b32_e32 v4, s3 -; GFX10-NEXT: v_mov_b32_e32 v5, s2 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s2 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v4 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v5 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -2820,29 +2272,21 @@ ; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_add_i32 s4, s32, 4 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s1, s0 ; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: s_mov_b32 s3, s0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-NEXT: s_add_i32 s3, s32, 0x4004 ; GFX11-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s3 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v4 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v5 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2859,24 +2303,14 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-PAL-NEXT: s_add_i32 s3, s32, 0x4004 -; GFX9-PAL-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX9-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX9-PAL-NEXT: s_add_i32 vcc_lo, s32, 4 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 +; GFX9-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, vcc_lo -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; @@ -2891,25 +2325,14 @@ ; GFX940-NEXT: s_mov_b32 s3, s0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NEXT: s_add_i32 s3, s32, 0x4004 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX940-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: s_add_i32 vcc_lo, s32, 4 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s3 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:16 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:32 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 +; GFX940-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, vcc_lo -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s1 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] ; @@ -2920,7 +2343,6 @@ ; GFX10-PAL-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX10-PAL-NEXT: s_mov_b32 s0, 0 -; GFX10-PAL-NEXT: s_add_i32 s4, s32, 0x4004 ; GFX10-PAL-NEXT: s_mov_b32 s1, s0 ; GFX10-PAL-NEXT: s_mov_b32 s2, s0 ; GFX10-PAL-NEXT: s_mov_b32 s3, s0 @@ -2928,23 +2350,14 @@ ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-PAL-NEXT: s_add_i32 s3, s32, 4 ; GFX10-PAL-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX10-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s4 -; GFX10-PAL-NEXT: v_mov_b32_e32 v4, s3 -; GFX10-PAL-NEXT: v_mov_b32_e32 v5, s2 +; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v4 -; GFX10-PAL-NEXT: ;;#ASMEND -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v5 -; GFX10-PAL-NEXT: ;;#ASMEND ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] ; @@ -2955,29 +2368,21 @@ ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_mov_b32 s0, 0 -; GFX11-PAL-NEXT: s_add_i32 s4, s32, 4 +; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-PAL-NEXT: s_mov_b32 s1, s0 ; GFX11-PAL-NEXT: s_mov_b32 s2, s0 ; GFX11-PAL-NEXT: s_mov_b32 s3, s0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-PAL-NEXT: s_add_i32 s3, s32, 0x4004 ; GFX11-PAL-NEXT: s_add_i32 s2, s32, 0x4004 ; GFX11-PAL-NEXT: s_add_i32 s1, s32, 0x4004 ; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX11-PAL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s3 ; GFX11-PAL-NEXT: s_clause 0x3 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s2 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v4 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v5 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] %padding = alloca [4096 x i32], align 4, addrspace(5) @@ -2986,8 +2391,6 @@ %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4 %cast = bitcast [32 x i16] addrspace(5)* %alloca to i8 addrspace(5)* call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 2 dereferenceable(64) %cast, i8 0, i64 64, i1 false) - call void asm sideeffect "; use $0", "s"([4096 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x i16] addrspace(5)* %alloca) #0 ret void } @@ -3010,14 +2413,6 @@ ; GFX9-NEXT: s_addk_i32 s0, 0x4004 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_sindex_large_offset_kernel: @@ -3030,7 +2425,6 @@ ; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, 15 -; GFX10-NEXT: v_mov_b32_e32 v1, 0x4004 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 @@ -3041,13 +2435,6 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v1 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_large_offset_kernel: @@ -3055,7 +2442,7 @@ ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x24 ; GFX11-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, 0x4004 +; GFX11-NEXT: v_mov_b32_e32 v0, 15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_and_b32 s1, s0, 15 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 @@ -3066,13 +2453,6 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v1 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: store_load_sindex_large_offset_kernel: @@ -3098,14 +2478,6 @@ ; GFX9-PAL-NEXT: s_addk_i32 s0, 0x4004 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: store_load_sindex_large_offset_kernel: @@ -3124,14 +2496,6 @@ ; GFX940-NEXT: s_addk_i32 s0, 0x4004 ; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_sindex_large_offset_kernel: @@ -3147,7 +2511,6 @@ ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 ; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 0x4004 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 @@ -3161,13 +2524,6 @@ ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v0 -; GFX1010-PAL-NEXT: ;;#ASMEND -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v1 -; GFX1010-PAL-NEXT: ;;#ASMEND ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: store_load_sindex_large_offset_kernel: @@ -3185,7 +2541,6 @@ ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15 -; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 0x4004 ; GFX1030-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2 @@ -3196,13 +2551,6 @@ ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v0 -; GFX1030-PAL-NEXT: ;;#ASMEND -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v1 -; GFX1030-PAL-NEXT: ;;#ASMEND ; GFX1030-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: store_load_sindex_large_offset_kernel: @@ -3210,7 +2558,7 @@ ; GFX11-PAL-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, 0x4004 +; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 2 @@ -3221,13 +2569,6 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v1 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_endpgm bb: %padding = alloca [4096 x i32], align 4, addrspace(5) @@ -3242,8 +2583,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([4096 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -3265,14 +2604,6 @@ ; GFX9-NEXT: s_addk_i32 s0, 0x4004 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_sindex_large_offset_foo: @@ -3293,21 +2624,13 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-NEXT: v_mov_b32_e32 v1, 0x4004 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v1 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_large_offset_foo: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, 0x4004 +; GFX11-NEXT: v_mov_b32_e32 v0, 15 ; GFX11-NEXT: s_and_b32 s1, s0, 15 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_lshl_b32 s1, s1, 2 @@ -3317,13 +2640,6 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v1 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: store_load_sindex_large_offset_foo: @@ -3348,14 +2664,6 @@ ; GFX9-PAL-NEXT: s_addk_i32 s0, 0x4004 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: store_load_sindex_large_offset_foo: @@ -3372,14 +2680,6 @@ ; GFX940-NEXT: s_addk_i32 s0, 0x4004 ; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_sindex_large_offset_foo: @@ -3406,14 +2706,6 @@ ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 0x4004 -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v0 -; GFX1010-PAL-NEXT: ;;#ASMEND -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v1 -; GFX1010-PAL-NEXT: ;;#ASMEND ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: store_load_sindex_large_offset_foo: @@ -3439,21 +2731,13 @@ ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 0x4004 -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v0 -; GFX1030-PAL-NEXT: ;;#ASMEND -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v1 -; GFX1030-PAL-NEXT: ;;#ASMEND ; GFX1030-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: store_load_sindex_large_offset_foo: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, off offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, 0x4004 +; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX11-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-PAL-NEXT: s_lshl_b32 s1, s1, 2 @@ -3463,13 +2747,6 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v1 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_endpgm bb: %padding = alloca [4096 x i32], align 4, addrspace(5) @@ -3484,8 +2761,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([4096 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -3505,14 +2780,6 @@ ; GFX9-NEXT: v_sub_u32_e32 v0, 0x4004, v0 ; GFX9-NEXT: scratch_load_dword v0, v0, off offset:124 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX9-NEXT: v_mov_b32_e32 v1, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v1 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_vindex_large_offset_kernel: @@ -3531,14 +2798,6 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v0, off offset:124 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-NEXT: v_mov_b32_e32 v1, 0x4004 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v1 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_vindex_large_offset_kernel: @@ -3550,16 +2809,8 @@ ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 0x4004, v0 ; GFX11-NEXT: scratch_store_b32 v0, v1, vcc_lo dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_mov_b32_e32 v1, 0x4004 ; GFX11-NEXT: scratch_load_b32 v0, v2, off offset:124 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v1 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: store_load_vindex_large_offset_kernel: @@ -3582,14 +2833,6 @@ ; GFX9-PAL-NEXT: v_sub_u32_e32 v0, 0x4004, v0 ; GFX9-PAL-NEXT: scratch_load_dword v0, v0, off offset:124 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v1 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: store_load_vindex_large_offset_kernel: @@ -3604,14 +2847,6 @@ ; GFX940-NEXT: v_sub_u32_e32 v0, 0x4004, v0 ; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0x4004 -; GFX940-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_vindex_large_offset_kernel: @@ -3636,14 +2871,6 @@ ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, v0, off offset:124 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 0x4004 -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v0 -; GFX1010-PAL-NEXT: ;;#ASMEND -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v1 -; GFX1010-PAL-NEXT: ;;#ASMEND ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: store_load_vindex_large_offset_kernel: @@ -3667,14 +2894,6 @@ ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, v0, off offset:124 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 0x4004 -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v0 -; GFX1030-PAL-NEXT: ;;#ASMEND -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v1 -; GFX1030-PAL-NEXT: ;;#ASMEND ; GFX1030-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: store_load_vindex_large_offset_kernel: @@ -3686,16 +2905,8 @@ ; GFX11-PAL-NEXT: v_sub_nc_u32_e32 v2, 0x4004, v0 ; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, vcc_lo dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-PAL-NEXT: v_mov_b32_e32 v1, 0x4004 ; GFX11-PAL-NEXT: scratch_load_b32 v0, v2, off offset:124 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v1 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_endpgm bb: %padding = alloca [4096 x i32], align 4, addrspace(5) @@ -3712,8 +2923,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([4096 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -3723,8 +2932,8 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: scratch_load_dword v1, off, s32 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX9-NEXT: v_mov_b32_e32 v1, vcc_lo +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 +; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi ; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 @@ -3733,14 +2942,6 @@ ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 ; GFX9-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 4 -; GFX9-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v1 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: store_load_vindex_large_offset_foo: @@ -3748,27 +2949,17 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX10-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s2 +; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, s1 +; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo ; GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_add_i32 s0, s32, 4 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: v_mov_b32_e32 v1, vcc_lo -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v1 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_vindex_large_offset_foo: @@ -3776,25 +2967,17 @@ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 -; GFX11-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX11-NEXT: s_add_i32 s0, s32, 4 -; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 v0, v2, s2 dlc +; GFX11-NEXT: scratch_store_b32 v0, v2, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: scratch_load_b32 v0, v1, s1 glc dlc +; GFX11-NEXT: scratch_load_b32 v0, v1, vcc_lo glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, vcc_lo -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v1 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-PAL-LABEL: store_load_vindex_large_offset_foo: @@ -3802,8 +2985,8 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_lo +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004 +; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi ; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0 @@ -3812,14 +2995,6 @@ ; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 ; GFX9-PAL-NEXT: scratch_load_dword v0, v0, off glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 4 -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v1 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: store_load_vindex_large_offset_foo: @@ -3829,24 +3004,14 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: s_add_i32 s1, s32, 0x4004 +; GFX940-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, s1 sc0 sc1 +; GFX940-NEXT: scratch_store_dword v1, v2, vcc_lo sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_add_i32 vcc_lo, s32, 4 -; GFX940-NEXT: v_mov_b32_e32 v0, vcc_lo ; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND +; GFX940-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1 +; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_vindex_large_offset_foo: @@ -3854,27 +3019,17 @@ ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-PAL-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX10-PAL-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s2 +; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, s1 +; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo ; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: s_add_i32 s0, s32, 4 -; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX10-PAL-NEXT: scratch_store_dword v0, v2, off ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-PAL-NEXT: v_mov_b32_e32 v1, vcc_lo -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v0 -; GFX10-PAL-NEXT: ;;#ASMEND -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v1 -; GFX10-PAL-NEXT: ;;#ASMEND ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-PAL-LABEL: store_load_vindex_large_offset_foo: @@ -3882,26 +3037,34 @@ ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 -; GFX11-PAL-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX11-PAL-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX11-PAL-NEXT: s_add_i32 s0, s32, 4 -; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-PAL-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: scratch_store_b32 v0, v2, s2 dlc +; GFX11-PAL-NEXT: scratch_store_b32 v0, v2, s0 dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, s1 glc dlc +; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, vcc_lo glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, vcc_lo -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v1 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: store_load_vindex_large_offset_foo: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, 15 +; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GCN-NEXT: v_and_b32_e32 v0, v0, v2 +; GCN-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GCN-NEXT: scratch_store_dword v1, v2, vcc_hi sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GCN-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] bb: %padding = alloca [4096 x i32], align 4, addrspace(5) %i = alloca [32 x float], align 4, addrspace(5) @@ -3915,8 +3078,6 @@ %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9 %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)* %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4 - call void asm sideeffect "; use $0", "s"([4096 x i32] addrspace(5)* %padding) #0 - call void asm sideeffect "; use $0", "s"([32 x float] addrspace(5)* %i) #0 ret void } @@ -3936,10 +3097,6 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: scratch_load_dword v0, off, s0 offset:3712 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_large_imm_offset_kernel: @@ -3958,10 +3115,6 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s0 offset:1664 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_large_imm_offset_kernel: @@ -3974,10 +3127,6 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v1, off offset:3716 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: store_load_large_imm_offset_kernel: @@ -4000,10 +3149,6 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 offset:3712 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: store_load_large_imm_offset_kernel: @@ -4017,10 +3162,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: scratch_load_dword v0, v0, off offset:3716 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_large_imm_offset_kernel: @@ -4045,10 +3186,6 @@ ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s0 offset:1664 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1010-PAL-NEXT: ;;#ASMSTART -; GFX1010-PAL-NEXT: ; use v0 -; GFX1010-PAL-NEXT: ;;#ASMEND ; GFX1010-PAL-NEXT: s_endpgm ; ; GFX1030-PAL-LABEL: store_load_large_imm_offset_kernel: @@ -4072,10 +3209,6 @@ ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s0 offset:1664 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX1030-PAL-NEXT: ;;#ASMSTART -; GFX1030-PAL-NEXT: ; use v0 -; GFX1030-PAL-NEXT: ;;#ASMEND ; GFX1030-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: store_load_large_imm_offset_kernel: @@ -4088,10 +3221,6 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, off offset:3716 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_endpgm bb: %i = alloca [4096 x i32], align 4, addrspace(5) @@ -4101,7 +3230,6 @@ store volatile i32 15, i32 addrspace(5)* %i7, align 4 %i10 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000 %i12 = load volatile i32, i32 addrspace(5)* %i10, align 4 - call void asm sideeffect "; use $0", "s"([4096 x i32] addrspace(5)* %i) #0 ret void } @@ -4111,20 +3239,15 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: s_movk_i32 s0, 0x3000 -; GFX9-NEXT: s_add_i32 vcc_lo, s32, 4 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 4 ; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 s0, s0, vcc_lo +; GFX9-NEXT: s_add_i32 s0, s0, vcc_hi ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: scratch_load_dword v0, off, s0 offset:3712 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 4 -; GFX9-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v0 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: store_load_large_imm_offset_foo: @@ -4134,19 +3257,14 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3800 -; GFX10-NEXT: s_add_i32 s1, s32, 4 -; GFX10-NEXT: s_add_i32 s0, s0, s1 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 4 +; GFX10-NEXT: s_add_i32 s0, s0, vcc_lo ; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s0 offset:1664 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 4 -; GFX10-NEXT: v_mov_b32_e32 v0, vcc_lo -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_large_imm_offset_foo: @@ -4155,17 +3273,12 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 0x3000 ; GFX11-NEXT: v_mov_b32_e32 v2, 15 -; GFX11-NEXT: s_add_i32 vcc_lo, s32, 4 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_store_b32 v1, v2, s32 offset:3716 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v1, s32 offset:3716 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, vcc_lo -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-PAL-LABEL: store_load_large_imm_offset_foo: @@ -4173,20 +3286,15 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000 -; GFX9-PAL-NEXT: s_add_i32 vcc_lo, s32, 4 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 4 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 s0, s0, vcc_lo +; GFX9-PAL-NEXT: s_add_i32 s0, s0, vcc_hi ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 offset:3712 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 4 -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v0 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: store_load_large_imm_offset_foo: @@ -4201,11 +3309,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:3716 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_add_i32 vcc_hi, s32, 4 -; GFX940-NEXT: v_mov_b32_e32 v0, vcc_hi -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_large_imm_offset_foo: @@ -4215,19 +3318,14 @@ ; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800 -; GFX10-PAL-NEXT: s_add_i32 s1, s32, 4 -; GFX10-PAL-NEXT: s_add_i32 s0, s0, s1 +; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 4 +; GFX10-PAL-NEXT: s_add_i32 s0, s0, vcc_lo ; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, off, s0 offset:1664 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 4 -; GFX10-PAL-NEXT: v_mov_b32_e32 v0, vcc_lo -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v0 -; GFX10-PAL-NEXT: ;;#ASMEND ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-PAL-LABEL: store_load_large_imm_offset_foo: @@ -4236,18 +3334,26 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 0x3000 ; GFX11-PAL-NEXT: v_mov_b32_e32 v2, 15 -; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 4 ; GFX11-PAL-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_store_b32 v1, v2, s32 offset:3716 dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, s32 offset:3716 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, vcc_lo -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: store_load_large_imm_offset_foo: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, 13 +; GCN-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, 0x3000 +; GCN-NEXT: v_mov_b32_e32 v1, 15 +; GCN-NEXT: scratch_store_dword v0, v1, s32 offset:3712 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_load_dword v0, v0, s32 offset:3712 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] bb: %i = alloca [4096 x i32], align 4, addrspace(5) %i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef @@ -4256,7 +3362,6 @@ store volatile i32 15, i32 addrspace(5)* %i7, align 4 %i10 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000 %i12 = load volatile i32, i32 addrspace(5)* %i10, align 4 - call void asm sideeffect "; use $0", "s"([4096 x i32] addrspace(5)* %i) #0 ret void } @@ -4267,17 +3372,14 @@ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX9-NEXT: scratch_store_dword v0, v2, off offset:1024 +; GFX9-NEXT: v_mov_b32_e32 v1, 15 +; GFX9-NEXT: scratch_store_dword v0, v1, off offset:1024 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: scratch_load_dword v0, v0, off offset:1024 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use v1 -; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: store_load_vidx_sidx_offset: @@ -4295,10 +3397,6 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v0, off offset:1024 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use v0 -; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_vidx_sidx_offset: @@ -4311,10 +3409,6 @@ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, v0, off offset:1028 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use v0 -; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_endpgm ; ; GFX9-PAL-LABEL: store_load_vidx_sidx_offset: @@ -4324,20 +3418,17 @@ ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX9-PAL-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 ; GFX9-PAL-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 ; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX9-PAL-NEXT: scratch_store_dword v0, v2, off offset:1024 +; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 15 +; GFX9-PAL-NEXT: scratch_store_dword v0, v1, off offset:1024 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: scratch_load_dword v0, v0, off offset:1024 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: ;;#ASMSTART -; GFX9-PAL-NEXT: ; use v1 -; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; ; GFX940-LABEL: store_load_vidx_sidx_offset: @@ -4350,10 +3441,6 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: scratch_load_dword v0, v0, off offset:1028 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use v0 -; GFX940-NEXT: ;;#ASMEND ; GFX940-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: store_load_vidx_sidx_offset: @@ -4376,10 +3463,6 @@ ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, v0, off offset:1024 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX10-PAL-NEXT: ;;#ASMSTART -; GFX10-PAL-NEXT: ; use v0 -; GFX10-PAL-NEXT: ;;#ASMEND ; GFX10-PAL-NEXT: s_endpgm ; ; GFX11-PAL-LABEL: store_load_vidx_sidx_offset: @@ -4392,11 +3475,18 @@ ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, v0, off offset:1028 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_mov_b32_e32 v0, 4 -; GFX11-PAL-NEXT: ;;#ASMSTART -; GFX11-PAL-NEXT: ; use v0 -; GFX11-PAL-NEXT: ;;#ASMEND ; GFX11-PAL-NEXT: s_endpgm +; GCN-LABEL: store_load_vidx_sidx_offset: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s0, s[0:1], 0x24 +; GCN-NEXT: v_mov_b32_e32 v1, 15 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_add_lshl_u32 v0, s0, v0, 2 +; GCN-NEXT: scratch_store_dword v0, v1, off offset:1028 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_load_dword v0, v0, off offset:1028 sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_endpgm bb: %alloca = alloca [32 x i32], align 4, addrspace(5) %vidx = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -4405,7 +3495,6 @@ %gep = getelementptr inbounds [32 x i32], [32 x i32] addrspace(5)* %alloca, i32 0, i32 %add2 store volatile i32 15, i32 addrspace(5)* %gep, align 4 %load = load volatile i32, i32 addrspace(5)* %gep, align 4 - call void asm sideeffect "; use $0", "s"([32 x i32] addrspace(5)* %alloca) #0 ret void } @@ -4488,6 +3577,16 @@ ; GFX11-PAL-NEXT: scratch_load_b64 v[0:1], v0, off glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: store_load_i64_aligned: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, 15 +; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] bb: store volatile i64 15, i64 addrspace(5)* %arg, align 8 %load = load volatile i64, i64 addrspace(5)* %arg, align 8 @@ -4573,6 +3672,16 @@ ; GFX11-PAL-NEXT: scratch_load_b64 v[0:1], v0, off glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: store_load_i64_unaligned: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, 15 +; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] bb: store volatile i64 15, i64 addrspace(5)* %arg, align 1 %load = load volatile i64, i64 addrspace(5)* %arg, align 1 @@ -4665,6 +3774,17 @@ ; GFX11-PAL-NEXT: scratch_load_b96 v[0:2], v0, off glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: store_load_v3i32_unaligned: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, 1 +; GCN-NEXT: v_mov_b32_e32 v3, 2 +; GCN-NEXT: v_mov_b32_e32 v4, 3 +; GCN-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] bb: store volatile <3 x i32> , <3 x i32> addrspace(5)* %arg, align 1 %load = load volatile <3 x i32>, <3 x i32> addrspace(5)* %arg, align 1 @@ -4762,6 +3882,18 @@ ; GFX11-PAL-NEXT: scratch_load_b128 v[0:3], v0, off glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: store_load_v4i32_unaligned: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, 1 +; GCN-NEXT: v_mov_b32_e32 v3, 2 +; GCN-NEXT: v_mov_b32_e32 v4, 3 +; GCN-NEXT: v_mov_b32_e32 v5, 4 +; GCN-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] bb: store volatile <4 x i32> , <4 x i32> addrspace(5)* %arg, align 1 %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* %arg, align 1 diff --git a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll --- a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll +++ b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address-codegen.ll @@ -7,15 +7,15 @@ ; GCN-NOT: load_lds_simple define internal i32 @load_lds_simple() { - %load = load i32, i32 addrspace(3)* @lds0, align 4 + %load = load i32, ptr addrspace(3) @lds0, align 4 ret i32 %load } ; GCN-LABEL: {{^}}kernel: ; GCN: v_mov_b32_e32 [[ADDR:v[0-9]+]], 0 ; GCN: ds_read_b32 v{{[0-9]+}}, [[ADDR]] -define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) { +define amdgpu_kernel void @kernel(ptr addrspace(1) %out) { %call = call i32 @load_lds_simple() - store i32 %call, i32 addrspace(1)* %out + store i32 %call, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll --- a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll +++ b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll @@ -7,59 +7,59 @@ @lds0 = addrspace(3) global i32 undef, align 4 @lds1 = addrspace(3) global [512 x i32] undef, align 4 -@nested.lds.address = addrspace(1) global i32 addrspace(3)* @lds0, align 4 +@nested.lds.address = addrspace(1) global ptr addrspace(3) @lds0, align 4 @gds0 = addrspace(2) global i32 undef, align 4 -@alias.lds0 = alias i32, i32 addrspace(3)* @lds0 -@lds.cycle = addrspace(3) global i32 ptrtoint (i32 addrspace(3)* @lds.cycle to i32), align 4 +@alias.lds0 = alias i32, ptr addrspace(3) @lds0 +@lds.cycle = addrspace(3) global i32 ptrtoint (ptr addrspace(3) @lds.cycle to i32), align 4 ; ALL-LABEL: define i32 @load_lds_simple() #0 { define i32 @load_lds_simple() { - %load = load i32, i32 addrspace(3)* @lds0, align 4 + %load = load i32, ptr addrspace(3) @lds0, align 4 ret i32 %load } ; ALL-LABEL: define i32 @load_gds_simple() #0 { define i32 @load_gds_simple() { - %load = load i32, i32 addrspace(2)* @gds0, align 4 + %load = load i32, ptr addrspace(2) @gds0, align 4 ret i32 %load } ; ALL-LABEL: define i32 @load_lds_const_gep() #0 { define i32 @load_lds_const_gep() { - %load = load i32, i32 addrspace(3)* getelementptr inbounds ([512 x i32], [512 x i32] addrspace(3)* @lds1, i64 0, i64 4), align 4 + %load = load i32, ptr addrspace(3) getelementptr inbounds ([512 x i32], ptr addrspace(3) @lds1, i64 0, i64 4), align 4 ret i32 %load } ; ALL-LABEL: define i32 @load_lds_var_gep(i32 %idx) #0 { define i32 @load_lds_var_gep(i32 %idx) { - %gep = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds1, i32 0, i32 %idx - %load = load i32, i32 addrspace(3)* %gep, align 4 + %gep = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds1, i32 0, i32 %idx + %load = load i32, ptr addrspace(3) %gep, align 4 ret i32 %load } -; ALL-LABEL: define i32 addrspace(3)* @load_nested_address(i32 %idx) #0 { -define i32 addrspace(3)* @load_nested_address(i32 %idx) { - %load = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(1)* @nested.lds.address, align 4 - ret i32 addrspace(3)* %load +; ALL-LABEL: define ptr addrspace(3) @load_nested_address(i32 %idx) #0 { +define ptr addrspace(3) @load_nested_address(i32 %idx) { + %load = load ptr addrspace(3), ptr addrspace(1) @nested.lds.address, align 4 + ret ptr addrspace(3) %load } ; ALL-LABEL: define i32 @load_lds_alias() #0 { define i32 @load_lds_alias() { - %load = load i32, i32 addrspace(3)* @alias.lds0, align 4 + %load = load i32, ptr addrspace(3) @alias.lds0, align 4 ret i32 %load } ; ALL-LABEL: define i32 @load_lds_cycle() #0 { define i32 @load_lds_cycle() { - %load = load i32, i32 addrspace(3)* @lds.cycle, align 4 + %load = load i32, ptr addrspace(3) @lds.cycle, align 4 ret i32 %load } ; ALL-LABEL: define i1 @icmp_lds_address() #0 { define i1 @icmp_lds_address() { - ret i1 icmp eq (i32 addrspace(3)* @lds0, i32 addrspace(3)* null) + ret i1 icmp eq (ptr addrspace(3) @lds0, ptr addrspace(3) null) } ; ALL-LABEL: define i32 @transitive_call() #0 { @@ -70,7 +70,7 @@ ; ALL-LABEL: define i32 @recursive_call_lds(i32 %arg0) #0 { define i32 @recursive_call_lds(i32 %arg0) { - %load = load i32, i32 addrspace(3)* @lds0, align 4 + %load = load i32, ptr addrspace(3) @lds0, align 4 %add = add i32 %arg0, %load %call = call i32 @recursive_call_lds(i32 %add) ret i32 %call @@ -81,13 +81,13 @@ ; ALL-LABEL: define i32 @load_lds_simple_noinline() #0 { define i32 @load_lds_simple_noinline() noinline { - %load = load i32, i32 addrspace(3)* @lds0, align 4 + %load = load i32, ptr addrspace(3) @lds0, align 4 ret i32 %load } ; ALL-LABEL: define i32 @recursive_call_lds_noinline(i32 %arg0) #0 { define i32 @recursive_call_lds_noinline(i32 %arg0) noinline { - %load = load i32, i32 addrspace(3)* @lds0, align 4 + %load = load i32, ptr addrspace(3) @lds0, align 4 %add = add i32 %arg0, %load %call = call i32 @recursive_call_lds(i32 %add) ret i32 %call diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll --- a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -march=amdgcn -mcpu=gfx940 -verify-machineinstrs | FileCheck %s -check-prefix=GFX940 -declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data) -declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) +declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) +declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data) ; bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm. -declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0v2i16(<2 x i16>* %ptr, <2 x i16> %data) -declare <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1v2i16(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) -declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(<2 x half> addrspace(3) * %ptr, <2 x half> %data, i32, i32, i1) -declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(<2 x i16> addrspace(3) * %ptr, <2 x i16> %data) +declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data) +declare <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data) +declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32, i32, i1) +declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data) -define amdgpu_kernel void @flat_atomic_fadd_f32_noret(float* %ptr, float %data) { +define amdgpu_kernel void @flat_atomic_fadd_f32_noret(ptr %ptr, float %data) { ; GFX940-LABEL: flat_atomic_fadd_f32_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -20,11 +20,11 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 ; GFX940-NEXT: s_endpgm - %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data) + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) ret void } -define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(float* %ptr) { +define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) { ; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -49,11 +49,11 @@ ; GFX940-NEXT: s_cbranch_execnz .LBB1_1 ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX940-NEXT: s_endpgm - %ret = atomicrmw fadd float* %ptr, float 4.0 seq_cst + %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst ret void } -define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(float* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 { ; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -78,22 +78,22 @@ ; GFX940-NEXT: s_cbranch_execnz .LBB2_1 ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX940-NEXT: s_endpgm - %ret = atomicrmw fadd float* %ptr, float 4.0 seq_cst + %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst ret void } -define float @flat_atomic_fadd_f32_rtn(float* %ptr, float %data) { +define float @flat_atomic_fadd_f32_rtn(ptr %ptr, float %data) { ; GFX940-LABEL: flat_atomic_fadd_f32_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data) + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) ret float %ret } -define float @flat_atomic_fadd_f32_rtn_pat(float* %ptr, float %data) { +define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) { ; GFX940-LABEL: flat_atomic_fadd_f32_rtn_pat: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -117,11 +117,11 @@ ; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX940-NEXT: v_mov_b32_e32 v0, v2 ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = atomicrmw fadd float* %ptr, float 4.0 seq_cst + %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst ret float %ret } -define amdgpu_kernel void @flat_atomic_fadd_v2f16_noret(<2 x half>* %ptr, <2 x half> %data) { +define amdgpu_kernel void @flat_atomic_fadd_v2f16_noret(ptr %ptr, <2 x half> %data) { ; GFX940-LABEL: flat_atomic_fadd_v2f16_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -131,22 +131,22 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 ; GFX940-NEXT: s_endpgm - %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data) ret void } -define <2 x half> @flat_atomic_fadd_v2f16_rtn(<2 x half>* %ptr, <2 x half> %data) { +define <2 x half> @flat_atomic_fadd_v2f16_rtn(ptr %ptr, <2 x half> %data) { ; GFX940-LABEL: flat_atomic_fadd_v2f16_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data) ret <2 x half> %ret } -define amdgpu_kernel void @flat_atomic_fadd_v2bf16_noret(<2 x i16>* %ptr, <2 x i16> %data) { +define amdgpu_kernel void @flat_atomic_fadd_v2bf16_noret(ptr %ptr, <2 x i16> %data) { ; GFX940-LABEL: flat_atomic_fadd_v2bf16_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -156,22 +156,22 @@ ; GFX940-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 ; GFX940-NEXT: s_endpgm - %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0v2i16(<2 x i16>* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data) ret void } -define <2 x i16> @flat_atomic_fadd_v2bf16_rtn(<2 x i16>* %ptr, <2 x i16> %data) { +define <2 x i16> @flat_atomic_fadd_v2bf16_rtn(ptr %ptr, <2 x i16> %data) { ; GFX940-LABEL: flat_atomic_fadd_v2bf16_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0v2i16(<2 x i16>* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data) ret <2 x i16> %ret } -define amdgpu_kernel void @global_atomic_fadd_v2bf16_noret(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) { +define amdgpu_kernel void @global_atomic_fadd_v2bf16_noret(ptr addrspace(1) %ptr, <2 x i16> %data) { ; GFX940-LABEL: global_atomic_fadd_v2bf16_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dword s4, s[0:1], 0x2c @@ -181,22 +181,22 @@ ; GFX940-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NEXT: global_atomic_pk_add_bf16 v0, v1, s[2:3] ; GFX940-NEXT: s_endpgm - %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1v2i16(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data) ret void } -define <2 x i16> @global_atomic_fadd_v2bf16_rtn(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) { +define <2 x i16> @global_atomic_fadd_v2bf16_rtn(ptr addrspace(1) %ptr, <2 x i16> %data) { ; GFX940-LABEL: global_atomic_fadd_v2bf16_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1v2i16(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data) ret <2 x i16> %ret } -define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(<2 x half> addrspace(3)* %ptr, <2 x half> %data) { +define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(ptr addrspace(3) %ptr, <2 x half> %data) { ; GFX940-LABEL: local_atomic_fadd_v2f16_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -205,22 +205,22 @@ ; GFX940-NEXT: v_mov_b32_e32 v1, s1 ; GFX940-NEXT: ds_pk_add_f16 v0, v1 ; GFX940-NEXT: s_endpgm - %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(<2 x half> addrspace(3)* %ptr, <2 x half> %data, i32 0, i32 0, i1 0) + %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32 0, i32 0, i1 0) ret void } -define <2 x half> @local_atomic_fadd_v2f16_rtn(<2 x half> addrspace(3)* %ptr, <2 x half> %data) { +define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half> %data) { ; GFX940-LABEL: local_atomic_fadd_v2f16_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(<2 x half> addrspace(3)* %ptr, <2 x half> %data, i32 0, i32 0, i1 0) + %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32 0, i32 0, i1 0) ret <2 x half> %ret } -define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data) { +define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(ptr addrspace(3) %ptr, <2 x i16> %data) { ; GFX940-LABEL: local_atomic_fadd_v2bf16_noret: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -233,11 +233,11 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_endpgm - %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data) ret void } -define <2 x i16> @local_atomic_fadd_v2bf16_rtn(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data) { +define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16> %data) { ; GFX940-LABEL: local_atomic_fadd_v2bf16_rtn: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -247,7 +247,7 @@ ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NEXT: buffer_inv sc0 sc1 ; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data) + %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data) ret <2 x i16> %ret } diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll --- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll @@ -8,14 +8,14 @@ declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg) declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg) -declare double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) -declare double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) -declare double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fadd.f64.p3f64.f64(double addrspace(3)* %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data) -declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data) -declare double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* nocapture, double, i32, i32, i1) +declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) +declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) +declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) +declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data) +declare double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %ptr, double %data) +declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) +declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) +declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32, i32, i1) define amdgpu_kernel void @buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { ; GFX90A-LABEL: buffer_atomic_add_noret_f64: @@ -42,11 +42,11 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: buffer_atomic_add_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -63,7 +63,7 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } @@ -92,11 +92,11 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -113,7 +113,7 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } @@ -142,11 +142,11 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -163,7 +163,7 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } @@ -192,11 +192,11 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -213,7 +213,7 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } @@ -242,11 +242,11 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -263,7 +263,7 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } @@ -292,11 +292,11 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -313,7 +313,7 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } @@ -342,11 +342,11 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) - store double %ret, double* undef + store double %ret, ptr undef ret void } -define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { +define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { ; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -363,11 +363,11 @@ ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) - store double %ret, double addrspace(1)* %out, align 8 + store double %ret, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @global_atomic_fadd_f64_noret(double addrspace(1)* %ptr, double %data) { +define amdgpu_kernel void @global_atomic_fadd_f64_noret(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fadd_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -378,11 +378,11 @@ ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void } -define amdgpu_kernel void @global_atomic_fmin_f64_noret(double addrspace(1)* %ptr, double %data) { +define amdgpu_kernel void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fmin_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -393,11 +393,11 @@ ; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void } -define amdgpu_kernel void @global_atomic_fmax_f64_noret(double addrspace(1)* %ptr, double %data) { +define amdgpu_kernel void @global_atomic_fmax_f64_noret(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fmax_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -408,11 +408,11 @@ ; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret void } -define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)* %ptr) #1 { +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -439,11 +439,11 @@ ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst ret void } -define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(double addrspace(1)* %ptr) #1 { +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(1) %ptr) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -456,11 +456,11 @@ ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst ret void } -define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrspace(1)* %ptr) #1 { +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace(1) %ptr) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_system: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -487,11 +487,11 @@ ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("one-as") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst ret void } -define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(double addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(1) %ptr) #0 { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_flush: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -504,11 +504,11 @@ ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst ret void } -define double @global_atomic_fadd_f64_rtn(double addrspace(1)* %ptr, double %data) { +define double @global_atomic_fadd_f64_rtn(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fadd_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -516,11 +516,11 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret } -define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double %data) #1 { +define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %data) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -547,11 +547,11 @@ ; GFX90A-NEXT: v_mov_b32_e32 v1, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst ret double %ret } -define double @global_atomic_fadd_f64_rtn_pat_agent(double addrspace(1)* %ptr, double %data) #1 { +define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, double %data) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_agent: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -563,11 +563,11 @@ ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst ret double %ret } -define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr, double %data) #1 { +define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, double %data) #1 { ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_system: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -594,11 +594,11 @@ ; GFX90A-NEXT: v_mov_b32_e32 v1, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("one-as") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst ret double %ret } -define double @global_atomic_fmax_f64_rtn(double addrspace(1)* %ptr, double %data) { +define double @global_atomic_fmax_f64_rtn(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fmax_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -606,11 +606,11 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret } -define double @global_atomic_fmin_f64_rtn(double addrspace(1)* %ptr, double %data) { +define double @global_atomic_fmin_f64_rtn(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fmin_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -618,11 +618,11 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) + %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data) ret double %ret } -define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double addrspace(1)* %ptr) { +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrspace(1) %ptr) { ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -647,11 +647,11 @@ ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst ret void } -define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 { +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -678,11 +678,11 @@ ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst ret void } -define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(double* %ptr) #1 { +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -696,11 +696,11 @@ ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst ret void } -define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #1 { +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -728,11 +728,11 @@ ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("one-as") seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst ret void } -define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 { +define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -759,11 +759,11 @@ ; GFX90A-NEXT: v_mov_b32_e32 v1, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst ret double %ret } -define double @flat_atomic_fadd_f64_rtn_pat_agent(double* %ptr) #1 { +define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_agent: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -775,11 +775,11 @@ ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst ret double %ret } -define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 { +define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 { ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_system: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -807,11 +807,11 @@ ; GFX90A-NEXT: v_mov_b32_e32 v1, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("one-as") seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst ret double %ret } -define amdgpu_kernel void @flat_atomic_fadd_f64_noret(double* %ptr, double %data) { +define amdgpu_kernel void @flat_atomic_fadd_f64_noret(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -823,11 +823,11 @@ ; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data) ret void } -define double @flat_atomic_fadd_f64_rtn(double* %ptr, double %data) { +define double @flat_atomic_fadd_f64_rtn(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -835,11 +835,11 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data) ret double %ret } -define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %ptr) { +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -864,11 +864,11 @@ ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst + %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst ret void } -define amdgpu_kernel void @flat_atomic_fmin_f64_noret(double* %ptr, double %data) { +define amdgpu_kernel void @flat_atomic_fmin_f64_noret(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fmin_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -880,11 +880,11 @@ ; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) ret void } -define double @flat_atomic_fmin_f64_rtn(double* %ptr, double %data) { +define double @flat_atomic_fmin_f64_rtn(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fmin_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -892,11 +892,11 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data) ret double %ret } -define amdgpu_kernel void @flat_atomic_fmax_f64_noret(double* %ptr, double %data) { +define amdgpu_kernel void @flat_atomic_fmax_f64_noret(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fmax_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -908,11 +908,11 @@ ; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) ret void } -define double @flat_atomic_fmax_f64_rtn(double* %ptr, double %data) { +define double @flat_atomic_fmax_f64_rtn(ptr %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fmax_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -920,11 +920,11 @@ ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data) ret double %ret } -define amdgpu_kernel void @local_atomic_fadd_f64_noret(double addrspace(3)* %ptr, double %data) { +define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, double %data) { ; GFX90A-LABEL: local_atomic_fadd_f64_noret: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dword s4, s[0:1], 0x24 @@ -935,11 +935,11 @@ ; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) + %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret void } -define double @local_atomic_fadd_f64_rtn(double addrspace(3)* %ptr, double %data) { +define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) { ; GFX90A-LABEL: local_atomic_fadd_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -949,11 +949,11 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) + %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret } -define amdgpu_kernel void @local_atomic_fadd_f64_noret_from_flat_intrinsic(double addrspace(3)* %ptr, double %data) { +define amdgpu_kernel void @local_atomic_fadd_f64_noret_from_flat_intrinsic(ptr addrspace(3) %ptr, double %data) { ; GFX90A-LABEL: local_atomic_fadd_f64_noret_from_flat_intrinsic: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dword s4, s[0:1], 0x24 @@ -964,11 +964,11 @@ ; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_endpgm main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p3f64.f64(double addrspace(3)* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %ptr, double %data) ret void } -define double @local_atomic_fadd_f64_rtn_from_flat_intrinsic(double addrspace(3)* %ptr, double %data) { +define double @local_atomic_fadd_f64_rtn_from_flat_intrinsic(ptr addrspace(3) %ptr, double %data) { ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_from_flat_intrinsic: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -978,11 +978,11 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p3f64.f64(double addrspace(3)* %ptr, double %data) + %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %ptr, double %data) ret double %ret } -define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)* %ptr) #1 { +define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) #1 { ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -995,11 +995,11 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret void } -define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(double addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3) %ptr) #0 { ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -1012,11 +1012,11 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret void } -define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double addrspace(3)* %ptr) #4 { +define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) #4 { ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dword s2, s[0:1], 0x24 @@ -1040,11 +1040,11 @@ ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: - %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret void } -define double @local_atomic_fadd_f64_rtn_pat(double addrspace(3)* %ptr, double %data) #1 { +define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data) #1 { ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1055,11 +1055,11 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst + %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret double %ret } -define double @local_atomic_fadd_f64_rtn_ieee_unsafe(double addrspace(3)* %ptr, double %data) #2 { +define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, double %data) #2 { ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1069,11 +1069,11 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) + %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret } -define double @local_atomic_fadd_f64_rtn_ieee_safe(double addrspace(3)* %ptr, double %data) #3 { +define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double %data) #3 { ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1083,7 +1083,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] main_body: - %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) + %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret } diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll --- a/llvm/test/CodeGen/AMDGPU/function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args.ll @@ -6,7 +6,7 @@ ; GCN: v_and_b32_e32 v0, 1, v0 ; GCN: buffer_store_byte v0, off define void @void_func_i1(i1 %arg0) #0 { - store i1 %arg0, i1 addrspace(1)* undef + store i1 %arg0, ptr addrspace(1) undef ret void } @@ -18,7 +18,7 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { %ext = zext i1 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } @@ -30,7 +30,7 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 { %ext = sext i1 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } @@ -43,7 +43,7 @@ br i1 %arg, label %bb2, label %bb1 bb1: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef br label %bb2 bb2: @@ -54,7 +54,7 @@ ; GCN-NOT: v0 ; GCN: buffer_store_byte v0, off define void @void_func_i8(i8 %arg0) #0 { - store i8 %arg0, i8 addrspace(1)* undef + store i8 %arg0, ptr addrspace(1) undef ret void } @@ -64,7 +64,7 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } @@ -74,14 +74,14 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 { %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}void_func_i16: ; GCN: buffer_store_short v0, off define void @void_func_i16(i16 %arg0) #0 { - store i16 %arg0, i16 addrspace(1)* undef + store i16 %arg0, ptr addrspace(1) undef ret void } @@ -91,7 +91,7 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } @@ -101,7 +101,7 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 { %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 - store i32 %add, i32 addrspace(1)* undef + store i32 %add, ptr addrspace(1) undef ret void } @@ -109,7 +109,7 @@ ; GCN-NOT: v0 ; GCN: buffer_store_dword v0, off define void @void_func_i32(i32 %arg0) #0 { - store i32 %arg0, i32 addrspace(1)* undef + store i32 %arg0, ptr addrspace(1) undef ret void } @@ -119,7 +119,7 @@ ; GCN-NOT: v1 ; GCN: buffer_store_dwordx2 v[0:1], off define void @void_func_i64(i64 %arg0) #0 { - store i64 %arg0, i64 addrspace(1)* undef + store i64 %arg0, ptr addrspace(1) undef ret void } @@ -128,7 +128,7 @@ ; CI: v_cvt_f16_f32_e32 v0, v0 ; GCN: buffer_store_short v0, off define void @void_func_f16(half %arg0) #0 { - store half %arg0, half addrspace(1)* undef + store half %arg0, ptr addrspace(1) undef ret void } @@ -136,7 +136,7 @@ ; GCN-NOT: v0 ; GCN: buffer_store_dword v0, off define void @void_func_f32(float %arg0) #0 { - store float %arg0, float addrspace(1)* undef + store float %arg0, ptr addrspace(1) undef ret void } @@ -146,7 +146,7 @@ ; GCN-NOT: v1 ; GCN: buffer_store_dwordx2 v[0:1], off define void @void_func_f64(double %arg0) #0 { - store double %arg0, double addrspace(1)* undef + store double %arg0, ptr addrspace(1) undef ret void } @@ -156,21 +156,21 @@ ; GCN-NOT: v1 ; GCN: buffer_store_dwordx2 v[0:1], off define void @void_func_v2i32(<2 x i32> %arg0) #0 { - store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef + store <2 x i32> %arg0, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}void_func_v3i32: ; GCN-DAG: buffer_store_dwordx3 v[0:2], off define void @void_func_v3i32(<3 x i32> %arg0) #0 { - store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef + store <3 x i32> %arg0, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}void_func_v4i32: ; GCN: buffer_store_dwordx4 v[0:3], off define void @void_func_v4i32(<4 x i32> %arg0) #0 { - store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef + store <4 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -178,7 +178,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[0:3], off ; GCN-DAG: buffer_store_dword v4, off define void @void_func_v5i32(<5 x i32> %arg0) #0 { - store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef + store <5 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -186,7 +186,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[0:3], off ; GCN-DAG: buffer_store_dwordx4 v[4:7], off define void @void_func_v8i32(<8 x i32> %arg0) #0 { - store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef + store <8 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -196,7 +196,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[8:11], off ; GCN-DAG: buffer_store_dwordx4 v[12:15], off define void @void_func_v16i32(<16 x i32> %arg0) #0 { - store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef + store <16 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -210,7 +210,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[24:27], off ; GCN-DAG: buffer_store_dwordx4 v[28:31], off define void @void_func_v32i32(<32 x i32> %arg0) #0 { - store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store <32 x i32> %arg0, ptr addrspace(1) undef ret void } @@ -228,14 +228,14 @@ ; GCN-DAG: buffer_store_dwordx4 v[28:31], off ; GCN: buffer_store_dword [[STACKLOAD]], off define void @void_func_v33i32(<33 x i32> %arg0) #0 { - store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef + store <33 x i32> %arg0, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}void_func_v2i64: ; GCN: buffer_store_dwordx4 v[0:3], off define void @void_func_v2i64(<2 x i64> %arg0) #0 { - store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef + store <2 x i64> %arg0, ptr addrspace(1) undef ret void } @@ -243,7 +243,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[0:3], off ; GCN-DAG: buffer_store_dwordx2 v[4:5], off define void @void_func_v3i64(<3 x i64> %arg0) #0 { - store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef + store <3 x i64> %arg0, ptr addrspace(1) undef ret void } @@ -251,7 +251,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[0:3], off ; GCN-DAG: buffer_store_dwordx4 v[4:7], off define void @void_func_v4i64(<4 x i64> %arg0) #0 { - store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef + store <4 x i64> %arg0, ptr addrspace(1) undef ret void } @@ -260,7 +260,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[4:7], off ; GCN-DAG: buffer_store_dwordx2 v[8:9], off define void @void_func_v5i64(<5 x i64> %arg0) #0 { - store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef + store <5 x i64> %arg0, ptr addrspace(1) undef ret void } @@ -270,7 +270,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[8:11], off ; GCN-DAG: buffer_store_dwordx4 v[12:15], off define void @void_func_v8i64(<8 x i64> %arg0) #0 { - store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef + store <8 x i64> %arg0, ptr addrspace(1) undef ret void } @@ -284,7 +284,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[24:27], off ; GCN-DAG: buffer_store_dwordx4 v[28:31], off define void @void_func_v16i64(<16 x i64> %arg0) #0 { - store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef + store <16 x i64> %arg0, ptr addrspace(1) undef ret void } @@ -292,7 +292,7 @@ ; GFX9-NOT: v0 ; GFX9: buffer_store_dword v0, off define void @void_func_v2i16(<2 x i16> %arg0) #0 { - store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef + store <2 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -300,7 +300,7 @@ ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off ; GCN-DAG: buffer_store_short v{{[0-9]+}}, off define void @void_func_v3i16(<3 x i16> %arg0) #0 { - store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef + store <3 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -309,7 +309,7 @@ ; GFX9-NOT: v1 ; GFX9: buffer_store_dwordx2 v[0:1], off define void @void_func_v4i16(<4 x i16> %arg0) #0 { - store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef + store <4 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -326,14 +326,14 @@ ; GFX89-DAG: buffer_store_dwordx2 v[0:1], off define void @void_func_v5i16(<5 x i16> %arg0) #0 { - store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef + store <5 x i16> %arg0, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}void_func_v8i16: ; GFX9-DAG: buffer_store_dwordx4 v[0:3], off define void @void_func_v8i16(<8 x i16> %arg0) #0 { - store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef + store <8 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -341,7 +341,7 @@ ; GFX9-DAG: buffer_store_dwordx4 v[0:3], off ; GFX9-DAG: buffer_store_dwordx4 v[4:7], off define void @void_func_v16i16(<16 x i16> %arg0) #0 { - store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef + store <16 x i16> %arg0, ptr addrspace(1) undef ret void } @@ -351,7 +351,7 @@ %elt0 = extractelement <2 x i24> %arg0, i32 0 %elt1 = extractelement <2 x i24> %arg0, i32 1 %add = add i24 %elt0, %elt1 - store i24 %add, i24 addrspace(1)* undef + store i24 %add, ptr addrspace(1) undef ret void } @@ -361,21 +361,21 @@ ; GCN-NOT: v1 ; GCN: buffer_store_dwordx2 v[0:1], off define void @void_func_v2f32(<2 x float> %arg0) #0 { - store <2 x float> %arg0, <2 x float> addrspace(1)* undef + store <2 x float> %arg0, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}void_func_v3f32: ; GCN-DAG: buffer_store_dwordx3 v[0:2], off define void @void_func_v3f32(<3 x float> %arg0) #0 { - store <3 x float> %arg0, <3 x float> addrspace(1)* undef + store <3 x float> %arg0, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}void_func_v4f32: ; GCN: buffer_store_dwordx4 v[0:3], off define void @void_func_v4f32(<4 x float> %arg0) #0 { - store <4 x float> %arg0, <4 x float> addrspace(1)* undef + store <4 x float> %arg0, ptr addrspace(1) undef ret void } @@ -383,7 +383,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[0:3], off ; GCN-DAG: buffer_store_dwordx4 v[4:7], off define void @void_func_v8f32(<8 x float> %arg0) #0 { - store <8 x float> %arg0, <8 x float> addrspace(1)* undef + store <8 x float> %arg0, ptr addrspace(1) undef ret void } @@ -393,14 +393,14 @@ ; GCN-DAG: buffer_store_dwordx4 v[8:11], off ; GCN-DAG: buffer_store_dwordx4 v[12:15], off define void @void_func_v16f32(<16 x float> %arg0) #0 { - store <16 x float> %arg0, <16 x float> addrspace(1)* undef + store <16 x float> %arg0, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}void_func_v2f64: ; GCN: buffer_store_dwordx4 v[0:3], off define void @void_func_v2f64(<2 x double> %arg0) #0 { - store <2 x double> %arg0, <2 x double> addrspace(1)* undef + store <2 x double> %arg0, ptr addrspace(1) undef ret void } @@ -408,7 +408,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[0:3], off ; GCN-DAG: buffer_store_dwordx2 v[4:5], off define void @void_func_v3f64(<3 x double> %arg0) #0 { - store <3 x double> %arg0, <3 x double> addrspace(1)* undef + store <3 x double> %arg0, ptr addrspace(1) undef ret void } @@ -416,7 +416,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[0:3], off ; GCN-DAG: buffer_store_dwordx4 v[4:7], off define void @void_func_v4f64(<4 x double> %arg0) #0 { - store <4 x double> %arg0, <4 x double> addrspace(1)* undef + store <4 x double> %arg0, ptr addrspace(1) undef ret void } @@ -426,7 +426,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[8:11], off ; GCN-DAG: buffer_store_dwordx4 v[12:15], off define void @void_func_v8f64(<8 x double> %arg0) #0 { - store <8 x double> %arg0, <8 x double> addrspace(1)* undef + store <8 x double> %arg0, ptr addrspace(1) undef ret void } @@ -440,7 +440,7 @@ ; GCN-DAG: buffer_store_dwordx4 v[24:27], off ; GCN-DAG: buffer_store_dwordx4 v[28:31], off define void @void_func_v16f64(<16 x double> %arg0) #0 { - store <16 x double> %arg0, <16 x double> addrspace(1)* undef + store <16 x double> %arg0, ptr addrspace(1) undef ret void } @@ -448,7 +448,7 @@ ; GFX9-NOT: v0 ; GFX9: buffer_store_dword v0, off define void @void_func_v2f16(<2 x half> %arg0) #0 { - store <2 x half> %arg0, <2 x half> addrspace(1)* undef + store <2 x half> %arg0, ptr addrspace(1) undef ret void } @@ -464,7 +464,7 @@ ; GCN-DAG: buffer_store_short ; GCN-DAG: buffer_store_dword define void @void_func_v3f16(<3 x half> %arg0) #0 { - store <3 x half> %arg0, <3 x half> addrspace(1)* undef + store <3 x half> %arg0, ptr addrspace(1) undef ret void } @@ -474,7 +474,7 @@ ; GFX9-NOT: v[0:1] ; GFX9: buffer_store_dwordx2 v[0:1], off define void @void_func_v4f16(<4 x half> %arg0) #0 { - store <4 x half> %arg0, <4 x half> addrspace(1)* undef + store <4 x half> %arg0, ptr addrspace(1) undef ret void } @@ -483,7 +483,7 @@ ; GFX9-NOT: v1 ; GFX9: buffer_store_dwordx4 v[0:3], off define void @void_func_v8f16(<8 x half> %arg0) #0 { - store <8 x half> %arg0, <8 x half> addrspace(1)* undef + store <8 x half> %arg0, ptr addrspace(1) undef ret void } @@ -493,7 +493,7 @@ ; GFX9-DAG: buffer_store_dwordx4 v[0:3], off ; GFX9-DAG: buffer_store_dwordx4 v[4:7], off define void @void_func_v16f16(<16 x half> %arg0) #0 { - store <16 x half> %arg0, <16 x half> addrspace(1)* undef + store <16 x half> %arg0, ptr addrspace(1) undef ret void } @@ -504,9 +504,9 @@ ; GCN: buffer_store_dwordx2 v[1:2] ; GCN: buffer_store_dword v3 define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { - store volatile i32 %arg0, i32 addrspace(1)* undef - store volatile i64 %arg1, i64 addrspace(1)* undef - store volatile i32 %arg2, i32 addrspace(1)* undef + store volatile i32 %arg0, ptr addrspace(1) undef + store volatile i64 %arg1, ptr addrspace(1) undef + store volatile i32 %arg2, ptr addrspace(1) undef ret void } @@ -514,7 +514,7 @@ ; GCN-NOT: v0 ; GCN: buffer_store_dword v0, off define void @void_func_struct_i32({ i32 } %arg0) #0 { - store { i32 } %arg0, { i32 } addrspace(1)* undef + store { i32 } %arg0, ptr addrspace(1) undef ret void } @@ -522,7 +522,7 @@ ; GCN-DAG: buffer_store_byte v0, off ; GCN-DAG: buffer_store_dword v1, off define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { - store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef + store { i8, i32 } %arg0, ptr addrspace(1) undef ret void } @@ -531,9 +531,9 @@ ; GCN-DAG: buffer_load_dword v[[ELT1:[0-9]+]], off, s[0:3], s32 offset:4{{$}} ; GCN-DAG: buffer_store_dword v[[ELT1]] ; GCN-DAG: buffer_store_byte v[[ELT0]] -define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 { - %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 - store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef +define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 { + %arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0 + store { i8, i32 } %arg0.load, ptr addrspace(1) undef ret void } @@ -545,12 +545,12 @@ ; GCN: ds_write_b32 v0, v0 ; GCN: s_setpc_b64 -define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg1, i32 %arg2) #0 { - %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 - %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1 - store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef - store volatile { i8, i32 } %arg1.load, { i8, i32 } addrspace(1)* undef - store volatile i32 %arg2, i32 addrspace(3)* undef +define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 { + %arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0 + %arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1 + store volatile { i8, i32 } %arg0.load, ptr addrspace(1) undef + store volatile { i8, i32 } %arg1.load, ptr addrspace(1) undef + store volatile i32 %arg2, ptr addrspace(3) undef ret void } @@ -560,11 +560,11 @@ ; GCN-DAG: buffer_load_dword v[[ARG1_LOAD1:[0-9]+]], off, s[0:3], s32 offset:12{{$}} ; GCN-DAG: buffer_store_dword v[[ARG0_LOAD]], off ; GCN-DAG: buffer_store_dwordx2 v[[[ARG1_LOAD0]]:[[ARG1_LOAD1]]], off -define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i64 addrspace(5)* byval(i64) %arg1) #0 { - %arg0.load = load i32, i32 addrspace(5)* %arg0 - %arg1.load = load i64, i64 addrspace(5)* %arg1 - store i32 %arg0.load, i32 addrspace(1)* undef - store i64 %arg1.load, i64 addrspace(1)* undef +define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 { + %arg0.load = load i32, ptr addrspace(5) %arg0 + %arg1.load = load i64, ptr addrspace(5) %arg1 + store i32 %arg0.load, ptr addrspace(1) undef + store i64 %arg1.load, ptr addrspace(1) undef ret void } @@ -585,9 +585,9 @@ ; GCN: buffer_store_dword v[[LOAD_ARG1]] ; GCN: buffer_store_dwordx2 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]], off define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 { - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile i32 %arg1, i32 addrspace(1)* undef - store volatile i64 %arg2, i64 addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile i32 %arg1, ptr addrspace(1) undef + store volatile i64 %arg2, ptr addrspace(1) undef ret void } @@ -612,11 +612,11 @@ ; CI: buffer_store_short [[CVT_ARG4]], off define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 { - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile i1 %arg1, i1 addrspace(1)* undef - store volatile i8 %arg2, i8 addrspace(1)* undef - store volatile i16 %arg3, i16 addrspace(1)* undef - store volatile half %arg4, half addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile i1 %arg1, ptr addrspace(1) undef + store volatile i8 %arg2, ptr addrspace(1) undef + store volatile i16 %arg3, ptr addrspace(1) undef + store volatile half %arg4, ptr addrspace(1) undef ret void } @@ -629,9 +629,9 @@ ; GCN: buffer_store_dwordx2 v[[[LOAD_ARG1_0]]:[[LOAD_ARG1_1]]], off ; GCN: buffer_store_dwordx2 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]], off define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 { - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef - store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <2 x i32> %arg1, ptr addrspace(1) undef + store volatile <2 x float> %arg2, ptr addrspace(1) undef ret void } @@ -641,9 +641,9 @@ ; GFX9: buffer_store_dword [[LOAD_ARG1]], off ; GFX9: buffer_store_short [[LOAD_ARG2]], off define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 { - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef - store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <2 x i16> %arg1, ptr addrspace(1) undef + store volatile <2 x half> %arg2, ptr addrspace(1) undef ret void } @@ -661,9 +661,9 @@ ; GCN: buffer_store_dwordx4 v[[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]], off ; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]], off define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef - store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <2 x i64> %arg1, ptr addrspace(1) undef + store volatile <2 x double> %arg2, ptr addrspace(1) undef ret void } @@ -681,9 +681,9 @@ ; GCN: buffer_store_dwordx4 v[[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]], off ; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]], off define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 { - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef - store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <4 x i32> %arg1, ptr addrspace(1) undef + store volatile <4 x float> %arg2, ptr addrspace(1) undef ret void } @@ -711,9 +711,9 @@ ; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_4]]:[[LOAD_ARG2_7]]], off ; GCN: buffer_store_dwordx4 v[[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]], off define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 { - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef - store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <8 x i32> %arg1, ptr addrspace(1) undef + store volatile <8 x float> %arg2, ptr addrspace(1) undef ret void } @@ -752,9 +752,9 @@ ; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_14:[0-9]+]], off, s[0:3], s32 offset:120{{$}} ; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_15:[0-9]+]], off, s[0:3], s32 offset:124{{$}} define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 { - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef - store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <16 x i32> %arg1, ptr addrspace(1) undef + store volatile <16 x float> %arg2, ptr addrspace(1) undef ret void } @@ -771,10 +771,10 @@ %arg0.0 = extractelement <3 x float> %arg0, i32 0 %arg0.1 = extractelement <3 x float> %arg0, i32 1 %arg0.2 = extractelement <3 x float> %arg0, i32 2 - store volatile float %arg0.0, float addrspace(3)* undef - store volatile float %arg0.1, float addrspace(3)* undef - store volatile float %arg0.2, float addrspace(3)* undef - store volatile i32 %arg1, i32 addrspace(3)* undef + store volatile float %arg0.0, ptr addrspace(3) undef + store volatile float %arg0.1, ptr addrspace(3) undef + store volatile float %arg0.2, ptr addrspace(3) undef + store volatile i32 %arg1, ptr addrspace(3) undef ret void } @@ -790,25 +790,25 @@ %arg0.0 = extractelement <3 x i32> %arg0, i32 0 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 %arg0.2 = extractelement <3 x i32> %arg0, i32 2 - store volatile i32 %arg0.0, i32 addrspace(3)* undef - store volatile i32 %arg0.1, i32 addrspace(3)* undef - store volatile i32 %arg0.2, i32 addrspace(3)* undef - store volatile i32 %arg1, i32 addrspace(3)* undef + store volatile i32 %arg0.0, ptr addrspace(3) undef + store volatile i32 %arg0.1, ptr addrspace(3) undef + store volatile i32 %arg0.2, ptr addrspace(3) undef + store volatile i32 %arg1, ptr addrspace(3) undef ret void } ; Check there is no crash. ; GCN-LABEL: {{^}}void_func_v16i8: define void @void_func_v16i8(<16 x i8> %arg0) #0 { - store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef + store volatile <16 x i8> %arg0, ptr addrspace(1) undef ret void } ; Check there is no crash. ; GCN-LABEL: {{^}}void_func_v32i32_v16i8: define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { - store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef - store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <16 x i8> %arg1, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/function-call-relocs.ll b/llvm/test/CodeGen/AMDGPU/function-call-relocs.ll --- a/llvm/test/CodeGen/AMDGPU/function-call-relocs.ll +++ b/llvm/test/CodeGen/AMDGPU/function-call-relocs.ll @@ -1,10 +1,10 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck %s -declare void @func(i32 addrspace(1)* %out) +declare void @func(ptr addrspace(1) %out) -declare protected void @protected_func(i32 addrspace(1)* %out) +declare protected void @protected_func(ptr addrspace(1) %out) -declare hidden void @hidden_func(i32 addrspace(1)* %out) +declare hidden void @hidden_func(ptr addrspace(1) %out) ; CHECK-LABEL: call_func: ; CHECK: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]] @@ -12,8 +12,8 @@ ; CHECK: s_addc_u32 s[[GOT_ADDR_HI:[0-9]+]], s[[PC_HI]], func@gotpcrel32@hi+12 ; CHECK: s_load_dwordx2 s[[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]], s[[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]], 0x0 ; CHECK: s_swappc_b64 s[{{[0-9]+:[0-9]+}}], s[[[ADDR_LO]]:[[ADDR_HI]]] -define amdgpu_kernel void @call_func(i32 addrspace(1)* %out) { - call void @func(i32 addrspace(1)* %out) +define amdgpu_kernel void @call_func(ptr addrspace(1) %out) { + call void @func(ptr addrspace(1) %out) ret void } @@ -22,8 +22,8 @@ ; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], protected_func@rel32@lo+4 ; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], protected_func@rel32@hi+12 ; CHECK: s_swappc_b64 s[{{[0-9]+:[0-9]+}}], s[[[ADDR_LO]]:[[ADDR_HI]]] -define amdgpu_kernel void @call_protected_func(i32 addrspace(1)* %out) { - call void @protected_func(i32 addrspace(1)* %out) +define amdgpu_kernel void @call_protected_func(ptr addrspace(1) %out) { + call void @protected_func(ptr addrspace(1) %out) ret void } @@ -32,8 +32,8 @@ ; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], hidden_func@rel32@lo+4 ; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], hidden_func@rel32@hi+12 ; CHECK: s_swappc_b64 s[{{[0-9]+:[0-9]+}}], s[[[ADDR_LO]]:[[ADDR_HI]]] -define amdgpu_kernel void @call_hidden_func(i32 addrspace(1)* %out) { - call void @hidden_func(i32 addrspace(1)* %out) +define amdgpu_kernel void @call_hidden_func(ptr addrspace(1) %out) { + call void @hidden_func(ptr addrspace(1) %out) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll --- a/llvm/test/CodeGen/AMDGPU/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll @@ -7,7 +7,7 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define i1 @i1_func_void() #0 { - %val = load i1, i1 addrspace(1)* undef + %val = load i1, ptr addrspace(1) undef ret i1 %val } @@ -17,7 +17,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define zeroext i1 @i1_zeroext_func_void() #0 { - %val = load i1, i1 addrspace(1)* undef + %val = load i1, ptr addrspace(1) undef ret i1 %val } @@ -27,7 +27,7 @@ ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}} ; GCN-NEXT: s_setpc_b64 define signext i1 @i1_signext_func_void() #0 { - %val = load i1, i1 addrspace(1)* undef + %val = load i1, ptr addrspace(1) undef ret i1 %val } @@ -36,7 +36,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define i8 @i8_func_void() #0 { - %val = load i8, i8 addrspace(1)* undef + %val = load i8, ptr addrspace(1) undef ret i8 %val } @@ -45,7 +45,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define zeroext i8 @i8_zeroext_func_void() #0 { - %val = load i8, i8 addrspace(1)* undef + %val = load i8, ptr addrspace(1) undef ret i8 %val } @@ -54,7 +54,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define signext i8 @i8_signext_func_void() #0 { - %val = load i8, i8 addrspace(1)* undef + %val = load i8, ptr addrspace(1) undef ret i8 %val } @@ -63,7 +63,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define i16 @i16_func_void() #0 { - %val = load i16, i16 addrspace(1)* undef + %val = load i16, ptr addrspace(1) undef ret i16 %val } @@ -72,7 +72,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define zeroext i16 @i16_zeroext_func_void() #0 { - %val = load i16, i16 addrspace(1)* undef + %val = load i16, ptr addrspace(1) undef ret i16 %val } @@ -81,7 +81,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define signext i16 @i16_signext_func_void() #0 { - %val = load i16, i16 addrspace(1)* undef + %val = load i16, ptr addrspace(1) undef ret i16 %val } @@ -90,7 +90,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define i32 @i32_func_void() #0 { - %val = load i32, i32 addrspace(1)* undef + %val = load i32, ptr addrspace(1) undef ret i32 %val } @@ -100,7 +100,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define i48 @i48_func_void() #0 { - %val = load i48, i48 addrspace(1)* undef, align 8 + %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val } @@ -110,7 +110,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define zeroext i48 @i48_zeroext_func_void() #0 { - %val = load i48, i48 addrspace(1)* undef, align 8 + %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val } @@ -120,7 +120,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define signext i48 @i48_signext_func_void() #0 { - %val = load i48, i48 addrspace(1)* undef, align 8 + %val = load i48, ptr addrspace(1) undef, align 8 ret i48 %val } @@ -157,7 +157,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define i64 @i64_func_void() #0 { - %val = load i64, i64 addrspace(1)* undef + %val = load i64, ptr addrspace(1) undef ret i64 %val } @@ -167,7 +167,7 @@ ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define i65 @i65_func_void() #0 { - %val = load i65, i65 addrspace(1)* undef + %val = load i65, ptr addrspace(1) undef ret i65 %val } @@ -176,7 +176,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define float @f32_func_void() #0 { - %val = load float, float addrspace(1)* undef + %val = load float, ptr addrspace(1) undef ret float %val } @@ -185,7 +185,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define double @f64_func_void() #0 { - %val = load double, double addrspace(1)* undef + %val = load double, ptr addrspace(1) undef ret double %val } @@ -194,7 +194,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <2 x double> @v2f64_func_void() #0 { - %val = load <2 x double>, <2 x double> addrspace(1)* undef + %val = load <2 x double>, ptr addrspace(1) undef ret <2 x double> %val } @@ -203,7 +203,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <2 x i32> @v2i32_func_void() #0 { - %val = load <2 x i32>, <2 x i32> addrspace(1)* undef + %val = load <2 x i32>, ptr addrspace(1) undef ret <2 x i32> %val } @@ -212,7 +212,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <3 x i32> @v3i32_func_void() #0 { - %val = load <3 x i32>, <3 x i32> addrspace(1)* undef + %val = load <3 x i32>, ptr addrspace(1) undef ret <3 x i32> %val } @@ -221,7 +221,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <4 x i32> @v4i32_func_void() #0 { - %val = load <4 x i32>, <4 x i32> addrspace(1)* undef + %val = load <4 x i32>, ptr addrspace(1) undef ret <4 x i32> %val } @@ -231,7 +231,7 @@ ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <5 x i32> @v5i32_func_void() #0 { - %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef + %val = load volatile <5 x i32>, ptr addrspace(1) undef ret <5 x i32> %val } @@ -241,8 +241,8 @@ ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <8 x i32> @v8i32_func_void() #0 { - %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef - %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <8 x i32>, ptr addrspace(1) %ptr ret <8 x i32> %val } @@ -254,8 +254,8 @@ ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <16 x i32> @v16i32_func_void() #0 { - %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef - %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i32>, ptr addrspace(1) %ptr ret <16 x i32> %val } @@ -271,8 +271,8 @@ ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <32 x i32> @v32i32_func_void() #0 { - %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef - %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <32 x i32>, ptr addrspace(1) %ptr ret <32 x i32> %val } @@ -281,7 +281,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <2 x i64> @v2i64_func_void() #0 { - %val = load <2 x i64>, <2 x i64> addrspace(1)* undef + %val = load <2 x i64>, ptr addrspace(1) undef ret <2 x i64> %val } @@ -291,8 +291,8 @@ ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <3 x i64> @v3i64_func_void() #0 { - %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef - %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <3 x i64>, ptr addrspace(1) %ptr ret <3 x i64> %val } @@ -302,8 +302,8 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <4 x i64> @v4i64_func_void() #0 { - %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef - %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <4 x i64>, ptr addrspace(1) %ptr ret <4 x i64> %val } @@ -314,8 +314,8 @@ ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <5 x i64> @v5i64_func_void() #0 { - %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef - %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <5 x i64>, ptr addrspace(1) %ptr ret <5 x i64> %val } @@ -327,8 +327,8 @@ ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <8 x i64> @v8i64_func_void() #0 { - %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef - %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <8 x i64>, ptr addrspace(1) %ptr ret <8 x i64> %val } @@ -344,8 +344,8 @@ ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define <16 x i64> @v16i64_func_void() #0 { - %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef - %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i64>, ptr addrspace(1) %ptr ret <16 x i64> %val } @@ -354,7 +354,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 define <2 x i16> @v2i16_func_void() #0 { - %val = load <2 x i16>, <2 x i16> addrspace(1)* undef + %val = load <2 x i16>, ptr addrspace(1) undef ret <2 x i16> %val } @@ -363,7 +363,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 define <3 x i16> @v3i16_func_void() #0 { - %val = load <3 x i16>, <3 x i16> addrspace(1)* undef + %val = load <3 x i16>, ptr addrspace(1) undef ret <3 x i16> %val } @@ -372,7 +372,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 define <4 x i16> @v4i16_func_void() #0 { - %val = load <4 x i16>, <4 x i16> addrspace(1)* undef + %val = load <4 x i16>, ptr addrspace(1) undef ret <4 x i16> %val } @@ -381,7 +381,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 define <4 x half> @v4f16_func_void() #0 { - %val = load <4 x half>, <4 x half> addrspace(1)* undef + %val = load <4 x half>, ptr addrspace(1) undef ret <4 x half> %val } @@ -392,8 +392,8 @@ ; GFX9-NEXT: s_waitcnt ; GFX9-NEXT: s_setpc_b64 define <5 x i16> @v5i16_func_void() #0 { - %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef - %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <5 x i16>, ptr addrspace(1) %ptr ret <5 x i16> %val } @@ -402,8 +402,8 @@ ; GFX9: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 define <8 x i16> @v8i16_func_void() #0 { - %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef - %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <8 x i16>, ptr addrspace(1) %ptr ret <8 x i16> %val } @@ -413,8 +413,8 @@ ; GFX9: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 define <16 x i16> @v16i16_func_void() #0 { - %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef - %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i16>, ptr addrspace(1) %ptr ret <16 x i16> %val } @@ -425,8 +425,8 @@ ; GCN-DAG: v14 ; GCN-DAG: v15 define <16 x i8> @v16i8_func_void() #0 { - %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef - %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i8>, ptr addrspace(1) %ptr ret <16 x i8> %val } @@ -438,8 +438,8 @@ ; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0 ; GCN: s_setpc_b64 define <4 x i8> @v4i8_func_void() #0 { - %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef - %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <4 x i8>, ptr addrspace(1) %ptr ret <4 x i8> %val } @@ -449,7 +449,7 @@ ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define {i8, i32} @struct_i8_i32_func_void() #0 { - %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef + %val = load { i8, i32 }, ptr addrspace(1) undef ret { i8, i32 } %val } @@ -458,13 +458,13 @@ ; GCN: buffer_load_dword [[VAL1:v[0-9]+]] ; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], 0 offen{{$}} ; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], 0 offen offset:4{{$}} -define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 { - %val0 = load volatile i8, i8 addrspace(1)* undef - %val1 = load volatile i32, i32 addrspace(1)* undef - %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 - %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 - store i8 %val0, i8 addrspace(5)* %gep0 - store i32 %val1, i32 addrspace(5)* %gep1 +define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) #0 { + %val0 = load volatile i8, ptr addrspace(1) undef + %val1 = load volatile i32, ptr addrspace(1) undef + %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0 + %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1 + store i8 %val0, ptr addrspace(5) %gep0 + store i32 %val1, ptr addrspace(5) %gep1 ret void } @@ -509,8 +509,8 @@ ; GFX9: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 define <33 x i32> @v33i32_func_void() #0 { - %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef - %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load <33 x i32>, ptr addrspace(1) %ptr ret <33 x i32> %val } @@ -551,8 +551,8 @@ ; GFX9: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { - %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef - %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr ret { <32 x i32>, i32 }%val } @@ -593,8 +593,8 @@ ; GFX9: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { - %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef - %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr + %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef + %val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr ret { i32, <32 x i32> }%val } @@ -605,10 +605,10 @@ ; GCN: ds_read_b32 v2, ; GCN: ds_read_b32 v3, define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { - %load0 = load volatile i32, i32 addrspace(3)* undef - %load1 = load volatile i32, i32 addrspace(3)* undef - %load2 = load volatile i32, i32 addrspace(3)* undef - %load3 = load volatile i32, i32 addrspace(3)* undef + %load0 = load volatile i32, ptr addrspace(3) undef + %load1 = load volatile i32, ptr addrspace(3) undef + %load2 = load volatile i32, ptr addrspace(3) undef + %load3 = load volatile i32, ptr addrspace(3) undef %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0 %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1 @@ -624,10 +624,10 @@ ; GCN: ds_read_b32 v2, ; GCN: ds_read_b32 v3, define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { - %load0 = load volatile float, float addrspace(3)* undef - %load1 = load volatile float, float addrspace(3)* undef - %load2 = load volatile float, float addrspace(3)* undef - %load3 = load volatile i32, i32 addrspace(3)* undef + %load0 = load volatile float, ptr addrspace(3) undef + %load1 = load volatile float, ptr addrspace(3) undef + %load2 = load volatile float, ptr addrspace(3) undef + %load3 = load volatile i32, ptr addrspace(3) undef %insert.0 = insertelement <3 x float> undef, float %load0, i32 0 %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1 @@ -644,16 +644,16 @@ ; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0 ; GCN: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]] ; GCN-NEXT: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]] -define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 { - %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 +define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) #0 { + %arg0.int = ptrtoint ptr addrspace(5) %arg0 to i32 %lshr0 = lshr i32 %arg0.int, 16 %lshr1 = lshr i32 %arg0.int, 17 %lshr2 = lshr i32 %arg0.int, 18 - store volatile i32 %lshr0, i32 addrspace(3)* undef - store volatile i32 %lshr1, i32 addrspace(3)* undef - store volatile i32 %lshr2, i32 addrspace(3)* undef + store volatile i32 %lshr0, ptr addrspace(3) undef + store volatile i32 %lshr1, ptr addrspace(3) undef + store volatile i32 %lshr2, ptr addrspace(3) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll --- a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll @@ -7,7 +7,7 @@ ; These two objects should be allocated at the same constant offsets ; from the base. -define amdgpu_kernel void @alloc_lds_gds(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @alloc_lds_gds(ptr addrspace(1) %out) #1 { ; GCN-LABEL: alloc_lds_gds: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, 5 @@ -21,15 +21,15 @@ ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_endpgm - %gep.gds = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds0, i32 0, i32 3 - %val0 = atomicrmw add i32 addrspace(2)* %gep.gds, i32 5 acq_rel - %gep.lds = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds0, i32 0, i32 3 - %val1 = atomicrmw add i32 addrspace(3)* %gep.lds, i32 5 acq_rel + %gep.gds = getelementptr [4 x i32], ptr addrspace(2) @gds0, i32 0, i32 3 + %val0 = atomicrmw add ptr addrspace(2) %gep.gds, i32 5 acq_rel + %gep.lds = getelementptr [4 x i32], ptr addrspace(3) @lds0, i32 0, i32 3 + %val1 = atomicrmw add ptr addrspace(3) %gep.lds, i32 5 acq_rel ret void } ; The LDS alignment shouldn't change offset of GDS. -define amdgpu_kernel void @alloc_lds_gds_align(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @alloc_lds_gds_align(ptr addrspace(1) %out) #1 { ; GCN-LABEL: alloc_lds_gds_align: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, 5 @@ -45,21 +45,21 @@ ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_endpgm - %gep.gds = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds0, i32 0, i32 3 - %val0 = atomicrmw add i32 addrspace(2)* %gep.gds, i32 5 acq_rel + %gep.gds = getelementptr [4 x i32], ptr addrspace(2) @gds0, i32 0, i32 3 + %val0 = atomicrmw add ptr addrspace(2) %gep.gds, i32 5 acq_rel - %gep.lds0 = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds0, i32 0, i32 3 - %val1 = atomicrmw add i32 addrspace(3)* %gep.lds0, i32 5 acq_rel + %gep.lds0 = getelementptr [4 x i32], ptr addrspace(3) @lds0, i32 0, i32 3 + %val1 = atomicrmw add ptr addrspace(3) %gep.lds0, i32 5 acq_rel - %gep.lds1 = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds1, i32 0, i32 3 - %val2 = atomicrmw add i32 addrspace(3)* %gep.lds1, i32 5 acq_rel + %gep.lds1 = getelementptr [4 x i32], ptr addrspace(3) @lds1, i32 0, i32 3 + %val2 = atomicrmw add ptr addrspace(3) %gep.lds1, i32 5 acq_rel ret void } @gds_align8 = internal addrspace(2) global [4 x i32] undef, align 8 @gds_align32 = internal addrspace(2) global [4 x i32] undef, align 32 -define amdgpu_kernel void @gds_global_align(i32 addrspace(1)* %out) { +define amdgpu_kernel void @gds_global_align(ptr addrspace(1) %out) { ; GCN-LABEL: gds_global_align: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, 5 @@ -74,14 +74,14 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 ; GCN-NEXT: s_endpgm - %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align8, i32 0, i32 3 - %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel - %gep.gds1 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align32, i32 0, i32 3 - %val1 = atomicrmw add i32 addrspace(2)* %gep.gds1, i32 5 acq_rel + %gep.gds0 = getelementptr [4 x i32], ptr addrspace(2) @gds_align8, i32 0, i32 3 + %val0 = atomicrmw add ptr addrspace(2) %gep.gds0, i32 5 acq_rel + %gep.gds1 = getelementptr [4 x i32], ptr addrspace(2) @gds_align32, i32 0, i32 3 + %val1 = atomicrmw add ptr addrspace(2) %gep.gds1, i32 5 acq_rel ret void } -define amdgpu_kernel void @gds_global_align_plus_attr(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @gds_global_align_plus_attr(ptr addrspace(1) %out) #0 { ; GCN-LABEL: gds_global_align_plus_attr: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, 5 @@ -96,17 +96,17 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 ; GCN-NEXT: s_endpgm - %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align8, i32 0, i32 3 - %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel - %gep.gds1 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align32, i32 0, i32 3 - %val1 = atomicrmw add i32 addrspace(2)* %gep.gds1, i32 5 acq_rel + %gep.gds0 = getelementptr [4 x i32], ptr addrspace(2) @gds_align8, i32 0, i32 3 + %val0 = atomicrmw add ptr addrspace(2) %gep.gds0, i32 5 acq_rel + %gep.gds1 = getelementptr [4 x i32], ptr addrspace(2) @gds_align32, i32 0, i32 3 + %val1 = atomicrmw add ptr addrspace(2) %gep.gds1, i32 5 acq_rel ret void } @small.gds = internal addrspace(2) global i8 undef, align 1 @gds.external = external unnamed_addr addrspace(3) global [0 x i32], align 4 -define amdgpu_kernel void @gds_extern_align(i32 addrspace(1)* %out, [4 x i32] addrspace(2)* %gds.arg) #0 { +define amdgpu_kernel void @gds_extern_align(ptr addrspace(1) %out, ptr addrspace(2) %gds.arg) #0 { ; GCN-LABEL: gds_extern_align: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[0:1], 0x8 @@ -123,9 +123,9 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 ; GCN-NEXT: s_endpgm - call void asm sideeffect "; use $0","s"(i8 addrspace(2)* @small.gds) - %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* %gds.arg, i32 0, i32 3 - %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel + call void asm sideeffect "; use $0","s"(ptr addrspace(2) @small.gds) + %gep.gds0 = getelementptr [4 x i32], ptr addrspace(2) %gds.arg, i32 0, i32 3 + %val0 = atomicrmw add ptr addrspace(2) %gep.gds0, i32 5 acq_rel ret void } diff --git a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll --- a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll @@ -7,19 +7,19 @@ ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s ; GCN-DAG: s_movk_i32 m0, 0x1000 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds -define amdgpu_kernel void @atomic_add_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 { - %val = atomicrmw volatile add i32 addrspace(2)* %gds, i32 5 acq_rel - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @atomic_add_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = atomicrmw volatile add ptr addrspace(2) %gds, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}atomic_add_ret_gds_const_offset: ; GCN: s_movk_i32 m0, 0x80 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20 gds -define amdgpu_kernel void @atomic_add_ret_gds_const_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #0 { - %gep = getelementptr i32, i32 addrspace(2)* %gds, i32 5 - %val = atomicrmw volatile add i32 addrspace(2)* %gep, i32 5 acq_rel - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @atomic_add_ret_gds_const_offset(ptr addrspace(1) %out, ptr addrspace(2) %gds) #0 { + %gep = getelementptr i32, ptr addrspace(2) %gds, i32 5 + %val = atomicrmw volatile add ptr addrspace(2) %gep, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out ret void } @@ -27,9 +27,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s ; GCN-DAG: s_movk_i32 m0, 0x1000 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds -define amdgpu_kernel void @atomic_sub_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 { - %val = atomicrmw sub i32 addrspace(2)* %gds, i32 5 acq_rel - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @atomic_sub_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = atomicrmw sub ptr addrspace(2) %gds, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out ret void } @@ -37,9 +37,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s ; GCN-DAG: s_movk_i32 m0, 0x1000 ; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds -define amdgpu_kernel void @atomic_and_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 { - %val = atomicrmw and i32 addrspace(2)* %gds, i32 5 acq_rel - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @atomic_and_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = atomicrmw and ptr addrspace(2) %gds, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out ret void } @@ -47,9 +47,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s ; GCN-DAG: s_movk_i32 m0, 0x1000 ; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds -define amdgpu_kernel void @atomic_or_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 { - %val = atomicrmw or i32 addrspace(2)* %gds, i32 5 acq_rel - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @atomic_or_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = atomicrmw or ptr addrspace(2) %gds, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out ret void } @@ -57,9 +57,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s ; GCN-DAG: s_movk_i32 m0, 0x1000 ; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds -define amdgpu_kernel void @atomic_xor_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 { - %val = atomicrmw xor i32 addrspace(2)* %gds, i32 5 acq_rel - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @atomic_xor_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = atomicrmw xor ptr addrspace(2) %gds, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out ret void } @@ -67,9 +67,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s ; GCN-DAG: s_movk_i32 m0, 0x1000 ; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds -define amdgpu_kernel void @atomic_umin_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 { - %val = atomicrmw umin i32 addrspace(2)* %gds, i32 5 acq_rel - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @atomic_umin_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = atomicrmw umin ptr addrspace(2) %gds, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out ret void } @@ -77,9 +77,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s ; GCN-DAG: s_movk_i32 m0, 0x1000 ; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds -define amdgpu_kernel void @atomic_umax_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 { - %val = atomicrmw umax i32 addrspace(2)* %gds, i32 5 acq_rel - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @atomic_umax_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = atomicrmw umax ptr addrspace(2) %gds, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out ret void } @@ -87,9 +87,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s ; GCN-DAG: s_movk_i32 m0, 0x1000 ; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds -define amdgpu_kernel void @atomic_imin_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 { - %val = atomicrmw min i32 addrspace(2)* %gds, i32 5 acq_rel - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @atomic_imin_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = atomicrmw min ptr addrspace(2) %gds, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out ret void } @@ -97,9 +97,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s ; GCN-DAG: s_movk_i32 m0, 0x1000 ; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds -define amdgpu_kernel void @atomic_imax_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 { - %val = atomicrmw max i32 addrspace(2)* %gds, i32 5 acq_rel - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @atomic_imax_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = atomicrmw max ptr addrspace(2) %gds, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out ret void } @@ -107,9 +107,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s ; GCN-DAG: s_movk_i32 m0, 0x1000 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds -define amdgpu_kernel void @atomic_xchg_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 { - %val = atomicrmw xchg i32 addrspace(2)* %gds, i32 5 acq_rel - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @atomic_xchg_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = atomicrmw xchg ptr addrspace(2) %gds, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out ret void } @@ -117,10 +117,10 @@ ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s ; GCN-DAG: s_movk_i32 m0, 0x1000 ; GCN: ds_cmpst_rtn_b32 v{{[0-9]+}}, v[[OFF:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} gds -define amdgpu_kernel void @atomic_cmpxchg_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 { - %val = cmpxchg i32 addrspace(2)* %gds, i32 0, i32 1 acquire acquire +define amdgpu_kernel void @atomic_cmpxchg_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = cmpxchg ptr addrspace(2) %gds, i32 0, i32 1 acquire acquire %x = extractvalue { i32, i1 } %val, 0 - store i32 %x, i32 addrspace(1)* %out + store i32 %x, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/gep-address-space.ll b/llvm/test/CodeGen/AMDGPU/gep-address-space.ll --- a/llvm/test/CodeGen/AMDGPU/gep-address-space.ll +++ b/llvm/test/CodeGen/AMDGPU/gep-address-space.ll @@ -2,12 +2,12 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s -define amdgpu_kernel void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { +define amdgpu_kernel void @use_gep_address_space(ptr addrspace(3) %array) nounwind { ; CHECK-LABEL: {{^}}use_gep_address_space: ; CHECK: v_mov_b32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}} ; CHECK: ds_write_b32 [[PTR]], v{{[0-9]+}} offset:64 - %p = getelementptr [1024 x i32], [1024 x i32] addrspace(3)* %array, i16 0, i16 16 - store i32 99, i32 addrspace(3)* %p + %p = getelementptr [1024 x i32], ptr addrspace(3) %array, i16 0, i16 16 + store i32 99, ptr addrspace(3) %p ret void } @@ -17,9 +17,9 @@ ; SI: s_bitset1_b32 ; CI: s_add_i32 ; CHECK: ds_write_b32 -define amdgpu_kernel void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind { - %p = getelementptr [1024 x i32], [1024 x i32] addrspace(3)* %array, i16 0, i16 16384 - store i32 99, i32 addrspace(3)* %p +define amdgpu_kernel void @use_gep_address_space_large_offset(ptr addrspace(3) %array) nounwind { + %p = getelementptr [1024 x i32], ptr addrspace(3) %array, i16 0, i16 16384 + store i32 99, ptr addrspace(3) %p ret void } @@ -39,16 +39,16 @@ ; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64 ; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64 ; CHECK: s_endpgm -define amdgpu_kernel void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind { - %p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> - %p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0 - %p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1 - %p2 = extractelement <4 x i32 addrspace(3)*> %p, i32 2 - %p3 = extractelement <4 x i32 addrspace(3)*> %p, i32 3 - store i32 99, i32 addrspace(3)* %p0 - store i32 99, i32 addrspace(3)* %p1 - store i32 99, i32 addrspace(3)* %p2 - store i32 99, i32 addrspace(3)* %p3 +define amdgpu_kernel void @gep_as_vector_v4(<4 x ptr addrspace(3)> %array) nounwind { + %p = getelementptr [1024 x i32], <4 x ptr addrspace(3)> %array, <4 x i16> zeroinitializer, <4 x i16> + %p0 = extractelement <4 x ptr addrspace(3)> %p, i32 0 + %p1 = extractelement <4 x ptr addrspace(3)> %p, i32 1 + %p2 = extractelement <4 x ptr addrspace(3)> %p, i32 2 + %p3 = extractelement <4 x ptr addrspace(3)> %p, i32 3 + store i32 99, ptr addrspace(3) %p0 + store i32 99, ptr addrspace(3) %p1 + store i32 99, ptr addrspace(3) %p2 + store i32 99, ptr addrspace(3) %p3 ret void } @@ -60,12 +60,12 @@ ; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64 ; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64 ; CHECK: s_endpgm -define amdgpu_kernel void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind { - %p = getelementptr [1024 x i32], <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> - %p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0 - %p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1 - store i32 99, i32 addrspace(3)* %p0 - store i32 99, i32 addrspace(3)* %p1 +define amdgpu_kernel void @gep_as_vector_v2(<2 x ptr addrspace(3)> %array) nounwind { + %p = getelementptr [1024 x i32], <2 x ptr addrspace(3)> %array, <2 x i16> zeroinitializer, <2 x i16> + %p0 = extractelement <2 x ptr addrspace(3)> %p, i32 0 + %p1 = extractelement <2 x ptr addrspace(3)> %p, i32 1 + store i32 99, ptr addrspace(3) %p0 + store i32 99, ptr addrspace(3) %p1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll b/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll --- a/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll +++ b/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx90a < %s | FileCheck %s -declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* nocapture, double) #8 +declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr nocapture, double) #8 -define protected amdgpu_kernel void @IllegalGEPConst(i32 %a, double addrspace(1)* %b, double %c) { +define protected amdgpu_kernel void @IllegalGEPConst(i32 %a, ptr addrspace(1) %b, double %c) { ; CHECK-LABEL: IllegalGEPConst: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_load_dword s2, s[0:1], 0x24 @@ -21,9 +21,9 @@ entry: %i = add nsw i32 %a, -1 %i.2 = sext i32 %i to i64 - %i.3 = getelementptr inbounds double, double addrspace(1)* %b, i64 %i.2 - %i.4 = addrspacecast double addrspace(1)* %i.3 to double* - %i.5 = tail call contract double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %i.4, double %c) #8 + %i.3 = getelementptr inbounds double, ptr addrspace(1) %b, i64 %i.2 + %i.4 = addrspacecast ptr addrspace(1) %i.3 to ptr + %i.5 = tail call contract double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %i.4, double %c) #8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -88,8 +88,8 @@ ; Structs declare hidden amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 }) #0 -declare hidden amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 })) #0 -declare hidden amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0 +declare hidden amdgpu_gfx void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0 +declare hidden amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0 declare hidden amdgpu_gfx void @external_void_func_v16i8(<16 x i8>) #0 @@ -336,7 +336,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %var = load volatile i1, i1 addrspace(1)* undef + %var = load volatile i1, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_i1_signext(i1 signext%var) ret void } @@ -464,7 +464,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %var = load volatile i1, i1 addrspace(1)* undef + %var = load volatile i1, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext %var) ret void } @@ -701,7 +701,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %var = load volatile i8, i8 addrspace(1)* undef + %var = load volatile i8, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_i8_signext(i8 signext %var) ret void } @@ -822,7 +822,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %var = load volatile i8, i8 addrspace(1)* undef + %var = load volatile i8, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext %var) ret void } @@ -1059,7 +1059,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %var = load volatile i16, i16 addrspace(1)* undef + %var = load volatile i16, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_i16_signext(i16 signext %var) ret void } @@ -1180,7 +1180,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %var = load volatile i16, i16 addrspace(1)* undef + %var = load volatile i16, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext %var) ret void } @@ -1540,7 +1540,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <2 x i64>, <2 x i64> addrspace(1)* null + %val = load <2 x i64>, ptr addrspace(1) null call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> %val) ret void } @@ -1797,7 +1797,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %load = load <2 x i64>, <2 x i64> addrspace(1)* null + %load = load <2 x i64>, ptr addrspace(1) null %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> call amdgpu_gfx void @external_void_func_v3i64(<3 x i64> %val) @@ -1937,7 +1937,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %load = load <2 x i64>, <2 x i64> addrspace(1)* null + %load = load <2 x i64>, ptr addrspace(1) null %val = shufflevector <2 x i64> %load, <2 x i64> , <4 x i32> call amdgpu_gfx void @external_void_func_v4i64(<4 x i64> %val) ret void @@ -3037,7 +3037,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <2 x i16>, <2 x i16> addrspace(1)* undef + %val = load <2 x i16>, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_v2i16(<2 x i16> %val) ret void } @@ -3154,7 +3154,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <3 x i16>, <3 x i16> addrspace(1)* undef + %val = load <3 x i16>, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> %val) ret void } @@ -3271,7 +3271,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <3 x half>, <3 x half> addrspace(1)* undef + %val = load <3 x half>, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_v3f16(<3 x half> %val) ret void } @@ -3627,7 +3627,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <4 x i16>, <4 x i16> addrspace(1)* undef + %val = load <4 x i16>, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> %val) ret void } @@ -3864,7 +3864,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <2 x half>, <2 x half> addrspace(1)* undef + %val = load <2 x half>, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_v2f16(<2 x half> %val) ret void } @@ -3981,7 +3981,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <2 x i32>, <2 x i32> addrspace(1)* undef + %val = load <2 x i32>, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> %val) ret void } @@ -4466,7 +4466,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <4 x i32>, <4 x i32> addrspace(1)* undef + %val = load <4 x i32>, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> %val) ret void } @@ -4858,8 +4858,8 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef - %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <8 x i32>, ptr addrspace(1) %ptr call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> %val) ret void } @@ -5143,8 +5143,8 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef - %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i32>, ptr addrspace(1) %ptr call amdgpu_gfx void @external_void_func_v16i32(<16 x i32> %val) ret void } @@ -5305,8 +5305,8 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef - %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <32 x i32>, ptr addrspace(1) %ptr call amdgpu_gfx void @external_void_func_v32i32(<32 x i32> %val) ret void } @@ -5478,14 +5478,14 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef - %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0 - %val1 = load i32, i32 addrspace(1)* undef + %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef + %val0 = load <32 x i32>, ptr addrspace(1) %ptr0 + %val1 = load i32, ptr addrspace(1) undef call amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1) ret void } -define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { +define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 { ; GFX9-LABEL: test_call_external_i32_func_i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -5632,7 +5632,7 @@ ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = call amdgpu_gfx i32 @external_i32_func_i32(i32 42) - store volatile i32 %val, i32 addrspace(1)* %out + store volatile i32 %val, ptr addrspace(1) %out ret void } @@ -5767,8 +5767,8 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef - %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 + %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef + %val = load { i8, i32 }, ptr addrspace(1) %ptr0 call amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 } %val) ret void } @@ -5902,11 +5902,11 @@ ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = alloca { i8, i32 }, align 4, addrspace(5) - %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0 - %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1 - store i8 3, i8 addrspace(5)* %gep0 - store i32 8, i32 addrspace(5)* %gep1 - call amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %val) + %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0 + %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1 + store i8 3, ptr addrspace(5) %gep0 + store i32 8, ptr addrspace(5) %gep1 + call amdgpu_gfx void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %val) ret void } @@ -6077,18 +6077,18 @@ ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %in.val = alloca { i8, i32 }, align 4, addrspace(5) %out.val = alloca { i8, i32 }, align 4, addrspace(5) - %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0 - %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1 - store i8 3, i8 addrspace(5)* %in.gep0 - store i32 8, i32 addrspace(5)* %in.gep1 - call amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %out.val, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %in.val) - %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0 - %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1 - %out.val0 = load i8, i8 addrspace(5)* %out.gep0 - %out.val1 = load i32, i32 addrspace(5)* %out.gep1 + %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0 + %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1 + store i8 3, ptr addrspace(5) %in.gep0 + store i32 8, ptr addrspace(5) %in.gep1 + call amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %out.val, ptr addrspace(5) byval({ i8, i32 }) %in.val) + %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0 + %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1 + %out.val0 = load i8, ptr addrspace(5) %out.gep0 + %out.val1 = load i32, ptr addrspace(5) %out.gep1 - store volatile i8 %out.val0, i8 addrspace(1)* undef - store volatile i32 %out.val1, i32 addrspace(1)* undef + store volatile i8 %out.val0, ptr addrspace(1) undef + store volatile i32 %out.val1, ptr addrspace(1) undef ret void } @@ -6291,8 +6291,8 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef - %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %ptr = load ptr addrspace(1), ptr addrspace(4) undef + %val = load <16 x i8>, ptr addrspace(1) %ptr call amdgpu_gfx void @external_void_func_v16i8(<16 x i8> %val) ret void } @@ -6671,7 +6671,7 @@ ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: %alloca = alloca double, align 8, addrspace(5) - tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca) + tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca) ret void } @@ -7452,7 +7452,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <2 x i64>, <2 x i64> addrspace(4)* null + %val = load <2 x i64>, ptr addrspace(4) null call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg %val) ret void } @@ -7789,7 +7789,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %load = load <2 x i64>, <2 x i64> addrspace(4)* null + %load = load <2 x i64>, ptr addrspace(4) null %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val) @@ -7992,7 +7992,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %load = load <2 x i64>, <2 x i64> addrspace(4)* null + %load = load <2 x i64>, ptr addrspace(4) null %val = shufflevector <2 x i64> %load, <2 x i64> , <4 x i32> call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg %val) ret void @@ -9302,7 +9302,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <2 x i16>, <2 x i16> addrspace(4)* undef + %val = load <2 x i16>, ptr addrspace(4) undef call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg %val) ret void } @@ -9435,7 +9435,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <3 x i16>, <3 x i16> addrspace(4)* undef + %val = load <3 x i16>, ptr addrspace(4) undef call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg %val) ret void } @@ -9568,7 +9568,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <3 x half>, <3 x half> addrspace(4)* undef + %val = load <3 x half>, ptr addrspace(4) undef call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg %val) ret void } @@ -9973,7 +9973,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <4 x i16>, <4 x i16> addrspace(4)* undef + %val = load <4 x i16>, ptr addrspace(4) undef call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg %val) ret void } @@ -10234,7 +10234,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <2 x half>, <2 x half> addrspace(4)* undef + %val = load <2 x half>, ptr addrspace(4) undef call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg %val) ret void } @@ -10367,7 +10367,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <2 x i32>, <2 x i32> addrspace(4)* undef + %val = load <2 x i32>, ptr addrspace(4) undef call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg %val) ret void } @@ -10960,7 +10960,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %val = load <4 x i32>, <4 x i32> addrspace(4)* undef + %val = load <4 x i32>, ptr addrspace(4) undef call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg %val) ret void } @@ -11481,8 +11481,8 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %ptr = load <8 x i32> addrspace(4)*, <8 x i32> addrspace(4)* addrspace(4)* undef - %val = load <8 x i32>, <8 x i32> addrspace(4)* %ptr + %ptr = load ptr addrspace(4), ptr addrspace(4) undef + %val = load <8 x i32>, ptr addrspace(4) %ptr call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg %val) ret void } @@ -11943,8 +11943,8 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %ptr = load <16 x i32> addrspace(4)*, <16 x i32> addrspace(4)* addrspace(4)* undef - %val = load <16 x i32>, <16 x i32> addrspace(4)* %ptr + %ptr = load ptr addrspace(4), ptr addrspace(4) undef + %val = load <16 x i32>, ptr addrspace(4) %ptr call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg %val) ret void } @@ -12366,8 +12366,8 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %ptr = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef - %val = load <32 x i32>, <32 x i32> addrspace(4)* %ptr + %ptr = load ptr addrspace(4), ptr addrspace(4) undef + %val = load <32 x i32>, ptr addrspace(4) %ptr call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg %val) ret void } @@ -12807,9 +12807,9 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] - %ptr0 = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef - %val0 = load <32 x i32>, <32 x i32> addrspace(4)* %ptr0 - %val1 = load i32, i32 addrspace(4)* undef + %ptr0 = load ptr addrspace(4), ptr addrspace(4) undef + %val0 = load <32 x i32>, ptr addrspace(4) %ptr0 + %val1 = load i32, ptr addrspace(4) undef call amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg %val0, i32 inreg %val1) ret void } @@ -13776,7 +13776,7 @@ ret void } -declare hidden amdgpu_gfx void @byval_align16_f64_arg(<32 x i32>, double addrspace(5)* byval(double) align 16) #0 +declare hidden amdgpu_gfx void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0 declare hidden amdgpu_gfx void @stack_passed_f64_arg(<32 x i32>, double) #0 declare hidden amdgpu_gfx void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0 diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll --- a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll @@ -46,7 +46,7 @@ ; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 0 define amdgpu_kernel void @minimal_kernel_inputs() { %id = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %id, i32 addrspace(1)* undef + store volatile i32 %id, ptr addrspace(1) undef ret void } @@ -75,8 +75,8 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() { %alloca = alloca i32, addrspace(5) %id = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %id, i32 addrspace(1)* undef - store volatile i32 0, i32 addrspace(5)* %alloca + store volatile i32 %id, ptr addrspace(1) undef + store volatile i32 0, ptr addrspace(5) %alloca ret void } @@ -105,10 +105,10 @@ ; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 15 ; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 2 define amdgpu_kernel void @queue_ptr() { - %queue.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 - %load = load volatile i8, i8 addrspace(4)* %queue.ptr + %queue.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 + %load = load volatile i8, ptr addrspace(4) %queue.ptr %id = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %id, i32 addrspace(1)* undef + store volatile i32 %id, ptr addrspace(1) undef ret void } @@ -152,28 +152,28 @@ ; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 8 define amdgpu_kernel void @all_inputs() { %alloca = alloca i32, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca + store volatile i32 0, ptr addrspace(5) %alloca - %dispatch.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %load.dispatch = load volatile i8, i8 addrspace(4)* %dispatch.ptr + %dispatch.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %load.dispatch = load volatile i8, ptr addrspace(4) %dispatch.ptr - %queue.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() - %load.queue = load volatile i8, i8 addrspace(4)* %queue.ptr + %queue.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() + %load.queue = load volatile i8, ptr addrspace(4) %queue.ptr - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %load.implicitarg = load volatile i8, i8 addrspace(4)* %implicitarg.ptr + %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %load.implicitarg = load volatile i8, ptr addrspace(4) %implicitarg.ptr %id.x = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %id.x, i32 addrspace(1)* undef + store volatile i32 %id.x, ptr addrspace(1) undef %id.y = call i32 @llvm.amdgcn.workgroup.id.y() - store volatile i32 %id.y, i32 addrspace(1)* undef + store volatile i32 %id.y, ptr addrspace(1) undef %id.z = call i32 @llvm.amdgcn.workgroup.id.z() - store volatile i32 %id.z, i32 addrspace(1)* undef + store volatile i32 %id.z, ptr addrspace(1) undef %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() - store volatile i64 %dispatch.id, i64 addrspace(1)* undef + store volatile i64 %dispatch.id, ptr addrspace(1) undef ret void } @@ -181,10 +181,10 @@ declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 declare i32 @llvm.amdgcn.workgroup.id.z() #0 -declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 -declare align 4 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 -declare align 4 i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 -declare align 4 i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 +declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 +declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 +declare align 4 ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 +declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0 declare i64 @llvm.amdgcn.dispatch.id() #0 attributes #0 = { nounwind readnone speculatable willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll b/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll --- a/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 --amdhsa-code-object-version=2 -mattr=-xnack < %s | FileCheck %s ; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" -define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind { - store float 0.0, float addrspace(1)* %out0 +define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind { + store float 0.0, ptr addrspace(1) %out0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll b/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll --- a/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll @@ -8,11 +8,11 @@ define amdgpu_kernel void @test(<4 x i32> %x) #0 { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %r1 = tail call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %x, i32 %id, i32 0, i32 0, i32 0) - store volatile <4 x float> %r1, <4 x float>* undef + store volatile <4 x float> %r1, ptr undef %r2 = tail call <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32> %x, i32 %id, i32 0, i32 0, i32 0) - store volatile <4 x half> %r2, <4 x half>* undef + store volatile <4 x half> %r2, ptr undef %r3 = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %x, i32 0, i32 0, i32 0) - store <4 x i32> %r3, <4 x i32>* undef + store <4 x i32> %r3, ptr undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll --- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll @@ -5,60 +5,56 @@ ; amdgcn atomic csub ; -------------------------------------------------------------------------------- -define amdgpu_ps float @global_csub_saddr_i32_rtn(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) { +define amdgpu_ps float @global_csub_saddr_i32_rtn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GCN-LABEL: global_csub_saddr_i32_rtn: ; GCN: ; %bb.0: ; GCN-NEXT: global_atomic_csub v0, v0, v1, s[2:3] glc ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 - %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset - %cast.gep0 = bitcast i8 addrspace(1)* %gep0 to i32 addrspace(1)* - %rtn = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %cast.gep0, i32 %data) + %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset + %rtn = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep0, i32 %data) %cast.rtn = bitcast i32 %rtn to float ret float %cast.rtn } -define amdgpu_ps float @global_csub_saddr_i32_rtn_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) { +define amdgpu_ps float @global_csub_saddr_i32_rtn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GCN-LABEL: global_csub_saddr_i32_rtn_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_atomic_csub v0, v0, v1, s[2:3] offset:-128 glc ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 - %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset - %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 - %cast.gep1 = bitcast i8 addrspace(1)* %gep1 to i32 addrspace(1)* - %rtn = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %cast.gep1, i32 %data) + %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 + %rtn = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep1, i32 %data) %cast.rtn = bitcast i32 %rtn to float ret float %cast.rtn } -define amdgpu_ps void @global_csub_saddr_i32_nortn(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) { +define amdgpu_ps void @global_csub_saddr_i32_nortn(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GCN-LABEL: global_csub_saddr_i32_nortn: ; GCN: ; %bb.0: ; GCN-NEXT: global_atomic_csub v0, v0, v1, s[2:3] glc ; GCN-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 - %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset - %cast.gep0 = bitcast i8 addrspace(1)* %gep0 to i32 addrspace(1)* - %unused = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %cast.gep0, i32 %data) + %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset + %unused = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep0, i32 %data) ret void } -define amdgpu_ps void @global_csub_saddr_i32_nortn_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) { +define amdgpu_ps void @global_csub_saddr_i32_nortn_neg128(ptr addrspace(1) inreg %sbase, i32 %voffset, i32 %data) { ; GCN-LABEL: global_csub_saddr_i32_nortn_neg128: ; GCN: ; %bb.0: ; GCN-NEXT: global_atomic_csub v0, v0, v1, s[2:3] offset:-128 glc ; GCN-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 - %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset - %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128 - %cast.gep1 = bitcast i8 addrspace(1)* %gep1 to i32 addrspace(1)* - %unused = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %cast.gep1, i32 %data) + %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -128 + %unused = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep1, i32 %data) ret void } -declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #0 +declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #0 attributes #0 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll b/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll --- a/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll +++ b/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll @@ -4,13 +4,13 @@ ; GCN: flat_load_dword ; GCN: flat_load_dword ; GCN: flat_store_dword -define void @unknown_memdep_analysis(float addrspace(1)* nocapture readonly %arg, float %arg1) #0 { +define void @unknown_memdep_analysis(ptr addrspace(1) nocapture readonly %arg, float %arg1) #0 { bb: - %tmp53 = load float, float addrspace(1)* undef, align 4 - %tmp54 = getelementptr inbounds float, float addrspace(1)* %arg, i32 31 - %tmp55 = load float, float addrspace(1)* %tmp54, align 4 + %tmp53 = load float, ptr addrspace(1) undef, align 4 + %tmp54 = getelementptr inbounds float, ptr addrspace(1) %arg, i32 31 + %tmp55 = load float, ptr addrspace(1) %tmp54, align 4 %tmp56 = tail call float @llvm.fmuladd.f32(float %arg1, float %tmp53, float %tmp55) - store float %tmp56, float addrspace(1)* undef, align 4 + store float %tmp56, ptr addrspace(1) undef, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/gv-const-addrspace.ll b/llvm/test/CodeGen/AMDGPU/gv-const-addrspace.ll --- a/llvm/test/CodeGen/AMDGPU/gv-const-addrspace.ll +++ b/llvm/test/CodeGen/AMDGPU/gv-const-addrspace.ll @@ -15,11 +15,11 @@ ; EG: @float_gv ; EG-NOT: MOVA_INT ; EG-NOT: MOV -define amdgpu_kernel void @float(float addrspace(1)* %out, i32 %index) { +define amdgpu_kernel void @float(ptr addrspace(1) %out, i32 %index) { entry: - %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(4)* @float_gv, i32 0, i32 %index - %1 = load float, float addrspace(4)* %0 - store float %1, float addrspace(1)* %out + %0 = getelementptr inbounds [5 x float], ptr addrspace(4) @float_gv, i32 0, i32 %index + %1 = load float, ptr addrspace(4) %0 + store float %1, ptr addrspace(1) %out ret void } @@ -33,11 +33,11 @@ ; EG: @i32_gv ; EG-NOT: MOVA_INT ; EG-NOT: MOV -define amdgpu_kernel void @i32(i32 addrspace(1)* %out, i32 %index) { +define amdgpu_kernel void @i32(ptr addrspace(1) %out, i32 %index) { entry: - %0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(4)* @i32_gv, i32 0, i32 %index - %1 = load i32, i32 addrspace(4)* %0 - store i32 %1, i32 addrspace(1)* %out + %0 = getelementptr inbounds [5 x i32], ptr addrspace(4) @i32_gv, i32 0, i32 %index + %1 = load i32, ptr addrspace(4) %0 + store i32 %1, ptr addrspace(1) %out ret void } @@ -53,10 +53,10 @@ ; EG: @struct_foo_gv ; EG-NOT: MOVA_INT ; EG-NOT: MOV -define amdgpu_kernel void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { - %gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(4)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index - %load = load i32, i32 addrspace(4)* %gep, align 4 - store i32 %load, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @struct_foo_gv_load(ptr addrspace(1) %out, i32 %index) { + %gep = getelementptr inbounds [1 x %struct.foo], ptr addrspace(4) @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index + %load = load i32, ptr addrspace(4) %gep, align 4 + store i32 %load, ptr addrspace(1) %out, align 4 ret void } @@ -72,10 +72,10 @@ ; EG: @array_v1_gv ; EG-NOT: MOVA_INT ; EG-NOT: MOV -define amdgpu_kernel void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { - %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(4)* @array_v1_gv, i32 0, i32 %index - %load = load <1 x i32>, <1 x i32> addrspace(4)* %gep, align 4 - store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4 +define amdgpu_kernel void @array_v1_gv_load(ptr addrspace(1) %out, i32 %index) { + %gep = getelementptr inbounds [4 x <1 x i32>], ptr addrspace(4) @array_v1_gv, i32 0, i32 %index + %load = load <1 x i32>, ptr addrspace(4) %gep, align 4 + store <1 x i32> %load, ptr addrspace(1) %out, align 4 ret void } @@ -84,19 +84,19 @@ ; EG: VTX_READ_32 ; EG: @float_gv ; EG-NOT: MOVA_INT -define amdgpu_kernel void @gv_addressing_in_branch(float addrspace(1)* %out, i32 %index, i32 %a) { +define amdgpu_kernel void @gv_addressing_in_branch(ptr addrspace(1) %out, i32 %index, i32 %a) { entry: %0 = icmp eq i32 0, %a br i1 %0, label %if, label %else if: - %1 = getelementptr inbounds [5 x float], [5 x float] addrspace(4)* @float_gv, i32 0, i32 %index - %2 = load float, float addrspace(4)* %1 - store float %2, float addrspace(1)* %out + %1 = getelementptr inbounds [5 x float], ptr addrspace(4) @float_gv, i32 0, i32 %index + %2 = load float, ptr addrspace(4) %1 + store float %2, ptr addrspace(1) %out br label %endif else: - store float 1.0, float addrspace(1)* %out + store float 1.0, ptr addrspace(1) %out br label %endif endif: diff --git a/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll b/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll --- a/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll +++ b/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll @@ -12,19 +12,19 @@ ; CHECK-LABEL: {{^}}dynamic_shared_array_0: ; CHECK: v_add_u32_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}} -define amdgpu_kernel void @dynamic_shared_array_0(float addrspace(1)* %out) { +define amdgpu_kernel void @dynamic_shared_array_0(ptr addrspace(1) %out) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() - %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %tid.x - %val0 = load float, float addrspace(3)* %arrayidx0, align 4 - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val0, float addrspace(3)* %arrayidx1, align 4 + %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %tid.x + %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val0, ptr addrspace(3) %arrayidx1, align 4 ret void } ; CHECK-LABEL: {{^}}dynamic_shared_array_1: ; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0xc00 ; CHECK: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] -define amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) { +define amdgpu_kernel void @dynamic_shared_array_1(ptr addrspace(1) %out, i32 %cond) { entry: %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %idx.0 = add nsw i32 %tid.x, 64 @@ -32,19 +32,19 @@ br i1 %tmp, label %if, label %else if: ; preds = %entry - %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0 - %val0 = load float, float addrspace(3)* %arrayidx0, align 4 + %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0 + %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 br label %endif else: ; preds = %entry - %arrayidx1 = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0 - %val1 = load float, float addrspace(3)* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [256 x float], ptr addrspace(3) @lds1, i32 0, i32 %idx.0 + %val1 = load float, ptr addrspace(3) %arrayidx1, align 4 br label %endif endif: ; preds = %else, %if %val = phi float [ %val0, %if ], [ %val1, %else ] - %arrayidx = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val, float addrspace(3)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val, ptr addrspace(3) %arrayidx, align 4 ret void } @@ -54,10 +54,10 @@ define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %vidx = add i32 %tid.x, %idx - %arrayidx0 = getelementptr inbounds [4096 x float], [4096 x float] addrspace(3)* @lds2, i32 0, i32 %vidx - %val0 = load float, float addrspace(3)* %arrayidx0, align 4 - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val0, float addrspace(3)* %arrayidx1, align 4 + %arrayidx0 = getelementptr inbounds [4096 x float], ptr addrspace(3) @lds2, i32 0, i32 %vidx + %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val0, ptr addrspace(3) %arrayidx1, align 4 ret void } @@ -69,11 +69,11 @@ define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %vidx = add i32 %tid.x, %idx - %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx - %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 + %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx + %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 %val1 = uitofp i8 %val0 to float - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val1, float addrspace(3)* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val1, ptr addrspace(3) %arrayidx1, align 4 ret void } @@ -86,14 +86,14 @@ define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %vidx = add i32 %tid.x, %idx - %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx - %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 + %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx + %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 %val1 = uitofp i8 %val0 to float %val2 = uitofp i8 %val0 to double - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val1, float addrspace(3)* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared1, i32 0, i32 %tid.x - store double %val2, double addrspace(3)* %arrayidx2, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val1, ptr addrspace(3) %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared1, i32 0, i32 %tid.x + store double %val2, ptr addrspace(3) %arrayidx2, align 4 ret void } @@ -105,14 +105,14 @@ define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %vidx = add i32 %tid.x, %idx - %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx - %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 + %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx + %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 %val1 = uitofp i8 %val0 to float %val2 = uitofp i8 %val0 to double - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val1, float addrspace(3)* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared2, i32 0, i32 %tid.x - store double %val2, double addrspace(3)* %arrayidx2, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val1, ptr addrspace(3) %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared2, i32 0, i32 %tid.x + store double %val2, ptr addrspace(3) %arrayidx2, align 4 ret void } @@ -124,24 +124,24 @@ define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %vidx = add i32 %tid.x, %idx - %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx - %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 + %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx + %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4 %val1 = uitofp i8 %val0 to float %val2 = uitofp i8 %val0 to double - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x - store float %val1, float addrspace(3)* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared3, i32 0, i32 %tid.x - store double %val2, double addrspace(3)* %arrayidx2, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x + store float %val1, ptr addrspace(3) %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared3, i32 0, i32 %tid.x + store double %val2, ptr addrspace(3) %arrayidx2, align 4 ret void } ; CHECK-LABEL: dynamic_shared_array_with_call: ; CHECK-NOT: s_swappc_b64 -define amdgpu_kernel void @dynamic_shared_array_with_call(float addrspace(1)* nocapture readnone %out) local_unnamed_addr { +define amdgpu_kernel void @dynamic_shared_array_with_call(ptr addrspace(1) nocapture readnone %out) local_unnamed_addr { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %1 = sext i32 %tid.x to i64 - %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i64 0, i64 %1 - %val0 = load float, float addrspace(3)* %arrayidx0, align 4 + %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i64 0, i64 %1 + %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 tail call void @store_value(float %val0) ret void } @@ -151,8 +151,8 @@ entry: %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() %0 = sext i32 %tid.x to i64 - %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i64 0, i64 %0 - store float %val1, float addrspace(3)* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i64 0, i64 %0 + store float %val1, ptr addrspace(3) %arrayidx1, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll --- a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll @@ -11,7 +11,7 @@ ; CHECK: s_and_saveexec_b64 s[{{[0-9]+:[0-9]+}}], [[COND]] ; CHECK: ; %bb.2: -define amdgpu_kernel void @hoist_cond(float addrspace(1)* nocapture %arg, float addrspace(1)* noalias nocapture readonly %arg1, i32 %arg3, i32 %arg4) { +define amdgpu_kernel void @hoist_cond(ptr addrspace(1) nocapture %arg, ptr addrspace(1) noalias nocapture readonly %arg1, i32 %arg3, i32 %arg4) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tmp5 = icmp ult i32 %tmp, %arg3 @@ -24,8 +24,8 @@ bb2: ; preds = %bb1 %tmp10 = zext i32 %tmp7 to i64 - %tmp11 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp10 - %tmp12 = load float, float addrspace(1)* %tmp11, align 4 + %tmp11 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 %tmp10 + %tmp12 = load float, ptr addrspace(1) %tmp11, align 4 br label %bb3 bb3: ; preds = %bb2, %bb1 @@ -36,7 +36,7 @@ br i1 %tmp17, label %bb4, label %bb1 bb4: ; preds = %bb3 - store float %tmp15, float addrspace(1)* %arg, align 4 + store float %tmp15, ptr addrspace(1) %arg, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll b/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll @@ -4,8 +4,8 @@ ; unsupported device. ; CHECK: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" -define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind { - store float 0.0, float addrspace(1)* %out0 +define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind { + store float 0.0, ptr addrspace(1) %out0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -4,9 +4,9 @@ ; GCN: float_mode = 240 ; GCN: enable_dx10_clamp = 1 ; GCN: enable_ieee_mode = 1 -define amdgpu_kernel void @test_default_ci(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 { - store float 0.0, float addrspace(1)* %out0 - store double 0.0, double addrspace(1)* %out1 +define amdgpu_kernel void @test_default_ci(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #0 { + store float 0.0, ptr addrspace(1) %out0 + store double 0.0, ptr addrspace(1) %out1 ret void } @@ -14,9 +14,9 @@ ; GCN: float_mode = 240 ; GCN: enable_dx10_clamp = 1 ; GCN: enable_ieee_mode = 1 -define amdgpu_kernel void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 { - store float 0.0, float addrspace(1)* %out0 - store double 0.0, double addrspace(1)* %out1 +define amdgpu_kernel void @test_default_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #1 { + store float 0.0, ptr addrspace(1) %out0 + store double 0.0, ptr addrspace(1) %out1 ret void } @@ -24,9 +24,9 @@ ; GCN: float_mode = 192 ; GCN: enable_dx10_clamp = 1 ; GCN: enable_ieee_mode = 1 -define amdgpu_kernel void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 { - store float 0.0, float addrspace(1)* %out0 - store double 0.0, double addrspace(1)* %out1 +define amdgpu_kernel void @test_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #2 { + store float 0.0, ptr addrspace(1) %out0 + store double 0.0, ptr addrspace(1) %out1 ret void } @@ -34,9 +34,9 @@ ; GCN: float_mode = 48 ; GCN: enable_dx10_clamp = 1 ; GCN: enable_ieee_mode = 1 -define amdgpu_kernel void @test_f32_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #3 { - store float 0.0, float addrspace(1)* %out0 - store double 0.0, double addrspace(1)* %out1 +define amdgpu_kernel void @test_f32_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #3 { + store float 0.0, ptr addrspace(1) %out0 + store double 0.0, ptr addrspace(1) %out1 ret void } @@ -44,9 +44,9 @@ ; GCN: float_mode = 240 ; GCN: enable_dx10_clamp = 1 ; GCN: enable_ieee_mode = 1 -define amdgpu_kernel void @test_f32_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #4 { - store float 0.0, float addrspace(1)* %out0 - store double 0.0, double addrspace(1)* %out1 +define amdgpu_kernel void @test_f32_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #4 { + store float 0.0, ptr addrspace(1) %out0 + store double 0.0, ptr addrspace(1) %out1 ret void } @@ -54,9 +54,9 @@ ; GCN: float_mode = 0 ; GCN: enable_dx10_clamp = 1 ; GCN: enable_ieee_mode = 1 -define amdgpu_kernel void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #5 { - store float 0.0, float addrspace(1)* %out0 - store double 0.0, double addrspace(1)* %out1 +define amdgpu_kernel void @test_no_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #5 { + store float 0.0, ptr addrspace(1) %out0 + store double 0.0, ptr addrspace(1) %out1 ret void } @@ -64,9 +64,9 @@ ; GCN: float_mode = 240 ; GCN: enable_dx10_clamp = 0 ; GCN: enable_ieee_mode = 1 -define amdgpu_kernel void @test_no_dx10_clamp_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #6 { - store float 0.0, float addrspace(1)* %out0 - store double 0.0, double addrspace(1)* %out1 +define amdgpu_kernel void @test_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #6 { + store float 0.0, ptr addrspace(1) %out0 + store double 0.0, ptr addrspace(1) %out1 ret void } @@ -74,9 +74,9 @@ ; GCN: float_mode = 240 ; GCN: enable_dx10_clamp = 1 ; GCN: enable_ieee_mode = 0 -define amdgpu_kernel void @test_no_ieee_mode_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #7 { - store float 0.0, float addrspace(1)* %out0 - store double 0.0, double addrspace(1)* %out1 +define amdgpu_kernel void @test_no_ieee_mode_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #7 { + store float 0.0, ptr addrspace(1) %out0 + store double 0.0, ptr addrspace(1) %out1 ret void } @@ -84,9 +84,9 @@ ; GCN: float_mode = 240 ; GCN: enable_dx10_clamp = 0 ; GCN: enable_ieee_mode = 0 -define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #8 { - store float 0.0, float addrspace(1)* %out0 - store double 0.0, double addrspace(1)* %out1 +define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #8 { + store float 0.0, ptr addrspace(1) %out0 + store double 0.0, ptr addrspace(1) %out1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-func-align.ll b/llvm/test/CodeGen/AMDGPU/hsa-func-align.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-func-align.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-func-align.ll @@ -10,9 +10,9 @@ ; HSA: .globl simple_align16 ; HSA: .p2align 5 -define void @simple_align16(i32 addrspace(1)* addrspace(4)* %ptr.out) align 32 { +define void @simple_align16(ptr addrspace(4) %ptr.out) align 32 { entry: - %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out - store i32 0, i32 addrspace(1)* %out + %out = load ptr addrspace(1), ptr addrspace(4) %ptr.out + store i32 0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-func.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-func.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-func.ll @@ -51,19 +51,19 @@ ; HSA: .size simple, .Lfunc_end0-simple ; HSA: ; Function info: ; HSA-NOT: COMPUTE_PGM_RSRC2 -define void @simple(i32 addrspace(1)* addrspace(4)* %ptr.out) { +define void @simple(ptr addrspace(4) %ptr.out) { entry: - %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out - store i32 0, i32 addrspace(1)* %out + %out = load ptr addrspace(1), ptr addrspace(4) %ptr.out + store i32 0, ptr addrspace(1) %out ret void } ; Ignore explicit alignment that is too low. ; HSA: .globl simple_align2 ; HSA: .p2align 2 -define void @simple_align2(i32 addrspace(1)* addrspace(4)* %ptr.out) align 2 { +define void @simple_align2(ptr addrspace(4) %ptr.out) align 2 { entry: - %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %ptr.out - store i32 0, i32 addrspace(1)* %out + %out = load ptr addrspace(1), ptr addrspace(4) %ptr.out + store i32 0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-group-segment.ll b/llvm/test/CodeGen/AMDGPU/hsa-group-segment.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-group-segment.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-group-segment.ll @@ -5,8 +5,8 @@ define amdgpu_kernel void @test() { entry: - store i32 0, i32 addrspace(3)* @internal_group - store i32 0, i32 addrspace(3)* @external_group + store i32 0, ptr addrspace(3) @internal_group + store i32 0, ptr addrspace(3) @external_group ret void } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll @@ -31,17 +31,17 @@ ; GFX10: .sgpr_spill_count: 0 ; GFX10: .vgpr_count: 4 ; GFX10: .vgpr_spill_count: 0 -define amdgpu_kernel void @test1(float* %x) { - %1 = load volatile float, float* %x +define amdgpu_kernel void @test1(ptr %x) { + %1 = load volatile float, ptr %x %2 = call float @f(float %1) - store volatile float %2, float* %x + store volatile float %2, ptr %x ret void } define internal float @f(float %arg0) #0 { %stack = alloca float, i32 4, align 4, addrspace(5) - store volatile float 3.0, float addrspace(5)* %stack - %val = load volatile float, float addrspace(5)* %stack + store volatile float 3.0, ptr addrspace(5) %stack + %val = load volatile float, ptr addrspace(5) %stack %add = fadd float %arg0, %val ret float %add } @@ -69,10 +69,10 @@ ; GFX10: .sgpr_spill_count: 0 ; GFX10: .vgpr_count: 4 ; GFX10: .vgpr_spill_count: 0 -define amdgpu_kernel void @test2(float* %x) { - %1 = load volatile float, float* %x +define amdgpu_kernel void @test2(ptr %x) { + %1 = load volatile float, ptr %x %2 = call float @f(float %1) - store volatile float %2, float* %x + store volatile float %2, ptr %x ret void } diff --git a/llvm/test/CodeGen/AMDGPU/internalize.ll b/llvm/test/CodeGen/AMDGPU/internalize.ll --- a/llvm/test/CodeGen/AMDGPU/internalize.ll +++ b/llvm/test/CodeGen/AMDGPU/internalize.ll @@ -12,32 +12,32 @@ ; OPT: define internal fastcc void @func_used_noinline( ; OPT-NONE: define fastcc void @func_used_noinline( -define fastcc void @func_used_noinline(i32 addrspace(1)* %out, i32 %tid) #1 { +define fastcc void @func_used_noinline(ptr addrspace(1) %out, i32 %tid) #1 { entry: - store volatile i32 %tid, i32 addrspace(1)* %out + store volatile i32 %tid, ptr addrspace(1) %out ret void } ; OPTNONE: define fastcc void @func_used_alwaysinline( ; OPT-NOT: @func_used_alwaysinline -define fastcc void @func_used_alwaysinline(i32 addrspace(1)* %out, i32 %tid) #2 { +define fastcc void @func_used_alwaysinline(ptr addrspace(1) %out, i32 %tid) #2 { entry: - store volatile i32 %tid, i32 addrspace(1)* %out + store volatile i32 %tid, ptr addrspace(1) %out ret void } ; OPTNONE: define void @func_unused( ; OPT-NOT: @func_unused -define void @func_unused(i32 addrspace(1)* %out, i32 %tid) #1 { +define void @func_unused(ptr addrspace(1) %out, i32 %tid) #1 { entry: - store volatile i32 %tid, i32 addrspace(1)* %out + store volatile i32 %tid, ptr addrspace(1) %out ret void } ; ALL: define amdgpu_kernel void @kernel_unused( -define amdgpu_kernel void @kernel_unused(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @kernel_unused(ptr addrspace(1) %out) #1 { entry: - store volatile i32 1, i32 addrspace(1)* %out + store volatile i32 1, ptr addrspace(1) %out ret void } @@ -49,8 +49,8 @@ define amdgpu_kernel void @main_kernel() { entry: %tid = tail call i32 @llvm.amdgcn.workitem.id.x() - tail call fastcc void @func_used_noinline(i32 addrspace(1)* @gvar_used, i32 %tid) - tail call fastcc void @func_used_alwaysinline(i32 addrspace(1)* @gvar_used, i32 %tid) + tail call fastcc void @func_used_noinline(ptr addrspace(1) @gvar_used, i32 %tid) + tail call fastcc void @func_used_alwaysinline(ptr addrspace(1) @gvar_used, i32 %tid) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll b/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll --- a/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll +++ b/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll @@ -10,11 +10,11 @@ ; GCN-DAG: buffer_load_dwordx2 [[PTR:v\[[0-9]+:[0-9]+\]]], ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b ; GCN: buffer_store_dword [[K]], [[PTR]] -define amdgpu_kernel void @test_merge_store_constant_i16_invariant_global_pointer_load(i16 addrspace(1)* addrspace(1)* dereferenceable(4096) nonnull %in) #0 { - %ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(1)* %in, !invariant.load !0 - %ptr.1 = getelementptr i16, i16 addrspace(1)* %ptr, i64 1 - store i16 123, i16 addrspace(1)* %ptr, align 4 - store i16 456, i16 addrspace(1)* %ptr.1 +define amdgpu_kernel void @test_merge_store_constant_i16_invariant_global_pointer_load(ptr addrspace(1) dereferenceable(4096) nonnull %in) #0 { + %ptr = load ptr addrspace(1), ptr addrspace(1) %in, !invariant.load !0 + %ptr.1 = getelementptr i16, ptr addrspace(1) %ptr, i64 1 + store i16 123, ptr addrspace(1) %ptr, align 4 + store i16 456, ptr addrspace(1) %ptr.1 ret void } @@ -22,11 +22,11 @@ ; GCN: s_load_dwordx2 s[[[SPTR_LO:[0-9]+]]:[[SPTR_HI:[0-9]+]]] ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b ; GCN: buffer_store_dword [[K]], off, s[[[SPTR_LO]]: -define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_pointer_load(i16 addrspace(1)* addrspace(4)* dereferenceable(4096) nonnull %in) #0 { - %ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(4)* %in, !invariant.load !0 - %ptr.1 = getelementptr i16, i16 addrspace(1)* %ptr, i64 1 - store i16 123, i16 addrspace(1)* %ptr, align 4 - store i16 456, i16 addrspace(1)* %ptr.1 +define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_pointer_load(ptr addrspace(4) dereferenceable(4096) nonnull %in) #0 { + %ptr = load ptr addrspace(1), ptr addrspace(4) %in, !invariant.load !0 + %ptr.1 = getelementptr i16, ptr addrspace(1) %ptr, i64 1 + store i16 123, ptr addrspace(1) %ptr, align 4 + store i16 456, ptr addrspace(1) %ptr.1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll --- a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll @@ -37,12 +37,12 @@ ret void } -@llvm.used = appending global [6 x i8*] [i8* bitcast (void ()* @csr to i8*), - i8* bitcast (void ()* @subregs_for_super to i8*), - i8* bitcast (void ()* @clobbered_reg_with_sub to i8*), - i8* bitcast (void ()* @nothing to i8*), - i8* bitcast (void ()* @special_regs to i8*), - i8* bitcast (void ()* @vcc to i8*)] +@llvm.used = appending global [6 x ptr] [ptr @csr, + ptr @subregs_for_super, + ptr @clobbered_reg_with_sub, + ptr @nothing, + ptr @special_regs, + ptr @vcc] attributes #0 = { nounwind } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll --- a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll @@ -23,10 +23,10 @@ declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0 ; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.end.p5i8(i64 immarg, i8 addrspace(5)* nocapture) #1 +declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #1 ; Function Attrs: norecurse -define internal fastcc void @svm_node_closure_bsdf(%struct.ShaderData addrspace(1)* %sd, float* %stack, <4 x i32> %node, i32* %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, %struct.ShaderClosure addrspace(1)* %arrayidx.i.i2202, %struct.ShaderClosure addrspace(1)* %retval.0.i.i22089, %struct.ShaderClosure addrspace(1)* %retval.1.i221310, i1 %cmp575, i32 addrspace(1)* %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221) #2 { +define internal fastcc void @svm_node_closure_bsdf(ptr addrspace(1) %sd, ptr %stack, <4 x i32> %node, ptr %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, ptr addrspace(1) %arrayidx.i.i2202, ptr addrspace(1) %retval.0.i.i22089, ptr addrspace(1) %retval.1.i221310, i1 %cmp575, ptr addrspace(1) %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221) #2 { ; GCN-LABEL: {{^}}svm_node_closure_bsdf: ; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30, ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, @@ -42,7 +42,7 @@ br i1 undef, label %common.ret.critedge, label %cond.true cond.true: ; preds = %entry - %9 = load float, float* null, align 4 + %9 = load float, ptr null, align 4 %phi.cmp = fcmp oeq float %9, 0.000000e+00 br i1 %phi.cmp, label %common.ret, label %cond.true20 @@ -63,15 +63,15 @@ br i1 %SwitchLeaf, label %if.end.i.i2285, label %NewDefault sw.bb: ; preds = %cond.true20 - %10 = load float, float* null, align 4 - %11 = load float, float* null, align 4 + %10 = load float, ptr null, align 4 + %11 = load float, ptr null, align 4 %12 = tail call float @llvm.amdgcn.fmed3.f32(float %1, float 0.000000e+00, float 0.000000e+00) %mul802 = fmul nsz float %1, 0.000000e+00 %cmp412.old3 = fcmp nsz ogt float %1, 0.000000e+00 br i1 %cmp412.old, label %if.then413, label %common.ret if.then413: ; preds = %sw.bb - %13 = load <4 x i32>, <4 x i32> addrspace(1)* null, align 16 + %13 = load <4 x i32>, ptr addrspace(1) null, align 16 %14 = extractelement <4 x i32> %node, i64 0 %cmp4404 = fcmp nsz ole float %1, 0.000000e+00 %cmp4425 = icmp eq i32 %0, 0 @@ -82,12 +82,12 @@ br i1 true, label %if.end511, label %common.ret common.ret.critedge: ; preds = %entry - store i32 0, i32* null, align 4 + store i32 0, ptr null, align 4 br label %common.ret NewDefault: ; preds = %LeafBlock1, %LeafBlock %phi.store = phi i32 [0, %LeafBlock], [1, %LeafBlock1] - store i32 %phi.store, i32* null, align 4 + store i32 %phi.store, ptr null, align 4 br label %common.ret common.ret: ; preds = %if.end.i.i2285, %if.end627.sink.split, %cond.end579, %bsdf_alloc.exit2188, %if.end511, %common.ret.critedge, %if.then443, %sw.bb, %NewDefault, %cond.true @@ -118,7 +118,7 @@ %.op7 = fmul nsz float undef, 0.000000e+00 %mul558 = select i1 %cmp440, float 0.000000e+00, float %1 %15 = tail call float @llvm.amdgcn.fmed3.f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) - store float %mul558, float addrspace(1)* null, align 4 + store float %mul558, ptr addrspace(1) null, align 4 br label %if.end627.sink.split if.else568: ; preds = %if.then413 @@ -128,57 +128,56 @@ br i1 undef, label %closure_alloc.exit.i2210, label %if.end.i.i2207 if.end.i.i2207: ; preds = %if.then.i2198 - %arrayidx.i.i22028 = getelementptr inbounds %struct.ShaderData, %struct.ShaderData addrspace(1)* %sd, i64 0, i32 30, i64 undef + %arrayidx.i.i22028 = getelementptr inbounds %struct.ShaderData, ptr addrspace(1) %sd, i64 0, i32 30, i64 undef br label %closure_alloc.exit.i2210 closure_alloc.exit.i2210: ; preds = %if.end.i.i2207, %if.then.i2198 - %retval.0.i.i220899 = phi %struct.ShaderClosure addrspace(1)* [ %arrayidx.i.i2202, %if.end.i.i2207 ], [ null, %if.then.i2198 ] + %retval.0.i.i220899 = phi ptr addrspace(1) [ %arrayidx.i.i2202, %if.end.i.i2207 ], [ null, %if.then.i2198 ] br i1 false, label %bsdf_alloc.exit2214, label %if.end.i2212 if.end.i2212: ; preds = %closure_alloc.exit.i2210 br label %bsdf_alloc.exit2214 bsdf_alloc.exit2214: ; preds = %if.end.i2212, %closure_alloc.exit.i2210, %if.else568 - %retval.1.i22131010 = phi %struct.ShaderClosure addrspace(1)* [ %arrayidx.i.i2202, %if.end.i2212 ], [ null, %closure_alloc.exit.i2210 ], [ null, %if.else568 ] - %cmp57511 = icmp ne %struct.ShaderClosure addrspace(1)* %arrayidx.i.i2202, null + %retval.1.i22131010 = phi ptr addrspace(1) [ %arrayidx.i.i2202, %if.end.i2212 ], [ null, %closure_alloc.exit.i2210 ], [ null, %if.else568 ] + %cmp57511 = icmp ne ptr addrspace(1) %arrayidx.i.i2202, null br i1 %cmp442, label %cond.true576, label %cond.end579 cond.true576: ; preds = %bsdf_alloc.exit2214 - %num_closure_left.i221512 = getelementptr inbounds %struct.ShaderData, %struct.ShaderData addrspace(1)* %sd, i64 0, i32 25 - %16 = load i32, i32 addrspace(1)* %num_closure_left.i2215, align 8 + %num_closure_left.i221512 = getelementptr inbounds %struct.ShaderData, ptr addrspace(1) %sd, i64 0, i32 25 + %16 = load i32, ptr addrspace(1) %num_closure_left.i2215, align 8 %cmp.i221613 = icmp slt i32 %0, 0 br i1 %cmp440, label %cond.end579, label %if.end.i2227 if.end.i2227: ; preds = %cond.true576 %sub5.i222114 = add nuw nsw i32 %0, 0 - %17 = load i32, i32 addrspace(1)* null, align 4294967296 + %17 = load i32, ptr addrspace(1) null, align 4294967296 %idx.ext.i222315 = sext i32 %0 to i64 - %add.ptr.i2224 = getelementptr inbounds %struct.ShaderData, %struct.ShaderData addrspace(1)* %sd, i64 0, i32 30, i64 %idx.ext.i2223 + %add.ptr.i2224 = getelementptr inbounds %struct.ShaderData, ptr addrspace(1) %sd, i64 0, i32 30, i64 %idx.ext.i2223 %idx.ext8.i22252724 = zext i32 %0 to i64 - %add.ptr9.i2226 = getelementptr inbounds %struct.ShaderClosure, %struct.ShaderClosure addrspace(1)* %add.ptr.i2224, i64 %idx.ext8.i22252724 - %phi.cast2731 = bitcast %struct.ShaderClosure addrspace(1)* %add.ptr9.i2226 to %struct.MicrofacetExtra addrspace(1)* + %add.ptr9.i2226 = getelementptr inbounds %struct.ShaderClosure, ptr addrspace(1) %add.ptr.i2224, i64 %idx.ext8.i22252724 br label %cond.end579 cond.end579: ; preds = %if.end.i2227, %cond.true576, %bsdf_alloc.exit2214 - %cond580 = phi %struct.MicrofacetExtra addrspace(1)* [ null, %bsdf_alloc.exit2214 ], [ %phi.cast2731, %if.end.i2227 ], [ null, %cond.true576 ] - %tobool583 = icmp ne %struct.MicrofacetExtra addrspace(1)* %cond580, null + %cond580 = phi ptr addrspace(1) [ null, %bsdf_alloc.exit2214 ], [ %add.ptr9.i2226, %if.end.i2227 ], [ null, %cond.true576 ] + %tobool583 = icmp ne ptr addrspace(1) %cond580, null %or.cond1308 = select i1 %cmp442, i1 %tobool583, i1 false br i1 %or.cond1308, label %if.then584, label %common.ret if.then584: ; preds = %cond.end579 - store %struct.MicrofacetExtra addrspace(1)* null, %struct.MicrofacetExtra addrspace(1)* addrspace(1)* null, align 4294967296 + store ptr addrspace(1) null, ptr addrspace(1) null, align 4294967296 br label %if.end627.sink.split if.end627.sink.split: ; preds = %if.then584, %if.then534 - store i32 0, i32 addrspace(1)* null, align 4 + store i32 0, ptr addrspace(1) null, align 4 br label %common.ret if.end.i.i2285: ; preds = %cond.true20 - store i32 0, i32 addrspace(1)* null, align 4294967296 + store i32 0, ptr addrspace(1) null, align 4294967296 br label %common.ret } -define internal fastcc void @svm_eval_nodes(%struct.ShaderData addrspace(1)* %sd) { +define internal fastcc void @svm_eval_nodes(ptr addrspace(1) %sd) { sw.bb10: ; GCN-LABEL: {{^}}svm_eval_nodes: ; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30, @@ -188,7 +187,7 @@ ; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], ; GCN: s_waitcnt vmcnt(0) ; GCN: s_setpc_b64 s[30:31] - call fastcc void @svm_node_closure_bsdf(%struct.ShaderData addrspace(1)* null, float* null, <4 x i32> zeroinitializer, i32* null, i32 undef, i8 undef, float undef, float undef, float undef, i1 undef, <4 x i32> undef, float undef, i32 undef, i1 undef, i1 undef, i1 undef, float undef, %struct.ShaderClosure addrspace(1)* undef, %struct.ShaderClosure addrspace(1)* undef, %struct.ShaderClosure addrspace(1)* undef, i1 undef, i32 addrspace(1)* undef, i32 undef, i1 undef, i32 undef, i64 undef, i32 undef) + call fastcc void @svm_node_closure_bsdf(ptr addrspace(1) null, ptr null, <4 x i32> zeroinitializer, ptr null, i32 undef, i8 undef, float undef, float undef, float undef, i1 undef, <4 x i32> undef, float undef, i32 undef, i1 undef, i1 undef, i1 undef, float undef, ptr addrspace(1) undef, ptr addrspace(1) undef, ptr addrspace(1) undef, i1 undef, ptr addrspace(1) undef, i32 undef, i1 undef, i32 undef, i64 undef, i32 undef) ret void } @@ -197,7 +196,7 @@ ; GCN-LABEL: {{^}}kernel_ocl_path_trace_shadow_blocked_dl: ; GCN: s_swappc_b64 s[30:31] ; GCN: endpgm - tail call fastcc void @svm_eval_nodes(%struct.ShaderData addrspace(1)* null) + tail call fastcc void @svm_eval_nodes(ptr addrspace(1) null) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll --- a/llvm/test/CodeGen/AMDGPU/ipra.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra.ll @@ -4,9 +4,9 @@ ; Kernels are not called, so there is no call preserved mask. ; GCN-LABEL: {{^}}kernel: ; GCN: flat_store_dword -define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @kernel(ptr addrspace(1) %out) #0 { entry: - store i32 0, i32 addrspace(1)* %out + store i32 0, ptr addrspace(1) %out ret void } @@ -33,9 +33,9 @@ ; GCN: ; NumSgprs: 37 ; GCN: ; NumVgprs: 9 define amdgpu_kernel void @kernel_call() #0 { - %vgpr = load volatile i32, i32 addrspace(1)* undef + %vgpr = load volatile i32, ptr addrspace(1) undef tail call void @func() - store volatile i32 %vgpr, i32 addrspace(1)* undef + store volatile i32 %vgpr, ptr addrspace(1) undef ret void } @@ -51,9 +51,9 @@ ; GCN: ; NumSgprs: 34 ; GCN: ; NumVgprs: 10 define void @func_regular_call() #1 { - %vgpr = load volatile i32, i32 addrspace(1)* undef + %vgpr = load volatile i32, ptr addrspace(1) undef tail call void @func() - store volatile i32 %vgpr, i32 addrspace(1)* undef + store volatile i32 %vgpr, ptr addrspace(1) undef ret void } @@ -80,9 +80,9 @@ ; GCN: ; NumSgprs: 34 ; GCN: ; NumVgprs: 10 define void @func_call_tail_call() #1 { - %vgpr = load volatile i32, i32 addrspace(1)* undef + %vgpr = load volatile i32, ptr addrspace(1) undef tail call void @func() - store volatile i32 %vgpr, i32 addrspace(1)* undef + store volatile i32 %vgpr, ptr addrspace(1) undef tail call void @func() ret void } @@ -106,7 +106,7 @@ } ; GCN-LABEL: {{^}}wombat: -define weak amdgpu_kernel void @wombat(i32* %arg, i32* %arg2) { +define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) { bb: call void @hoge() #0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/jump-address.ll b/llvm/test/CodeGen/AMDGPU/jump-address.ll --- a/llvm/test/CodeGen/AMDGPU/jump-address.ll +++ b/llvm/test/CodeGen/AMDGPU/jump-address.ll @@ -6,7 +6,7 @@ define amdgpu_ps void @main() { main_body: - %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %0 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1) %1 = extractelement <4 x float> %0, i32 0 %2 = bitcast float %1 to i32 %3 = icmp eq i32 %2, 0 @@ -17,7 +17,7 @@ br i1 %7, label %ENDIF, label %ELSE ELSE: ; preds = %main_body - %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %8 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1) %9 = extractelement <4 x float> %8, i32 0 %10 = bitcast float %9 to i32 %11 = icmp eq i32 %10, 1 @@ -40,7 +40,7 @@ ret void IF13: ; preds = %ELSE - %20 = load <4 x float>, <4 x float> addrspace(8)* null + %20 = load <4 x float>, ptr addrspace(8) null %21 = extractelement <4 x float> %20, i32 0 %22 = fsub float -0.000000e+00, %21 %23 = fadd float 0x3FF8000000000000, %22 diff --git a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll --- a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll +++ b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll @@ -20,10 +20,10 @@ ; DOORBELL-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 ; DOORBELL: .end_amdhsa_kernel -define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) { - store volatile i32 1, i32 addrspace(1)* %arg0 +define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) { + store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.trap() unreachable - store volatile i32 2, i32 addrspace(1)* %arg0 + store volatile i32 2, ptr addrspace(1) %arg0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll b/llvm/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll --- a/llvm/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll @@ -7,7 +7,7 @@ ; CHECK: ScratchSize: 5{{$}} define amdgpu_kernel void @no_args() { %alloca = alloca i8, addrspace(5) - store volatile i8 0, i8 addrspace(5)* %alloca + store volatile i8 0, ptr addrspace(5) %alloca ret void } @@ -15,7 +15,7 @@ ; CHECK: ScratchSize: 5{{$}} define amdgpu_kernel void @force_align32(<8 x i32>) { %alloca = alloca i8, addrspace(5) - store volatile i8 0, i8 addrspace(5)* %alloca + store volatile i8 0, ptr addrspace(5) %alloca ret void } @@ -23,7 +23,7 @@ ; CHECK: ScratchSize: 5{{$}} define amdgpu_kernel void @force_align64(<16 x i32>) { %alloca = alloca i8, addrspace(5) - store volatile i8 0, i8 addrspace(5)* %alloca + store volatile i8 0, ptr addrspace(5) %alloca ret void } @@ -31,7 +31,7 @@ ; CHECK: ScratchSize: 5{{$}} define amdgpu_kernel void @force_align128(<32 x i32>) { %alloca = alloca i8, addrspace(5) - store volatile i8 0, i8 addrspace(5)* %alloca + store volatile i8 0, ptr addrspace(5) %alloca ret void } @@ -39,6 +39,6 @@ ; CHECK: ScratchSize: 5{{$}} define amdgpu_kernel void @force_align256(<64 x i32>) { %alloca = alloca i8, addrspace(5) - store volatile i8 0, i8 addrspace(5)* %alloca + store volatile i8 0, ptr addrspace(5) %alloca ret void } diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll --- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck -check-prefixes=EGCM,EG %s ; RUN: llc < %s -march=r600 -mcpu=cayman -verify-machineinstrs | FileCheck -check-prefixes=EGCM,CM %s -define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { +define amdgpu_kernel void @i8_arg(ptr addrspace(1) nocapture %out, i8 %in) nounwind { ; SI-LABEL: i8_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -73,11 +73,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %ext = zext i8 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { +define amdgpu_kernel void @i8_zext_arg(ptr addrspace(1) nocapture %out, i8 zeroext %in) nounwind { ; SI-LABEL: i8_zext_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -148,11 +148,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %ext = zext i8 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { +define amdgpu_kernel void @i8_sext_arg(ptr addrspace(1) nocapture %out, i8 signext %in) nounwind { ; SI-LABEL: i8_sext_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -223,11 +223,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %ext = sext i8 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { +define amdgpu_kernel void @i16_arg(ptr addrspace(1) nocapture %out, i16 %in) nounwind { ; SI-LABEL: i16_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -295,11 +295,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %ext = zext i16 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { +define amdgpu_kernel void @i16_zext_arg(ptr addrspace(1) nocapture %out, i16 zeroext %in) nounwind { ; SI-LABEL: i16_zext_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -370,11 +370,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %ext = zext i16 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { +define amdgpu_kernel void @i16_sext_arg(ptr addrspace(1) nocapture %out, i16 signext %in) nounwind { ; SI-LABEL: i16_sext_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -445,11 +445,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %ext = sext i16 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { +define amdgpu_kernel void @i32_arg(ptr addrspace(1) nocapture %out, i32 %in) nounwind { ; SI-LABEL: i32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -504,11 +504,11 @@ ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; CM-NEXT: MOV * T1.X, KC0[2].Z, entry: - store i32 %in, i32 addrspace(1)* %out, align 4 + store i32 %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { +define amdgpu_kernel void @f32_arg(ptr addrspace(1) nocapture %out, float %in) nounwind { ; SI-LABEL: f32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -563,11 +563,11 @@ ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; CM-NEXT: MOV * T1.X, KC0[2].Z, entry: - store float %in, float addrspace(1)* %out, align 4 + store float %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { +define amdgpu_kernel void @v2i8_arg(ptr addrspace(1) %out, <2 x i8> %in) { ; SI-LABEL: v2i8_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -662,11 +662,11 @@ ; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <2 x i8> %in, <2 x i8> addrspace(1)* %out + store <2 x i8> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { +define amdgpu_kernel void @v2i16_arg(ptr addrspace(1) %out, <2 x i16> %in) { ; SI-LABEL: v2i16_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -741,11 +741,11 @@ ; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <2 x i16> %in, <2 x i16> addrspace(1)* %out + store <2 x i16> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { +define amdgpu_kernel void @v2i32_arg(ptr addrspace(1) nocapture %out, <2 x i32> %in) nounwind { ; SI-LABEL: v2i32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -804,11 +804,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 + store <2 x i32> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { +define amdgpu_kernel void @v2f32_arg(ptr addrspace(1) nocapture %out, <2 x float> %in) nounwind { ; SI-LABEL: v2f32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -867,11 +867,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 + store <2 x float> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { +define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %in) nounwind { ; SI-LABEL: v3i8_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -1006,11 +1006,11 @@ ; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 + store <3 x i8> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { +define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) nounwind { ; SI-LABEL: v3i16_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1125,11 +1125,11 @@ ; CM-NEXT: LSHR * T8.X, T0.W, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 + store <3 x i16> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { +define amdgpu_kernel void @v3i32_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) nounwind { ; SI-LABEL: v3i32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -1204,11 +1204,11 @@ ; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 + store <3 x i32> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { +define amdgpu_kernel void @v3f32_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) nounwind { ; SI-LABEL: v3f32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -1283,11 +1283,11 @@ ; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 + store <3 x float> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { +define amdgpu_kernel void @v4i8_arg(ptr addrspace(1) %out, <4 x i8> %in) { ; SI-LABEL: v4i8_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -1386,11 +1386,11 @@ ; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <4 x i8> %in, <4 x i8> addrspace(1)* %out + store <4 x i8> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { +define amdgpu_kernel void @v4i16_arg(ptr addrspace(1) %out, <4 x i16> %in) { ; SI-LABEL: v4i16_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1537,11 +1537,11 @@ ; CM-NEXT: MOV T2.X, PV.X, ; CM-NEXT: MOV * T5.Y, T3.X, entry: - store <4 x i16> %in, <4 x i16> addrspace(1)* %out + store <4 x i16> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { +define amdgpu_kernel void @v4i32_arg(ptr addrspace(1) nocapture %out, <4 x i32> %in) nounwind { ; SI-LABEL: v4i32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -1611,11 +1611,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 + store <4 x i32> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { +define amdgpu_kernel void @v4f32_arg(ptr addrspace(1) nocapture %out, <4 x float> %in) nounwind { ; SI-LABEL: v4f32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -1685,11 +1685,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 + store <4 x float> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v5i8_arg(<5 x i8> addrspace(1)* nocapture %out, <5 x i8> %in) nounwind { +define amdgpu_kernel void @v5i8_arg(ptr addrspace(1) nocapture %out, <5 x i8> %in) nounwind { ; SI-LABEL: v5i8_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1826,11 +1826,11 @@ ; CM-NEXT: LSHR * T8.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <5 x i8> %in, <5 x i8> addrspace(1)* %out, align 4 + store <5 x i8> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v5i16_arg(<5 x i16> addrspace(1)* nocapture %out, <5 x i16> %in) nounwind { +define amdgpu_kernel void @v5i16_arg(ptr addrspace(1) nocapture %out, <5 x i16> %in) nounwind { ; SI-LABEL: v5i16_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s2, s[0:1], 0xf @@ -2057,11 +2057,11 @@ ; CM-NEXT: LSHR * T9.X, T0.W, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <5 x i16> %in, <5 x i16> addrspace(1)* %out, align 4 + store <5 x i16> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v5i32_arg(<5 x i32> addrspace(1)* nocapture %out, <5 x i32> %in) nounwind { +define amdgpu_kernel void @v5i32_arg(ptr addrspace(1) nocapture %out, <5 x i32> %in) nounwind { ; SI-LABEL: v5i32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s8, s[0:1], 0x15 @@ -2155,11 +2155,11 @@ ; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <5 x i32> %in, <5 x i32> addrspace(1)* %out, align 4 + store <5 x i32> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v5f32_arg(<5 x float> addrspace(1)* nocapture %out, <5 x float> %in) nounwind { +define amdgpu_kernel void @v5f32_arg(ptr addrspace(1) nocapture %out, <5 x float> %in) nounwind { ; SI-LABEL: v5f32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s8, s[0:1], 0x15 @@ -2254,11 +2254,11 @@ ; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <5 x float> %in, <5 x float> addrspace(1)* %out, align 4 + store <5 x float> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v5i64_arg(<5 x i64> addrspace(1)* nocapture %out, <5 x i64> %in) nounwind { +define amdgpu_kernel void @v5i64_arg(ptr addrspace(1) nocapture %out, <5 x i64> %in) nounwind { ; SI-LABEL: v5i64_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x19 @@ -2396,11 +2396,11 @@ ; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <5 x i64> %in, <5 x i64> addrspace(1)* %out, align 8 + store <5 x i64> %in, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @v5f64_arg(<5 x double> addrspace(1)* nocapture %out, <5 x double> %in) nounwind { +define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture %out, <5 x double> %in) nounwind { ; SI-LABEL: v5f64_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x19 @@ -2538,12 +2538,12 @@ ; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <5 x double> %in, <5 x double> addrspace(1)* %out, align 8 + store <5 x double> %in, ptr addrspace(1) %out, align 8 ret void } ; FIXME: Lots of unpack and re-pack junk on VI -define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { +define amdgpu_kernel void @v8i8_arg(ptr addrspace(1) %out, <8 x i8> %in) { ; SI-LABEL: v8i8_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2790,11 +2790,11 @@ ; CM-NEXT: LSHR * T6.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <8 x i8> %in, <8 x i8> addrspace(1)* %out + store <8 x i8> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { +define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) { ; SI-LABEL: v8i16_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -3038,11 +3038,11 @@ ; CM-NEXT: MOV * T7.W, T3.X, ; CM-NEXT: MOV * T7.Y, T5.X, entry: - store <8 x i16> %in, <8 x i16> addrspace(1)* %out + store <8 x i16> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { +define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) nounwind { ; SI-LABEL: v8i32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x11 @@ -3149,11 +3149,11 @@ ; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 + store <8 x i32> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { +define amdgpu_kernel void @v8f32_arg(ptr addrspace(1) nocapture %out, <8 x float> %in) nounwind { ; SI-LABEL: v8f32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x11 @@ -3260,12 +3260,12 @@ ; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 + store <8 x float> %in, ptr addrspace(1) %out, align 4 ret void } ; FIXME: Pack/repack on VI -define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { +define amdgpu_kernel void @v16i8_arg(ptr addrspace(1) %out, <16 x i8> %in) { ; SI-LABEL: v16i8_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd @@ -3711,11 +3711,11 @@ ; CM-NEXT: LSHR * T8.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <16 x i8> %in, <16 x i8> addrspace(1)* %out + store <16 x i8> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { +define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) { ; SI-LABEL: v16i16_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x11 @@ -4167,11 +4167,11 @@ ; CM-NEXT: MOV * T11.W, T7.X, BS:VEC_120/SCL_212 ; CM-NEXT: MOV * T11.Y, T9.X, entry: - store <16 x i16> %in, <16 x i16> addrspace(1)* %out + store <16 x i16> %in, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { +define amdgpu_kernel void @v16i32_arg(ptr addrspace(1) nocapture %out, <16 x i32> %in) nounwind { ; SI-LABEL: v16i32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x19 @@ -4355,11 +4355,11 @@ ; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 + store <16 x i32> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { +define amdgpu_kernel void @v16f32_arg(ptr addrspace(1) nocapture %out, <16 x float> %in) nounwind { ; SI-LABEL: v16f32_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x19 @@ -4543,11 +4543,11 @@ ; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 + store <16 x float> %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { +define amdgpu_kernel void @kernel_arg_i64(ptr addrspace(1) %out, i64 %a) nounwind { ; SI-LABEL: kernel_arg_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -4605,11 +4605,11 @@ ; CM-NEXT: MOV * T0.X, KC0[2].W, ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) - store i64 %a, i64 addrspace(1)* %out, align 8 + store i64 %a, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in) { +define amdgpu_kernel void @f64_kernel_arg(ptr addrspace(1) %out, double %in) { ; SI-LABEL: f64_kernel_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -4668,7 +4668,7 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store double %in, double addrspace(1)* %out + store double %in, ptr addrspace(1) %out ret void } @@ -4676,12 +4676,12 @@ ; XGCN: s_load_dwordx2 ; XGCN: s_load_dwordx2 ; XGCN: buffer_store_dwordx2 -; define amdgpu_kernel void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { -; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 +; define amdgpu_kernel void @kernel_arg_v1i64(ptr addrspace(1) %out, <1 x i64> %a) nounwind { +; store <1 x i64> %a, ptr addrspace(1) %out, align 8 ; ret void ; } -define amdgpu_kernel void @i65_arg(i65 addrspace(1)* nocapture %out, i65 %in) nounwind { +define amdgpu_kernel void @i65_arg(ptr addrspace(1) nocapture %out, i65 %in) nounwind { ; SI-LABEL: i65_arg: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dword s4, s[0:1], 0xd @@ -4795,11 +4795,11 @@ ; CM-NEXT: LSHR * T5.X, T0.W, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - store i65 %in, i65 addrspace(1)* %out, align 4 + store i65 %in, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) nounwind { ; SI-LABEL: i1_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -4886,11 +4886,11 @@ ; CM-NEXT: MOV * T0.Z, 0.0, ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) - store i1 %x, i1 addrspace(1)* %out, align 1 + store i1 %x, ptr addrspace(1) %out, align 1 ret void } -define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_zext_i32(ptr addrspace(1) %out, i1 %x) nounwind { ; SI-LABEL: i1_arg_zext_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -4958,11 +4958,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %ext = zext i1 %x to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_zext_i64(ptr addrspace(1) %out, i1 %x) nounwind { ; SI-LABEL: i1_arg_zext_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -5034,11 +5034,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %ext = zext i1 %x to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 + store i64 %ext, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_sext_i32(ptr addrspace(1) %out, i1 %x) nounwind { ; SI-LABEL: i1_arg_sext_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -5108,11 +5108,11 @@ ; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %ext = sext i1 %x to i32 - store i32 %ext, i32addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_sext_i64(ptr addrspace(1) %out, i1 %x) nounwind { ; SI-LABEL: i1_arg_sext_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -5187,7 +5187,7 @@ ; CM-NEXT: MOV * T0.Y, PV.X, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) %ext = sext i1 %x to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 + store i64 %ext, ptr addrspace(1) %out, align 8 ret void } @@ -5346,10 +5346,10 @@ %val1 = extractvalue {i32, i64} %arg0, 1 %val2 = extractvalue {i32, i64} %arg1, 0 %val3 = extractvalue {i32, i64} %arg1, 1 - store volatile i32 %val0, i32 addrspace(1)* null - store volatile i64 %val1, i64 addrspace(1)* null - store volatile i32 %val2, i32 addrspace(1)* null - store volatile i64 %val3, i64 addrspace(1)* null + store volatile i32 %val0, ptr addrspace(1) null + store volatile i64 %val1, ptr addrspace(1) null + store volatile i32 %val2, ptr addrspace(1) null + store volatile i64 %val3, ptr addrspace(1) null ret void } @@ -5531,10 +5531,10 @@ %val1 = extractvalue <{i32, i64}> %arg0, 1 %val2 = extractvalue <{i32, i64}> %arg1, 0 %val3 = extractvalue <{i32, i64}> %arg1, 1 - store volatile i32 %val0, i32 addrspace(1)* null - store volatile i64 %val1, i64 addrspace(1)* null - store volatile i32 %val2, i32 addrspace(1)* null - store volatile i64 %val3, i64 addrspace(1)* null + store volatile i32 %val0, ptr addrspace(1) null + store volatile i64 %val1, ptr addrspace(1) null + store volatile i32 %val2, ptr addrspace(1) null + store volatile i64 %val3, ptr addrspace(1) null ret void } @@ -5696,11 +5696,11 @@ %val1 = extractvalue {i32, i64} %arg0, 1 %val2 = extractvalue {i32, i64} %arg2, 0 %val3 = extractvalue {i32, i64} %arg2, 1 - store volatile i32 %val0, i32 addrspace(1)* null - store volatile i64 %val1, i64 addrspace(1)* null - store volatile i32 %val2, i32 addrspace(1)* null - store volatile i64 %val3, i64 addrspace(1)* null - store volatile <4 x i32> %arg4, <4 x i32> addrspace(1)* null + store volatile i32 %val0, ptr addrspace(1) null + store volatile i64 %val1, ptr addrspace(1) null + store volatile i32 %val2, ptr addrspace(1) null + store volatile i64 %val3, ptr addrspace(1) null + store volatile <4 x i32> %arg4, ptr addrspace(1) null ret void } @@ -5812,8 +5812,8 @@ ; CM-NEXT: MOV * T3.X, KC0[3].X, ; CM-NEXT: MOV * T4.X, literal.x, ; CM-NEXT: 0(0.000000e+00), 0(0.000000e+00) - store volatile i16 %arg0, i16 addrspace(1)* undef - store volatile [3 x i32] %arg1, [3 x i32] addrspace(1)* undef + store volatile i16 %arg0, ptr addrspace(1) undef + store volatile [3 x i32] %arg1, ptr addrspace(1) undef ret void } @@ -5983,8 +5983,8 @@ ; CM-NEXT: MOV T2.Y, 0.0, ; CM-NEXT: MOV * T2.Z, 0.0, ; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) - store volatile i8 %arg0, i8 addrspace(1)* undef - store volatile [3 x i16] %arg1, [3 x i16] addrspace(1)* undef + store volatile i8 %arg0, ptr addrspace(1) undef + store volatile [3 x i16] %arg1, ptr addrspace(1) undef ret void } @@ -6041,11 +6041,11 @@ ; EGCM-NEXT: MOV * T1.X, literal.x, ; EGCM-NEXT: 0(0.000000e+00), 0(0.000000e+00) %val = extractvalue [1 x i8] %arg, 0 - store volatile i8 %val, i8 addrspace(1)* undef + store volatile i8 %val, ptr addrspace(1) undef ret void } -define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) { +define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) { ; SI-LABEL: byref_align_constant_i32_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x49 @@ -6123,13 +6123,13 @@ ; CM-NEXT: MOV * T1.X, KC0[18].Z, ; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %in = load i32, i32 addrspace(4)* %in.byref - store volatile i32 %in, i32 addrspace(1)* %out, align 4 - store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 + %in = load i32, ptr addrspace(4) %in.byref + store volatile i32 %in, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) %in.byref, i32 %after.offset) { +define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) %in.byref, i32 %after.offset) { ; SI-LABEL: byref_natural_align_constant_v16i32_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x19 @@ -6347,9 +6347,8 @@ ; CM-NEXT: MOV * T1.X, KC0[10].Y, ; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %in = load <16 x i32>, <16 x i32> addrspace(4)* %in.byref - %cast.out = bitcast i32 addrspace(1)* %out to <16 x i32> addrspace(1)* - store volatile <16 x i32> %in, <16 x i32> addrspace(1)* %cast.out, align 4 - store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 + %in = load <16 x i32>, ptr addrspace(4) %in.byref + store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll b/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll --- a/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll @@ -10,8 +10,8 @@ ; GCN: s_load_dword s ; GCN: s_and_b32 -define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { - store i1 %x, i1 addrspace(1)* %out, align 1 +define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) nounwind { + store i1 %x, ptr addrspace(1) %out, align 1 ret void } @@ -20,9 +20,9 @@ ; HSA-VI: kernarg_segment_alignment = 4 ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8 ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0 -define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { +define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %in) nounwind { entry: - store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 + store <3 x i8> %in, ptr addrspace(1) %out, align 4 ret void } @@ -30,9 +30,9 @@ ; HSA-VI: kernarg_segment_byte_size = 24 ; HSA-VI: kernarg_segment_alignment = 4 ; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0 -define amdgpu_kernel void @i65_arg(i65 addrspace(1)* nocapture %out, i65 %in) nounwind { +define amdgpu_kernel void @i65_arg(ptr addrspace(1) nocapture %out, i65 %in) nounwind { entry: - store i65 %in, i65 addrspace(1)* %out, align 4 + store i65 %in, ptr addrspace(1) %out, align 4 ret void } @@ -63,10 +63,10 @@ %val1 = extractvalue {i32, i64} %arg0, 1 %val2 = extractvalue {i32, i64} %arg1, 0 %val3 = extractvalue {i32, i64} %arg1, 1 - store volatile i32 %val0, i32 addrspace(1)* null - store volatile i64 %val1, i64 addrspace(1)* null - store volatile i32 %val2, i32 addrspace(1)* null - store volatile i64 %val3, i64 addrspace(1)* null + store volatile i32 %val0, ptr addrspace(1) null + store volatile i64 %val1, ptr addrspace(1) null + store volatile i32 %val2, ptr addrspace(1) null + store volatile i64 %val3, ptr addrspace(1) null ret void } @@ -84,10 +84,10 @@ %val1 = extractvalue <{i32, i64}> %arg0, 1 %val2 = extractvalue <{i32, i64}> %arg1, 0 %val3 = extractvalue <{i32, i64}> %arg1, 1 - store volatile i32 %val0, i32 addrspace(1)* null - store volatile i64 %val1, i64 addrspace(1)* null - store volatile i32 %val2, i32 addrspace(1)* null - store volatile i64 %val3, i64 addrspace(1)* null + store volatile i32 %val0, ptr addrspace(1) null + store volatile i64 %val1, ptr addrspace(1) null + store volatile i32 %val2, ptr addrspace(1) null + store volatile i64 %val3, ptr addrspace(1) null ret void } @@ -103,27 +103,27 @@ %val1 = extractvalue {i32, i64} %arg0, 1 %val2 = extractvalue {i32, i64} %arg2, 0 %val3 = extractvalue {i32, i64} %arg2, 1 - store volatile i32 %val0, i32 addrspace(1)* null - store volatile i64 %val1, i64 addrspace(1)* null - store volatile i32 %val2, i32 addrspace(1)* null - store volatile i64 %val3, i64 addrspace(1)* null - store volatile <4 x i32> %arg4, <4 x i32> addrspace(1)* null + store volatile i32 %val0, ptr addrspace(1) null + store volatile i64 %val1, ptr addrspace(1) null + store volatile i32 %val2, ptr addrspace(1) null + store volatile i64 %val3, ptr addrspace(1) null + store volatile <4 x i32> %arg4, ptr addrspace(1) null ret void } ; GCN-LABEL: {{^}}array_3xi32: ; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0 define amdgpu_kernel void @array_3xi32(i16 %arg0, [3 x i32] %arg1) { - store volatile i16 %arg0, i16 addrspace(1)* undef - store volatile [3 x i32] %arg1, [3 x i32] addrspace(1)* undef + store volatile i16 %arg0, ptr addrspace(1) undef + store volatile [3 x i32] %arg1, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}array_3xi16: ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0 define amdgpu_kernel void @array_3xi16(i8 %arg0, [3 x i16] %arg1) { - store volatile i8 %arg0, i8 addrspace(1)* undef - store volatile [3 x i16] %arg1, [3 x i16] addrspace(1)* undef + store volatile i8 %arg0, ptr addrspace(1) undef + store volatile [3 x i16] %arg1, ptr addrspace(1) undef ret void } @@ -131,9 +131,9 @@ ; GCN: s_load_dword [[DWORD:s[0-9]+]] ; GCN-DAG: s_bfe_u32 [[BFE:s[0-9]+]], [[DWORD]], 0x100010{{$}} ; GCN-DAG: s_and_b32 [[AND:s[0-9]+]], [[DWORD]], 0x7fff{{$}} -define amdgpu_kernel void @v2i15_arg(<2 x i15> addrspace(1)* nocapture %out, <2 x i15> %in) { +define amdgpu_kernel void @v2i15_arg(ptr addrspace(1) nocapture %out, <2 x i15> %in) { entry: - store <2 x i15> %in, <2 x i15> addrspace(1)* %out, align 4 + store <2 x i15> %in, ptr addrspace(1) %out, align 4 ret void } @@ -143,9 +143,9 @@ ; GCN: s_and_b32 ; GCN: s_and_b32 ; GCN: s_or_b32 -define amdgpu_kernel void @v3i15_arg(<3 x i15> addrspace(1)* nocapture %out, <3 x i15> %in) { +define amdgpu_kernel void @v3i15_arg(ptr addrspace(1) nocapture %out, <3 x i15> %in) { entry: - store <3 x i15> %in, <3 x i15> addrspace(1)* %out, align 4 + store <3 x i15> %in, ptr addrspace(1) %out, align 4 ret void } @@ -154,10 +154,10 @@ ; GCN: kernarg_segment_byte_size = 12 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GCN: global_load_ubyte v{{[0-9]+}}, [[ZERO]], s[4:5] offset:8 -define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %out, i8 addrspace(4)* byref(i8) %in.byref) { - %in = load i8, i8 addrspace(4)* %in.byref +define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) { + %in = load i8, ptr addrspace(4) %in.byref %ext = zext i8 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } @@ -165,20 +165,20 @@ ; GCN: kernarg_segment_byte_size = 12 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GCN: global_load_ushort v{{[0-9]+}}, [[ZERO]], s[4:5] offset:8 -define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %out, i16 addrspace(4)* byref(i16) %in.byref) { - %in = load i16, i16 addrspace(4)* %in.byref +define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) %in.byref) { + %in = load i16, ptr addrspace(4) %in.byref %ext = zext i16 %in to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}byref_constant_i32_arg: ; GCN: kernarg_segment_byte_size = 16 ; GCN: s_load_dwordx4 [[LOAD:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}} -define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) %in.byref, i32 %after.offset) { - %in = load i32, i32 addrspace(4)* %in.byref - store volatile i32 %in, i32 addrspace(1)* %out, align 4 - store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in.byref, i32 %after.offset) { + %in = load i32, ptr addrspace(4) %in.byref + store volatile i32 %in, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } @@ -186,11 +186,10 @@ ; GCN: kernarg_segment_byte_size = 36 ; GCN: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x10{{$}} ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x20{{$}} -define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> addrspace(4)* byref(<4 x i32>) %in.byref, i32 %after.offset) { - %in = load <4 x i32>, <4 x i32> addrspace(4)* %in.byref - store volatile <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 - %out.cast = bitcast <4 x i32> addrspace(1)* %out to i32 addrspace(1)* - store volatile i32 %after.offset, i32 addrspace(1)* %out.cast, align 4 +define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) %in.byref, i32 %after.offset) { + %in = load <4 x i32>, ptr addrspace(4) %in.byref + store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } @@ -201,10 +200,10 @@ ; GCN-DAG: v_mov_b32_e32 [[V_AFTER_OFFSET:v[0-9]+]], s[[AFTER_OFFSET]] ; GCN: global_store_dword v{{[0-9]+}}, [[V_IN]], s ; GCN: global_store_dword v{{[0-9]+}}, [[V_AFTER_OFFSET]], s -define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) { - %in = load i32, i32 addrspace(4)* %in.byref - store volatile i32 %in, i32 addrspace(1)* %out, align 4 - store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) { + %in = load i32, ptr addrspace(4) %in.byref + store volatile i32 %in, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } @@ -212,11 +211,10 @@ ; GCN: kernarg_segment_byte_size = 132 ; GCN-DAG: s_load_dword s{{[0-9]+}}, s[4:5], 0x80 ; GCN-DAG: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x40{{$}} -define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) { - %in = load <16 x i32>, <16 x i32> addrspace(4)* %in.byref - %cast.out = bitcast i32 addrspace(1)* %out to <16 x i32> addrspace(1)* - store volatile <16 x i32> %in, <16 x i32> addrspace(1)* %cast.out, align 4 - store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) { + %in = load <16 x i32>, ptr addrspace(4) %in.byref + store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } @@ -224,17 +222,17 @@ ; GCN-LABEL: {{^}}byref_global_i32_arg: ; GCN: kernarg_segment_byte_size = 12 ; GCN: s_load_dword [[IN:s[0-9]+]], s[4:5], 0x8{{$}} -define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* byref(i32) %in.byref) { - %in = load i32, i32 addrspace(1)* %in.byref - store i32 %in, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) %in.byref) { + %in = load i32, ptr addrspace(1) %in.byref + store i32 %in, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}byref_flat_i32_arg: ; GCN: flat_load_dword [[IN:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}} offset:8{{$}} -define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, i32* byref(i32) %in.byref) { - %in = load i32, i32* %in.byref - store i32 %in, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, ptr byref(i32) %in.byref) { + %in = load i32, ptr %in.byref + store i32 %in, ptr addrspace(1) %out, align 4 ret void } @@ -242,27 +240,27 @@ ; GCN: s_add_i32 s[[PTR_LO:[0-9]+]], s4, 8 ; GCN: s_mov_b32 s[[PTR_HI:[0-9]+]], 0{{$}} ; GCN: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}} -define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) %in.byref) { - %in = load i32, i32 addrspace(6)* %in.byref - store i32 %in, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(6) byref(i32) %in.byref) { + %in = load i32, ptr addrspace(6) %in.byref + store i32 %in, ptr addrspace(1) %out, align 4 ret void } -; define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(999)* byref %in.byref) { -; %in = load i32, i32 addrspace(999)* %in.byref -; store i32 %in, i32 addrspace(1)* %out, align 4 +; define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(999) byref %in.byref) { +; %in = load i32, ptr addrspace(999) %in.byref +; store i32 %in, ptr addrspace(1) %out, align 4 ; ret void ; } ; GCN-LABEL: {{^}}multi_byref_constant_i32_arg: ; GCN: kernarg_segment_byte_size = 20 ; GCN: s_load_dwordx4 {{s\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0 -define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) %in0.byref, i32 addrspace(4)* byref(i32) %in1.byref, i32 %after.offset) { - %in0 = load i32, i32 addrspace(4)* %in0.byref - %in1 = load i32, i32 addrspace(4)* %in1.byref - store volatile i32 %in0, i32 addrspace(1)* %out, align 4 - store volatile i32 %in1, i32 addrspace(1)* %out, align 4 - store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in0.byref, ptr addrspace(4) byref(i32) %in1.byref, i32 %after.offset) { + %in0 = load i32, ptr addrspace(4) %in0.byref + %in1 = load i32, ptr addrspace(4) %in1.byref + store volatile i32 %in0, ptr addrspace(1) %out, align 4 + store volatile i32 %in1, ptr addrspace(1) %out, align 4 + store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 ret void } @@ -271,8 +269,8 @@ ; GCN-NOT: s4 ; GCN-NOT: s5 ; GCN: s_load_dword {{s[0-9]+}}, s[4:5], 0x0{{$}} -define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byref(i32) %in.byref) { - %in = load i32, i32 addrspace(4)* %in.byref - store i32 %in, i32 addrspace(1)* undef, align 4 +define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) %in.byref) { + %in = load i32, ptr addrspace(4) %in.byref + store i32 %in, ptr addrspace(1) undef, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/knownbits-recursion.ll b/llvm/test/CodeGen/AMDGPU/knownbits-recursion.ll --- a/llvm/test/CodeGen/AMDGPU/knownbits-recursion.ll +++ b/llvm/test/CodeGen/AMDGPU/knownbits-recursion.ll @@ -8,14 +8,14 @@ ; node produced. ; GCN: v_mul_u32_u24 -define amdgpu_kernel void @test(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @test(ptr addrspace(1) nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() br label %bb4 bb1: ; preds = %bb4 - %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp46 - store i32 %tmp46, i32 addrspace(1)* %tmp3, align 4 + %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tmp46 + store i32 %tmp46, ptr addrspace(1) %tmp3, align 4 ret void bb4: ; preds = %bb4, %bb diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll --- a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll +++ b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -59,11 +59,11 @@ ; ALL: ; ScratchSize: 32772 define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 { %large = alloca [8192 x i32], align 4, addrspace(5) - %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191 - store volatile i32 %x, i32 addrspace(5)* %gep - %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y - %val = load volatile i32, i32 addrspace(5)* %gep1 - store volatile i32 %val, i32 addrspace(1)* undef + %gep = getelementptr [8192 x i32], ptr addrspace(5) %large, i32 0, i32 8191 + store volatile i32 %x, ptr addrspace(5) %gep + %gep1 = getelementptr [8192 x i32], ptr addrspace(5) %large, i32 0, i32 %y + %val = load volatile i32, ptr addrspace(5) %gep1 + store volatile i32 %val, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll --- a/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll +++ b/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll @@ -20,11 +20,11 @@ ; ALL: ; ScratchSize: 32772 define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 { %large = alloca [8192 x i32], align 4, addrspace(5) - %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191 - store volatile i32 %x, i32 addrspace(5)* %gep - %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y - %val = load volatile i32, i32 addrspace(5)* %gep1 - store volatile i32 %val, i32 addrspace(1)* undef + %gep = getelementptr [8192 x i32], ptr addrspace(5) %large, i32 0, i32 8191 + store volatile i32 %x, ptr addrspace(5) %gep + %gep1 = getelementptr [8192 x i32], ptr addrspace(5) %large, i32 0, i32 %y + %val = load volatile i32, ptr addrspace(5) %gep1 + store volatile i32 %val, ptr addrspace(1) undef ret void } @@ -46,11 +46,11 @@ ; ALL: ; ScratchSize: 32772 define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #0 { %large = alloca [8192 x i32], align 4, addrspace(5) - %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191 - store volatile i32 %x, i32 addrspace(5)* %gep - %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y - %val = load volatile i32, i32 addrspace(5)* %gep1 - store volatile i32 %val, i32 addrspace(1)* undef + %gep = getelementptr [8192 x i32], ptr addrspace(5) %large, i32 0, i32 8191 + store volatile i32 %x, ptr addrspace(5) %gep + %gep1 = getelementptr [8192 x i32], ptr addrspace(5) %large, i32 0, i32 %y + %val = load volatile i32, ptr addrspace(5) %gep1 + store volatile i32 %val, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll b/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll --- a/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll @@ -4,8 +4,8 @@ @gv = external unnamed_addr addrspace(4) constant [239 x i32], align 4 -define amdgpu_kernel void @opencv_cvtfloat_crash(i32 addrspace(1)* %out, i32 %x) nounwind { - %val = load i32, i32 addrspace(4)* getelementptr ([239 x i32], [239 x i32] addrspace(4)* @gv, i64 0, i64 239), align 4 +define amdgpu_kernel void @opencv_cvtfloat_crash(ptr addrspace(1) %out, i32 %x) nounwind { + %val = load i32, ptr addrspace(4) getelementptr ([239 x i32], ptr addrspace(4) @gv, i64 0, i64 239), align 4 %mul12 = mul nsw i32 %val, 7 br i1 undef, label %exit, label %bb diff --git a/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll b/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll --- a/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll +++ b/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll @@ -2,7 +2,7 @@ ; CHECK-LABEL: non_uniform_loop ; CHECK: s_endpgm -define amdgpu_kernel void @non_uniform_loop(float addrspace(1)* %array) { +define amdgpu_kernel void @non_uniform_loop(ptr addrspace(1) %array) { entry: %w = tail call i32 @llvm.amdgcn.workitem.id.x() br label %for.cond diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -374,6 +374,7 @@ ; GCN-O1-NEXT: Machine Optimization Remark Emitter ; GCN-O1-NEXT: Shrink Wrapping analysis ; GCN-O1-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; GCN-O1-NEXT: Machine Late Instructions Cleanup Pass ; GCN-O1-NEXT: Control Flow Optimizer ; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O1-NEXT: Tail Duplication @@ -670,6 +671,7 @@ ; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter ; GCN-O1-OPTS-NEXT: Shrink Wrapping analysis ; GCN-O1-OPTS-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; GCN-O1-OPTS-NEXT: Machine Late Instructions Cleanup Pass ; GCN-O1-OPTS-NEXT: Control Flow Optimizer ; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT: Tail Duplication @@ -968,6 +970,7 @@ ; GCN-O2-NEXT: Machine Optimization Remark Emitter ; GCN-O2-NEXT: Shrink Wrapping analysis ; GCN-O2-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; GCN-O2-NEXT: Machine Late Instructions Cleanup Pass ; GCN-O2-NEXT: Control Flow Optimizer ; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O2-NEXT: Tail Duplication @@ -1279,6 +1282,7 @@ ; GCN-O3-NEXT: Machine Optimization Remark Emitter ; GCN-O3-NEXT: Shrink Wrapping analysis ; GCN-O3-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; GCN-O3-NEXT: Machine Late Instructions Cleanup Pass ; GCN-O3-NEXT: Control Flow Optimizer ; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis ; GCN-O3-NEXT: Tail Duplication diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -3476,122 +3476,103 @@ ; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 -; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 -; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16 -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v15 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v14 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v13 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v4, 16, v12 -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v19 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v18 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v15 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v14 -; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) -; GCN-NOHSA-SI-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v5, 0xffff, v13 -; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v3, 0xffff, v12 -; GCN-NOHSA-SI-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) -; GCN-NOHSA-SI-NEXT: buffer_store_dword v4, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: buffer_store_dword v6, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v17 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v16 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v19 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v18 -; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[18:21], off, s[8:11], 0 offset:32 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v14, 0xffff, v17 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v16 -; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[22:25], off, s[8:11], 0 offset:48 -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v29, 16, v21 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v27, 16, v20 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v33, 16, v19 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v31, 16, v18 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v28, 0xffff, v21 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, 0xffff, v20 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v32, 0xffff, v19 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, 0xffff, v18 -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v25 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v24 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v37, 16, v23 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v35, 16, v22 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v25 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v24 -; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[38:41], off, s[8:11], 0 offset:64 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v36, 0xffff, v23 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, 0xffff, v22 +; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 +; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 +; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64 ; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80 -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v44, 16, v41 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v42, 16, v40 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v48, 16, v39 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v46, 16, v38 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v43, 0xffff, v41 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v41, 0xffff, v40 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v47, 0xffff, v39 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v45, 0xffff, v38 +; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96 +; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112 +; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(7) +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v35, 16, v3 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v33, 16, v2 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v39, 16, v1 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v37, 16, v0 +; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(6) +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v43, 16, v7 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v41, 16, v6 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, 0xffff, v3 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v32, 0xffff, v2 +; GCN-NOHSA-SI-NEXT: buffer_store_dword v32, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v52, 16, v23 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v50, 16, v22 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v56, 16, v21 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v54, 16, v20 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v51, 0xffff, v23 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v49, 0xffff, v22 -; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[22:25], off, s[8:11], 0 offset:96 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v55, 0xffff, v21 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v53, 0xffff, v20 -; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[57:60], off, s[8:11], 0 offset:112 -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v25 -; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v24 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v23 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v22 +; GCN-NOHSA-SI-NEXT: buffer_store_dword v33, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill +; GCN-NOHSA-SI-NEXT: buffer_store_dword v34, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill +; GCN-NOHSA-SI-NEXT: buffer_store_dword v35, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v38, 0xffff, v1 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v36, 0xffff, v0 ; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v25 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v24 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v23 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v22 -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v60 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v59 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v63, 16, v58 -; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v61, 16, v57 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, 0xffff, v60 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v59 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v62, 0xffff, v58 -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v60, 0xffff, v57 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v35, 16, v5 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v33, 16, v4 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v42, 0xffff, v7 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v40, 0xffff, v6 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, 0xffff, v5 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v32, 0xffff, v4 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v11 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v10 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v47, 16, v9 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v45, 16, v8 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v11 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v10 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v46, 0xffff, v9 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v44, 0xffff, v8 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v15 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v14 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v51, 16, v13 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v49, 16, v12 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v15 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v14 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v50, 0xffff, v13 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v48, 0xffff, v12 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v19 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v18 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v55, 16, v17 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v53, 16, v16 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v14, 0xffff, v19 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v18 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v54, 0xffff, v17 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v52, 0xffff, v16 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v23 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v22 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v59, 16, v21 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v57, 16, v20 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v23 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v22 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v58, 0xffff, v21 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v56, 0xffff, v20 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v27 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v26 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v63, 16, v25 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v61, 16, v24 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, 0xffff, v27 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v26 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v62, 0xffff, v25 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v60, 0xffff, v24 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v27, 16, v31 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v25, 16, v30 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v29 +; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v28 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, 0xffff, v31 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v24, 0xffff, v30 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v29 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v28 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 ; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:224 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:240 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[53:56], off, s[0:3], 0 offset:160 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[49:52], off, s[0:3], 0 offset:176 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[45:48], off, s[0:3], 0 offset:128 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[41:44], off, s[0:3], 0 offset:144 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[34:37], off, s[0:3], 0 offset:96 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[30:33], off, s[0:3], 0 offset:64 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[26:29], off, s[0:3], 0 offset:80 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48 -; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:32 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) -; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 -; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:240 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:208 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:32 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:48 +; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload @@ -3808,108 +3789,102 @@ ; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112 ; GCN-NOHSA-VI-NEXT: s_addc_u32 s89, s89, 0 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 -; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v15 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v14 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v15 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v14 -; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill -; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) -; GCN-NOHSA-VI-NEXT: buffer_store_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill -; GCN-NOHSA-VI-NEXT: buffer_store_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill -; GCN-NOHSA-VI-NEXT: buffer_store_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v13 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v12 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v19 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v18 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v13 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v12 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v17 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v16 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v19 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v18 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, 0xffff, v17 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v16 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:32 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:48 -; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v27, 16, v19 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v25, 16, v18 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v31, 16, v17 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v29, 16, v16 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v26, 0xffff, v19 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, 0xffff, v18 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v30, 0xffff, v17 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, 0xffff, v16 +; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7) +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v35, 16, v3 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v33, 16, v2 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v34, 0xffff, v3 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v32, 0xffff, v2 +; GCN-NOHSA-VI-NEXT: buffer_store_dword v32, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) +; GCN-NOHSA-VI-NEXT: buffer_store_dword v33, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill +; GCN-NOHSA-VI-NEXT: buffer_store_dword v34, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill +; GCN-NOHSA-VI-NEXT: buffer_store_dword v35, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v39, 16, v1 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v37, 16, v0 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v38, 0xffff, v1 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v36, 0xffff, v0 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v29 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v28 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v29 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v28 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v43, 16, v7 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v41, 16, v6 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v35, 16, v5 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v33, 16, v4 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v42, 0xffff, v7 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v40, 0xffff, v6 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v34, 0xffff, v5 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v32, 0xffff, v4 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v11 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v10 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v47, 16, v9 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v45, 16, v8 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v11 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v10 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v46, 0xffff, v9 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v44, 0xffff, v8 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v15 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v14 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v51, 16, v13 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v49, 16, v12 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v15 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v14 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v50, 0xffff, v13 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v48, 0xffff, v12 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v19 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v18 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v55, 16, v17 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v53, 16, v16 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, 0xffff, v19 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v18 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v54, 0xffff, v17 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v52, 0xffff, v16 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v23 ; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v22 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v35, 16, v21 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v33, 16, v20 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v59, 16, v21 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v57, 16, v20 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v23 ; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v22 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v34, 0xffff, v21 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v32, 0xffff, v20 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:64 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[36:39], off, s[8:11], 0 offset:80 -; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v43, 16, v23 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v41, 16, v22 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v47, 16, v21 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v45, 16, v20 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v42, 0xffff, v23 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v40, 0xffff, v22 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v46, 0xffff, v21 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v44, 0xffff, v20 -; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v23, 16, v39 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v21, 16, v38 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v51, 16, v37 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v49, 16, v36 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v22, 0xffff, v39 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v38 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v50, 0xffff, v37 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v48, 0xffff, v36 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[36:39], off, s[8:11], 0 offset:96 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[52:55], off, s[8:11], 0 offset:112 -; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v59, 16, v39 -; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v53 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v52 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v53 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v52 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v57, 16, v38 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v63, 16, v37 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v61, 16, v36 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v58, 0xffff, v39 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v56, 0xffff, v38 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v62, 0xffff, v37 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v60, 0xffff, v36 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v39, 16, v55 -; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v37, 16, v54 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v38, 0xffff, v55 -; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v36, 0xffff, v54 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v58, 0xffff, v21 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v56, 0xffff, v20 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v23, 16, v27 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v21, 16, v26 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v63, 16, v25 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v61, 16, v24 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v22, 0xffff, v27 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v26 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v62, 0xffff, v25 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v60, 0xffff, v24 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v27, 16, v31 +; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v25, 16, v30 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v26, 0xffff, v31 +; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, 0xffff, v30 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:240 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:240 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:208 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:160 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:176 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:128 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:144 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:80 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:208 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:32 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:48 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload ; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload ; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload @@ -4601,110 +4576,107 @@ ; GCN-NOHSA-VI-NEXT: s_add_u32 s88, s88, s3 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 +; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 ; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 +; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 +; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96 +; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112 +; GCN-NOHSA-VI-NEXT: s_addc_u32 s89, s89, 0 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 ; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 -; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 -; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 -; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 -; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:112 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:96 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:80 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:64 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:16 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[32:35], off, s[4:7], 0 offset:32 -; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[36:39], off, s[4:7], 0 offset:48 -; GCN-NOHSA-VI-NEXT: s_addc_u32 s89, s89, 0 -; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6) -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v63, 16, v13 -; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(5) -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v59, 16, v17 -; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4) -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v55, 16, v21 -; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v27 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v26 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v27, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v26, 0, 16 -; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill +; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7) +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v35, 16, v3 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v33, 16, v2 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v34, v3, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v32, v2, 0, 16 +; GCN-NOHSA-VI-NEXT: buffer_store_dword v32, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill ; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) -; GCN-NOHSA-VI-NEXT: buffer_store_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill -; GCN-NOHSA-VI-NEXT: buffer_store_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill -; GCN-NOHSA-VI-NEXT: buffer_store_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v9 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v8 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v9, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v8, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v25 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v24 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v25, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v24, 0, 16 +; GCN-NOHSA-VI-NEXT: buffer_store_dword v33, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill +; GCN-NOHSA-VI-NEXT: buffer_store_dword v34, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill +; GCN-NOHSA-VI-NEXT: buffer_store_dword v35, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v39, 16, v1 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v37, 16, v0 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v38, v1, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v36, v0, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v29 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v28 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v29, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v28, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v35, 16, v7 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v33, 16, v6 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v34, v7, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v32, v6, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v43, 16, v5 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v41, 16, v4 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v42, v5, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v40, v4, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v11 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v10 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v11, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v10, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v47, 16, v9 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v45, 16, v8 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v46, v9, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v44, v8, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v15 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v14 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v15, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v14, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v51, 16, v13 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v49, 16, v12 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v50, v13, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v48, v12, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 16, v19 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 16, v18 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v19, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v18, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v55, 16, v17 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v53, 16, v16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v54, v17, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v52, v16, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v23 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v22 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v23, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v22, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v59, 16, v21 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v57, 16, v20 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v58, v21, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v56, v20, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v23, 16, v27 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 16, v26 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v22, v27, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v26, 0, 16 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v63, 16, v25 +; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v61, 16, v24 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v62, v25, 0, 16 +; GCN-NOHSA-VI-NEXT: v_bfe_i32 v60, v24, 0, 16 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v27, 16, v31 ; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v25, 16, v30 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v26, v31, 0, 16 ; GCN-NOHSA-VI-NEXT: v_bfe_i32 v24, v30, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v43, 16, v29 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v41, 16, v28 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v42, v29, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v40, v28, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v31, 16, v35 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v29, 16, v34 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v30, v35, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v28, v34, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v47, 16, v33 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v45, 16, v32 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v46, v33, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v44, v32, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v35, 16, v39 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v33, 16, v38 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v34, v39, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v32, v38, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v51, 16, v37 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v49, 16, v36 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v50, v37, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v48, v36, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v39, 16, v23 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v37, 16, v22 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v38, v23, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v36, v22, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v53, 16, v20 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v54, v21, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v52, v20, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 16, v19 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 16, v18 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v19, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v18, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v57, 16, v16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v58, v17, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v56, v16, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 16, v15 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 16, v14 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v15, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v14, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v61, 16, v12 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v62, v13, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v60, v12, 0, 16 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 16, v11 -; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 16, v10 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v11, 0, 16 -; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v10, 0, 16 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:240 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:240 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:208 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:208 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:176 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:144 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:112 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:80 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 ; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:32 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48 -; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:48 +; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 ; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload ; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload ; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll b/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll --- a/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll @@ -5,22 +5,22 @@ ; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range: ; CHECK-NOT: v0 ; CHECK: {{flat|buffer}}_store_dword {{.*}}v0 -define amdgpu_kernel void @test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 { +define amdgpu_kernel void @test_workitem_id_x_known_max_range(ptr addrspace(1) nocapture %out) #0 { entry: %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 %and = and i32 %id, 1023 - store i32 %and, i32 addrspace(1)* %out, align 4 + store i32 %and, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}test_workitem_id_x_known_trunc_1_bit_range: ; CHECK-NOT: v_and_b32 ; CHECK: {{flat|buffer}}_store_dword {{.*}}v0 -define amdgpu_kernel void @test_workitem_id_x_known_trunc_1_bit_range(i32 addrspace(1)* nocapture %out) #0 { +define amdgpu_kernel void @test_workitem_id_x_known_trunc_1_bit_range(ptr addrspace(1) nocapture %out) #0 { entry: %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 %and = and i32 %id, 511 - store i32 %and, i32 addrspace(1)* %out, align 4 + store i32 %and, ptr addrspace(1) %out, align 4 ret void } @@ -28,11 +28,11 @@ ; CHECK-NOT: v0 ; CHECK-NOT: v_and_b32 ; CHECK: {{flat|buffer}}_store_dword {{.*}}v0 -define amdgpu_kernel void @test_workitem_id_x_known_max_range_m1(i32 addrspace(1)* nocapture %out) #0 { +define amdgpu_kernel void @test_workitem_id_x_known_max_range_m1(ptr addrspace(1) nocapture %out) #0 { entry: %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !1 %and = and i32 %id, 255 - store i32 %and, i32 addrspace(1)* %out, align 4 + store i32 %and, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir --- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir +++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir @@ -4146,10 +4146,10 @@ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0 - ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 ; GFX908-NEXT: %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0 + ; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec + ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 ; GFX908-NEXT: %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 @@ -4488,10 +4488,10 @@ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0 - ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 ; GFX908-NEXT: %5:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0 + ; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec + ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 ; GFX908-NEXT: %6:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: %7:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 ; GFX908-NEXT: %8:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll --- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll @@ -33,20 +33,20 @@ ; SI-DENORM: buffer_store_dword [[RESULT]] ; SI-STD: buffer_store_dword [[C]] -define amdgpu_kernel void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_mad_f32_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid - %a = load volatile float, float addrspace(1)* %gep.0 - %b = load volatile float, float addrspace(1)* %gep.1 - %c = load volatile float, float addrspace(1)* %gep.2 + %a = load volatile float, ptr addrspace(1) %gep.0 + %b = load volatile float, ptr addrspace(1) %gep.1 + %c = load volatile float, ptr addrspace(1) %gep.2 %mul = fmul float %a, %b %fma = fadd float %mul, %c - store float %fma, float addrspace(1)* %gep.out + store float %fma, ptr addrspace(1) %gep.out ret void } @@ -72,26 +72,26 @@ ; SI-STD-DAG: buffer_store_dword [[C]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI: s_endpgm -define amdgpu_kernel void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_mad_f32_0_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 - %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid - %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 - - %a = load volatile float, float addrspace(1)* %gep.0 - %b = load volatile float, float addrspace(1)* %gep.1 - %c = load volatile float, float addrspace(1)* %gep.2 - %d = load volatile float, float addrspace(1)* %gep.3 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.3 = getelementptr float, ptr addrspace(1) %gep.0, i32 3 + %gep.out.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid + %gep.out.1 = getelementptr float, ptr addrspace(1) %gep.out.0, i32 1 + + %a = load volatile float, ptr addrspace(1) %gep.0 + %b = load volatile float, ptr addrspace(1) %gep.1 + %c = load volatile float, ptr addrspace(1) %gep.2 + %d = load volatile float, ptr addrspace(1) %gep.3 %mul = fmul float %a, %b %fma0 = fadd float %mul, %c %fma1 = fadd float %mul, %d - store volatile float %fma0, float addrspace(1)* %gep.out.0 - store volatile float %fma1, float addrspace(1)* %gep.out.1 + store volatile float %fma0, ptr addrspace(1) %gep.out.0 + store volatile float %fma1, ptr addrspace(1) %gep.out.1 ret void } @@ -109,20 +109,20 @@ ; SI-DENORM: buffer_store_dword [[RESULT]] ; SI-STD: buffer_store_dword [[C]] -define amdgpu_kernel void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_mad_f32_1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid - %a = load volatile float, float addrspace(1)* %gep.0 - %b = load volatile float, float addrspace(1)* %gep.1 - %c = load volatile float, float addrspace(1)* %gep.2 + %a = load volatile float, ptr addrspace(1) %gep.0 + %b = load volatile float, ptr addrspace(1) %gep.1 + %c = load volatile float, ptr addrspace(1) %gep.2 %mul = fmul float %a, %b %fma = fadd float %c, %mul - store float %fma, float addrspace(1)* %gep.out + store float %fma, ptr addrspace(1) %gep.out ret void } @@ -139,20 +139,20 @@ ; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]] ; SI: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_mad_fsub_0_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid - %a = load volatile float, float addrspace(1)* %gep.0 - %b = load volatile float, float addrspace(1)* %gep.1 - %c = load volatile float, float addrspace(1)* %gep.2 + %a = load volatile float, ptr addrspace(1) %gep.0 + %b = load volatile float, ptr addrspace(1) %gep.1 + %c = load volatile float, ptr addrspace(1) %gep.2 %mul = fmul float %a, %b %fma = fsub float %mul, %c - store float %fma, float addrspace(1)* %gep.out + store float %fma, ptr addrspace(1) %gep.out ret void } @@ -176,25 +176,25 @@ ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI: s_endpgm -define amdgpu_kernel void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_mad_fsub_0_f32_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 - %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid - %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 - - %a = load volatile float, float addrspace(1)* %gep.0 - %b = load volatile float, float addrspace(1)* %gep.1 - %c = load volatile float, float addrspace(1)* %gep.2 - %d = load volatile float, float addrspace(1)* %gep.3 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.3 = getelementptr float, ptr addrspace(1) %gep.0, i32 3 + %gep.out.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid + %gep.out.1 = getelementptr float, ptr addrspace(1) %gep.out.0, i32 1 + + %a = load volatile float, ptr addrspace(1) %gep.0 + %b = load volatile float, ptr addrspace(1) %gep.1 + %c = load volatile float, ptr addrspace(1) %gep.2 + %d = load volatile float, ptr addrspace(1) %gep.3 %mul = fmul float %a, %b %fma0 = fsub float %mul, %c %fma1 = fsub float %mul, %d - store volatile float %fma0, float addrspace(1)* %gep.out.0 - store volatile float %fma1, float addrspace(1)* %gep.out.1 + store volatile float %fma0, ptr addrspace(1) %gep.out.0 + store volatile float %fma1, ptr addrspace(1) %gep.out.1 ret void } @@ -211,20 +211,20 @@ ; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]] ; SI: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_mad_fsub_1_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid - %a = load volatile float, float addrspace(1)* %gep.0 - %b = load volatile float, float addrspace(1)* %gep.1 - %c = load volatile float, float addrspace(1)* %gep.2 + %a = load volatile float, ptr addrspace(1) %gep.0 + %b = load volatile float, ptr addrspace(1) %gep.1 + %c = load volatile float, ptr addrspace(1) %gep.2 %mul = fmul float %a, %b %fma = fsub float %c, %mul - store float %fma, float addrspace(1)* %gep.out + store float %fma, ptr addrspace(1) %gep.out ret void } @@ -248,25 +248,25 @@ ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI: s_endpgm -define amdgpu_kernel void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_mad_fsub_1_f32_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 - %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid - %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 - - %a = load volatile float, float addrspace(1)* %gep.0 - %b = load volatile float, float addrspace(1)* %gep.1 - %c = load volatile float, float addrspace(1)* %gep.2 - %d = load volatile float, float addrspace(1)* %gep.3 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.3 = getelementptr float, ptr addrspace(1) %gep.0, i32 3 + %gep.out.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid + %gep.out.1 = getelementptr float, ptr addrspace(1) %gep.out.0, i32 1 + + %a = load volatile float, ptr addrspace(1) %gep.0 + %b = load volatile float, ptr addrspace(1) %gep.1 + %c = load volatile float, ptr addrspace(1) %gep.2 + %d = load volatile float, ptr addrspace(1) %gep.3 %mul = fmul float %a, %b %fma0 = fsub float %c, %mul %fma1 = fsub float %d, %mul - store volatile float %fma0, float addrspace(1)* %gep.out.0 - store volatile float %fma1, float addrspace(1)* %gep.out.1 + store volatile float %fma0, ptr addrspace(1) %gep.out.0 + store volatile float %fma1, ptr addrspace(1) %gep.out.1 ret void } @@ -284,22 +284,22 @@ ; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]] ; SI: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_mad_fsub_2_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid - %a = load volatile float, float addrspace(1)* %gep.0 - %b = load volatile float, float addrspace(1)* %gep.1 - %c = load volatile float, float addrspace(1)* %gep.2 + %a = load volatile float, ptr addrspace(1) %gep.0 + %b = load volatile float, ptr addrspace(1) %gep.1 + %c = load volatile float, ptr addrspace(1) %gep.2 %mul = fmul float %a, %b %mul.neg = fneg float %mul %fma = fsub float %mul.neg, %c - store float %fma, float addrspace(1)* %gep.out + store float %fma, ptr addrspace(1) %gep.out ret void } @@ -323,27 +323,27 @@ ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI: s_endpgm -define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_neg(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 - %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid - %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 - - %a = load volatile float, float addrspace(1)* %gep.0 - %b = load volatile float, float addrspace(1)* %gep.1 - %c = load volatile float, float addrspace(1)* %gep.2 - %d = load volatile float, float addrspace(1)* %gep.3 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.3 = getelementptr float, ptr addrspace(1) %gep.0, i32 3 + %gep.out.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid + %gep.out.1 = getelementptr float, ptr addrspace(1) %gep.out.0, i32 1 + + %a = load volatile float, ptr addrspace(1) %gep.0 + %b = load volatile float, ptr addrspace(1) %gep.1 + %c = load volatile float, ptr addrspace(1) %gep.2 + %d = load volatile float, ptr addrspace(1) %gep.3 %mul = fmul float %a, %b %mul.neg = fneg float %mul %fma0 = fsub float %mul.neg, %c %fma1 = fsub float %mul.neg, %d - store volatile float %fma0, float addrspace(1)* %gep.out.0 - store volatile float %fma1, float addrspace(1)* %gep.out.1 + store volatile float %fma0, ptr addrspace(1) %gep.out.0 + store volatile float %fma1, ptr addrspace(1) %gep.out.1 ret void } @@ -367,27 +367,27 @@ ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI: s_endpgm -define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_mul(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 - %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid - %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1 - - %a = load volatile float, float addrspace(1)* %gep.0 - %b = load volatile float, float addrspace(1)* %gep.1 - %c = load volatile float, float addrspace(1)* %gep.2 - %d = load volatile float, float addrspace(1)* %gep.3 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.3 = getelementptr float, ptr addrspace(1) %gep.0, i32 3 + %gep.out.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid + %gep.out.1 = getelementptr float, ptr addrspace(1) %gep.out.0, i32 1 + + %a = load volatile float, ptr addrspace(1) %gep.0 + %b = load volatile float, ptr addrspace(1) %gep.1 + %c = load volatile float, ptr addrspace(1) %gep.2 + %d = load volatile float, ptr addrspace(1) %gep.3 %mul = fmul float %a, %b %mul.neg = fneg float %mul %fma0 = fsub float %mul.neg, %c %fma1 = fsub float %mul, %d - store volatile float %fma0, float addrspace(1)* %gep.out.0 - store volatile float %fma1, float addrspace(1)* %gep.out.1 + store volatile float %fma0, ptr addrspace(1) %gep.out.0 + store volatile float %fma1, ptr addrspace(1) %gep.out.1 ret void } @@ -412,26 +412,26 @@ ; SI-DENORM: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]] ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 - %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - - %x = load volatile float, float addrspace(1)* %gep.0 - %y = load volatile float, float addrspace(1)* %gep.1 - %z = load volatile float, float addrspace(1)* %gep.2 - %u = load volatile float, float addrspace(1)* %gep.3 - %v = load volatile float, float addrspace(1)* %gep.4 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.3 = getelementptr float, ptr addrspace(1) %gep.0, i32 3 + %gep.4 = getelementptr float, ptr addrspace(1) %gep.0, i32 4 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid + + %x = load volatile float, ptr addrspace(1) %gep.0 + %y = load volatile float, ptr addrspace(1) %gep.1 + %z = load volatile float, ptr addrspace(1) %gep.2 + %u = load volatile float, ptr addrspace(1) %gep.3 + %v = load volatile float, ptr addrspace(1) %gep.4 %tmp0 = fmul float %u, %v %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0 %tmp2 = fsub float %tmp1, %z - store float %tmp2, float addrspace(1)* %gep.out + store float %tmp2, ptr addrspace(1) %gep.out ret void } @@ -455,26 +455,26 @@ ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: s_endpgm -define amdgpu_kernel void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @aggressive_combine_to_mad_fsub_1_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 - %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - - %x = load volatile float, float addrspace(1)* %gep.0 - %y = load volatile float, float addrspace(1)* %gep.1 - %z = load volatile float, float addrspace(1)* %gep.2 - %u = load volatile float, float addrspace(1)* %gep.3 - %v = load volatile float, float addrspace(1)* %gep.4 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.3 = getelementptr float, ptr addrspace(1) %gep.0, i32 3 + %gep.4 = getelementptr float, ptr addrspace(1) %gep.0, i32 4 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid + + %x = load volatile float, ptr addrspace(1) %gep.0 + %y = load volatile float, ptr addrspace(1) %gep.1 + %z = load volatile float, ptr addrspace(1) %gep.2 + %u = load volatile float, ptr addrspace(1) %gep.3 + %v = load volatile float, ptr addrspace(1) %gep.4 %tmp0 = fmul float %u, %v %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0 %tmp2 = fsub float %x, %tmp1 - store float %tmp2, float addrspace(1)* %gep.out + store float %tmp2, ptr addrspace(1) %gep.out ret void } @@ -505,26 +505,26 @@ ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: s_endpgm -define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 - %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - - %x = load volatile float, float addrspace(1)* %gep.0 - %y = load volatile float, float addrspace(1)* %gep.1 - %z = load volatile float, float addrspace(1)* %gep.2 - %u = load volatile float, float addrspace(1)* %gep.3 - %v = load volatile float, float addrspace(1)* %gep.4 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.3 = getelementptr float, ptr addrspace(1) %gep.0, i32 3 + %gep.4 = getelementptr float, ptr addrspace(1) %gep.0, i32 4 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid + + %x = load volatile float, ptr addrspace(1) %gep.0 + %y = load volatile float, ptr addrspace(1) %gep.1 + %z = load volatile float, ptr addrspace(1) %gep.2 + %u = load volatile float, ptr addrspace(1) %gep.3 + %v = load volatile float, ptr addrspace(1) %gep.4 %tmp0 = fmul float %u, %v %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0 %tmp2 = fsub float %tmp1, %z - store float %tmp2, float addrspace(1)* %gep.out + store float %tmp2, ptr addrspace(1) %gep.out ret void } @@ -556,27 +556,27 @@ ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: s_endpgm -define amdgpu_kernel void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { +define amdgpu_kernel void @aggressive_combine_to_mad_fsub_3_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 - %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3 - %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid - - %x = load volatile float, float addrspace(1)* %gep.0 - %y = load volatile float, float addrspace(1)* %gep.1 - %z = load volatile float, float addrspace(1)* %gep.2 - %u = load volatile float, float addrspace(1)* %gep.3 - %v = load volatile float, float addrspace(1)* %gep.4 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 + %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 + %gep.3 = getelementptr float, ptr addrspace(1) %gep.0, i32 3 + %gep.4 = getelementptr float, ptr addrspace(1) %gep.0, i32 4 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid + + %x = load volatile float, ptr addrspace(1) %gep.0 + %y = load volatile float, ptr addrspace(1) %gep.1 + %z = load volatile float, ptr addrspace(1) %gep.2 + %u = load volatile float, ptr addrspace(1) %gep.3 + %v = load volatile float, ptr addrspace(1) %gep.4 ; nsz flag is needed since this combine may change sign of zero %tmp0 = fmul nsz float %u, %v %tmp1 = call nsz float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0 %tmp2 = fsub nsz float %x, %tmp1 - store float %tmp2, float addrspace(1)* %gep.out + store float %tmp2, ptr addrspace(1) %gep.out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll --- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll @@ -12,7 +12,7 @@ br label %if.end if.end: ; preds = %entry - %0 = load i32, i32* undef, align 4 + %0 = load i32, ptr undef, align 4 %mul = mul i32 %0, 3 %cmp13 = icmp eq i32 %mul, 989619 br i1 %cmp13, label %cleanup.cont, label %if.end15 @@ -33,11 +33,9 @@ br label %if.end60 if.end60: ; preds = %if.end60.loopexit857, %while.cond.i - %1 = load i8, i8 addrspace(1)* getelementptr inbounds ([4096 x i8], [4096 x i8] addrspace(1)* @_RSENC_gDcd_______________________________, i64 0, i64 655), align 1 - %2 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %runtimeVersionCopy, i32 0, i32 0 - %arrayidx144260.5 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %runtimeVersionCopy, i32 0, i32 5 - %3 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %licenseVersionCopy, i32 0, i32 0 - %arrayidx156258.5 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %licenseVersionCopy, i32 0, i32 5 + %1 = load i8, ptr addrspace(1) getelementptr inbounds ([4096 x i8], ptr addrspace(1) @_RSENC_gDcd_______________________________, i64 0, i64 655), align 1 + %arrayidx144260.5 = getelementptr inbounds [128 x i8], ptr addrspace(5) %runtimeVersionCopy, i32 0, i32 5 + %arrayidx156258.5 = getelementptr inbounds [128 x i8], ptr addrspace(5) %licenseVersionCopy, i32 0, i32 5 switch i8 0, label %if.end5.i [ i8 45, label %if.then.i i8 43, label %if.then3.i @@ -50,9 +48,9 @@ br label %if.end5.i if.end5.i: ; preds = %if.then3.i, %if.end60 - %pS.addr.0.i = phi i8 addrspace(5)* [ undef, %if.then3.i ], [ %2, %if.end60 ] - %4 = load i8, i8 addrspace(5)* %pS.addr.0.i, align 1 - %conv612.i = sext i8 %4 to i32 + %pS.addr.0.i = phi ptr addrspace(5) [ undef, %if.then3.i ], [ %runtimeVersionCopy, %if.end60 ] + %2 = load i8, ptr addrspace(5) %pS.addr.0.i, align 1 + %conv612.i = sext i8 %2 to i32 %sub13.i = add nsw i32 %conv612.i, -48 %cmp714.i = icmp ugt i32 %sub13.i, 9 switch i8 undef, label %if.end5.i314 [ @@ -67,9 +65,9 @@ br label %if.end5.i314 if.end5.i314: ; preds = %if.then3.i308, %if.end5.i - %pS.addr.0.i309 = phi i8 addrspace(5)* [ undef, %if.then3.i308 ], [ %3, %if.end5.i ] - %5 = load i8, i8 addrspace(5)* %pS.addr.0.i309, align 1 - %conv612.i311 = sext i8 %5 to i32 + %pS.addr.0.i309 = phi ptr addrspace(5) [ undef, %if.then3.i308 ], [ %licenseVersionCopy, %if.end5.i ] + %3 = load i8, ptr addrspace(5) %pS.addr.0.i309, align 1 + %conv612.i311 = sext i8 %3 to i32 %sub13.i312 = add nsw i32 %conv612.i311, -48 %cmp714.i313 = icmp ugt i32 %sub13.i312, 9 switch i8 undef, label %if.end5.i338 [ @@ -84,9 +82,9 @@ br label %if.end5.i338 if.end5.i338: ; preds = %if.then3.i332, %if.end5.i314 - %pS.addr.0.i333 = phi i8 addrspace(5)* [ undef, %if.then3.i332 ], [ %arrayidx144260.5, %if.end5.i314 ] - %6 = load i8, i8 addrspace(5)* %pS.addr.0.i333, align 1 - %conv612.i335 = sext i8 %6 to i32 + %pS.addr.0.i333 = phi ptr addrspace(5) [ undef, %if.then3.i332 ], [ %arrayidx144260.5, %if.end5.i314 ] + %4 = load i8, ptr addrspace(5) %pS.addr.0.i333, align 1 + %conv612.i335 = sext i8 %4 to i32 %sub13.i336 = add nsw i32 %conv612.i335, -48 %cmp714.i337 = icmp ugt i32 %sub13.i336, 9 switch i8 undef, label %if.end5.i362 [ @@ -101,15 +99,15 @@ br label %if.end5.i362 if.end5.i362: ; preds = %if.then3.i356, %if.end5.i338 - %pS.addr.0.i357 = phi i8 addrspace(5)* [ undef, %if.then3.i356 ], [ %arrayidx156258.5, %if.end5.i338 ] - %7 = load i8, i8 addrspace(5)* %pS.addr.0.i357, align 1 - %conv612.i359 = sext i8 %7 to i32 + %pS.addr.0.i357 = phi ptr addrspace(5) [ undef, %if.then3.i356 ], [ %arrayidx156258.5, %if.end5.i338 ] + %5 = load i8, ptr addrspace(5) %pS.addr.0.i357, align 1 + %conv612.i359 = sext i8 %5 to i32 %sub13.i360 = add nsw i32 %conv612.i359, -48 %cmp714.i361 = icmp ugt i32 %sub13.i360, 9 - store i8 0, i8 addrspace(5)* undef, align 16 - %8 = load i8, i8 addrspace(1)* getelementptr inbounds ([4096 x i8], [4096 x i8] addrspace(1)* @_RSENC_gDcd_______________________________, i64 0, i64 1153), align 1 - %arrayidx232250.1 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %pD10, i32 0, i32 1 - store i8 %8, i8 addrspace(5)* %arrayidx232250.1, align 1 + store i8 0, ptr addrspace(5) undef, align 16 + %6 = load i8, ptr addrspace(1) getelementptr inbounds ([4096 x i8], ptr addrspace(1) @_RSENC_gDcd_______________________________, i64 0, i64 1153), align 1 + %arrayidx232250.1 = getelementptr inbounds [128 x i8], ptr addrspace(5) %pD10, i32 0, i32 1 + store i8 %6, ptr addrspace(5) %arrayidx232250.1, align 1 switch i8 undef, label %if.end5.i400 [ i8 45, label %if.then.i392 i8 43, label %if.then3.i394 @@ -122,13 +120,13 @@ br label %if.end5.i400 if.end5.i400: ; preds = %if.then3.i394, %if.end5.i362 - %pS.addr.0.i395 = phi i8 addrspace(5)* [ %arrayidx232250.1, %if.then3.i394 ], [ undef, %if.end5.i362 ] - %9 = load i8, i8 addrspace(5)* %pS.addr.0.i395, align 1 - %conv612.i397 = sext i8 %9 to i32 + %pS.addr.0.i395 = phi ptr addrspace(5) [ %arrayidx232250.1, %if.then3.i394 ], [ undef, %if.end5.i362 ] + %7 = load i8, ptr addrspace(5) %pS.addr.0.i395, align 1 + %conv612.i397 = sext i8 %7 to i32 %sub13.i398 = add nsw i32 %conv612.i397, -48 %cmp714.i399 = icmp ugt i32 %sub13.i398, 9 - %10 = load i8, i8* undef, align 1 - %cmp9.not.i500 = icmp eq i8 0, %10 + %8 = load i8, ptr undef, align 1 + %cmp9.not.i500 = icmp eq i8 0, %8 br label %land.lhs.true402.critedge land.lhs.true402.critedge: ; preds = %if.end5.i400 diff --git a/llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.ll b/llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.ll --- a/llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.ll @@ -12,17 +12,16 @@ ; GCN-COUNT3: ds_write_b64 define amdgpu_kernel void @out_of_order_merge() { entry: - %gep1 = getelementptr inbounds [96 x double], [96 x double] addrspace(3)* @Ldisp, i32 0, i32 0 - %gep2 = getelementptr inbounds [96 x double], [96 x double] addrspace(3)* @Ldisp, i32 0, i32 1 - %tmp12 = load <2 x double>, <2 x double> addrspace(3)* bitcast (double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 0, i32 1) to <2 x double> addrspace(3)*), align 8 + %gep2 = getelementptr inbounds [96 x double], ptr addrspace(3) @Ldisp, i32 0, i32 1 + %tmp12 = load <2 x double>, ptr addrspace(3) getelementptr inbounds ([9 x double], ptr addrspace(3) @L, i32 0, i32 1), align 8 %tmp14 = extractelement <2 x double> %tmp12, i32 0 %tmp15 = extractelement <2 x double> %tmp12, i32 1 %add50.i = fadd double %tmp14, %tmp15 - store double %add50.i, double addrspace(3)* %gep1, align 8 - %tmp16 = load double, double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 1, i32 0), align 8 - store double %tmp16, double addrspace(3)* %gep2, align 8 - %tmp17 = load <2 x double>, <2 x double> addrspace(3)* bitcast (double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 2, i32 1) to <2 x double> addrspace(3)*), align 8 + store double %add50.i, ptr addrspace(3) @Ldisp, align 8 + %tmp16 = load double, ptr addrspace(3) getelementptr inbounds ([9 x double], ptr addrspace(3) @L, i32 1, i32 0), align 8 + store double %tmp16, ptr addrspace(3) %gep2, align 8 + %tmp17 = load <2 x double>, ptr addrspace(3) getelementptr inbounds ([9 x double], ptr addrspace(3) @L, i32 2, i32 1), align 8 %tmp19 = extractelement <2 x double> %tmp17, i32 1 - store double %tmp19, double addrspace(3)* undef, align 8 + store double %tmp19, ptr addrspace(3) undef, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll --- a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll @@ -13,15 +13,14 @@ ; CHECK: tbuffer_store_format_xyzw v[0:3], define amdgpu_vs void @main(i32 inreg %arg) { main_body: - %tmp = load float, float addrspace(3)* undef, align 4 - %tmp1 = load float, float addrspace(3)* undef, align 4 - store float %tmp, float addrspace(3)* null, align 4 + %tmp = load float, ptr addrspace(3) undef, align 4 + %tmp1 = load float, ptr addrspace(3) undef, align 4 + store float %tmp, ptr addrspace(3) null, align 4 %tmp2 = bitcast float %tmp to i32 %tmp3 = add nuw nsw i32 0, 1 %tmp4 = zext i32 %tmp3 to i64 - %tmp5 = getelementptr [8192 x i32], [8192 x i32] addrspace(3)* @tess_lds, i64 0, i64 %tmp4 - %tmp6 = bitcast i32 addrspace(3)* %tmp5 to float addrspace(3)* - store float %tmp1, float addrspace(3)* %tmp6, align 4 + %tmp5 = getelementptr [8192 x i32], ptr addrspace(3) @tess_lds, i64 0, i64 %tmp4 + store float %tmp1, ptr addrspace(3) %tmp5, align 4 %tmp7 = bitcast float %tmp1 to i32 %tmp8 = insertelement <4 x i32> undef, i32 %tmp2, i32 0 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp7, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll --- a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll @@ -6,15 +6,14 @@ ; CHECK: ds_read_b32 ; CHECK: ds_write_b32 define amdgpu_vs void @test1(i32 %v) #0 { - %p0 = getelementptr i32, i32 addrspace(3)* null, i32 0 - %p1 = getelementptr i32, i32 addrspace(3)* null, i32 1 + %p1 = getelementptr i32, ptr addrspace(3) null, i32 1 - store i32 %v, i32 addrspace(3)* %p0 + store i32 %v, ptr addrspace(3) null call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 %v, <4 x i32> undef, i32 0, i32 0, i32 68, i32 1) - %w = load i32, i32 addrspace(3)* %p0 - store i32 %w, i32 addrspace(3)* %p1 + %w = load i32, ptr addrspace(3) null + store i32 %w, ptr addrspace(3) %p1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll --- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll @@ -12,11 +12,11 @@ ; GCN-LABEL: {{^}}merge_global_store_2_constants_i8: ; GCN: buffer_store_short ; GCN: s_endpgm -define amdgpu_kernel void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 +define amdgpu_kernel void @merge_global_store_2_constants_i8(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i32 1 - store i8 123, i8 addrspace(1)* %out.gep.1 - store i8 456, i8 addrspace(1)* %out, align 2 + store i8 123, ptr addrspace(1) %out.gep.1 + store i8 456, ptr addrspace(1) %out, align 2 ret void } @@ -24,31 +24,31 @@ ; GCN: buffer_store_byte ; GCN: buffer_store_byte ; GCN: s_endpgm -define amdgpu_kernel void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 +define amdgpu_kernel void @merge_global_store_2_constants_i8_natural_align(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i32 1 - store i8 123, i8 addrspace(1)* %out.gep.1 - store i8 456, i8 addrspace(1)* %out + store i8 123, ptr addrspace(1) %out.gep.1 + store i8 456, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}merge_global_store_2_constants_i16: ; GCN: buffer_store_dword v -define amdgpu_kernel void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 +define amdgpu_kernel void @merge_global_store_2_constants_i16(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr i16, ptr addrspace(1) %out, i32 1 - store i16 123, i16 addrspace(1)* %out.gep.1 - store i16 456, i16 addrspace(1)* %out, align 4 + store i16 123, ptr addrspace(1) %out.gep.1 + store i16 456, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}merge_global_store_2_constants_0_i16: ; GCN: buffer_store_dword v -define amdgpu_kernel void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 +define amdgpu_kernel void @merge_global_store_2_constants_0_i16(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr i16, ptr addrspace(1) %out, i32 1 - store i16 0, i16 addrspace(1)* %out.gep.1 - store i16 0, i16 addrspace(1)* %out, align 4 + store i16 0, ptr addrspace(1) %out.gep.1 + store i16 0, ptr addrspace(1) %out, align 4 ret void } @@ -56,11 +56,11 @@ ; GCN: buffer_store_short ; GCN: buffer_store_short ; GCN: s_endpgm -define amdgpu_kernel void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 +define amdgpu_kernel void @merge_global_store_2_constants_i16_natural_align(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr i16, ptr addrspace(1) %out, i32 1 - store i16 123, i16 addrspace(1)* %out.gep.1 - store i16 456, i16 addrspace(1)* %out + store i16 123, ptr addrspace(1) %out.gep.1 + store i16 456, ptr addrspace(1) %out ret void } @@ -68,21 +68,20 @@ ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b ; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]] -define amdgpu_kernel void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 +define amdgpu_kernel void @merge_global_store_2_constants_i32(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 - store i32 123, i32 addrspace(1)* %out.gep.1 - store i32 456, i32 addrspace(1)* %out + store i32 123, ptr addrspace(1) %out.gep.1 + store i32 456, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}merge_global_store_2_constants_i32_f32: ; GCN: buffer_store_dwordx2 -define amdgpu_kernel void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - %out.gep.1.bc = bitcast i32 addrspace(1)* %out.gep.1 to float addrspace(1)* - store float 1.0, float addrspace(1)* %out.gep.1.bc - store i32 456, i32 addrspace(1)* %out +define amdgpu_kernel void @merge_global_store_2_constants_i32_f32(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 + store float 1.0, ptr addrspace(1) %out.gep.1 + store i32 456, ptr addrspace(1) %out ret void } @@ -90,11 +89,10 @@ ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 4.0 ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0x7b ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]] -define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 - %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)* - store i32 123, i32 addrspace(1)* %out.gep.1.bc - store float 4.0, float addrspace(1)* %out +define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1 + store i32 123, ptr addrspace(1) %out.gep.1 + store float 4.0, ptr addrspace(1) %out ret void } @@ -104,62 +102,60 @@ ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}} ; GCN: buffer_store_dwordx4 v[[[LO]]:[[HI]]] -define amdgpu_kernel void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 - %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 +define amdgpu_kernel void @merge_global_store_4_constants_i32(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 + %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 + %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3 - store i32 123, i32 addrspace(1)* %out.gep.1 - store i32 456, i32 addrspace(1)* %out.gep.2 - store i32 333, i32 addrspace(1)* %out.gep.3 - store i32 1234, i32 addrspace(1)* %out + store i32 123, ptr addrspace(1) %out.gep.1 + store i32 456, ptr addrspace(1) %out.gep.2 + store i32 333, ptr addrspace(1) %out.gep.3 + store i32 1234, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order: ; GCN: buffer_store_dwordx4 -define amdgpu_kernel void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 - %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 - %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 +define amdgpu_kernel void @merge_global_store_4_constants_f32_order(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1 + %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2 + %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3 - store float 8.0, float addrspace(1)* %out - store float 1.0, float addrspace(1)* %out.gep.1 - store float 2.0, float addrspace(1)* %out.gep.2 - store float 4.0, float addrspace(1)* %out.gep.3 + store float 8.0, ptr addrspace(1) %out + store float 1.0, ptr addrspace(1) %out.gep.1 + store float 2.0, ptr addrspace(1) %out.gep.2 + store float 4.0, ptr addrspace(1) %out.gep.3 ret void } ; First store is out of order. ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32: ; GCN: buffer_store_dwordx4 -define amdgpu_kernel void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 - %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 - %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 +define amdgpu_kernel void @merge_global_store_4_constants_f32(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1 + %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2 + %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3 - store float 1.0, float addrspace(1)* %out.gep.1 - store float 2.0, float addrspace(1)* %out.gep.2 - store float 4.0, float addrspace(1)* %out.gep.3 - store float 8.0, float addrspace(1)* %out + store float 1.0, ptr addrspace(1) %out.gep.1 + store float 2.0, ptr addrspace(1) %out.gep.2 + store float 4.0, ptr addrspace(1) %out.gep.3 + store float 8.0, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32: ; GCN-AA: buffer_store_dwordx4 v ; GCN: s_endpgm -define amdgpu_kernel void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 - %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 - %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 +define amdgpu_kernel void @merge_global_store_4_constants_mixed_i32_f32(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1 + %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2 + %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3 - %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)* - %out.gep.3.bc = bitcast float addrspace(1)* %out.gep.3 to i32 addrspace(1)* - store i32 11, i32 addrspace(1)* %out.gep.1.bc - store float 2.0, float addrspace(1)* %out.gep.2 - store i32 17, i32 addrspace(1)* %out.gep.3.bc - store float 8.0, float addrspace(1)* %out + store i32 11, ptr addrspace(1) %out.gep.1 + store float 2.0, ptr addrspace(1) %out.gep.2 + store i32 17, ptr addrspace(1) %out.gep.3 + store float 8.0, ptr addrspace(1) %out ret void } @@ -169,108 +165,108 @@ ; CI-DAG: buffer_store_dwordx3 ; GCN-NOT: buffer_store_dword ; GCN: s_endpgm -define amdgpu_kernel void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 +define amdgpu_kernel void @merge_global_store_3_constants_i32(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 + %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 - store i32 123, i32 addrspace(1)* %out.gep.1 - store i32 456, i32 addrspace(1)* %out.gep.2 - store i32 1234, i32 addrspace(1)* %out + store i32 123, ptr addrspace(1) %out.gep.1 + store i32 456, ptr addrspace(1) %out.gep.2 + store i32 1234, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}merge_global_store_2_constants_i64: ; GCN: buffer_store_dwordx4 -define amdgpu_kernel void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1 +define amdgpu_kernel void @merge_global_store_2_constants_i64(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr i64, ptr addrspace(1) %out, i64 1 - store i64 123, i64 addrspace(1)* %out.gep.1 - store i64 456, i64 addrspace(1)* %out + store i64 123, ptr addrspace(1) %out.gep.1 + store i64 456, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}merge_global_store_4_constants_i64: ; GCN: buffer_store_dwordx4 ; GCN: buffer_store_dwordx4 -define amdgpu_kernel void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 { - %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1 - %out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2 - %out.gep.3 = getelementptr i64, i64 addrspace(1)* %out, i64 3 +define amdgpu_kernel void @merge_global_store_4_constants_i64(ptr addrspace(1) %out) #0 { + %out.gep.1 = getelementptr i64, ptr addrspace(1) %out, i64 1 + %out.gep.2 = getelementptr i64, ptr addrspace(1) %out, i64 2 + %out.gep.3 = getelementptr i64, ptr addrspace(1) %out, i64 3 - store i64 123, i64 addrspace(1)* %out.gep.1 - store i64 456, i64 addrspace(1)* %out.gep.2 - store i64 333, i64 addrspace(1)* %out.gep.3 - store i64 1234, i64 addrspace(1)* %out + store i64 123, ptr addrspace(1) %out.gep.1 + store i64 456, ptr addrspace(1) %out.gep.2 + store i64 333, ptr addrspace(1) %out.gep.3 + store i64 1234, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32: ; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]] ; GCN: buffer_store_dwordx2 [[LOAD]] -define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 + %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1 - %lo = load i32, i32 addrspace(1)* %in - %hi = load i32, i32 addrspace(1)* %in.gep.1 + %lo = load i32, ptr addrspace(1) %in + %hi = load i32, ptr addrspace(1) %in.gep.1 - store i32 %lo, i32 addrspace(1)* %out - store i32 %hi, i32 addrspace(1)* %out.gep.1 + store i32 %lo, ptr addrspace(1) %out + store i32 %hi, ptr addrspace(1) %out.gep.1 ret void } ; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base: ; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 ; GCN: buffer_store_dwordx2 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2 - %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3 +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %in.gep.0 = getelementptr i32, ptr addrspace(1) %in, i32 2 + %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 3 - %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 2 - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 3 - %lo = load i32, i32 addrspace(1)* %in.gep.0 - %hi = load i32, i32 addrspace(1)* %in.gep.1 + %out.gep.0 = getelementptr i32, ptr addrspace(1) %out, i32 2 + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 3 + %lo = load i32, ptr addrspace(1) %in.gep.0 + %hi = load i32, ptr addrspace(1) %in.gep.1 - store i32 %lo, i32 addrspace(1)* %out.gep.0 - store i32 %hi, i32 addrspace(1)* %out.gep.1 + store i32 %lo, ptr addrspace(1) %out.gep.0 + store i32 %hi, ptr addrspace(1) %out.gep.1 ret void } ; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_shuffle_i32: ; GCN: buffer_load_dwordx2 v ; GCN: buffer_store_dwordx2 v -define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 + %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1 - %lo = load i32, i32 addrspace(1)* %in - %hi = load i32, i32 addrspace(1)* %in.gep.1 + %lo = load i32, ptr addrspace(1) %in + %hi = load i32, ptr addrspace(1) %in.gep.1 - store i32 %hi, i32 addrspace(1)* %out - store i32 %lo, i32 addrspace(1)* %out.gep.1 + store i32 %hi, ptr addrspace(1) %out + store i32 %lo, ptr addrspace(1) %out.gep.1 ret void } ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32: ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] ; GCN: buffer_store_dwordx4 [[LOAD]] -define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 - %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 - %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2 - %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3 +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 + %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 + %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3 + %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1 + %in.gep.2 = getelementptr i32, ptr addrspace(1) %in, i32 2 + %in.gep.3 = getelementptr i32, ptr addrspace(1) %in, i32 3 - %x = load i32, i32 addrspace(1)* %in - %y = load i32, i32 addrspace(1)* %in.gep.1 - %z = load i32, i32 addrspace(1)* %in.gep.2 - %w = load i32, i32 addrspace(1)* %in.gep.3 + %x = load i32, ptr addrspace(1) %in + %y = load i32, ptr addrspace(1) %in.gep.1 + %z = load i32, ptr addrspace(1) %in.gep.2 + %w = load i32, ptr addrspace(1) %in.gep.3 - store i32 %x, i32 addrspace(1)* %out - store i32 %y, i32 addrspace(1)* %out.gep.1 - store i32 %z, i32 addrspace(1)* %out.gep.2 - store i32 %w, i32 addrspace(1)* %out.gep.3 + store i32 %x, ptr addrspace(1) %out + store i32 %y, ptr addrspace(1) %out.gep.1 + store i32 %z, ptr addrspace(1) %out.gep.2 + store i32 %w, ptr addrspace(1) %out.gep.3 ret void } @@ -283,67 +279,67 @@ ; SI-DAG: buffer_store_dword v ; CI-DAG: buffer_store_dwordx3 ; GCN: s_endpgm -define amdgpu_kernel void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 - %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2 +define amdgpu_kernel void @merge_global_store_3_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 + %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 + %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1 + %in.gep.2 = getelementptr i32, ptr addrspace(1) %in, i32 2 - %x = load i32, i32 addrspace(1)* %in - %y = load i32, i32 addrspace(1)* %in.gep.1 - %z = load i32, i32 addrspace(1)* %in.gep.2 + %x = load i32, ptr addrspace(1) %in + %y = load i32, ptr addrspace(1) %in.gep.1 + %z = load i32, ptr addrspace(1) %in.gep.2 - store i32 %x, i32 addrspace(1)* %out - store i32 %y, i32 addrspace(1)* %out.gep.1 - store i32 %z, i32 addrspace(1)* %out.gep.2 + store i32 %x, ptr addrspace(1) %out + store i32 %y, ptr addrspace(1) %out.gep.1 + store i32 %z, ptr addrspace(1) %out.gep.2 ret void } ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_f32: ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] ; GCN: buffer_store_dwordx4 [[LOAD]] -define amdgpu_kernel void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 - %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 - %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 - %in.gep.1 = getelementptr float, float addrspace(1)* %in, i32 1 - %in.gep.2 = getelementptr float, float addrspace(1)* %in, i32 2 - %in.gep.3 = getelementptr float, float addrspace(1)* %in, i32 3 +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1 + %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2 + %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3 + %in.gep.1 = getelementptr float, ptr addrspace(1) %in, i32 1 + %in.gep.2 = getelementptr float, ptr addrspace(1) %in, i32 2 + %in.gep.3 = getelementptr float, ptr addrspace(1) %in, i32 3 - %x = load float, float addrspace(1)* %in - %y = load float, float addrspace(1)* %in.gep.1 - %z = load float, float addrspace(1)* %in.gep.2 - %w = load float, float addrspace(1)* %in.gep.3 + %x = load float, ptr addrspace(1) %in + %y = load float, ptr addrspace(1) %in.gep.1 + %z = load float, ptr addrspace(1) %in.gep.2 + %w = load float, ptr addrspace(1) %in.gep.3 - store float %x, float addrspace(1)* %out - store float %y, float addrspace(1)* %out.gep.1 - store float %z, float addrspace(1)* %out.gep.2 - store float %w, float addrspace(1)* %out.gep.3 + store float %x, ptr addrspace(1) %out + store float %y, ptr addrspace(1) %out.gep.1 + store float %z, ptr addrspace(1) %out.gep.2 + store float %w, ptr addrspace(1) %out.gep.3 ret void } ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base: ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 ; GCN: buffer_store_dwordx4 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28 -define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11 - %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12 - %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 13 - %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 14 - %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 7 - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 8 - %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 9 - %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 10 +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32_nonzero_base(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %in.gep.0 = getelementptr i32, ptr addrspace(1) %in, i32 11 + %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 12 + %in.gep.2 = getelementptr i32, ptr addrspace(1) %in, i32 13 + %in.gep.3 = getelementptr i32, ptr addrspace(1) %in, i32 14 + %out.gep.0 = getelementptr i32, ptr addrspace(1) %out, i32 7 + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 8 + %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 9 + %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 10 - %x = load i32, i32 addrspace(1)* %in.gep.0 - %y = load i32, i32 addrspace(1)* %in.gep.1 - %z = load i32, i32 addrspace(1)* %in.gep.2 - %w = load i32, i32 addrspace(1)* %in.gep.3 + %x = load i32, ptr addrspace(1) %in.gep.0 + %y = load i32, ptr addrspace(1) %in.gep.1 + %z = load i32, ptr addrspace(1) %in.gep.2 + %w = load i32, ptr addrspace(1) %in.gep.3 - store i32 %x, i32 addrspace(1)* %out.gep.0 - store i32 %y, i32 addrspace(1)* %out.gep.1 - store i32 %z, i32 addrspace(1)* %out.gep.2 - store i32 %w, i32 addrspace(1)* %out.gep.3 + store i32 %x, ptr addrspace(1) %out.gep.0 + store i32 %y, ptr addrspace(1) %out.gep.1 + store i32 %z, ptr addrspace(1) %out.gep.2 + store i32 %w, ptr addrspace(1) %out.gep.3 ret void } @@ -351,26 +347,26 @@ ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] ; GCN: s_barrier ; GCN: buffer_store_dwordx4 [[LOAD]] -define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 - %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 - %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2 - %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3 - - %x = load i32, i32 addrspace(1)* %in - %y = load i32, i32 addrspace(1)* %in.gep.1 - %z = load i32, i32 addrspace(1)* %in.gep.2 - %w = load i32, i32 addrspace(1)* %in.gep.3 +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 + %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 + %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3 + %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1 + %in.gep.2 = getelementptr i32, ptr addrspace(1) %in, i32 2 + %in.gep.3 = getelementptr i32, ptr addrspace(1) %in, i32 3 + + %x = load i32, ptr addrspace(1) %in + %y = load i32, ptr addrspace(1) %in.gep.1 + %z = load i32, ptr addrspace(1) %in.gep.2 + %w = load i32, ptr addrspace(1) %in.gep.3 ; Make sure the barrier doesn't stop this tail call void @llvm.amdgcn.s.barrier() #1 - store i32 %w, i32 addrspace(1)* %out.gep.3 - store i32 %z, i32 addrspace(1)* %out.gep.2 - store i32 %y, i32 addrspace(1)* %out.gep.1 - store i32 %x, i32 addrspace(1)* %out + store i32 %w, ptr addrspace(1) %out.gep.3 + store i32 %z, ptr addrspace(1) %out.gep.2 + store i32 %y, ptr addrspace(1) %out.gep.1 + store i32 %x, ptr addrspace(1) %out ret void } @@ -382,26 +378,26 @@ ; GCN: buffer_load_dwordx4 v ; GCN: s_barrier ; GCN: buffer_store_dwordx4 v -define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 - %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 - %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2 - %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3 - - %x = load i32, i32 addrspace(1)* %in - %y = load i32, i32 addrspace(1)* %in.gep.1 - %z = load i32, i32 addrspace(1)* %in.gep.2 - %w = load i32, i32 addrspace(1)* %in.gep.3 +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 + %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 + %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3 + %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1 + %in.gep.2 = getelementptr i32, ptr addrspace(1) %in, i32 2 + %in.gep.3 = getelementptr i32, ptr addrspace(1) %in, i32 3 + + %x = load i32, ptr addrspace(1) %in + %y = load i32, ptr addrspace(1) %in.gep.1 + %z = load i32, ptr addrspace(1) %in.gep.2 + %w = load i32, ptr addrspace(1) %in.gep.3 ; Make sure the barrier doesn't stop this tail call void @llvm.amdgcn.s.barrier() #1 - store i32 %w, i32 addrspace(1)* %out - store i32 %z, i32 addrspace(1)* %out.gep.1 - store i32 %y, i32 addrspace(1)* %out.gep.2 - store i32 %x, i32 addrspace(1)* %out.gep.3 + store i32 %w, ptr addrspace(1) %out + store i32 %z, ptr addrspace(1) %out.gep.1 + store i32 %y, ptr addrspace(1) %out.gep.2 + store i32 %x, ptr addrspace(1) %out.gep.3 ret void } @@ -410,23 +406,23 @@ ; GCN: buffer_load_dword [[LOAD:v[0-9]+]] ; GCN: buffer_store_dword [[LOAD]] ; GCN: s_endpgm -define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { - %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1 - %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2 - %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3 - %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1 - %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2 - %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3 +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i8 1 + %out.gep.2 = getelementptr i8, ptr addrspace(1) %out, i8 2 + %out.gep.3 = getelementptr i8, ptr addrspace(1) %out, i8 3 + %in.gep.1 = getelementptr i8, ptr addrspace(1) %in, i8 1 + %in.gep.2 = getelementptr i8, ptr addrspace(1) %in, i8 2 + %in.gep.3 = getelementptr i8, ptr addrspace(1) %in, i8 3 - %x = load i8, i8 addrspace(1)* %in, align 4 - %y = load i8, i8 addrspace(1)* %in.gep.1 - %z = load i8, i8 addrspace(1)* %in.gep.2 - %w = load i8, i8 addrspace(1)* %in.gep.3 + %x = load i8, ptr addrspace(1) %in, align 4 + %y = load i8, ptr addrspace(1) %in.gep.1 + %z = load i8, ptr addrspace(1) %in.gep.2 + %w = load i8, ptr addrspace(1) %in.gep.3 - store i8 %x, i8 addrspace(1)* %out, align 4 - store i8 %y, i8 addrspace(1)* %out.gep.1 - store i8 %z, i8 addrspace(1)* %out.gep.2 - store i8 %w, i8 addrspace(1)* %out.gep.3 + store i8 %x, ptr addrspace(1) %out, align 4 + store i8 %y, ptr addrspace(1) %out.gep.1 + store i8 %z, ptr addrspace(1) %out.gep.2 + store i8 %w, ptr addrspace(1) %out.gep.3 ret void } @@ -440,23 +436,23 @@ ; GCN: buffer_store_byte ; GCN: buffer_store_byte ; GCN: s_endpgm -define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { - %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1 - %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2 - %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3 - %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1 - %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2 - %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3 +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8_natural_align(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i8 1 + %out.gep.2 = getelementptr i8, ptr addrspace(1) %out, i8 2 + %out.gep.3 = getelementptr i8, ptr addrspace(1) %out, i8 3 + %in.gep.1 = getelementptr i8, ptr addrspace(1) %in, i8 1 + %in.gep.2 = getelementptr i8, ptr addrspace(1) %in, i8 2 + %in.gep.3 = getelementptr i8, ptr addrspace(1) %in, i8 3 - %x = load i8, i8 addrspace(1)* %in - %y = load i8, i8 addrspace(1)* %in.gep.1 - %z = load i8, i8 addrspace(1)* %in.gep.2 - %w = load i8, i8 addrspace(1)* %in.gep.3 + %x = load i8, ptr addrspace(1) %in + %y = load i8, ptr addrspace(1) %in.gep.1 + %z = load i8, ptr addrspace(1) %in.gep.2 + %w = load i8, ptr addrspace(1) %in.gep.3 - store i8 %x, i8 addrspace(1)* %out - store i8 %y, i8 addrspace(1)* %out.gep.1 - store i8 %z, i8 addrspace(1)* %out.gep.2 - store i8 %w, i8 addrspace(1)* %out.gep.3 + store i8 %x, ptr addrspace(1) %out + store i8 %y, ptr addrspace(1) %out.gep.1 + store i8 %z, ptr addrspace(1) %out.gep.2 + store i8 %w, ptr addrspace(1) %out.gep.3 ret void } @@ -464,32 +460,32 @@ ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] ; GCN: buffer_store_dwordx4 [[LOAD]] ; GCN: s_endpgm -define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 - %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 - %vec = load <4 x i32>, <4 x i32> addrspace(1)* %in +define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 + %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2 + %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3 + %vec = load <4 x i32>, ptr addrspace(1) %in %x = extractelement <4 x i32> %vec, i32 0 %y = extractelement <4 x i32> %vec, i32 1 %z = extractelement <4 x i32> %vec, i32 2 %w = extractelement <4 x i32> %vec, i32 3 - store i32 %x, i32 addrspace(1)* %out - store i32 %y, i32 addrspace(1)* %out.gep.1 - store i32 %z, i32 addrspace(1)* %out.gep.2 - store i32 %w, i32 addrspace(1)* %out.gep.3 + store i32 %x, ptr addrspace(1) %out + store i32 %y, ptr addrspace(1) %out.gep.1 + store i32 %z, ptr addrspace(1) %out.gep.2 + store i32 %w, ptr addrspace(1) %out.gep.3 ret void } ; GCN-LABEL: {{^}}merge_local_store_2_constants_i8: ; GCN: ds_write_b16 ; GCN: s_endpgm -define amdgpu_kernel void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { - %out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1 +define amdgpu_kernel void @merge_local_store_2_constants_i8(ptr addrspace(3) %out) #0 { + %out.gep.1 = getelementptr i8, ptr addrspace(3) %out, i32 1 - store i8 123, i8 addrspace(3)* %out.gep.1 - store i8 456, i8 addrspace(3)* %out, align 2 + store i8 123, ptr addrspace(3) %out.gep.1 + store i8 456, ptr addrspace(3) %out, align 2 ret void } @@ -497,11 +493,11 @@ ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b ; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}} -define amdgpu_kernel void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 +define amdgpu_kernel void @merge_local_store_2_constants_i32(ptr addrspace(3) %out) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(3) %out, i32 1 - store i32 123, i32 addrspace(3)* %out.gep.1 - store i32 456, i32 addrspace(3)* %out + store i32 123, ptr addrspace(3) %out.gep.1 + store i32 456, ptr addrspace(3) %out ret void } @@ -515,15 +511,15 @@ ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K0]], [[K1]] offset1:1 ; GCN: s_endpgm -define amdgpu_kernel void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { - %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 - %out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2 - %out.gep.3 = getelementptr i32, i32 addrspace(3)* %out, i32 3 +define amdgpu_kernel void @merge_local_store_4_constants_i32(ptr addrspace(3) %out) #0 { + %out.gep.1 = getelementptr i32, ptr addrspace(3) %out, i32 1 + %out.gep.2 = getelementptr i32, ptr addrspace(3) %out, i32 2 + %out.gep.3 = getelementptr i32, ptr addrspace(3) %out, i32 3 - store i32 123, i32 addrspace(3)* %out.gep.1 - store i32 456, i32 addrspace(3)* %out.gep.2 - store i32 333, i32 addrspace(3)* %out.gep.3 - store i32 1234, i32 addrspace(3)* %out + store i32 123, ptr addrspace(3) %out.gep.1 + store i32 456, ptr addrspace(3) %out.gep.2 + store i32 333, ptr addrspace(3) %out.gep.3 + store i32 1234, ptr addrspace(3) %out ret void } @@ -533,34 +529,34 @@ ; GCN: buffer_store_dwordx4 v[[[LO]]:[[HI4]]] ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}} ; GCN: buffer_store_dword v[[HI]] -define amdgpu_kernel void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { - store i32 9, i32 addrspace(1)* %out, align 4 - %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 - store i32 12, i32 addrspace(1)* %idx1, align 4 - %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2 - store i32 16, i32 addrspace(1)* %idx2, align 4 - %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3 - store i32 -12, i32 addrspace(1)* %idx3, align 4 - %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4 - store i32 11, i32 addrspace(1)* %idx4, align 4 +define amdgpu_kernel void @merge_global_store_5_constants_i32(ptr addrspace(1) %out) { + store i32 9, ptr addrspace(1) %out, align 4 + %idx1 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 1 + store i32 12, ptr addrspace(1) %idx1, align 4 + %idx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 2 + store i32 16, ptr addrspace(1) %idx2, align 4 + %idx3 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 3 + store i32 -12, ptr addrspace(1) %idx3, align 4 + %idx4 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 4 + store i32 11, ptr addrspace(1) %idx4, align 4 ret void } ; GCN-LABEL: {{^}}merge_global_store_6_constants_i32: ; GCN: buffer_store_dwordx4 ; GCN: buffer_store_dwordx2 -define amdgpu_kernel void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) { - store i32 13, i32 addrspace(1)* %out, align 4 - %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 - store i32 15, i32 addrspace(1)* %idx1, align 4 - %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2 - store i32 62, i32 addrspace(1)* %idx2, align 4 - %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3 - store i32 63, i32 addrspace(1)* %idx3, align 4 - %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4 - store i32 11, i32 addrspace(1)* %idx4, align 4 - %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5 - store i32 123, i32 addrspace(1)* %idx5, align 4 +define amdgpu_kernel void @merge_global_store_6_constants_i32(ptr addrspace(1) %out) { + store i32 13, ptr addrspace(1) %out, align 4 + %idx1 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 1 + store i32 15, ptr addrspace(1) %idx1, align 4 + %idx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 2 + store i32 62, ptr addrspace(1) %idx2, align 4 + %idx3 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 3 + store i32 63, ptr addrspace(1) %idx3, align 4 + %idx4 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 4 + store i32 11, ptr addrspace(1) %idx4, align 4 + %idx5 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 5 + store i32 123, ptr addrspace(1) %idx5, align 4 ret void } @@ -568,20 +564,20 @@ ; GCN: buffer_store_dwordx4 ; SI-DAG: buffer_store_dwordx2 ; CI: buffer_store_dwordx3 -define amdgpu_kernel void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { - store i32 34, i32 addrspace(1)* %out, align 4 - %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 - store i32 999, i32 addrspace(1)* %idx1, align 4 - %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2 - store i32 65, i32 addrspace(1)* %idx2, align 4 - %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3 - store i32 33, i32 addrspace(1)* %idx3, align 4 - %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4 - store i32 98, i32 addrspace(1)* %idx4, align 4 - %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5 - store i32 91, i32 addrspace(1)* %idx5, align 4 - %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6 - store i32 212, i32 addrspace(1)* %idx6, align 4 +define amdgpu_kernel void @merge_global_store_7_constants_i32(ptr addrspace(1) %out) { + store i32 34, ptr addrspace(1) %out, align 4 + %idx1 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 1 + store i32 999, ptr addrspace(1) %idx1, align 4 + %idx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 2 + store i32 65, ptr addrspace(1) %idx2, align 4 + %idx3 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 3 + store i32 33, ptr addrspace(1) %idx3, align 4 + %idx4 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 4 + store i32 98, ptr addrspace(1) %idx4, align 4 + %idx5 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 5 + store i32 91, ptr addrspace(1) %idx5, align 4 + %idx6 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 6 + store i32 212, ptr addrspace(1) %idx6, align 4 ret void } @@ -589,22 +585,22 @@ ; GCN: buffer_store_dwordx4 ; GCN: buffer_store_dwordx4 ; GCN: s_endpgm -define amdgpu_kernel void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { - store i32 34, i32 addrspace(1)* %out, align 4 - %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 - store i32 999, i32 addrspace(1)* %idx1, align 4 - %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2 - store i32 65, i32 addrspace(1)* %idx2, align 4 - %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3 - store i32 33, i32 addrspace(1)* %idx3, align 4 - %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4 - store i32 98, i32 addrspace(1)* %idx4, align 4 - %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5 - store i32 91, i32 addrspace(1)* %idx5, align 4 - %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6 - store i32 212, i32 addrspace(1)* %idx6, align 4 - %idx7 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 7 - store i32 999, i32 addrspace(1)* %idx7, align 4 +define amdgpu_kernel void @merge_global_store_8_constants_i32(ptr addrspace(1) %out) { + store i32 34, ptr addrspace(1) %out, align 4 + %idx1 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 1 + store i32 999, ptr addrspace(1) %idx1, align 4 + %idx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 2 + store i32 65, ptr addrspace(1) %idx2, align 4 + %idx3 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 3 + store i32 33, ptr addrspace(1) %idx3, align 4 + %idx4 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 4 + store i32 98, ptr addrspace(1) %idx4, align 4 + %idx5 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 5 + store i32 91, ptr addrspace(1) %idx5, align 4 + %idx6 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 6 + store i32 212, ptr addrspace(1) %idx6, align 4 + %idx7 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 7 + store i32 999, ptr addrspace(1) %idx7, align 4 ret void } @@ -625,9 +621,9 @@ ; CI-DAG: buffer_store_dwordx3 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN: ScratchSize: 0{{$}} -define amdgpu_kernel void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 { - %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4 - store <3 x i32> %vec, <3 x i32> addrspace(1)* %out +define amdgpu_kernel void @copy_v3i32_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { + %vec = load <3 x i32>, ptr addrspace(1) %in, align 4 + store <3 x i32> %vec, ptr addrspace(1) %out ret void } @@ -641,9 +637,9 @@ ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} ; GCN: ScratchSize: 0{{$}} -define amdgpu_kernel void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 { - %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4 - store <3 x i64> %vec, <3 x i64> addrspace(1)* %out +define amdgpu_kernel void @copy_v3i64_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { + %vec = load <3 x i64>, ptr addrspace(1) %in, align 4 + store <3 x i64> %vec, ptr addrspace(1) %out ret void } @@ -659,10 +655,10 @@ ; SI-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 ; CI-DAG: buffer_store_dwordx3 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN: ScratchSize: 0{{$}} -define amdgpu_kernel void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 { - %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4 +define amdgpu_kernel void @copy_v3f32_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { + %vec = load <3 x float>, ptr addrspace(1) %in, align 4 %fadd = fadd <3 x float> %vec, - store <3 x float> %fadd, <3 x float> addrspace(1)* %out + store <3 x float> %fadd, ptr addrspace(1) %out ret void } @@ -676,10 +672,10 @@ ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} ; GCN: ScratchSize: 0{{$}} -define amdgpu_kernel void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 { - %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4 +define amdgpu_kernel void @copy_v3f64_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { + %vec = load <3 x double>, ptr addrspace(1) %in, align 4 %fadd = fadd <3 x double> %vec, - store <3 x double> %fadd, <3 x double> addrspace(1)* %out + store <3 x double> %fadd, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/mesa3d.ll b/llvm/test/CodeGen/AMDGPU/mesa3d.ll --- a/llvm/test/CodeGen/AMDGPU/mesa3d.ll +++ b/llvm/test/CodeGen/AMDGPU/mesa3d.ll @@ -16,10 +16,10 @@ ; GCN-DAG: s_mov_b32 s7, 0xe8f000 ; GCN-DAG: v_mov_b32_e32 [[V:v[0-9]+]], 2 ; GCN: buffer_store_dword [[V]], v0, s[4:7], 0 offen -define amdgpu_ps void @scratch_ps(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_ps void @scratch_ps(ptr addrspace(1) %out, i32 %in) { entry: %alloca = alloca [32 x i32], addrspace(5) - %ptr = getelementptr [32 x i32], [32 x i32] addrspace(5)* %alloca, i32 0, i32 %in - store volatile i32 2, i32 addrspace(5)* %ptr + %ptr = getelementptr [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in + store volatile i32 2, ptr addrspace(5) %ptr ret void } diff --git a/llvm/test/CodeGen/AMDGPU/missing-store.ll b/llvm/test/CodeGen/AMDGPU/missing-store.ll --- a/llvm/test/CodeGen/AMDGPU/missing-store.ll +++ b/llvm/test/CodeGen/AMDGPU/missing-store.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s -@ptr_load = addrspace(3) global i32 addrspace(4)* undef, align 8 +@ptr_load = addrspace(3) global ptr addrspace(4) undef, align 8 ; Make sure when the load from %ptr2 is folded the chain isn't lost, ; resulting in losing the store to gptr @@ -14,14 +14,14 @@ ; SI-DAG: buffer_store_dword ; SI-DAG: buffer_store_dword ; SI: s_endpgm -define amdgpu_kernel void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { - %ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @ptr_load, align 8 - %ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 2 +define amdgpu_kernel void @missing_store_reduced(ptr addrspace(1) %out, ptr addrspace(1) %gptr) #0 { + %ptr0 = load ptr addrspace(4), ptr addrspace(3) @ptr_load, align 8 + %ptr2 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 2 - store i32 99, i32 addrspace(1)* %gptr, align 4 - %tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4 + store i32 99, ptr addrspace(1) %gptr, align 4 + %tmp2 = load i32, ptr addrspace(4) %ptr2, align 4 - store i32 %tmp2, i32 addrspace(1)* %out, align 4 + store i32 %tmp2, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll b/llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll --- a/llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll +++ b/llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll @@ -18,15 +18,15 @@ ; GCN: v_addc_u32_e32 v[[PTRHI:[0-9]+]], vcc, v[[LDPTRHI]], v[[VARG1HI]] ; GCN: buffer_load_ubyte v{{[0-9]+}}, v[[[PTRLO]]:[[PTRHI]]], -define amdgpu_kernel void @clobber_vgpr_pair_pointer_add(i64 %arg1, [8 x i32], i8 addrspace(1)* addrspace(1)* %ptrarg, i32 %arg3) #0 { +define amdgpu_kernel void @clobber_vgpr_pair_pointer_add(i64 %arg1, [8 x i32], ptr addrspace(1) %ptrarg, i32 %arg3) #0 { bb: %tmp = icmp sgt i32 %arg3, 0 br i1 %tmp, label %bb4, label %bb17 bb4: - %tmp14 = load volatile i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %ptrarg - %tmp15 = getelementptr inbounds i8, i8 addrspace(1)* %tmp14, i64 %arg1 - %tmp16 = load volatile i8, i8 addrspace(1)* %tmp15 + %tmp14 = load volatile ptr addrspace(1), ptr addrspace(1) %ptrarg + %tmp15 = getelementptr inbounds i8, ptr addrspace(1) %tmp14, i64 %arg1 + %tmp16 = load volatile i8, ptr addrspace(1) %tmp15 br label %bb17 bb17: diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll --- a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll @@ -11,18 +11,18 @@ ; GCN-LABEL: {{^}}atomic_max_i32: ; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400 glc{{$}} -define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 { +define amdgpu_kernel void @atomic_max_i32(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid - %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep + %tid.gep = getelementptr ptr addrspace(1), ptr addrspace(1) %in, i32 %tid + %ptr = load volatile ptr addrspace(1), ptr addrspace(1) %tid.gep %xor = xor i32 %tid, 1 %cmp = icmp ne i32 %xor, 0 br i1 %cmp, label %atomic, label %exit atomic: - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100 - %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst - store i32 %ret, i32 addrspace(1)* %out + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 100 + %ret = atomicrmw max ptr addrspace(1) %gep, i32 %y seq_cst + store i32 %ret, ptr addrspace(1) %out br label %exit exit: @@ -31,17 +31,17 @@ ; GCN-LABEL: {{^}}atomic_max_i32_noret: ; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400{{$}} -define amdgpu_kernel void @atomic_max_i32_noret(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 { +define amdgpu_kernel void @atomic_max_i32_noret(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid - %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep + %tid.gep = getelementptr ptr addrspace(1), ptr addrspace(1) %in, i32 %tid + %ptr = load volatile ptr addrspace(1), ptr addrspace(1) %tid.gep %xor = xor i32 %tid, 1 %cmp = icmp ne i32 %xor, 0 br i1 %cmp, label %atomic, label %exit atomic: - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100 - %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 100 + %ret = atomicrmw max ptr addrspace(1) %gep, i32 %y seq_cst br label %exit exit: diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll --- a/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll +++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll @@ -13,7 +13,7 @@ ; GCN-NEXT: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @in_worklist_once() #0 { bb: - %tmp = load i64, i64 addrspace(5)* undef + %tmp = load i64, ptr addrspace(5) undef br label %bb1 bb1: ; preds = %bb1, %bb diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -122,12 +122,12 @@ ; W32-DAG: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES0]], off ; W32-DAG: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES1]], off -define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, float addrspace(1)* %out0, float addrspace(1)* %out1) #0 { +define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr addrspace(1) %out0, ptr addrspace(1) %out1) #0 { entry: %val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i32 0, i32 0) #1 %val1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %j, i32 %c, i32 0, i32 0, i32 0) #1 - store volatile float %val0, float addrspace(1)* %out0 - store volatile float %val1, float addrspace(1)* %out1 + store volatile float %val0, ptr addrspace(1) %out0 + store volatile float %val1, ptr addrspace(1) %out1 ret void } @@ -307,7 +307,7 @@ ; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF]] ; 4-byte Folded Reload ; W64-O0: global_store_dword v[{{[0-9]+:[0-9]+}}], [[RES]], off -define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, float addrspace(1)* %in, float addrspace(1)* %out) #0 { +define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr addrspace(1) %in, ptr addrspace(1) %out) #0 { entry: %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" () %val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %live.out.reg, i32 0, i32 0, i32 0) #1 @@ -321,7 +321,7 @@ bb2: %val = phi float [ %val0, %entry ], [ %val1, %bb1 ] - store volatile float %val, float addrspace(1)* %out + store volatile float %val, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll b/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll --- a/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll @@ -7,113 +7,113 @@ ; GCN-LABEL: {{^}}store_private_offset_i8: ; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], 0 offset:8 define amdgpu_kernel void @store_private_offset_i8() #0 { - store volatile i8 5, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*) + store volatile i8 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) ret void } ; GCN-LABEL: {{^}}store_private_offset_i16: ; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], 0 offset:8 define amdgpu_kernel void @store_private_offset_i16() #0 { - store volatile i16 5, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*) + store volatile i16 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) ret void } ; GCN-LABEL: {{^}}store_private_offset_i32: ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], 0 offset:8 define amdgpu_kernel void @store_private_offset_i32() #0 { - store volatile i32 5, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*) + store volatile i32 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) ret void } ; GCN-LABEL: {{^}}store_private_offset_v2i32: ; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], 0 offset:8 define amdgpu_kernel void @store_private_offset_v2i32() #0 { - store volatile <2 x i32> , <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*) + store volatile <2 x i32> , ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) ret void } ; GCN-LABEL: {{^}}store_private_offset_v4i32: ; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], 0 offset:8 define amdgpu_kernel void @store_private_offset_v4i32() #0 { - store volatile <4 x i32> , <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*) + store volatile <4 x i32> , ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) ret void } ; GCN-LABEL: {{^}}load_private_offset_i8: ; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], 0 offset:8 define amdgpu_kernel void @load_private_offset_i8() #0 { - %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*) + %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) ret void } ; GCN-LABEL: {{^}}sextload_private_offset_i8: ; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[4:7], 0 offset:8 -define amdgpu_kernel void @sextload_private_offset_i8(i32 addrspace(1)* %out) #0 { - %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*) +define amdgpu_kernel void @sextload_private_offset_i8(ptr addrspace(1) %out) #0 { + %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) %sextload = sext i8 %load to i32 - store i32 %sextload, i32 addrspace(1)* undef + store i32 %sextload, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}zextload_private_offset_i8: ; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], 0 offset:8 -define amdgpu_kernel void @zextload_private_offset_i8(i32 addrspace(1)* %out) #0 { - %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*) +define amdgpu_kernel void @zextload_private_offset_i8(ptr addrspace(1) %out) #0 { + %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) %zextload = zext i8 %load to i32 - store i32 %zextload, i32 addrspace(1)* undef + store i32 %zextload, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}load_private_offset_i16: ; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], 0 offset:8 define amdgpu_kernel void @load_private_offset_i16() #0 { - %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*) + %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) ret void } ; GCN-LABEL: {{^}}sextload_private_offset_i16: ; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[4:7], 0 offset:8 -define amdgpu_kernel void @sextload_private_offset_i16(i32 addrspace(1)* %out) #0 { - %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*) +define amdgpu_kernel void @sextload_private_offset_i16(ptr addrspace(1) %out) #0 { + %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) %sextload = sext i16 %load to i32 - store i32 %sextload, i32 addrspace(1)* undef + store i32 %sextload, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}zextload_private_offset_i16: ; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], 0 offset:8 -define amdgpu_kernel void @zextload_private_offset_i16(i32 addrspace(1)* %out) #0 { - %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*) +define amdgpu_kernel void @zextload_private_offset_i16(ptr addrspace(1) %out) #0 { + %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) %zextload = zext i16 %load to i32 - store i32 %zextload, i32 addrspace(1)* undef + store i32 %zextload, ptr addrspace(1) undef ret void } ; GCN-LABEL: {{^}}load_private_offset_i32: ; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], 0 offset:8 define amdgpu_kernel void @load_private_offset_i32() #0 { - %load = load volatile i32, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*) + %load = load volatile i32, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) ret void } ; GCN-LABEL: {{^}}load_private_offset_v2i32: ; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], 0 offset:8 define amdgpu_kernel void @load_private_offset_v2i32() #0 { - %load = load volatile <2 x i32>, <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*) + %load = load volatile <2 x i32>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) ret void } ; GCN-LABEL: {{^}}load_private_offset_v4i32: ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], 0 offset:8 define amdgpu_kernel void @load_private_offset_v4i32() #0 { - %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*) + %load = load volatile <4 x i32>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) ret void } ; GCN-LABEL: {{^}}store_private_offset_i8_max_offset: ; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], 0 offset:4095 define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 { - store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4095 to i8 addrspace(5)*) + store volatile i8 5, ptr addrspace(5) inttoptr (i32 4095 to ptr addrspace(5)) ret void } @@ -121,7 +121,7 @@ ; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000 ; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], 0 offen{{$}} define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 { - store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4096 to i8 addrspace(5)*) + store volatile i8 5, ptr addrspace(5) inttoptr (i32 4096 to ptr addrspace(5)) ret void } @@ -129,7 +129,7 @@ ; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000 ; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], 0 offen offset:1{{$}} define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 { - store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4097 to i8 addrspace(5)*) + store volatile i8 5, ptr addrspace(5) inttoptr (i32 4097 to ptr addrspace(5)) ret void } @@ -145,10 +145,10 @@ ; GFX9: buffer_store_dword v{{[0-9]+}}, [[ADDR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen offset:32 define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 { %alloca = alloca [16 x i32], align 4, addrspace(5) - %vaddr = load volatile i32, i32 addrspace(1)* undef + %vaddr = load volatile i32, ptr addrspace(1) undef %vaddr.off = add i32 %vaddr, 8 - %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %vaddr.off - store volatile i32 9, i32 addrspace(5)* %gep + %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %vaddr.off + store volatile i32 9, ptr addrspace(5) %gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll --- a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll @@ -5,40 +5,40 @@ ; CHECK-LABEL: {{^}}test_none: ; CHECK: buffer_load_format_x v0, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} -define amdgpu_vs float @test_none(<4 x i32> addrspace(4)* inreg %base, i32 %i) { +define amdgpu_vs float @test_none(ptr addrspace(4) inreg %base, i32 %i) { main_body: - %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i - %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32 + %ptr = getelementptr <4 x i32>, ptr addrspace(4) %base, i32 %i + %tmp2 = load <4 x i32>, ptr addrspace(4) %ptr, align 32 %tmp7 = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 0, i32 0) ret float %tmp7 } ; CHECK-LABEL: {{^}}test_idxen: ; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen{{$}} -define amdgpu_vs float @test_idxen(<4 x i32> addrspace(4)* inreg %base, i32 %i) { +define amdgpu_vs float @test_idxen(ptr addrspace(4) inreg %base, i32 %i) { main_body: - %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i - %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32 + %ptr = getelementptr <4 x i32>, ptr addrspace(4) %base, i32 %i + %tmp2 = load <4 x i32>, ptr addrspace(4) %ptr, align 32 %tmp7 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i32 0, i32 0) ret float %tmp7 } ; CHECK-LABEL: {{^}}test_offen: ; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} -define amdgpu_vs float @test_offen(<4 x i32> addrspace(4)* inreg %base, i32 %i) { +define amdgpu_vs float @test_offen(ptr addrspace(4) inreg %base, i32 %i) { main_body: - %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i - %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32 + %ptr = getelementptr <4 x i32>, ptr addrspace(4) %base, i32 %i + %tmp2 = load <4 x i32>, ptr addrspace(4) %ptr, align 32 %tmp7 = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i32 0) ret float %tmp7 } ; CHECK-LABEL: {{^}}test_both: ; CHECK: buffer_load_format_x v0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen{{$}} -define amdgpu_vs float @test_both(<4 x i32> addrspace(4)* inreg %base, i32 %i) { +define amdgpu_vs float @test_both(ptr addrspace(4) inreg %base, i32 %i) { main_body: - %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i - %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32 + %ptr = getelementptr <4 x i32>, ptr addrspace(4) %base, i32 %i + %tmp2 = load <4 x i32>, ptr addrspace(4) %ptr, align 32 %tmp7 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i32 0, i32 0) ret float %tmp7 } diff --git a/llvm/test/CodeGen/AMDGPU/mubuf.ll b/llvm/test/CodeGen/AMDGPU/mubuf.ll --- a/llvm/test/CodeGen/AMDGPU/mubuf.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf.ll @@ -7,22 +7,22 @@ ; MUBUF load with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: {{^}}mubuf_load0: ; CHECK: buffer_load_dword v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0 -define amdgpu_kernel void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @mubuf_load0(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: - %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1 - %1 = load i32, i32 addrspace(1)* %0 - store i32 %1, i32 addrspace(1)* %out + %0 = getelementptr i32, ptr addrspace(1) %in, i64 1 + %1 = load i32, ptr addrspace(1) %0 + store i32 %1, ptr addrspace(1) %out ret void } ; MUBUF load with the largest possible immediate offset ; CHECK-LABEL: {{^}}mubuf_load1: ; CHECK: buffer_load_ubyte v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0 -define amdgpu_kernel void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { +define amdgpu_kernel void @mubuf_load1(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: - %0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095 - %1 = load i8, i8 addrspace(1)* %0 - store i8 %1, i8 addrspace(1)* %out + %0 = getelementptr i8, ptr addrspace(1) %in, i64 4095 + %1 = load i8, ptr addrspace(1) %0 + store i8 %1, ptr addrspace(1) %out ret void } @@ -30,11 +30,11 @@ ; CHECK-LABEL: {{^}}mubuf_load2: ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000 ; CHECK: buffer_load_dword v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0 -define amdgpu_kernel void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @mubuf_load2(ptr addrspace(1) %out, ptr addrspace(1) %in) { entry: - %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1024 - %1 = load i32, i32 addrspace(1)* %0 - store i32 %1, i32 addrspace(1)* %out + %0 = getelementptr i32, ptr addrspace(1) %in, i64 1024 + %1 = load i32, ptr addrspace(1) %0 + store i32 %1, ptr addrspace(1) %out ret void } @@ -42,21 +42,20 @@ ; CHECK-LABEL: {{^}}mubuf_load3: ; CHECK-NOT: ADD ; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0 -define amdgpu_kernel void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) { +define amdgpu_kernel void @mubuf_load3(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %offset) { entry: - %0 = getelementptr i32, i32 addrspace(1)* %in, i64 %offset - %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1 - %2 = load i32, i32 addrspace(1)* %1 - store i32 %2, i32 addrspace(1)* %out + %0 = getelementptr i32, ptr addrspace(1) %in, i64 %offset + %1 = getelementptr i32, ptr addrspace(1) %0, i64 1 + %2 = load i32, ptr addrspace(1) %1 + store i32 %2, ptr addrspace(1) %out ret void } ; CHECK-LABEL: {{^}}soffset_max_imm: ; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc -define amdgpu_gs void @soffset_max_imm([6 x <4 x i32>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [16 x <4 x i32>] addrspace(4)* inreg, [32 x <8 x i32>] addrspace(4)* inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { +define amdgpu_gs void @soffset_max_imm(ptr addrspace(4) inreg, ptr addrspace(4) inreg, ptr addrspace(4) inreg, ptr addrspace(4) inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { main_body: - %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0 - %tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0 + %tmp1 = load <4 x i32>, ptr addrspace(4) %0 %tmp2 = shl i32 %6, 2 %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %tmp1, i32 %tmp2, i32 64, i32 1) %tmp4 = add i32 %6, 16 @@ -72,10 +71,9 @@ ; CHECK-LABEL: {{^}}soffset_no_fold: ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41 ; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc -define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [16 x <4 x i32>] addrspace(4)* inreg, [32 x <8 x i32>] addrspace(4)* inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { +define amdgpu_gs void @soffset_no_fold(ptr addrspace(4) inreg, ptr addrspace(4) inreg, ptr addrspace(4) inreg, ptr addrspace(4) inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { main_body: - %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0 - %tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0 + %tmp1 = load <4 x i32>, ptr addrspace(4) %0 %tmp2 = shl i32 %6, 2 %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %tmp1, i32 %tmp2, i32 65, i32 1) %tmp4 = add i32 %6, 16 @@ -91,10 +89,10 @@ ; MUBUF store with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: {{^}}mubuf_store0: ; CHECK: buffer_store_dword v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0 -define amdgpu_kernel void @mubuf_store0(i32 addrspace(1)* %out) { +define amdgpu_kernel void @mubuf_store0(ptr addrspace(1) %out) { entry: - %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1 - store i32 0, i32 addrspace(1)* %0 + %0 = getelementptr i32, ptr addrspace(1) %out, i64 1 + store i32 0, ptr addrspace(1) %0 ret void } @@ -102,10 +100,10 @@ ; CHECK-LABEL: {{^}}mubuf_store1: ; CHECK: buffer_store_byte v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0 -define amdgpu_kernel void @mubuf_store1(i8 addrspace(1)* %out) { +define amdgpu_kernel void @mubuf_store1(ptr addrspace(1) %out) { entry: - %0 = getelementptr i8, i8 addrspace(1)* %out, i64 4095 - store i8 0, i8 addrspace(1)* %0 + %0 = getelementptr i8, ptr addrspace(1) %out, i64 4095 + store i8 0, ptr addrspace(1) %0 ret void } @@ -113,10 +111,10 @@ ; CHECK-LABEL: {{^}}mubuf_store2: ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000 ; CHECK: buffer_store_dword v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0 -define amdgpu_kernel void @mubuf_store2(i32 addrspace(1)* %out) { +define amdgpu_kernel void @mubuf_store2(ptr addrspace(1) %out) { entry: - %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1024 - store i32 0, i32 addrspace(1)* %0 + %0 = getelementptr i32, ptr addrspace(1) %out, i64 1024 + store i32 0, ptr addrspace(1) %0 ret void } @@ -124,53 +122,53 @@ ; CHECK-LABEL: {{^}}mubuf_store3: ; CHECK-NOT: ADD ; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0 -define amdgpu_kernel void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) { +define amdgpu_kernel void @mubuf_store3(ptr addrspace(1) %out, i64 %offset) { entry: - %0 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset - %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1 - store i32 0, i32 addrspace(1)* %1 + %0 = getelementptr i32, ptr addrspace(1) %out, i64 %offset + %1 = getelementptr i32, ptr addrspace(1) %0, i64 1 + store i32 0, ptr addrspace(1) %1 ret void } ; CHECK-LABEL: {{^}}store_sgpr_ptr: ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 -define amdgpu_kernel void @store_sgpr_ptr(i32 addrspace(1)* %out) { - store i32 99, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @store_sgpr_ptr(ptr addrspace(1) %out) { + store i32 99, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}store_sgpr_ptr_offset: ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40 -define amdgpu_kernel void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) { - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 10 - store i32 99, i32 addrspace(1)* %out.gep, align 4 +define amdgpu_kernel void @store_sgpr_ptr_offset(ptr addrspace(1) %out) { + %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 10 + store i32 99, ptr addrspace(1) %out.gep, align 4 ret void } ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset: ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] -define amdgpu_kernel void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) { - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768 - store i32 99, i32 addrspace(1)* %out.gep, align 4 +define amdgpu_kernel void @store_sgpr_ptr_large_offset(ptr addrspace(1) %out) { + %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 32768 + store i32 99, ptr addrspace(1) %out.gep, align 4 ret void } ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset_atomic: ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000 ; CHECK: buffer_atomic_add v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] -define amdgpu_kernel void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) { - %gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768 - %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 5 seq_cst +define amdgpu_kernel void @store_sgpr_ptr_large_offset_atomic(ptr addrspace(1) %out) { + %gep = getelementptr i32, ptr addrspace(1) %out, i32 32768 + %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 5 seq_cst ret void } ; CHECK-LABEL: {{^}}store_vgpr_ptr: ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 -define amdgpu_kernel void @store_vgpr_ptr(i32 addrspace(1)* %out) { +define amdgpu_kernel void @store_vgpr_ptr(ptr addrspace(1) %out) { %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - store i32 99, i32 addrspace(1)* %out.gep, align 4 + %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %tid + store i32 99, ptr addrspace(1) %out.gep, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -188,7 +188,6 @@ ; GCN-NEXT: ; %bb.3: ; %LeafBlock1 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_cmp_eq_u32 s8, 1 -; GCN-NEXT: s_mov_b64 s[4:5], -1 ; GCN-NEXT: s_cbranch_scc0 .LBB1_5 ; GCN-NEXT: ; %bb.4: ; %case1 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 diff --git a/llvm/test/CodeGen/AMDGPU/nand.ll b/llvm/test/CodeGen/AMDGPU/nand.ll --- a/llvm/test/CodeGen/AMDGPU/nand.ll +++ b/llvm/test/CodeGen/AMDGPU/nand.ll @@ -6,11 +6,11 @@ ; GCN-LABEL: {{^}}scalar_nand_i32_one_use ; GCN: s_nand_b32 define amdgpu_kernel void @scalar_nand_i32_one_use( - i32 addrspace(1)* %r0, i32 %a, i32 %b) { + ptr addrspace(1) %r0, i32 %a, i32 %b) { entry: %and = and i32 %a, %b %r0.val = xor i32 %and, -1 - store i32 %r0.val, i32 addrspace(1)* %r0 + store i32 %r0.val, ptr addrspace(1) %r0 ret void } @@ -20,24 +20,24 @@ ; GCN: s_not_b32 ; GCN: s_add_i32 define amdgpu_kernel void @scalar_nand_i32_mul_use( - i32 addrspace(1)* %r0, i32 addrspace(1)* %r1, i32 %a, i32 %b) { + ptr addrspace(1) %r0, ptr addrspace(1) %r1, i32 %a, i32 %b) { entry: %and = and i32 %a, %b %r0.val = xor i32 %and, -1 %r1.val = add i32 %and, %a - store i32 %r0.val, i32 addrspace(1)* %r0 - store i32 %r1.val, i32 addrspace(1)* %r1 + store i32 %r0.val, ptr addrspace(1) %r0 + store i32 %r1.val, ptr addrspace(1) %r1 ret void } ; GCN-LABEL: {{^}}scalar_nand_i64_one_use ; GCN: s_nand_b64 define amdgpu_kernel void @scalar_nand_i64_one_use( - i64 addrspace(1)* %r0, i64 %a, i64 %b) { + ptr addrspace(1) %r0, i64 %a, i64 %b) { entry: %and = and i64 %a, %b %r0.val = xor i64 %and, -1 - store i64 %r0.val, i64 addrspace(1)* %r0 + store i64 %r0.val, ptr addrspace(1) %r0 ret void } @@ -48,13 +48,13 @@ ; GCN: s_add_u32 ; GCN: s_addc_u32 define amdgpu_kernel void @scalar_nand_i64_mul_use( - i64 addrspace(1)* %r0, i64 addrspace(1)* %r1, i64 %a, i64 %b) { + ptr addrspace(1) %r0, ptr addrspace(1) %r1, i64 %a, i64 %b) { entry: %and = and i64 %a, %b %r0.val = xor i64 %and, -1 %r1.val = add i64 %and, %a - store i64 %r0.val, i64 addrspace(1)* %r0 - store i64 %r1.val, i64 addrspace(1)* %r1 + store i64 %r0.val, ptr addrspace(1) %r0 + store i64 %r1.val, ptr addrspace(1) %r1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll b/llvm/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll --- a/llvm/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll +++ b/llvm/test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll @@ -9,9 +9,9 @@ @extern_const_addrspace = external unnamed_addr addrspace(4) constant [5 x i32], align 4 ; CHECK-DAG: Name: load_extern_const_init -define amdgpu_kernel void @load_extern_const_init(i32 addrspace(1)* %out) nounwind { - %val = load i32, i32 addrspace(4)* getelementptr ([5 x i32], [5 x i32] addrspace(4)* @extern_const_addrspace, i64 0, i64 3), align 4 - store i32 %val, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @load_extern_const_init(ptr addrspace(1) %out) nounwind { + %val = load i32, ptr addrspace(4) getelementptr ([5 x i32], ptr addrspace(4) @extern_const_addrspace, i64 0, i64 3), align 4 + store i32 %val, ptr addrspace(1) %out, align 4 ret void } @@ -19,8 +19,8 @@ @undef_const_addrspace = unnamed_addr addrspace(4) constant [5 x i32] undef, align 4 ; CHECK-DAG: Name: undef_const_addrspace -define amdgpu_kernel void @load_undef_const_init(i32 addrspace(1)* %out) nounwind { - %val = load i32, i32 addrspace(4)* getelementptr ([5 x i32], [5 x i32] addrspace(4)* @undef_const_addrspace, i64 0, i64 3), align 4 - store i32 %val, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @load_undef_const_init(ptr addrspace(1) %out) nounwind { + %val = load i32, ptr addrspace(4) getelementptr ([5 x i32], ptr addrspace(4) @undef_const_addrspace, i64 0, i64 3), align 4 + store i32 %val, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll --- a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll @@ -17,32 +17,32 @@ ; GCN: s_load_dword s ; GCN-NOT: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @simple_barrier(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @simple_barrier(ptr addrspace(1) %arg) { ; CHECK-LABEL: @simple_barrier( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[ARG:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire ; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier() -; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0 -; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0 +; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]] -; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 2 -; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4 +; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2 +; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4 ; CHECK-NEXT: ret void ; bb: - %i = load i32, i32 addrspace(1)* %arg, align 4 + %i = load i32, ptr addrspace(1) %arg, align 4 fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire tail call void @llvm.amdgcn.wave.barrier() - %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 - %i2 = load i32, i32 addrspace(1)* %i1, align 4 + %i1 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1 + %i2 = load i32, ptr addrspace(1) %i1, align 4 %i3 = add i32 %i2, %i - %i4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 - store i32 %i3, i32 addrspace(1)* %i4, align 4 + %i4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2 + store i32 %i3, ptr addrspace(1) %i4, align 4 ret void } @@ -55,10 +55,10 @@ ; GCN: s_load_dword s ; GCN-NOT: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @memory_phi_no_clobber(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @memory_phi_no_clobber(ptr addrspace(1) %arg) { ; CHECK-LABEL: @memory_phi_no_clobber( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[ARG:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0 ; CHECK: if.then: ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() @@ -67,15 +67,15 @@ ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: br label [[IF_END]], !amdgpu.uniform !0 ; CHECK: if.end: -; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0 -; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0 +; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]] -; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 2 -; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4 +; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2 +; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4 ; CHECK-NEXT: ret void ; bb: - %i = load i32, i32 addrspace(1)* %arg, align 4 + %i = load i32, ptr addrspace(1) %arg, align 4 br i1 undef, label %if.then, label %if.else if.then: @@ -87,11 +87,11 @@ br label %if.end if.end: - %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 - %i2 = load i32, i32 addrspace(1)* %i1, align 4 + %i1 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1 + %i2 = load i32, ptr addrspace(1) %i1, align 4 %i3 = add i32 %i2, %i - %i4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 - store i32 %i3, i32 addrspace(1)* %i4, align 4 + %i4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2 + store i32 %i3, ptr addrspace(1) %i4, align 4 ret void } @@ -101,33 +101,33 @@ ; GCN: global_store_dword ; GCN: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @memory_phi_clobber1(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @memory_phi_clobber1(ptr addrspace(1) %arg) { ; CHECK-LABEL: @memory_phi_clobber1( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[ARG:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0 ; CHECK: if.then: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 3 -; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[GEP]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3 +; CHECK-NEXT: store i32 1, ptr addrspace(1) [[GEP]], align 4 ; CHECK-NEXT: br label [[IF_END:%.*]], !amdgpu.uniform !0 ; CHECK: if.else: ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: br label [[IF_END]], !amdgpu.uniform !0 ; CHECK: if.end: -; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0 -; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4 +; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0 +; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4 ; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]] -; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 2 -; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4 +; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2 +; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4 ; CHECK-NEXT: ret void ; bb: - %i = load i32, i32 addrspace(1)* %arg, align 4 + %i = load i32, ptr addrspace(1) %arg, align 4 br i1 undef, label %if.then, label %if.else if.then: - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3 - store i32 1, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 3 + store i32 1, ptr addrspace(1) %gep, align 4 br label %if.end if.else: @@ -135,11 +135,11 @@ br label %if.end if.end: - %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 - %i2 = load i32, i32 addrspace(1)* %i1, align 4 + %i1 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1 + %i2 = load i32, ptr addrspace(1) %i1, align 4 %i3 = add i32 %i2, %i - %i4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 - store i32 %i3, i32 addrspace(1)* %i4, align 4 + %i4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2 + store i32 %i3, ptr addrspace(1) %i4, align 4 ret void } @@ -149,28 +149,28 @@ ; GCN: s_barrier ; GCN: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @memory_phi_clobber2(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @memory_phi_clobber2(ptr addrspace(1) %arg) { ; CHECK-LABEL: @memory_phi_clobber2( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[ARG:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0 ; CHECK: if.then: ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: br label [[IF_END:%.*]], !amdgpu.uniform !0 ; CHECK: if.else: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 3 -; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[GEP]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3 +; CHECK-NEXT: store i32 1, ptr addrspace(1) [[GEP]], align 4 ; CHECK-NEXT: br label [[IF_END]], !amdgpu.uniform !0 ; CHECK: if.end: -; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0 -; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4 +; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0 +; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4 ; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]] -; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 2 -; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4 +; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2 +; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4 ; CHECK-NEXT: ret void ; bb: - %i = load i32, i32 addrspace(1)* %arg, align 4 + %i = load i32, ptr addrspace(1) %arg, align 4 br i1 undef, label %if.then, label %if.else if.then: @@ -178,16 +178,16 @@ br label %if.end if.else: - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3 - store i32 1, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 3 + store i32 1, ptr addrspace(1) %gep, align 4 br label %if.end if.end: - %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 - %i2 = load i32, i32 addrspace(1)* %i1, align 4 + %i1 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1 + %i2 = load i32, ptr addrspace(1) %i1, align 4 %i3 = add i32 %i2, %i - %i4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 - store i32 %i3, i32 addrspace(1)* %i4, align 4 + %i4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2 + store i32 %i3, ptr addrspace(1) %i4, align 4 ret void } @@ -196,32 +196,32 @@ ; GCN: s_load_dword s ; GCN-NOT: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @no_clobbering_loop1(i32 addrspace(1)* %arg, i1 %cc) { +define amdgpu_kernel void @no_clobbering_loop1(ptr addrspace(1) %arg, i1 %cc) { ; CHECK-LABEL: @no_clobbering_loop1( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[ARG:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0 ; CHECK: while.cond: -; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0 -; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0 +; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]] -; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 2 -; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4 +; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2 +; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4 ; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier() ; CHECK-NEXT: br i1 [[CC:%.*]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform !0 ; CHECK: end: ; CHECK-NEXT: ret void ; bb: - %i = load i32, i32 addrspace(1)* %arg, align 4 + %i = load i32, ptr addrspace(1) %arg, align 4 br label %while.cond while.cond: - %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 - %i2 = load i32, i32 addrspace(1)* %i1, align 4 + %i1 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1 + %i2 = load i32, ptr addrspace(1) %i1, align 4 %i3 = add i32 %i2, %i - %i4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 - store i32 %i3, i32 addrspace(1)* %i4, align 4 + %i4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2 + store i32 %i3, ptr addrspace(1) %i4, align 4 tail call void @llvm.amdgcn.wave.barrier() br i1 %cc, label %while.cond, label %end @@ -234,34 +234,34 @@ ; GCN: s_load_dword s ; GCN-NOT: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @no_clobbering_loop2(i32 addrspace(1)* noalias %arg, i32 addrspace(1)* noalias %out, i32 %n) { +define amdgpu_kernel void @no_clobbering_loop2(ptr addrspace(1) noalias %arg, ptr addrspace(1) noalias %out, i32 %n) { ; CHECK-LABEL: @no_clobbering_loop2( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[ARG:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0 ; CHECK: while.cond: ; CHECK-NEXT: [[C:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[INC:%.*]], [[WHILE_COND]] ] ; CHECK-NEXT: [[ACC:%.*]] = phi i32 [ [[I]], [[BB]] ], [ [[I3:%.*]], [[WHILE_COND]] ] -; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i32 [[C]], !amdgpu.uniform !0 -; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i32 [[C]], !amdgpu.uniform !0 +; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: [[I3]] = add i32 [[I2]], [[ACC]] ; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier() ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[C]], 1 ; CHECK-NEXT: [[CC:%.*]] = icmp eq i32 [[INC]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CC]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform !0 ; CHECK: end: -; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; bb: - %i = load i32, i32 addrspace(1)* %arg, align 4 + %i = load i32, ptr addrspace(1) %arg, align 4 br label %while.cond while.cond: %c = phi i32 [ 0, %bb ], [ %inc, %while.cond ] %acc = phi i32 [ %i, %bb ], [ %i3, %while.cond ] - %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %c - %i2 = load i32, i32 addrspace(1)* %i1, align 4 + %i1 = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %c + %i2 = load i32, ptr addrspace(1) %i1, align 4 %i3 = add i32 %i2, %acc tail call void @llvm.amdgcn.wave.barrier() %inc = add nuw nsw i32 %c, 1 @@ -269,7 +269,7 @@ br i1 %cc, label %while.cond, label %end end: - store i32 %i3, i32 addrspace(1)* %out, align 4 + store i32 %i3, ptr addrspace(1) %out, align 4 ret void } @@ -277,32 +277,32 @@ ; GCN: s_load_dword s ; GCN: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @clobbering_loop(i32 addrspace(1)* %arg, i32 addrspace(1)* %out, i1 %cc) { +define amdgpu_kernel void @clobbering_loop(ptr addrspace(1) %arg, ptr addrspace(1) %out, i1 %cc) { ; CHECK-LABEL: @clobbering_loop( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[ARG:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0 ; CHECK: while.cond: -; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 1, !amdgpu.uniform !0 -; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4 +; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0 +; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4 ; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]] -; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[OUT:%.*]], i64 1 -; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4 +; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT:%.*]], i64 1 +; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4 ; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier() ; CHECK-NEXT: br i1 [[CC:%.*]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform !0 ; CHECK: end: ; CHECK-NEXT: ret void ; bb: - %i = load i32, i32 addrspace(1)* %arg, align 4 + %i = load i32, ptr addrspace(1) %arg, align 4 br label %while.cond while.cond: - %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 - %i2 = load i32, i32 addrspace(1)* %i1, align 4 + %i1 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1 + %i2 = load i32, ptr addrspace(1) %i1, align 4 %i3 = add i32 %i2, %i - %i4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 - store i32 %i3, i32 addrspace(1)* %i4, align 4 + %i4 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 1 + store i32 %i3, ptr addrspace(1) %i4, align 4 tail call void @llvm.amdgcn.wave.barrier() br i1 %cc, label %while.cond, label %end @@ -315,28 +315,28 @@ ; GCN: global_load_dword {{.*}} glc ; GCN: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @clobber_by_atomic_load(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @clobber_by_atomic_load(ptr addrspace(1) %arg) { ; CHECK-LABEL: @clobber_by_atomic_load( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I:%.*]] = load i32, i32 addrspace(1)* [[ARG:%.*]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 2, !amdgpu.uniform !0 -; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, i32 addrspace(1)* [[GEP]] seq_cst, align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 3, !amdgpu.uniform !0 -; CHECK-NEXT: [[I2:%.*]] = load i32, i32 addrspace(1)* [[I1]], align 4 +; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2, !amdgpu.uniform !0 +; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[GEP]] seq_cst, align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3, !amdgpu.uniform !0 +; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4 ; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]] -; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG]], i64 4 -; CHECK-NEXT: store i32 [[I3]], i32 addrspace(1)* [[I4]], align 4 +; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 4 +; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4 ; CHECK-NEXT: ret void ; bb: - %i = load i32, i32 addrspace(1)* %arg, align 4 - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 - %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 - %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3 - %i2 = load i32, i32 addrspace(1)* %i1, align 4 + %i = load i32, ptr addrspace(1) %arg, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2 + %val = load atomic i32, ptr addrspace(1) %gep seq_cst, align 4 + %i1 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 3 + %i2 = load i32, ptr addrspace(1) %i1, align 4 %i3 = add i32 %i2, %i - %i4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4 - store i32 %i3, i32 addrspace(1)* %i4, align 4 + %i4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 4 + store i32 %i3, ptr addrspace(1) %i4, align 4 ret void } @@ -346,24 +346,24 @@ ; GCN: s_load_dword s ; GCN-NOT: global_load_dword ; GCN: global_store_dword -define protected amdgpu_kernel void @no_alias_store(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { +define protected amdgpu_kernel void @no_alias_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @no_alias_store( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 0, i32 addrspace(3)* @LDS, align 4 +; CHECK-NEXT: store i32 0, ptr addrspace(3) @LDS, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: - store i32 0, i32 addrspace(3)* @LDS, align 4 + store i32 0, ptr addrspace(3) @LDS, align 4 fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %ld = load i32, i32 addrspace(1)* %in, align 4 - store i32 %ld, i32 addrspace(1)* %out, align 4 + %ld = load i32, ptr addrspace(1) %in, align 4 + store i32 %ld, ptr addrspace(1) %out, align 4 ret void } @@ -372,24 +372,24 @@ ; GCN: s_barrier ; GCN: global_load_dword ; GCN: global_store_dword -define protected amdgpu_kernel void @may_alias_store(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { +define protected amdgpu_kernel void @may_alias_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @may_alias_store( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: store i32 0, ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4 -; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4 +; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT]], align 4 ; CHECK-NEXT: ret void ; entry: - store i32 0, i32 addrspace(1)* %out, align 4 + store i32 0, ptr addrspace(1) %out, align 4 fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %ld = load i32, i32 addrspace(1)* %in, align 4 - store i32 %ld, i32 addrspace(1)* %out, align 4 + %ld = load i32, ptr addrspace(1) %in, align 4 + store i32 %ld, ptr addrspace(1) %out, align 4 ret void } @@ -399,24 +399,24 @@ ; GCN: s_load_dword s ; GCN-NOT: global_load_dword ; GCN: global_store_dword -define protected amdgpu_kernel void @no_alias_volatile_store(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { +define protected amdgpu_kernel void @no_alias_volatile_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @no_alias_volatile_store( ; CHECK-NEXT: entry: -; CHECK-NEXT: store volatile i32 0, i32 addrspace(3)* @LDS, align 4 +; CHECK-NEXT: store volatile i32 0, ptr addrspace(3) @LDS, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: - store volatile i32 0, i32 addrspace(3)* @LDS, align 4 + store volatile i32 0, ptr addrspace(3) @LDS, align 4 fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %ld = load i32, i32 addrspace(1)* %in, align 4 - store i32 %ld, i32 addrspace(1)* %out, align 4 + %ld = load i32, ptr addrspace(1) %in, align 4 + store i32 %ld, ptr addrspace(1) %out, align 4 ret void } @@ -425,18 +425,18 @@ ; GCN: s_load_dword s ; GCN-NOT: global_load_dword ; GCN: global_store_dword -define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { +define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @no_alias_atomic_rmw_relaxed( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 monotonic, align 4 -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add ptr addrspace(3) @LDS, i32 5 monotonic, align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: - %unused = atomicrmw add i32 addrspace(3)* @LDS, i32 5 monotonic - %ld = load i32, i32 addrspace(1)* %in, align 4 - store i32 %ld, i32 addrspace(1)* %out, align 4 + %unused = atomicrmw add ptr addrspace(3) @LDS, i32 5 monotonic + %ld = load i32, ptr addrspace(1) %in, align 4 + store i32 %ld, ptr addrspace(1) %out, align 4 ret void } @@ -445,24 +445,24 @@ ; GCN: s_load_dword s ; GCN-NOT: global_load_dword ; GCN: global_store_dword -define protected amdgpu_kernel void @no_alias_atomic_cmpxchg(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i32 %swap) { +define protected amdgpu_kernel void @no_alias_atomic_cmpxchg(ptr addrspace(1) %in, ptr addrspace(1) %out, i32 %swap) { ; CHECK-LABEL: @no_alias_atomic_cmpxchg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[UNUSED:%.*]] = cmpxchg i32 addrspace(3)* @LDS, i32 7, i32 [[SWAP:%.*]] seq_cst monotonic, align 4 +; CHECK-NEXT: [[UNUSED:%.*]] = cmpxchg ptr addrspace(3) @LDS, i32 7, i32 [[SWAP:%.*]] seq_cst monotonic, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: - %unused = cmpxchg i32 addrspace(3)* @LDS, i32 7, i32 %swap seq_cst monotonic + %unused = cmpxchg ptr addrspace(3) @LDS, i32 7, i32 %swap seq_cst monotonic fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %ld = load i32, i32 addrspace(1)* %in, align 4 - store i32 %ld, i32 addrspace(1)* %out, align 4 + %ld = load i32, ptr addrspace(1) %in, align 4 + store i32 %ld, ptr addrspace(1) %out, align 4 ret void } @@ -471,24 +471,24 @@ ; GCN: s_load_dword s ; GCN-NOT: global_load_dword ; GCN: global_store_dword -define protected amdgpu_kernel void @no_alias_atomic_rmw(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { +define protected amdgpu_kernel void @no_alias_atomic_rmw(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @no_alias_atomic_rmw( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst, align 4 +; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add ptr addrspace(3) @LDS, i32 5 seq_cst, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: - %unused = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst + %unused = atomicrmw add ptr addrspace(3) @LDS, i32 5 seq_cst fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %ld = load i32, i32 addrspace(1)* %in, align 4 - store i32 %ld, i32 addrspace(1)* %out, align 4 + %ld = load i32, ptr addrspace(1) %in, align 4 + store i32 %ld, ptr addrspace(1) %out, align 4 ret void } @@ -496,24 +496,24 @@ ; GCN: global_atomic_cmpswap ; GCN: global_load_dword ; GCN: global_store_dword -define protected amdgpu_kernel void @may_alias_atomic_cmpxchg(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i32 %swap) { +define protected amdgpu_kernel void @may_alias_atomic_cmpxchg(ptr addrspace(1) %in, ptr addrspace(1) %out, i32 %swap) { ; CHECK-LABEL: @may_alias_atomic_cmpxchg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[UNUSED:%.*]] = cmpxchg i32 addrspace(1)* [[OUT:%.*]], i32 7, i32 [[SWAP:%.*]] seq_cst monotonic, align 4 +; CHECK-NEXT: [[UNUSED:%.*]] = cmpxchg ptr addrspace(1) [[OUT:%.*]], i32 7, i32 [[SWAP:%.*]] seq_cst monotonic, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4 -; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4 +; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT]], align 4 ; CHECK-NEXT: ret void ; entry: - %unused = cmpxchg i32 addrspace(1)* %out, i32 7, i32 %swap seq_cst monotonic + %unused = cmpxchg ptr addrspace(1) %out, i32 7, i32 %swap seq_cst monotonic fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %ld = load i32, i32 addrspace(1)* %in, align 4 - store i32 %ld, i32 addrspace(1)* %out, align 4 + %ld = load i32, ptr addrspace(1) %in, align 4 + store i32 %ld, ptr addrspace(1) %out, align 4 ret void } @@ -521,24 +521,24 @@ ; GCN: global_atomic_add ; GCN: global_load_dword ; GCN: global_store_dword -define protected amdgpu_kernel void @may_alias_atomic_rmw(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { +define protected amdgpu_kernel void @may_alias_atomic_rmw(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @may_alias_atomic_rmw( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(1)* [[OUT:%.*]], i32 5 seq_cst, align 4 +; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add ptr addrspace(1) [[OUT:%.*]], i32 5 seq_cst, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4 -; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4 +; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT]], align 4 ; CHECK-NEXT: ret void ; entry: - %unused = atomicrmw add i32 addrspace(1)* %out, i32 5 seq_cst + %unused = atomicrmw add ptr addrspace(1) %out, i32 5 seq_cst fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %ld = load i32, i32 addrspace(1)* %in, align 4 - store i32 %ld, i32 addrspace(1)* %out, align 4 + %ld = load i32, ptr addrspace(1) %in, align 4 + store i32 %ld, ptr addrspace(1) %out, align 4 ret void } @@ -548,28 +548,28 @@ ; GCN: ds_add_u32 ; GCN: global_load_dword ; GCN: global_store_dword -define protected amdgpu_kernel void @no_alias_atomic_rmw_then_clobber(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i32 addrspace(1)* noalias %noalias) { +define protected amdgpu_kernel void @no_alias_atomic_rmw_then_clobber(ptr addrspace(1) %in, ptr addrspace(1) %out, ptr addrspace(1) noalias %noalias) { ; CHECK-LABEL: @no_alias_atomic_rmw_then_clobber( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[OUT:%.*]], align 4 -; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[NOALIAS:%.*]], align 4 -; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst, align 4 +; CHECK-NEXT: store i32 1, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: store i32 2, ptr addrspace(1) [[NOALIAS:%.*]], align 4 +; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add ptr addrspace(3) @LDS, i32 5 seq_cst, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4 -; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4 +; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT]], align 4 ; CHECK-NEXT: ret void ; entry: - store i32 1, i32 addrspace(1)* %out, align 4 - store i32 2, i32 addrspace(1)* %noalias, align 4 - %unused = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst + store i32 1, ptr addrspace(1) %out, align 4 + store i32 2, ptr addrspace(1) %noalias, align 4 + %unused = atomicrmw add ptr addrspace(3) @LDS, i32 5 seq_cst fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %ld = load i32, i32 addrspace(1)* %in, align 4 - store i32 %ld, i32 addrspace(1)* %out, align 4 + %ld = load i32, ptr addrspace(1) %in, align 4 + store i32 %ld, ptr addrspace(1) %out, align 4 ret void } @@ -579,26 +579,26 @@ ; GCN: s_load_dword s ; GCN-NOT: global_load_dword ; GCN: global_store_dword -define protected amdgpu_kernel void @no_alias_atomic_rmw_then_no_alias_store(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i32 addrspace(1)* noalias %noalias) { +define protected amdgpu_kernel void @no_alias_atomic_rmw_then_no_alias_store(ptr addrspace(1) %in, ptr addrspace(1) %out, ptr addrspace(1) noalias %noalias) { ; CHECK-LABEL: @no_alias_atomic_rmw_then_no_alias_store( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[NOALIAS:%.*]], align 4 -; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst, align 4 +; CHECK-NEXT: store i32 2, ptr addrspace(1) [[NOALIAS:%.*]], align 4 +; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add ptr addrspace(3) @LDS, i32 5 seq_cst, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: - store i32 2, i32 addrspace(1)* %noalias, align 4 - %unused = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst + store i32 2, ptr addrspace(1) %noalias, align 4 + %unused = atomicrmw add ptr addrspace(3) @LDS, i32 5 seq_cst fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %ld = load i32, i32 addrspace(1)* %in, align 4 - store i32 %ld, i32 addrspace(1)* %out, align 4 + %ld = load i32, ptr addrspace(1) %in, align 4 + store i32 %ld, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll --- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -14,7 +14,7 @@ ; FIXME: FunctionLoweringInfo unhelpfully doesn't preserve an ; alignment less than the stack alignment. -define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(i32 addrspace(1)* %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) #1 { +define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) #1 { ; MUBUF-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4: ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_add_u32 s0, s0, s9 @@ -90,24 +90,23 @@ bb.0: %alloca = alloca [16 x i32], align 4, addrspace(5) - %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 + %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 %cond1 = icmp eq i32 %arg.cond1, 0 br i1 %cond1, label %bb.1, label %bb.2 bb.1: ; Use the alloca outside of the defining block. - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in - %load = load i32, i32 addrspace(5)* %gep2 + store i32 0, ptr addrspace(5) %alloca + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in + %load = load i32, ptr addrspace(5) %gep2 %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %load, %tid - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out br label %bb.2 bb.2: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } ; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112 @@ -119,7 +118,7 @@ ; ASSUME1024: .amdhsa_private_segment_fixed_size 1040 ; ASSUME1024: ; ScratchSize: 1040 -define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(i32 addrspace(1)* %out, i32 %arg.cond, i32 %in) { +define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) { ; MUBUF-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64: ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x8 @@ -190,19 +189,18 @@ bb.0: %alloca = alloca [16 x i32], align 64, addrspace(5) - %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in - %load = load i32, i32 addrspace(5)* %gep2 + %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 + store i32 0, ptr addrspace(5) %alloca + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in + %load = load i32, ptr addrspace(5) %gep2 %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %load, %tid - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out br label %bb.1 bb.1: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } @@ -216,7 +214,7 @@ ; ASSUME1024: ; ScratchSize: 1088 -define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) { +define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) { ; MUBUF-LABEL: func_non_entry_block_static_alloca_align4: ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -293,28 +291,27 @@ bb.0: %alloca = alloca [16 x i32], align 4, addrspace(5) - %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 + %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 %cond1 = icmp eq i32 %arg.cond1, 0 br i1 %cond1, label %bb.1, label %bb.2 bb.1: ; Use the alloca outside of the defining block. - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in - %load = load i32, i32 addrspace(5)* %gep2 + store i32 0, ptr addrspace(5) %alloca + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in + %load = load i32, ptr addrspace(5) %gep2 %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %load, %tid - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out br label %bb.2 bb.2: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } -define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out, i32 %arg.cond, i32 %in) { +define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) { ; MUBUF-LABEL: func_non_entry_block_static_alloca_align64: ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -386,19 +383,18 @@ bb.0: %alloca = alloca [16 x i32], align 64, addrspace(5) - %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in - %load = load i32, i32 addrspace(5)* %gep2 + %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 + store i32 0, ptr addrspace(5) %alloca + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in + %load = load i32, ptr addrspace(5) %gep2 %tid = call i32 @llvm.amdgcn.workitem.id.x() %add = add i32 %load, %tid - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out br label %bb.1 bb.1: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll --- a/llvm/test/CodeGen/AMDGPU/nop-data.ll +++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll @@ -81,7 +81,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: : ; CHECK: s_endpgm -define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(4)* %ptr.out) align 256 { +define amdgpu_kernel void @kernel1(ptr addrspace(4) %ptr.out) align 256 { entry: ret void } diff --git a/llvm/test/CodeGen/AMDGPU/nullptr.ll b/llvm/test/CodeGen/AMDGPU/nullptr.ll --- a/llvm/test/CodeGen/AMDGPU/nullptr.ll +++ b/llvm/test/CodeGen/AMDGPU/nullptr.ll @@ -1,101 +1,101 @@ ;RUN: llc < %s -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,GCN %s ;RUN: llc < %s -march=r600 -mtriple=r600-- -verify-machineinstrs | FileCheck -check-prefixes=CHECK,R600 %s -%struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(4)*, i32 addrspace(3)*, i32*, i32 addrspace(2)*} +%struct.S = type { ptr addrspace(5), ptr addrspace(1), ptr addrspace(4), ptr addrspace(3), ptr, ptr addrspace(2)} ; CHECK-LABEL: nullptr_priv: ; CHECK-NEXT: .long -1 -@nullptr_priv = global i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*) +@nullptr_priv = global ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)) ; CHECK-LABEL: nullptr_glob: ; GCN-NEXT: .quad 0 ; R600-NEXT: .long 0 -@nullptr_glob = global i32 addrspace(1)* addrspacecast (i32* null to i32 addrspace(1)*) +@nullptr_glob = global ptr addrspace(1) addrspacecast (ptr null to ptr addrspace(1)) ; CHECK-LABEL: nullptr_const: ; GCN-NEXT: .quad 0 ; R600-NEXT: .long 0 -@nullptr_const = global i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*) +@nullptr_const = global ptr addrspace(4) addrspacecast (ptr null to ptr addrspace(4)) ; CHECK-LABEL: nullptr_local: ; CHECK-NEXT: .long -1 -@nullptr_local = global i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*) +@nullptr_local = global ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) ; CHECK-LABEL: nullptr_region: ; CHECK-NEXT: .long -1 -@nullptr_region = global i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*) +@nullptr_region = global ptr addrspace(2) addrspacecast (ptr null to ptr addrspace(2)) ; CHECK-LABEL: nullptr6: ; R600-NEXT: .long 0 -@nullptr6 = global i32 addrspace(6)* addrspacecast (i32* null to i32 addrspace(6)*) +@nullptr6 = global ptr addrspace(6) addrspacecast (ptr null to ptr addrspace(6)) ; CHECK-LABEL: nullptr7: ; R600-NEXT: .long 0 -@nullptr7 = global i32 addrspace(7)* addrspacecast (i32* null to i32 addrspace(7)*) +@nullptr7 = global ptr addrspace(7) addrspacecast (ptr null to ptr addrspace(7)) ; CHECK-LABEL: nullptr8: ; R600-NEXT: .long 0 -@nullptr8 = global i32 addrspace(8)* addrspacecast (i32* null to i32 addrspace(8)*) +@nullptr8 = global ptr addrspace(8) addrspacecast (ptr null to ptr addrspace(8)) ; CHECK-LABEL: nullptr9: ; R600-NEXT: .long 0 -@nullptr9 = global i32 addrspace(9)* addrspacecast (i32* null to i32 addrspace(9)*) +@nullptr9 = global ptr addrspace(9) addrspacecast (ptr null to ptr addrspace(9)) ; CHECK-LABEL: nullptr10: ; R600-NEXT: .long 0 -@nullptr10 = global i32 addrspace(10)* addrspacecast (i32* null to i32 addrspace(10)*) +@nullptr10 = global ptr addrspace(10) addrspacecast (ptr null to ptr addrspace(10)) ; CHECK-LABEL: nullptr11: ; R600-NEXT: .long 0 -@nullptr11 = global i32 addrspace(11)* addrspacecast (i32* null to i32 addrspace(11)*) +@nullptr11 = global ptr addrspace(11) addrspacecast (ptr null to ptr addrspace(11)) ; CHECK-LABEL: nullptr12: ; R600-NEXT: .long 0 -@nullptr12 = global i32 addrspace(12)* addrspacecast (i32* null to i32 addrspace(12)*) +@nullptr12 = global ptr addrspace(12) addrspacecast (ptr null to ptr addrspace(12)) ; CHECK-LABEL: nullptr13: ; R600-NEXT: .long 0 -@nullptr13 = global i32 addrspace(13)* addrspacecast (i32* null to i32 addrspace(13)*) +@nullptr13 = global ptr addrspace(13) addrspacecast (ptr null to ptr addrspace(13)) ; CHECK-LABEL: nullptr14: ; R600-NEXT: .long 0 -@nullptr14 = global i32 addrspace(14)* addrspacecast (i32* null to i32 addrspace(14)*) +@nullptr14 = global ptr addrspace(14) addrspacecast (ptr null to ptr addrspace(14)) ; CHECK-LABEL: nullptr15: ; R600-NEXT: .long 0 -@nullptr15 = global i32 addrspace(15)* addrspacecast (i32* null to i32 addrspace(15)*) +@nullptr15 = global ptr addrspace(15) addrspacecast (ptr null to ptr addrspace(15)) ; CHECK-LABEL: nullptr16: ; R600-NEXT: .long 0 -@nullptr16 = global i32 addrspace(16)* addrspacecast (i32* null to i32 addrspace(16)*) +@nullptr16 = global ptr addrspace(16) addrspacecast (ptr null to ptr addrspace(16)) ; CHECK-LABEL: nullptr17: ; R600-NEXT: .long 0 -@nullptr17 = global i32 addrspace(17)* addrspacecast (i32* null to i32 addrspace(17)*) +@nullptr17 = global ptr addrspace(17) addrspacecast (ptr null to ptr addrspace(17)) ; CHECK-LABEL: nullptr18: ; R600-NEXT: .long 0 -@nullptr18 = global i32 addrspace(18)* addrspacecast (i32* null to i32 addrspace(18)*) +@nullptr18 = global ptr addrspace(18) addrspacecast (ptr null to ptr addrspace(18)) ; CHECK-LABEL: nullptr19: ; R600-NEXT: .long 0 -@nullptr19 = global i32 addrspace(19)* addrspacecast (i32* null to i32 addrspace(19)*) +@nullptr19 = global ptr addrspace(19) addrspacecast (ptr null to ptr addrspace(19)) ; CHECK-LABEL: nullptr20: ; R600-NEXT: .long 0 -@nullptr20 = global i32 addrspace(20)* addrspacecast (i32* null to i32 addrspace(20)*) +@nullptr20 = global ptr addrspace(20) addrspacecast (ptr null to ptr addrspace(20)) ; CHECK-LABEL: nullptr21: ; R600-NEXT: .long 0 -@nullptr21 = global i32 addrspace(21)* addrspacecast (i32* null to i32 addrspace(21)*) +@nullptr21 = global ptr addrspace(21) addrspacecast (ptr null to ptr addrspace(21)) ; CHECK-LABEL: nullptr22: ; R600-NEXT: .long 0 -@nullptr22 = global i32 addrspace(22)* addrspacecast (i32* null to i32 addrspace(22)*) +@nullptr22 = global ptr addrspace(22) addrspacecast (ptr null to ptr addrspace(22)) ; CHECK-LABEL: nullptr23: ; R600-NEXT: .long 0 -@nullptr23 = global i32 addrspace(23)* addrspacecast (i32* null to i32 addrspace(23)*) +@nullptr23 = global ptr addrspace(23) addrspacecast (ptr null to ptr addrspace(23)) ; CHECK-LABEL: structWithPointers: ; CHECK-NEXT: .long -1 @@ -111,9 +111,9 @@ ; CHECK-NEXT: .long -1 ; GCN-NEXT: .zero 4 @structWithPointers = addrspace(1) global %struct.S { - i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*), - i32 addrspace(1)* addrspacecast (i32* null to i32 addrspace(1)*), - i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*), - i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*), - i32* null, - i32 addrspace(2)* addrspacecast (i32* null to i32 addrspace(2)*)}, align 4 + ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), + ptr addrspace(1) addrspacecast (ptr null to ptr addrspace(1)), + ptr addrspace(4) addrspacecast (ptr null to ptr addrspace(4)), + ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), + ptr null, + ptr addrspace(2) addrspacecast (ptr null to ptr addrspace(2))}, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll --- a/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll +++ b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll @@ -334,8 +334,7 @@ ; GFX1100: ; Occupancy: 16 @lds6552 = internal addrspace(3) global [6552 x i8] undef, align 4 define amdgpu_kernel void @used_lds_6552() { - %p = bitcast [6552 x i8] addrspace(3)* @lds6552 to i8 addrspace(3)* - store volatile i8 1, i8 addrspace(3)* %p + store volatile i8 1, ptr addrspace(3) @lds6552 ret void } @@ -346,8 +345,7 @@ ; GFX1100: ; Occupancy: 16 @lds6556 = internal addrspace(3) global [6556 x i8] undef, align 4 define amdgpu_kernel void @used_lds_6556() { - %p = bitcast [6556 x i8] addrspace(3)* @lds6556 to i8 addrspace(3)* - store volatile i8 1, i8 addrspace(3)* %p + store volatile i8 1, ptr addrspace(3) @lds6556 ret void } @@ -358,8 +356,7 @@ ; GFX1100: ; Occupancy: 16 @lds13112 = internal addrspace(3) global [13112 x i8] undef, align 4 define amdgpu_kernel void @used_lds_13112() { - %p = bitcast [13112 x i8] addrspace(3)* @lds13112 to i8 addrspace(3)* - store volatile i8 1, i8 addrspace(3)* %p + store volatile i8 1, ptr addrspace(3) @lds13112 ret void } @@ -371,8 +368,7 @@ ; GFX1100W32: ; Occupancy: 14{{$}} @lds8252 = internal addrspace(3) global [8252 x i8] undef, align 4 define amdgpu_kernel void @used_lds_8252_max_group_size_64() #3 { - %p = bitcast [8252 x i8] addrspace(3)* @lds8252 to i8 addrspace(3)* - store volatile i8 1, i8 addrspace(3)* %p + store volatile i8 1, ptr addrspace(3) @lds8252 ret void } @@ -384,8 +380,7 @@ ; GFX1100W64: ; Occupancy: 14{{$}} ; GFX1100W32: ; Occupancy: 16{{$}} define amdgpu_kernel void @used_lds_8252_max_group_size_96() #4 { - %p = bitcast [8252 x i8] addrspace(3)* @lds8252 to i8 addrspace(3)* - store volatile i8 1, i8 addrspace(3)* %p + store volatile i8 1, ptr addrspace(3) @lds8252 ret void } @@ -397,8 +392,7 @@ ; GFX1100W64: ; Occupancy: 14{{$}} ; GFX1100W32: ; Occupancy: 16{{$}} define amdgpu_kernel void @used_lds_8252_max_group_size_128() #5 { - %p = bitcast [8252 x i8] addrspace(3)* @lds8252 to i8 addrspace(3)* - store volatile i8 1, i8 addrspace(3)* %p + store volatile i8 1, ptr addrspace(3) @lds8252 ret void } @@ -408,8 +402,7 @@ ; GFX1030: ; Occupancy: 16{{$}} ; GFX1100: ; Occupancy: 16{{$}} define amdgpu_kernel void @used_lds_8252_max_group_size_192() #6 { - %p = bitcast [8252 x i8] addrspace(3)* @lds8252 to i8 addrspace(3)* - store volatile i8 1, i8 addrspace(3)* %p + store volatile i8 1, ptr addrspace(3) @lds8252 ret void } @@ -419,8 +412,7 @@ ; GFX1030: ; Occupancy: 16{{$}} ; GFX1100: ; Occupancy: 16{{$}} define amdgpu_kernel void @used_lds_8252_max_group_size_256() #7 { - %p = bitcast [8252 x i8] addrspace(3)* @lds8252 to i8 addrspace(3)* - store volatile i8 1, i8 addrspace(3)* %p + store volatile i8 1, ptr addrspace(3) @lds8252 ret void } @@ -430,8 +422,7 @@ ; GFX1030: ; Occupancy: 16{{$}} ; GFX1100: ; Occupancy: 16{{$}} define amdgpu_kernel void @used_lds_8252_max_group_size_512() #8 { - %p = bitcast [8252 x i8] addrspace(3)* @lds8252 to i8 addrspace(3)* - store volatile i8 1, i8 addrspace(3)* %p + store volatile i8 1, ptr addrspace(3) @lds8252 ret void } @@ -441,8 +432,7 @@ ; GFX1030: ; Occupancy: 16{{$}} ; GFX1100: ; Occupancy: 16{{$}} define amdgpu_kernel void @used_lds_8252_max_group_size_1024() #9 { - %p = bitcast [8252 x i8] addrspace(3)* @lds8252 to i8 addrspace(3)* - store volatile i8 1, i8 addrspace(3)* %p + store volatile i8 1, ptr addrspace(3) @lds8252 ret void } @@ -451,8 +441,7 @@ ; GFX10: ; Occupancy: 7{{$}} ; GFX1100: ; Occupancy: 7{{$}} define amdgpu_kernel void @used_lds_8252_max_group_size_32() #10 { - %p = bitcast [8252 x i8] addrspace(3)* @lds8252 to i8 addrspace(3)* - store volatile i8 1, i8 addrspace(3)* %p + store volatile i8 1, ptr addrspace(3) @lds8252 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/omod.ll b/llvm/test/CodeGen/AMDGPU/omod.ll --- a/llvm/test/CodeGen/AMDGPU/omod.ll +++ b/llvm/test/CodeGen/AMDGPU/omod.ll @@ -3,7 +3,7 @@ ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck --check-prefixes=VI %s ; IEEE bit enabled for compute kernel, so shouldn't use. -define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_signed_zeros(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 { +define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_signed_zeros(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #4 { ; SI-LABEL: v_omod_div2_f32_enable_ieee_signed_zeros: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -39,17 +39,17 @@ ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid - %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep0 + %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid + %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %a = load float, ptr addrspace(1) %gep0 %add = fadd float %a, 1.0 %div2 = fmul float %add, 0.5 - store float %div2, float addrspace(1)* %out.gep + store float %div2, ptr addrspace(1) %out.gep ret void } ; IEEE bit enabled for compute kernel, so shouldn't use. -define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_signed_zeros(double addrspace(1)* %out, double addrspace(1)* %aptr) #4 { +define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_signed_zeros(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #4 { ; SI-LABEL: v_omod_div2_f64_enable_ieee_signed_zeros: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -85,17 +85,17 @@ ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid - %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid - %a = load double, double addrspace(1)* %gep0 + %gep0 = getelementptr double, ptr addrspace(1) %aptr, i32 %tid + %out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid + %a = load double, ptr addrspace(1) %gep0 %add = fadd double %a, 1.0 %div2 = fmul double %add, 0.5 - store double %div2, double addrspace(1)* %out.gep + store double %div2, ptr addrspace(1) %out.gep ret void } ; IEEE bit enabled for compute kernel, so shouldn't use even though nsz is allowed -define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_nsz(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { +define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_nsz(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 { ; SI-LABEL: v_omod_div2_f32_enable_ieee_nsz: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -131,17 +131,17 @@ ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid - %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep0 + %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid + %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid + %a = load float, ptr addrspace(1) %gep0 %add = fadd float %a, 1.0 %div2 = fmul float %add, 0.5 - store float %div2, float addrspace(1)* %out.gep + store float %div2, ptr addrspace(1) %out.gep ret void } ; IEEE bit enabled for compute kernel, so shouldn't use even though nsz is allowed. -define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_nsz(double addrspace(1)* %out, double addrspace(1)* %aptr) #5 { +define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_nsz(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #5 { ; SI-LABEL: v_omod_div2_f64_enable_ieee_nsz: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -177,12 +177,12 @@ ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid - %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid - %a = load double, double addrspace(1)* %gep0 + %gep0 = getelementptr double, ptr addrspace(1) %aptr, i32 %tid + %out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid + %a = load double, ptr addrspace(1) %gep0 %add = fadd double %a, 1.0 %div2 = fmul double %add, 0.5 - store double %div2, double addrspace(1)* %out.gep + store double %div2, ptr addrspace(1) %out.gep ret void } @@ -205,7 +205,7 @@ ; VI-NEXT: s_endpgm %add = fadd float %a, 1.0 %div2 = fmul float %add, 0.5 - store float %div2, float addrspace(1)* undef + store float %div2, ptr addrspace(1) undef ret void } @@ -228,7 +228,7 @@ ; VI-NEXT: s_endpgm %add = fadd double %a, 1.0 %div2 = fmul double %add, 0.5 - store double %div2, double addrspace(1)* undef + store double %div2, ptr addrspace(1) undef ret void } @@ -248,7 +248,7 @@ ; VI-NEXT: s_endpgm %add = fadd float %a, 1.0 %div2 = fmul float %add, 0.5 - store float %div2, float addrspace(1)* undef + store float %div2, ptr addrspace(1) undef ret void } @@ -268,7 +268,7 @@ ; VI-NEXT: s_endpgm %add = fadd nsz double %a, 1.0 %div2 = fmul nsz double %add, 0.5 - store double %div2, double addrspace(1)* undef + store double %div2, ptr addrspace(1) undef ret void } @@ -288,7 +288,7 @@ ; VI-NEXT: s_endpgm %add = fadd float %a, 1.0 %div2 = fmul float %add, 2.0 - store float %div2, float addrspace(1)* undef + store float %div2, ptr addrspace(1) undef ret void } @@ -308,7 +308,7 @@ ; VI-NEXT: s_endpgm %add = fadd nsz double %a, 1.0 %div2 = fmul nsz double %add, 2.0 - store double %div2, double addrspace(1)* undef + store double %div2, ptr addrspace(1) undef ret void } @@ -328,7 +328,7 @@ ; VI-NEXT: s_endpgm %add = fadd float %a, 1.0 %div2 = fmul float %add, 4.0 - store float %div2, float addrspace(1)* undef + store float %div2, ptr addrspace(1) undef ret void } @@ -348,7 +348,7 @@ ; VI-NEXT: s_endpgm %add = fadd nsz double %a, 1.0 %div2 = fmul nsz double %add, 4.0 - store double %div2, double addrspace(1)* undef + store double %div2, ptr addrspace(1) undef ret void } @@ -374,8 +374,8 @@ ; VI-NEXT: s_endpgm %add = fadd float %a, 1.0 %div2 = fmul float %add, 4.0 - store float %div2, float addrspace(1)* undef - store volatile float %add, float addrspace(1)* undef + store float %div2, ptr addrspace(1) undef + store volatile float %add, ptr addrspace(1) undef ret void } @@ -396,7 +396,7 @@ %add = fadd float %a, 1.0 call void @llvm.dbg.value(metadata float %add, i64 0, metadata !4, metadata !9), !dbg !10 %div2 = fmul float %add, 4.0 - store float %div2, float addrspace(1)* undef + store float %div2, ptr addrspace(1) undef ret void } @@ -420,7 +420,7 @@ %max = call float @llvm.maxnum.f32(float %div2, float 0.0) %clamp = call float @llvm.minnum.f32(float %max, float 1.0) - store float %clamp, float addrspace(1)* undef + store float %clamp, ptr addrspace(1) undef ret void } @@ -445,7 +445,7 @@ %max = call float @llvm.maxnum.f32(float %add, float 0.0) %clamp = call float @llvm.minnum.f32(float %max, float 1.0) %div2 = fmul float %clamp, 0.5 - store float %div2, float addrspace(1)* undef + store float %div2, ptr addrspace(1) undef ret void } @@ -468,7 +468,7 @@ %add = fadd float %a, 1.0 %abs.add = call float @llvm.fabs.f32(float %add) %div2 = fmul float %abs.add, 0.5 - store float %div2, float addrspace(1)* undef + store float %div2, ptr addrspace(1) undef ret void } @@ -489,7 +489,7 @@ %add = fadd float %a, %a %max = call float @llvm.maxnum.f32(float %add, float 0.0) %clamp = call float @llvm.minnum.f32(float %max, float 1.0) - store float %clamp, float addrspace(1)* undef + store float %clamp, ptr addrspace(1) undef ret void } @@ -512,7 +512,7 @@ %max = call float @llvm.maxnum.f32(float %a, float 0.0) %clamp = call float @llvm.minnum.f32(float %max, float 1.0) %add = fadd float %clamp, %clamp - store float %add, float addrspace(1)* undef + store float %add, ptr addrspace(1) undef ret void } @@ -535,7 +535,7 @@ %x = fadd float %a, 1.0 %abs.x = call float @llvm.fabs.f32(float %x) %add = fadd float %abs.x, %abs.x - store float %add, float addrspace(1)* undef + store float %add, ptr addrspace(1) undef ret void } @@ -558,7 +558,7 @@ %x = fadd float %a, 1.0 %abs.x = call float @llvm.fabs.f32(float %x) %add = fadd float %abs.x, %x - store float %add, float addrspace(1)* undef + store float %add, ptr addrspace(1) undef ret void } @@ -581,7 +581,7 @@ %x = fadd float %a, 1.0 %abs.x = call float @llvm.fabs.f32(float %x) %add = fadd float %x, %abs.x - store float %add, float addrspace(1)* undef + store float %add, ptr addrspace(1) undef ret void } @@ -605,7 +605,7 @@ %add = fadd float %a, 1.0 %div2.0 = fmul float %add, 0.5 %div2.1 = fmul float %div2.0, 0.5 - store float %div2.1, float addrspace(1)* undef + store float %div2.1, ptr addrspace(1) undef ret void } @@ -628,7 +628,7 @@ ; VI-NEXT: s_endpgm %add = fadd float %a, 1.0 %div2 = fmul float %add, 0.5 - store float %div2, float addrspace(1)* undef + store float %div2, ptr addrspace(1) undef ret void } @@ -651,7 +651,7 @@ ; VI-NEXT: s_endpgm %add = fadd double %a, 1.0 %div2 = fmul double %add, 0.5 - store double %div2, double addrspace(1)* undef + store double %div2, ptr addrspace(1) undef ret void } @@ -674,7 +674,7 @@ ; VI-NEXT: s_endpgm %add = fadd float %a, 1.0 %mul2 = fadd float %add, %add - store float %mul2, float addrspace(1)* undef + store float %mul2, ptr addrspace(1) undef ret void } @@ -697,7 +697,7 @@ ; VI-NEXT: s_endpgm %add = fadd double %a, 1.0 %mul2 = fadd double %add, %add - store double %mul2, double addrspace(1)* undef + store double %mul2, ptr addrspace(1) undef ret void } @@ -722,7 +722,7 @@ ; VI-NEXT: s_endpgm %add = fadd half %a, 1.0 %div2 = fmul half %add, 0.5 - store half %div2, half addrspace(1)* undef + store half %div2, ptr addrspace(1) undef ret void } @@ -747,7 +747,7 @@ ; VI-NEXT: s_endpgm %add = fadd half %a, 1.0 %mul2 = fadd half %add, %add - store half %mul2, half addrspace(1)* undef + store half %mul2, ptr addrspace(1) undef ret void } @@ -770,7 +770,7 @@ ; VI-NEXT: s_endpgm %add = fadd half %a, 1.0 %div2 = fmul half %add, 0.5 - store half %div2, half addrspace(1)* undef + store half %div2, ptr addrspace(1) undef ret void } @@ -794,7 +794,7 @@ %add = fadd float %mul, %b %mad = fmul float %add, 2.0 %res = fmul float %mad, %b - store float %res, float addrspace(1)* undef + store float %res, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/operand-spacing.ll b/llvm/test/CodeGen/AMDGPU/operand-spacing.ll --- a/llvm/test/CodeGen/AMDGPU/operand-spacing.ll +++ b/llvm/test/CodeGen/AMDGPU/operand-spacing.ll @@ -15,8 +15,8 @@ ; VI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]] ; GCN: buffer_store_dword [[RESULT]], -define amdgpu_kernel void @add_f32(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) { +define amdgpu_kernel void @add_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b) { %result = fadd float %a, %b - store float %result, float addrspace(1)* %out + store float %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/optimize-compare.ll b/llvm/test/CodeGen/AMDGPU/optimize-compare.ll --- a/llvm/test/CodeGen/AMDGPU/optimize-compare.ll +++ b/llvm/test/CodeGen/AMDGPU/optimize-compare.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -define amdgpu_kernel void @if_masked_1(i32 %arg, i32 addrspace(1)* %p) { +define amdgpu_kernel void @if_masked_1(i32 %arg, ptr addrspace(1) %p) { ; GCN-LABEL: if_masked_1: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s4, s[0:1], 0x24 @@ -16,11 +16,11 @@ %and = and i32 %arg, 1 %cmp = icmp eq i32 %and, 0 %sel = select i1 %cmp, i32 22, i32 33 - store i32 %sel, i32 addrspace(1)* %p + store i32 %sel, ptr addrspace(1) %p ret void } -define amdgpu_kernel void @if_masked_1024(i32 %arg, i32 addrspace(1)* %p) { +define amdgpu_kernel void @if_masked_1024(i32 %arg, ptr addrspace(1) %p) { ; GCN-LABEL: if_masked_1024: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s4, s[0:1], 0x24 @@ -35,11 +35,11 @@ %and = and i32 %arg, 1024 %cmp = icmp eq i32 %and, 0 %sel = select i1 %cmp, i32 22, i32 33 - store i32 %sel, i32 addrspace(1)* %p + store i32 %sel, ptr addrspace(1) %p ret void } -define amdgpu_kernel void @if_masked_0x80000000(i32 %arg, i32 addrspace(1)* %p) { +define amdgpu_kernel void @if_masked_0x80000000(i32 %arg, ptr addrspace(1) %p) { ; GCN-LABEL: if_masked_0x80000000: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s4, s[0:1], 0x24 @@ -54,12 +54,12 @@ %and = and i32 %arg, 2147483648 %cmp = icmp eq i32 %and, 0 %sel = select i1 %cmp, i32 22, i32 33 - store i32 %sel, i32 addrspace(1)* %p + store i32 %sel, ptr addrspace(1) %p ret void } ; FIXME: this should result in "s_bitcmp0_b64 $arg, 63" or "s_bitcmp0_b32 $arg.sub1, 31" -define amdgpu_kernel void @if_masked_0x8000000000000000(i64 %arg, i32 addrspace(1)* %p) { +define amdgpu_kernel void @if_masked_0x8000000000000000(i64 %arg, ptr addrspace(1) %p) { ; GCN-LABEL: if_masked_0x8000000000000000: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -75,6 +75,6 @@ %and = and i64 %arg, 9223372036854775808 %cmp = icmp eq i64 %and, 0 %sel = select i1 %cmp, i32 22, i32 33 - store i32 %sel, i32 addrspace(1)* %p + store i32 %sel, ptr addrspace(1) %p ret void } diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll @@ -8,12 +8,12 @@ ; GCN-NOT: v_cmp ; GCN: s_andn2_b64 vcc, exec, [[CC]] ; GCN: s_cbranch_vccnz .LBB0_2 -define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) { +define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) { bb: br label %bb1 bb1: - %tmp1 = load i32, i32 addrspace(1)* %arg1 + %tmp1 = load i32, ptr addrspace(1) %arg1 %tmp2 = icmp eq i32 %tmp1, 0 br label %bb2 @@ -28,8 +28,8 @@ bb4: %tmp6 = phi i32 [ %tmp5, %bb3 ], [ %tmp4, %bb2 ] - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tmp6 - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg1, i32 %tmp6 + store i32 0, ptr addrspace(1) %gep %tmp7 = icmp eq i32 %tmp6, 32 br i1 %tmp7, label %bb1, label %bb2 } @@ -47,12 +47,12 @@ ; GCN: s_mov_b64 vcc, exec ; GCN: s_cbranch_execnz [[BB0]] ; GCN: [[BB2]]: -define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) { +define amdgpu_kernel void @negated_cond_dominated_blocks(ptr addrspace(1) %arg1) { bb: br label %bb2 bb2: - %tmp1 = load i32, i32 addrspace(1)* %arg1 + %tmp1 = load i32, ptr addrspace(1) %arg1 %tmp2 = icmp eq i32 %tmp1, 0 br label %bb4 @@ -74,8 +74,8 @@ bb7: %tmp7 = phi i32 [ %tmp5, %bb5 ], [ %tmp6, %bb6 ] - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tmp7 - store i32 0, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg1, i32 %tmp7 + store i32 0, ptr addrspace(1) %gep %tmp8 = icmp eq i32 %tmp7, 32 br i1 %tmp8, label %bb3, label %bb4 } diff --git a/llvm/test/CodeGen/AMDGPU/predicates.ll b/llvm/test/CodeGen/AMDGPU/predicates.ll --- a/llvm/test/CodeGen/AMDGPU/predicates.ll +++ b/llvm/test/CodeGen/AMDGPU/predicates.ll @@ -6,7 +6,7 @@ ; CHECK-LABEL: {{^}}simple_if: ; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred, ; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel -define amdgpu_kernel void @simple_if(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @simple_if(ptr addrspace(1) %out, i32 %in) { entry: %cmp0 = icmp sgt i32 %in, 0 br i1 %cmp0, label %IF, label %ENDIF @@ -17,7 +17,7 @@ ENDIF: %tmp2 = phi i32 [ %in, %entry ], [ %tmp1, %IF ] - store i32 %tmp2, i32 addrspace(1)* %out + store i32 %tmp2, ptr addrspace(1) %out ret void } @@ -25,7 +25,7 @@ ; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred, ; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel ; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel -define amdgpu_kernel void @simple_if_else(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @simple_if_else(ptr addrspace(1) %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 br i1 %0, label %IF, label %ELSE @@ -40,7 +40,7 @@ ENDIF: %3 = phi i32 [ %1, %IF ], [ %2, %ELSE ] - store i32 %3, i32 addrspace(1)* %out + store i32 %3, ptr addrspace(1) %out ret void } @@ -51,7 +51,7 @@ ; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Exec ; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred, ; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel -define amdgpu_kernel void @nested_if(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @nested_if(ptr addrspace(1) %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 br i1 %0, label %IF0, label %ENDIF @@ -67,7 +67,7 @@ ENDIF: %4 = phi i32 [%in, %entry], [%1, %IF0], [%3, %IF1] - store i32 %4, i32 addrspace(1)* %out + store i32 %4, ptr addrspace(1) %out ret void } @@ -79,7 +79,7 @@ ; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred, ; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel ; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel -define amdgpu_kernel void @nested_if_else(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @nested_if_else(ptr addrspace(1) %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 br i1 %0, label %IF0, label %ENDIF @@ -99,6 +99,6 @@ ENDIF: %5 = phi i32 [%in, %entry], [%3, %IF1], [%4, %ELSE1] - store i32 %5, i32 addrspace(1)* %out + store i32 %5, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll b/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll --- a/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll +++ b/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll @@ -18,7 +18,7 @@ ; OPTNONE-NOT: s_mov_b32 ; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} define amdgpu_kernel void @store_to_undef() #0 { - store volatile i32 0, i32 addrspace(5)* undef + store volatile i32 0, ptr addrspace(5) undef ret void } @@ -27,7 +27,7 @@ ; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]], s[2:3] ; OPT: buffer_store_dword v{{[0-9]+}}, off, s[[[RSRC_LO]]:[[RSRC_HI]]], 0 offset:124{{$}} define amdgpu_kernel void @store_to_inttoptr() #0 { - store volatile i32 0, i32 addrspace(5)* inttoptr (i32 124 to i32 addrspace(5)*) + store volatile i32 0, ptr addrspace(5) inttoptr (i32 124 to ptr addrspace(5)) ret void } @@ -36,7 +36,7 @@ ; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]], s[2:3] ; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[[[RSRC_LO]]:[[RSRC_HI]]], 0 offen glc{{$}} define amdgpu_kernel void @load_from_undef() #0 { - %ld = load volatile i32, i32 addrspace(5)* undef + %ld = load volatile i32, ptr addrspace(5) undef ret void } @@ -45,7 +45,7 @@ ; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]], s[2:3] ; OPT: buffer_load_dword v{{[0-9]+}}, off, s[[[RSRC_LO]]:[[RSRC_HI]]], 0 offset:124 glc{{$}} define amdgpu_kernel void @load_from_inttoptr() #0 { - %ld = load volatile i32, i32 addrspace(5)* inttoptr (i32 124 to i32 addrspace(5)*) + %ld = load volatile i32, ptr addrspace(5) inttoptr (i32 124 to ptr addrspace(5)) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/private-element-size.ll b/llvm/test/CodeGen/AMDGPU/private-element-size.ll --- a/llvm/test/CodeGen/AMDGPU/private-element-size.ll +++ b/llvm/test/CodeGen/AMDGPU/private-element-size.ll @@ -36,21 +36,20 @@ ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}} ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}} -define amdgpu_kernel void @private_elt_size_v4i32(<4 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { +define amdgpu_kernel void @private_elt_size_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %idxprom = sext i32 %tid to i64 - %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom - %index.load = load i32, i32 addrspace(1)* %gep.index + %gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom + %index.load = load i32, ptr addrspace(1) %gep.index %index = and i32 %index.load, 2 %alloca = alloca [2 x <4 x i32>], align 16, addrspace(5) - %gep0 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 1 - store <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %gep0 - store <4 x i32> , <4 x i32> addrspace(5)* %gep1 - %gep2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 %index - %load = load <4 x i32>, <4 x i32> addrspace(5)* %gep2 - store <4 x i32> %load, <4 x i32> addrspace(1)* %out + %gep1 = getelementptr inbounds [2 x <4 x i32>], ptr addrspace(5) %alloca, i32 0, i32 1 + store <4 x i32> zeroinitializer, ptr addrspace(5) %alloca + store <4 x i32> , ptr addrspace(5) %gep1 + %gep2 = getelementptr inbounds [2 x <4 x i32>], ptr addrspace(5) %alloca, i32 0, i32 %index + %load = load <4 x i32>, ptr addrspace(5) %gep2 + store <4 x i32> %load, ptr addrspace(1) %out ret void } @@ -106,21 +105,20 @@ ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:20{{$}} ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:24{{$}} ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:28{{$}} -define amdgpu_kernel void @private_elt_size_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { +define amdgpu_kernel void @private_elt_size_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %idxprom = sext i32 %tid to i64 - %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom - %index.load = load i32, i32 addrspace(1)* %gep.index + %gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom + %index.load = load i32, ptr addrspace(1) %gep.index %index = and i32 %index.load, 2 %alloca = alloca [2 x <8 x i32>], align 32, addrspace(5) - %gep0 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 1 - store <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %gep0 - store <8 x i32> , <8 x i32> addrspace(5)* %gep1 - %gep2 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 %index - %load = load <8 x i32>, <8 x i32> addrspace(5)* %gep2 - store <8 x i32> %load, <8 x i32> addrspace(1)* %out + %gep1 = getelementptr inbounds [2 x <8 x i32>], ptr addrspace(5) %alloca, i32 0, i32 1 + store <8 x i32> zeroinitializer, ptr addrspace(5) %alloca + store <8 x i32> , ptr addrspace(5) %gep1 + %gep2 = getelementptr inbounds [2 x <8 x i32>], ptr addrspace(5) %alloca, i32 0, i32 %index + %load = load <8 x i32>, ptr addrspace(5) %gep2 + store <8 x i32> %load, ptr addrspace(1) %out ret void } @@ -144,21 +142,20 @@ ; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} ; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}} ; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]] -define amdgpu_kernel void @private_elt_size_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { +define amdgpu_kernel void @private_elt_size_i64(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %idxprom = sext i32 %tid to i64 - %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom - %index.load = load i32, i32 addrspace(1)* %gep.index + %gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom + %index.load = load i32, ptr addrspace(1) %gep.index %index = and i32 %index.load, 2 %alloca = alloca [2 x i64], align 16, addrspace(5) - %gep0 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 1 - store i64 0, i64 addrspace(5)* %gep0 - store i64 34359738602, i64 addrspace(5)* %gep1 - %gep2 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 %index - %load = load i64, i64 addrspace(5)* %gep2 - store i64 %load, i64 addrspace(1)* %out + %gep1 = getelementptr inbounds [2 x i64], ptr addrspace(5) %alloca, i32 0, i32 1 + store i64 0, ptr addrspace(5) %alloca + store i64 34359738602, ptr addrspace(5) %gep1 + %gep2 = getelementptr inbounds [2 x i64], ptr addrspace(5) %alloca, i32 0, i32 %index + %load = load i64, ptr addrspace(5) %gep2 + store i64 %load, ptr addrspace(1) %out ret void } @@ -181,21 +178,20 @@ ; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} ; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}} ; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]] -define amdgpu_kernel void @private_elt_size_f64(double addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { +define amdgpu_kernel void @private_elt_size_f64(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %idxprom = sext i32 %tid to i64 - %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom - %index.load = load i32, i32 addrspace(1)* %gep.index + %gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom + %index.load = load i32, ptr addrspace(1) %gep.index %index = and i32 %index.load, 2 %alloca = alloca [2 x double], align 16, addrspace(5) - %gep0 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 1 - store double 0.0, double addrspace(5)* %gep0 - store double 4.0, double addrspace(5)* %gep1 - %gep2 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 %index - %load = load double, double addrspace(5)* %gep2 - store double %load, double addrspace(1)* %out + %gep1 = getelementptr inbounds [2 x double], ptr addrspace(5) %alloca, i32 0, i32 1 + store double 0.0, ptr addrspace(5) %alloca + store double 4.0, ptr addrspace(5) %gep1 + %gep2 = getelementptr inbounds [2 x double], ptr addrspace(5) %alloca, i32 0, i32 %index + %load = load double, ptr addrspace(5) %gep2 + store double %load, ptr addrspace(1) %out ret void } @@ -230,21 +226,20 @@ ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}} ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}} ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}} -define amdgpu_kernel void @private_elt_size_v2i64(<2 x i64> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { +define amdgpu_kernel void @private_elt_size_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %index.array) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %idxprom = sext i32 %tid to i64 - %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom - %index.load = load i32, i32 addrspace(1)* %gep.index + %gep.index = getelementptr inbounds i32, ptr addrspace(1) %index.array, i64 %idxprom + %index.load = load i32, ptr addrspace(1) %gep.index %index = and i32 %index.load, 2 %alloca = alloca [2 x <2 x i64>], align 16, addrspace(5) - %gep0 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 0 - %gep1 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 1 - store <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %gep0 - store <2 x i64> , <2 x i64> addrspace(5)* %gep1 - %gep2 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 %index - %load = load <2 x i64>, <2 x i64> addrspace(5)* %gep2 - store <2 x i64> %load, <2 x i64> addrspace(1)* %out + %gep1 = getelementptr inbounds [2 x <2 x i64>], ptr addrspace(5) %alloca, i32 0, i32 1 + store <2 x i64> zeroinitializer, ptr addrspace(5) %alloca + store <2 x i64> , ptr addrspace(5) %gep1 + %gep2 = getelementptr inbounds [2 x <2 x i64>], ptr addrspace(5) %alloca, i32 0, i32 %index + %load = load <2 x i64>, ptr addrspace(5) %gep2 + store <2 x i64> %load, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll --- a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll @@ -2,9 +2,9 @@ ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -atomic-expand < %s | FileCheck -check-prefix=IR %s ; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefix=GCN %s -define i32 @load_atomic_private_seq_cst_i32(i32 addrspace(5)* %ptr) { +define i32 @load_atomic_private_seq_cst_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @load_atomic_private_seq_cst_i32( -; IR-NEXT: [[LOAD:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: ret i32 [[LOAD]] ; ; GCN-LABEL: load_atomic_private_seq_cst_i32: @@ -13,13 +13,13 @@ ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %load = load atomic i32, i32 addrspace(5)* %ptr seq_cst, align 4 + %load = load atomic i32, ptr addrspace(5) %ptr seq_cst, align 4 ret i32 %load } -define i64 @load_atomic_private_seq_cst_i64(i64 addrspace(5)* %ptr) { +define i64 @load_atomic_private_seq_cst_i64(ptr addrspace(5) %ptr) { ; IR-LABEL: @load_atomic_private_seq_cst_i64( -; IR-NEXT: [[LOAD:%.*]] = load i64, i64 addrspace(5)* [[PTR:%.*]], align 8 +; IR-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(5) [[PTR:%.*]], align 8 ; IR-NEXT: ret i64 [[LOAD]] ; ; GCN-LABEL: load_atomic_private_seq_cst_i64: @@ -30,13 +30,13 @@ ; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %load = load atomic i64, i64 addrspace(5)* %ptr seq_cst, align 8 + %load = load atomic i64, ptr addrspace(5) %ptr seq_cst, align 8 ret i64 %load } -define void @atomic_store_seq_cst_i32(i32 addrspace(5)* %ptr, i32 %val) { +define void @atomic_store_seq_cst_i32(ptr addrspace(5) %ptr, i32 %val) { ; IR-LABEL: @atomic_store_seq_cst_i32( -; IR-NEXT: store i32 [[VAL:%.*]], i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: store i32 [[VAL:%.*]], ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: ret void ; ; GCN-LABEL: atomic_store_seq_cst_i32: @@ -45,13 +45,13 @@ ; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - store atomic i32 %val, i32 addrspace(5)* %ptr seq_cst, align 4 + store atomic i32 %val, ptr addrspace(5) %ptr seq_cst, align 4 ret void } -define void @atomic_store_seq_cst_i64(i64 addrspace(5)* %ptr, i64 %val) { +define void @atomic_store_seq_cst_i64(ptr addrspace(5) %ptr, i64 %val) { ; IR-LABEL: @atomic_store_seq_cst_i64( -; IR-NEXT: store i64 [[VAL:%.*]], i64 addrspace(5)* [[PTR:%.*]], align 8 +; IR-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[PTR:%.*]], align 8 ; IR-NEXT: ret void ; ; GCN-LABEL: atomic_store_seq_cst_i64: @@ -62,13 +62,13 @@ ; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - store atomic i64 %val, i64 addrspace(5)* %ptr seq_cst, align 8 + store atomic i64 %val, ptr addrspace(5) %ptr seq_cst, align 8 ret void } -define i32 @load_atomic_private_seq_cst_syncscope_i32(i32 addrspace(5)* %ptr) { +define i32 @load_atomic_private_seq_cst_syncscope_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @load_atomic_private_seq_cst_syncscope_i32( -; IR-NEXT: [[LOAD:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: ret i32 [[LOAD]] ; ; GCN-LABEL: load_atomic_private_seq_cst_syncscope_i32: @@ -77,13 +77,13 @@ ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %load = load atomic i32, i32 addrspace(5)* %ptr syncscope("agent") seq_cst, align 4 + %load = load atomic i32, ptr addrspace(5) %ptr syncscope("agent") seq_cst, align 4 ret i32 %load } -define void @atomic_store_seq_cst_syncscope_i32(i32 addrspace(5)* %ptr, i32 %val) { +define void @atomic_store_seq_cst_syncscope_i32(ptr addrspace(5) %ptr, i32 %val) { ; IR-LABEL: @atomic_store_seq_cst_syncscope_i32( -; IR-NEXT: store i32 [[VAL:%.*]], i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: store i32 [[VAL:%.*]], ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: ret void ; ; GCN-LABEL: atomic_store_seq_cst_syncscope_i32: @@ -92,21 +92,21 @@ ; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - store atomic i32 %val, i32 addrspace(5)* %ptr syncscope("agent") seq_cst, align 4 + store atomic i32 %val, ptr addrspace(5) %ptr syncscope("agent") seq_cst, align 4 ret void } -define i32 @cmpxchg_private_i32(i32 addrspace(5)* %ptr) { +define i32 @cmpxchg_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @cmpxchg_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; IR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 1, i32 [[TMP1]] -; IR-NEXT: store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: [[TMP4:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP1]], 0 ; IR-NEXT: [[TMP5:%.*]] = insertvalue { i32, i1 } [[TMP4]], i1 [[TMP2]], 1 ; IR-NEXT: [[RESULT_0:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 ; IR-NEXT: [[RESULT_1:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; IR-NEXT: store i1 [[RESULT_1]], i1 addrspace(1)* poison, align 1 +; IR-NEXT: store i1 [[RESULT_1]], ptr addrspace(1) poison, align 1 ; IR-NEXT: ret i32 [[RESULT_0]] ; ; GCN-LABEL: cmpxchg_private_i32: @@ -125,24 +125,24 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = cmpxchg i32 addrspace(5)* %ptr, i32 0, i32 1 acq_rel monotonic + %result = cmpxchg ptr addrspace(5) %ptr, i32 0, i32 1 acq_rel monotonic %result.0 = extractvalue { i32, i1 } %result, 0 %result.1 = extractvalue { i32, i1 } %result, 1 - store i1 %result.1, i1 addrspace(1)* poison + store i1 %result.1, ptr addrspace(1) poison ret i32 %result.0 } -define i64 @cmpxchg_private_i64(i64 addrspace(5)* %ptr) { +define i64 @cmpxchg_private_i64(ptr addrspace(5) %ptr) { ; IR-LABEL: @cmpxchg_private_i64( -; IR-NEXT: [[TMP1:%.*]] = load i64, i64 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 ; IR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 1, i64 [[TMP1]] -; IR-NEXT: store i64 [[TMP3]], i64 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i64 [[TMP3]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: [[TMP4:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP1]], 0 ; IR-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } [[TMP4]], i1 [[TMP2]], 1 ; IR-NEXT: [[RESULT_0:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 ; IR-NEXT: [[RESULT_1:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 -; IR-NEXT: store i1 [[RESULT_1]], i1 addrspace(1)* poison, align 1 +; IR-NEXT: store i1 [[RESULT_1]], ptr addrspace(1) poison, align 1 ; IR-NEXT: ret i64 [[RESULT_0]] ; ; GCN-LABEL: cmpxchg_private_i64: @@ -165,18 +165,18 @@ ; GCN-NEXT: buffer_store_byte v4, off, s[4:7], 0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = cmpxchg i64 addrspace(5)* %ptr, i64 0, i64 1 acq_rel monotonic + %result = cmpxchg ptr addrspace(5) %ptr, i64 0, i64 1 acq_rel monotonic %result.0 = extractvalue { i64, i1 } %result, 0 %result.1 = extractvalue { i64, i1 } %result, 1 - store i1 %result.1, i1 addrspace(1)* poison + store i1 %result.1, ptr addrspace(1) poison ret i64 %result.0 } -define i32 @atomicrmw_xchg_private_i32(i32 addrspace(5)* %ptr) { +define i32 @atomicrmw_xchg_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_xchg_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 -; IR-NEXT: store i32 4, i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 +; IR-NEXT: store i32 4, ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret i32 [[TMP1]] ; ; GCN-LABEL: atomicrmw_xchg_private_i32: @@ -189,15 +189,15 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw xchg i32 addrspace(5)* %ptr, i32 4 seq_cst + %result = atomicrmw xchg ptr addrspace(5) %ptr, i32 4 seq_cst ret i32 %result } -define i32 @atomicrmw_add_private_i32(i32 addrspace(5)* %ptr) { +define i32 @atomicrmw_add_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_add_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[NEW:%.*]] = add i32 [[TMP1]], 4 -; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret i32 [[TMP1]] ; ; GCN-LABEL: atomicrmw_add_private_i32: @@ -210,15 +210,15 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw add i32 addrspace(5)* %ptr, i32 4 seq_cst + %result = atomicrmw add ptr addrspace(5) %ptr, i32 4 seq_cst ret i32 %result } -define i32 @atomicrmw_sub_private_i32(i32 addrspace(5)* %ptr) { +define i32 @atomicrmw_sub_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_sub_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[NEW:%.*]] = sub i32 [[TMP1]], 4 -; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret i32 [[TMP1]] ; ; GCN-LABEL: atomicrmw_sub_private_i32: @@ -231,15 +231,15 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw sub i32 addrspace(5)* %ptr, i32 4 seq_cst + %result = atomicrmw sub ptr addrspace(5) %ptr, i32 4 seq_cst ret i32 %result } -define i32 @atomicrmw_and_private_i32(i32 addrspace(5)* %ptr) { +define i32 @atomicrmw_and_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_and_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[NEW:%.*]] = and i32 [[TMP1]], 4 -; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret i32 [[TMP1]] ; ; GCN-LABEL: atomicrmw_and_private_i32: @@ -252,16 +252,16 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw and i32 addrspace(5)* %ptr, i32 4 seq_cst + %result = atomicrmw and ptr addrspace(5) %ptr, i32 4 seq_cst ret i32 %result } -define i32 @atomicrmw_nand_private_i32(i32 addrspace(5)* %ptr) { +define i32 @atomicrmw_nand_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_nand_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 4 ; IR-NEXT: [[NEW:%.*]] = xor i32 [[TMP2]], -1 -; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret i32 [[TMP1]] ; ; GCN-LABEL: atomicrmw_nand_private_i32: @@ -275,15 +275,15 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw nand i32 addrspace(5)* %ptr, i32 4 seq_cst + %result = atomicrmw nand ptr addrspace(5) %ptr, i32 4 seq_cst ret i32 %result } -define i32 @atomicrmw_or_private_i32(i32 addrspace(5)* %ptr) { +define i32 @atomicrmw_or_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_or_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[NEW:%.*]] = or i32 [[TMP1]], 4 -; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret i32 [[TMP1]] ; ; GCN-LABEL: atomicrmw_or_private_i32: @@ -296,15 +296,15 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw or i32 addrspace(5)* %ptr, i32 4 seq_cst + %result = atomicrmw or ptr addrspace(5) %ptr, i32 4 seq_cst ret i32 %result } -define i32 @atomicrmw_xor_private_i32(i32 addrspace(5)* %ptr) { +define i32 @atomicrmw_xor_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_xor_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[NEW:%.*]] = xor i32 [[TMP1]], 4 -; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret i32 [[TMP1]] ; ; GCN-LABEL: atomicrmw_xor_private_i32: @@ -317,16 +317,16 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw xor i32 addrspace(5)* %ptr, i32 4 seq_cst + %result = atomicrmw xor ptr addrspace(5) %ptr, i32 4 seq_cst ret i32 %result } -define i32 @atomicrmw_max_private_i32(i32 addrspace(5)* %ptr) { +define i32 @atomicrmw_max_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_max_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 4 ; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4 -; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret i32 [[TMP1]] ; ; GCN-LABEL: atomicrmw_max_private_i32: @@ -339,16 +339,16 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw max i32 addrspace(5)* %ptr, i32 4 seq_cst + %result = atomicrmw max ptr addrspace(5) %ptr, i32 4 seq_cst ret i32 %result } -define i32 @atomicrmw_min_private_i32(i32 addrspace(5)* %ptr) { +define i32 @atomicrmw_min_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_min_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[TMP2:%.*]] = icmp sle i32 [[TMP1]], 4 ; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4 -; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret i32 [[TMP1]] ; ; GCN-LABEL: atomicrmw_min_private_i32: @@ -361,16 +361,16 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw min i32 addrspace(5)* %ptr, i32 4 seq_cst + %result = atomicrmw min ptr addrspace(5) %ptr, i32 4 seq_cst ret i32 %result } -define i32 @atomicrmw_umax_private_i32(i32 addrspace(5)* %ptr) { +define i32 @atomicrmw_umax_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_umax_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 4 ; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4 -; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret i32 [[TMP1]] ; ; GCN-LABEL: atomicrmw_umax_private_i32: @@ -383,16 +383,16 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw umax i32 addrspace(5)* %ptr, i32 4 seq_cst + %result = atomicrmw umax ptr addrspace(5) %ptr, i32 4 seq_cst ret i32 %result } -define i32 @atomicrmw_umin_private_i32(i32 addrspace(5)* %ptr) { +define i32 @atomicrmw_umin_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_umin_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[TMP2:%.*]] = icmp ule i32 [[TMP1]], 4 ; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4 -; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret i32 [[TMP1]] ; ; GCN-LABEL: atomicrmw_umin_private_i32: @@ -405,15 +405,15 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw umin i32 addrspace(5)* %ptr, i32 4 seq_cst + %result = atomicrmw umin ptr addrspace(5) %ptr, i32 4 seq_cst ret i32 %result } -define float @atomicrmw_fadd_private_i32(float addrspace(5)* %ptr) { +define float @atomicrmw_fadd_private_i32(ptr addrspace(5) %ptr) { ; IR-LABEL: @atomicrmw_fadd_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load float, float addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[NEW:%.*]] = fadd float [[TMP1]], 2.000000e+00 -; IR-NEXT: store float [[NEW]], float addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store float [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret float [[TMP1]] ; ; GCN-LABEL: atomicrmw_fadd_private_i32: @@ -426,15 +426,15 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw fadd float addrspace(5)* %ptr, float 2.0 seq_cst + %result = atomicrmw fadd ptr addrspace(5) %ptr, float 2.0 seq_cst ret float %result } -define float @atomicrmw_fsub_private_i32(float addrspace(5)* %ptr, float %val) { +define float @atomicrmw_fsub_private_i32(ptr addrspace(5) %ptr, float %val) { ; IR-LABEL: @atomicrmw_fsub_private_i32( -; IR-NEXT: [[TMP1:%.*]] = load float, float addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[PTR:%.*]], align 4 ; IR-NEXT: [[NEW:%.*]] = fsub float [[TMP1]], [[VAL:%.*]] -; IR-NEXT: store float [[NEW]], float addrspace(5)* [[PTR]], align 4 +; IR-NEXT: store float [[NEW]], ptr addrspace(5) [[PTR]], align 4 ; IR-NEXT: ret float [[TMP1]] ; ; GCN-LABEL: atomicrmw_fsub_private_i32: @@ -447,23 +447,22 @@ ; GCN-NEXT: v_mov_b32_e32 v0, v2 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] - %result = atomicrmw fsub float addrspace(5)* %ptr, float %val seq_cst + %result = atomicrmw fsub ptr addrspace(5) %ptr, float %val seq_cst ret float %result } -define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote(i32 addrspace(1)* %out, i32 %in) nounwind { +define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote(ptr addrspace(1) %out, i32 %in) nounwind { ; IR-LABEL: @alloca_promote_atomicrmw_private_lds_promote( ; IR-NEXT: entry: ; IR-NEXT: [[TMP:%.*]] = alloca [2 x i32], align 4, addrspace(5) -; IR-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 0 -; IR-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 1 -; IR-NEXT: store i32 0, i32 addrspace(5)* [[GEP1]], align 4 -; IR-NEXT: store i32 1, i32 addrspace(5)* [[GEP2]], align 4 -; IR-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 [[IN:%.*]] -; IR-NEXT: [[TMP0:%.*]] = load i32, i32 addrspace(5)* [[GEP3]], align 4 +; IR-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 1 +; IR-NEXT: store i32 0, ptr addrspace(5) [[TMP]], align 4 +; IR-NEXT: store i32 1, ptr addrspace(5) [[GEP2]], align 4 +; IR-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 [[IN:%.*]] +; IR-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[GEP3]], align 4 ; IR-NEXT: [[NEW:%.*]] = add i32 [[TMP0]], 7 -; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[GEP3]], align 4 -; IR-NEXT: store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[GEP3]], align 4 +; IR-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 4 ; IR-NEXT: ret void ; ; GCN-LABEL: alloca_promote_atomicrmw_private_lds_promote: @@ -480,33 +479,31 @@ ; GCN-NEXT: s_endpgm entry: %tmp = alloca [2 x i32], addrspace(5) - %gep1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0 - %gep2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %gep1 - store i32 1, i32 addrspace(5)* %gep2 - %gep3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in - %rmw = atomicrmw add i32 addrspace(5)* %gep3, i32 7 acq_rel - store i32 %rmw, i32 addrspace(1)* %out + %gep2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1 + store i32 0, ptr addrspace(5) %tmp + store i32 1, ptr addrspace(5) %gep2 + %gep3 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 %in + %rmw = atomicrmw add ptr addrspace(5) %gep3, i32 7 acq_rel + store i32 %rmw, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @alloca_promote_cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind { +define amdgpu_kernel void @alloca_promote_cmpxchg_private(ptr addrspace(1) %out, i32 %in) nounwind { ; IR-LABEL: @alloca_promote_cmpxchg_private( ; IR-NEXT: entry: ; IR-NEXT: [[TMP:%.*]] = alloca [2 x i32], align 4, addrspace(5) -; IR-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 0 -; IR-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 1 -; IR-NEXT: store i32 0, i32 addrspace(5)* [[GEP1]], align 4 -; IR-NEXT: store i32 1, i32 addrspace(5)* [[GEP2]], align 4 -; IR-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 [[IN:%.*]] -; IR-NEXT: [[TMP0:%.*]] = load i32, i32 addrspace(5)* [[GEP3]], align 4 +; IR-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 1 +; IR-NEXT: store i32 0, ptr addrspace(5) [[TMP]], align 4 +; IR-NEXT: store i32 1, ptr addrspace(5) [[GEP2]], align 4 +; IR-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 [[IN:%.*]] +; IR-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[GEP3]], align 4 ; IR-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 ; IR-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 1, i32 [[TMP0]] -; IR-NEXT: store i32 [[TMP2]], i32 addrspace(5)* [[GEP3]], align 4 +; IR-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[GEP3]], align 4 ; IR-NEXT: [[TMP3:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP0]], 0 ; IR-NEXT: [[TMP4:%.*]] = insertvalue { i32, i1 } [[TMP3]], i1 [[TMP1]], 1 ; IR-NEXT: [[VAL:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 -; IR-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT:%.*]], align 4 +; IR-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 4 ; IR-NEXT: ret void ; ; GCN-LABEL: alloca_promote_cmpxchg_private: @@ -523,13 +520,12 @@ ; GCN-NEXT: s_endpgm entry: %tmp = alloca [2 x i32], addrspace(5) - %gep1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0 - %gep2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %gep1 - store i32 1, i32 addrspace(5)* %gep2 - %gep3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in - %xchg = cmpxchg i32 addrspace(5)* %gep3, i32 0, i32 1 acq_rel monotonic + %gep2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1 + store i32 0, ptr addrspace(5) %tmp + store i32 1, ptr addrspace(5) %gep2 + %gep3 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 %in + %xchg = cmpxchg ptr addrspace(5) %gep3, i32 0, i32 1 acq_rel monotonic %val = extractvalue { i32, i1 } %xchg, 0 - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll b/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll --- a/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll @@ -6,73 +6,69 @@ ; GCN-LABEL: ptr_nest_3: ; GCN-COUNT-2: global_load_dwordx2 ; GCN: global_store_dword -define amdgpu_kernel void @ptr_nest_3(float** addrspace(1)* nocapture readonly %Arg) { +define amdgpu_kernel void @ptr_nest_3(ptr addrspace(1) nocapture readonly %Arg) { ; CHECK-LABEL: @ptr_nest_3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float**, float** addrspace(1)* [[ARG:%.*]], i32 [[I]] -; CHECK-NEXT: [[P2:%.*]] = load float**, float** addrspace(1)* [[P1]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast float** [[P2]] to float* addrspace(1)* -; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(1)* [[P2_GLOBAL]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P3_GLOBAL]], align 4 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i32 [[I]] +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) +; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr addrspace(1) [[P2_GLOBAL]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: %i = tail call i32 @llvm.amdgcn.workitem.id.x() - %p1 = getelementptr inbounds float**, float** addrspace(1)* %Arg, i32 %i - %p2 = load float**, float** addrspace(1)* %p1, align 8 - %p3 = load float*, float** %p2, align 8 - store float 0.000000e+00, float* %p3, align 4 + %p1 = getelementptr inbounds ptr, ptr addrspace(1) %Arg, i32 %i + %p2 = load ptr, ptr addrspace(1) %p1, align 8 + %p3 = load ptr, ptr %p2, align 8 + store float 0.000000e+00, ptr %p3, align 4 ret void } ; GCN-LABEL: ptr_bitcast: ; GCN: global_load_dwordx2 ; GCN: global_store_dword -define amdgpu_kernel void @ptr_bitcast(float** nocapture readonly %Arg) { +define amdgpu_kernel void @ptr_bitcast(ptr nocapture readonly %Arg) { ; CHECK-LABEL: @ptr_bitcast( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast float** [[ARG:%.*]] to float* addrspace(1)* +; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1) ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG_GLOBAL]], i32 [[I]] -; CHECK-NEXT: [[P1_CAST:%.*]] = bitcast float* addrspace(1)* [[P1]] to i32* addrspace(1)* -; CHECK-NEXT: [[P2:%.*]] = load i32*, i32* addrspace(1)* [[P1_CAST]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast i32* [[P2]] to i32 addrspace(1)* -; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[P2_GLOBAL]], align 4 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I]] +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) +; CHECK-NEXT: store i32 0, ptr addrspace(1) [[P2_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: %i = tail call i32 @llvm.amdgcn.workitem.id.x() - %p1 = getelementptr inbounds float*, float** %Arg, i32 %i - %p1.cast = bitcast float** %p1 to i32** - %p2 = load i32*, i32** %p1.cast, align 8 - store i32 0, i32* %p2, align 4 + %p1 = getelementptr inbounds ptr, ptr %Arg, i32 %i + %p2 = load ptr, ptr %p1, align 8 + store i32 0, ptr %p2, align 4 ret void } -%struct.S = type { float* } +%struct.S = type { ptr } ; GCN-LABEL: ptr_in_struct: ; GCN: s_load_dwordx2 ; GCN: global_store_dword -define amdgpu_kernel void @ptr_in_struct(%struct.S addrspace(1)* nocapture readonly %Arg) { +define amdgpu_kernel void @ptr_in_struct(ptr addrspace(1) nocapture readonly %Arg) { ; CHECK-LABEL: @ptr_in_struct( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], [[STRUCT_S]] addrspace(1)* [[ARG:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[P1:%.*]] = load float*, float* addrspace(1)* [[P]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[P1_GLOBAL:%.*]] = addrspacecast float* [[P1]] to float addrspace(1)* +; CHECK-NEXT: [[P1:%.*]] = load ptr, ptr addrspace(1) [[ARG:%.*]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P1_GLOBAL:%.*]] = addrspacecast ptr [[P1]] to ptr addrspace(1) ; CHECK-NEXT: [[ID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float addrspace(1)* [[P1_GLOBAL]], i32 [[ID]] -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[P1_GLOBAL]], i32 [[ID]] +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[ARRAYIDX]], align 4 ; CHECK-NEXT: ret void ; entry: - %p = getelementptr inbounds %struct.S, %struct.S addrspace(1)* %Arg, i64 0, i32 0 - %p1 = load float*, float* addrspace(1)* %p, align 8 + %p1 = load ptr, ptr addrspace(1) %Arg, align 8 %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %arrayidx = getelementptr inbounds float, float* %p1, i32 %id - store float 0.000000e+00, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %p1, i32 %id + store float 0.000000e+00, ptr %arrayidx, align 4 ret void } @@ -82,76 +78,76 @@ ; GCN-COUNT-2: global_load_dwordx2 ; GCN: global_load_dwordx4 ; GCN: global_store_dword -define amdgpu_kernel void @flat_ptr_arg(float** nocapture readonly noalias %Arg, float** nocapture noalias %Out, i32 %X) { +define amdgpu_kernel void @flat_ptr_arg(ptr nocapture readonly noalias %Arg, ptr nocapture noalias %Out, i32 %X) { ; CHECK-LABEL: @flat_ptr_arg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OUT_GLOBAL:%.*]] = addrspacecast float** [[OUT:%.*]] to float* addrspace(1)* -; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast float** [[ARG:%.*]] to float* addrspace(1)* +; CHECK-NEXT: [[OUT_GLOBAL:%.*]] = addrspacecast ptr [[OUT:%.*]] to ptr addrspace(1) +; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1) ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG_GLOBAL]], i64 [[IDXPROM]] -; CHECK-NEXT: [[I1:%.*]] = load float*, float* addrspace(1)* [[ARRAYIDX10]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast float* [[I1]] to float addrspace(1)* -; CHECK-NEXT: [[I2:%.*]] = load float, float addrspace(1)* [[I1_GLOBAL]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[X:%.*]] -; CHECK-NEXT: store float [[I2]], float addrspace(3)* [[ARRAYIDX512]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 1 -; CHECK-NEXT: [[I3:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i64 [[IDXPROM]] +; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast ptr [[I1]] to ptr addrspace(1) +; CHECK-NEXT: [[I2:%.*]] = load float, ptr addrspace(1) [[I1_GLOBAL]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[X:%.*]] +; CHECK-NEXT: store float [[I2]], ptr addrspace(3) [[ARRAYIDX512]], align 4 +; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 1 +; CHECK-NEXT: [[I3:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_1]], align 4 ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[X]], 1 -; CHECK-NEXT: [[ARRAYIDX512_1:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_1]] -; CHECK-NEXT: store float [[I3]], float addrspace(3)* [[ARRAYIDX512_1]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 2 -; CHECK-NEXT: [[I4:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX512_1:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_1]] +; CHECK-NEXT: store float [[I3]], ptr addrspace(3) [[ARRAYIDX512_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 2 +; CHECK-NEXT: [[I4:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_2]], align 4 ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[X]], 2 -; CHECK-NEXT: [[ARRAYIDX512_2:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_2]] -; CHECK-NEXT: store float [[I4]], float addrspace(3)* [[ARRAYIDX512_2]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 3 -; CHECK-NEXT: [[I5:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_3]], align 4 +; CHECK-NEXT: [[ARRAYIDX512_2:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_2]] +; CHECK-NEXT: store float [[I4]], ptr addrspace(3) [[ARRAYIDX512_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 3 +; CHECK-NEXT: [[I5:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_3]], align 4 ; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[X]], 3 -; CHECK-NEXT: [[ARRAYIDX512_3:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_3]] -; CHECK-NEXT: store float [[I5]], float addrspace(3)* [[ARRAYIDX512_3]], align 4 +; CHECK-NEXT: [[ARRAYIDX512_3:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_3]] +; CHECK-NEXT: store float [[I5]], ptr addrspace(3) [[ARRAYIDX512_3]], align 4 ; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[SUB]] -; CHECK-NEXT: [[I6:%.*]] = load float, float addrspace(3)* [[ARRAYIDX711]], align 4 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[OUT_GLOBAL]], i64 [[IDXPROM]] -; CHECK-NEXT: [[I7:%.*]] = load float*, float* addrspace(1)* [[ARRAYIDX11]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[I7_GLOBAL:%.*]] = addrspacecast float* [[I7]] to float addrspace(1)* +; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[SUB]] +; CHECK-NEXT: [[I6:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX711]], align 4 +; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[OUT_GLOBAL]], i64 [[IDXPROM]] +; CHECK-NEXT: [[I7:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX11]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I7_GLOBAL:%.*]] = addrspacecast ptr [[I7]] to ptr addrspace(1) ; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I7_GLOBAL]], i64 [[IDXPROM8]] -; CHECK-NEXT: store float [[I6]], float addrspace(1)* [[ARRAYIDX9]], align 4 +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I7_GLOBAL]], i64 [[IDXPROM8]] +; CHECK-NEXT: store float [[I6]], ptr addrspace(1) [[ARRAYIDX9]], align 4 ; CHECK-NEXT: ret void ; entry: %i = tail call i32 @llvm.amdgcn.workitem.id.x() %idxprom = zext i32 %i to i64 - %arrayidx10 = getelementptr inbounds float*, float** %Arg, i64 %idxprom - %i1 = load float*, float** %arrayidx10, align 8 - %i2 = load float, float* %i1, align 4 - %arrayidx512 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %X - store float %i2, float addrspace(3)* %arrayidx512, align 4 - %arrayidx3.1 = getelementptr inbounds float, float* %i1, i64 1 - %i3 = load float, float* %arrayidx3.1, align 4 + %arrayidx10 = getelementptr inbounds ptr, ptr %Arg, i64 %idxprom + %i1 = load ptr, ptr %arrayidx10, align 8 + %i2 = load float, ptr %i1, align 4 + %arrayidx512 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %X + store float %i2, ptr addrspace(3) %arrayidx512, align 4 + %arrayidx3.1 = getelementptr inbounds float, ptr %i1, i64 1 + %i3 = load float, ptr %arrayidx3.1, align 4 %add.1 = add nsw i32 %X, 1 - %arrayidx512.1 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %add.1 - store float %i3, float addrspace(3)* %arrayidx512.1, align 4 - %arrayidx3.2 = getelementptr inbounds float, float* %i1, i64 2 - %i4 = load float, float* %arrayidx3.2, align 4 + %arrayidx512.1 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.1 + store float %i3, ptr addrspace(3) %arrayidx512.1, align 4 + %arrayidx3.2 = getelementptr inbounds float, ptr %i1, i64 2 + %i4 = load float, ptr %arrayidx3.2, align 4 %add.2 = add nsw i32 %X, 2 - %arrayidx512.2 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %add.2 - store float %i4, float addrspace(3)* %arrayidx512.2, align 4 - %arrayidx3.3 = getelementptr inbounds float, float* %i1, i64 3 - %i5 = load float, float* %arrayidx3.3, align 4 + %arrayidx512.2 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.2 + store float %i4, ptr addrspace(3) %arrayidx512.2, align 4 + %arrayidx3.3 = getelementptr inbounds float, ptr %i1, i64 3 + %i5 = load float, ptr %arrayidx3.3, align 4 %add.3 = add nsw i32 %X, 3 - %arrayidx512.3 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %add.3 - store float %i5, float addrspace(3)* %arrayidx512.3, align 4 + %arrayidx512.3 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.3 + store float %i5, ptr addrspace(3) %arrayidx512.3, align 4 %sub = add nsw i32 %X, -1 - %arrayidx711 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %sub - %i6 = load float, float addrspace(3)* %arrayidx711, align 4 - %arrayidx11 = getelementptr inbounds float*, float** %Out, i64 %idxprom - %i7 = load float*, float** %arrayidx11, align 8 + %arrayidx711 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %sub + %i6 = load float, ptr addrspace(3) %arrayidx711, align 4 + %arrayidx11 = getelementptr inbounds ptr, ptr %Out, i64 %idxprom + %i7 = load ptr, ptr %arrayidx11, align 8 %idxprom8 = sext i32 %X to i64 - %arrayidx9 = getelementptr inbounds float, float* %i7, i64 %idxprom8 - store float %i6, float* %arrayidx9, align 4 + %arrayidx9 = getelementptr inbounds float, ptr %i7, i64 %idxprom8 + store float %i6, ptr %arrayidx9, align 4 ret void } @@ -159,69 +155,69 @@ ; GCN: global_load_dwordx2 ; GCN: global_load_dwordx4 ; GCN: global_store_dword -define amdgpu_kernel void @global_ptr_arg(float* addrspace(1)* nocapture readonly %Arg, i32 %X) { +define amdgpu_kernel void @global_ptr_arg(ptr addrspace(1) nocapture readonly %Arg, i32 %X) { ; CHECK-LABEL: @global_ptr_arg( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[I1:%.*]] = load float*, float* addrspace(1)* [[ARRAYIDX10]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast float* [[I1]] to float addrspace(1)* -; CHECK-NEXT: [[I2:%.*]] = load float, float addrspace(1)* [[I1_GLOBAL]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[X:%.*]] -; CHECK-NEXT: store float [[I2]], float addrspace(3)* [[ARRAYIDX512]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 1 -; CHECK-NEXT: [[I3:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast ptr [[I1]] to ptr addrspace(1) +; CHECK-NEXT: [[I2:%.*]] = load float, ptr addrspace(1) [[I1_GLOBAL]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[X:%.*]] +; CHECK-NEXT: store float [[I2]], ptr addrspace(3) [[ARRAYIDX512]], align 4 +; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 1 +; CHECK-NEXT: [[I3:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_1]], align 4 ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[X]], 1 -; CHECK-NEXT: [[ARRAYIDX512_1:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_1]] -; CHECK-NEXT: store float [[I3]], float addrspace(3)* [[ARRAYIDX512_1]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 2 -; CHECK-NEXT: [[I4:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX512_1:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_1]] +; CHECK-NEXT: store float [[I3]], ptr addrspace(3) [[ARRAYIDX512_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 2 +; CHECK-NEXT: [[I4:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_2]], align 4 ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[X]], 2 -; CHECK-NEXT: [[ARRAYIDX512_2:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_2]] -; CHECK-NEXT: store float [[I4]], float addrspace(3)* [[ARRAYIDX512_2]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 3 -; CHECK-NEXT: [[I5:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_3]], align 4 +; CHECK-NEXT: [[ARRAYIDX512_2:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_2]] +; CHECK-NEXT: store float [[I4]], ptr addrspace(3) [[ARRAYIDX512_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 3 +; CHECK-NEXT: [[I5:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_3]], align 4 ; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[X]], 3 -; CHECK-NEXT: [[ARRAYIDX512_3:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_3]] -; CHECK-NEXT: store float [[I5]], float addrspace(3)* [[ARRAYIDX512_3]], align 4 +; CHECK-NEXT: [[ARRAYIDX512_3:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_3]] +; CHECK-NEXT: store float [[I5]], ptr addrspace(3) [[ARRAYIDX512_3]], align 4 ; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[SUB]] -; CHECK-NEXT: [[I6:%.*]] = load float, float addrspace(3)* [[ARRAYIDX711]], align 4 +; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[SUB]] +; CHECK-NEXT: [[I6:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX711]], align 4 ; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 [[IDXPROM8]] -; CHECK-NEXT: store float [[I6]], float addrspace(1)* [[ARRAYIDX9]], align 4 +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 [[IDXPROM8]] +; CHECK-NEXT: store float [[I6]], ptr addrspace(1) [[ARRAYIDX9]], align 4 ; CHECK-NEXT: ret void ; entry: %i = tail call i32 @llvm.amdgcn.workitem.id.x() %idxprom = zext i32 %i to i64 - %arrayidx10 = getelementptr inbounds float*, float* addrspace(1)* %Arg, i64 %idxprom - %i1 = load float*, float* addrspace(1)* %arrayidx10, align 8 - %i2 = load float, float* %i1, align 4 - %arrayidx512 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %X - store float %i2, float addrspace(3)* %arrayidx512, align 4 - %arrayidx3.1 = getelementptr inbounds float, float* %i1, i64 1 - %i3 = load float, float* %arrayidx3.1, align 4 + %arrayidx10 = getelementptr inbounds ptr, ptr addrspace(1) %Arg, i64 %idxprom + %i1 = load ptr, ptr addrspace(1) %arrayidx10, align 8 + %i2 = load float, ptr %i1, align 4 + %arrayidx512 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %X + store float %i2, ptr addrspace(3) %arrayidx512, align 4 + %arrayidx3.1 = getelementptr inbounds float, ptr %i1, i64 1 + %i3 = load float, ptr %arrayidx3.1, align 4 %add.1 = add nsw i32 %X, 1 - %arrayidx512.1 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %add.1 - store float %i3, float addrspace(3)* %arrayidx512.1, align 4 - %arrayidx3.2 = getelementptr inbounds float, float* %i1, i64 2 - %i4 = load float, float* %arrayidx3.2, align 4 + %arrayidx512.1 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.1 + store float %i3, ptr addrspace(3) %arrayidx512.1, align 4 + %arrayidx3.2 = getelementptr inbounds float, ptr %i1, i64 2 + %i4 = load float, ptr %arrayidx3.2, align 4 %add.2 = add nsw i32 %X, 2 - %arrayidx512.2 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %add.2 - store float %i4, float addrspace(3)* %arrayidx512.2, align 4 - %arrayidx3.3 = getelementptr inbounds float, float* %i1, i64 3 - %i5 = load float, float* %arrayidx3.3, align 4 + %arrayidx512.2 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.2 + store float %i4, ptr addrspace(3) %arrayidx512.2, align 4 + %arrayidx3.3 = getelementptr inbounds float, ptr %i1, i64 3 + %i5 = load float, ptr %arrayidx3.3, align 4 %add.3 = add nsw i32 %X, 3 - %arrayidx512.3 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %add.3 - store float %i5, float addrspace(3)* %arrayidx512.3, align 4 + %arrayidx512.3 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %add.3 + store float %i5, ptr addrspace(3) %arrayidx512.3, align 4 %sub = add nsw i32 %X, -1 - %arrayidx711 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %sub - %i6 = load float, float addrspace(3)* %arrayidx711, align 4 + %arrayidx711 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %sub + %i6 = load float, ptr addrspace(3) %arrayidx711, align 4 %idxprom8 = sext i32 %X to i64 - %arrayidx9 = getelementptr inbounds float, float* %i1, i64 %idxprom8 - store float %i6, float* %arrayidx9, align 4 + %arrayidx9 = getelementptr inbounds float, ptr %i1, i64 %idxprom8 + store float %i6, ptr %arrayidx9, align 4 ret void } @@ -230,42 +226,42 @@ ; GCN: global_load_dwordx2 ; GCN: flat_load_dword ; GCN: flat_store_dword -define amdgpu_kernel void @global_ptr_arg_clobbered(float* addrspace(1)* nocapture readonly %Arg, i32 %X) { +define amdgpu_kernel void @global_ptr_arg_clobbered(ptr addrspace(1) nocapture readonly %Arg, i32 %X) { ; CHECK-LABEL: @global_ptr_arg_clobbered( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARRAYIDX10]], i32 [[X:%.*]] -; CHECK-NEXT: store float* null, float* addrspace(1)* [[ARRAYIDX11]], align 4 -; CHECK-NEXT: [[I1:%.*]] = load float*, float* addrspace(1)* [[ARRAYIDX10]], align 8 -; CHECK-NEXT: [[I2:%.*]] = load float, float* [[I1]], align 4 -; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[X]] -; CHECK-NEXT: store float [[I2]], float addrspace(3)* [[ARRAYIDX512]], align 4 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARRAYIDX10]], i32 [[X:%.*]] +; CHECK-NEXT: store ptr null, ptr addrspace(1) [[ARRAYIDX11]], align 4 +; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8 +; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[I1]], align 4 +; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[X]] +; CHECK-NEXT: store float [[I2]], ptr addrspace(3) [[ARRAYIDX512]], align 4 ; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[SUB]] -; CHECK-NEXT: [[I6:%.*]] = load float, float addrspace(3)* [[ARRAYIDX711]], align 4 +; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[SUB]] +; CHECK-NEXT: [[I6:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX711]], align 4 ; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[I1]], i64 [[IDXPROM8]] -; CHECK-NEXT: store float [[I6]], float* [[ARRAYIDX9]], align 4 +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[I1]], i64 [[IDXPROM8]] +; CHECK-NEXT: store float [[I6]], ptr [[ARRAYIDX9]], align 4 ; CHECK-NEXT: ret void ; entry: %i = tail call i32 @llvm.amdgcn.workitem.id.x() %idxprom = zext i32 %i to i64 - %arrayidx10 = getelementptr inbounds float*, float* addrspace(1)* %Arg, i64 %idxprom - %arrayidx11 = getelementptr inbounds float*, float* addrspace(1)* %arrayidx10, i32 %X - store float* null, float* addrspace(1)* %arrayidx11, align 4 - %i1 = load float*, float* addrspace(1)* %arrayidx10, align 8 - %i2 = load float, float* %i1, align 4 - %arrayidx512 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %X - store float %i2, float addrspace(3)* %arrayidx512, align 4 + %arrayidx10 = getelementptr inbounds ptr, ptr addrspace(1) %Arg, i64 %idxprom + %arrayidx11 = getelementptr inbounds ptr, ptr addrspace(1) %arrayidx10, i32 %X + store ptr null, ptr addrspace(1) %arrayidx11, align 4 + %i1 = load ptr, ptr addrspace(1) %arrayidx10, align 8 + %i2 = load float, ptr %i1, align 4 + %arrayidx512 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %X + store float %i2, ptr addrspace(3) %arrayidx512, align 4 %sub = add nsw i32 %X, -1 - %arrayidx711 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %sub - %i6 = load float, float addrspace(3)* %arrayidx711, align 4 + %arrayidx711 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %sub + %i6 = load float, ptr addrspace(3) %arrayidx711, align 4 %idxprom8 = sext i32 %X to i64 - %arrayidx9 = getelementptr inbounds float, float* %i1, i64 %idxprom8 - store float %i6, float* %arrayidx9, align 4 + %arrayidx9 = getelementptr inbounds float, ptr %i1, i64 %idxprom8 + store float %i6, ptr %arrayidx9, align 4 ret void } @@ -274,69 +270,69 @@ ; GCN: global_store_dwordx2 ; GCN: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @global_ptr_arg_clobbered_after_load(float* addrspace(1)* nocapture readonly %Arg, i32 %X) { +define amdgpu_kernel void @global_ptr_arg_clobbered_after_load(ptr addrspace(1) nocapture readonly %Arg, i32 %X) { ; CHECK-LABEL: @global_ptr_arg_clobbered_after_load( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[I1:%.*]] = load float*, float* addrspace(1)* [[ARRAYIDX10]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast float* [[I1]] to float addrspace(1)* -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARRAYIDX10]], i32 [[X:%.*]] -; CHECK-NEXT: store float* null, float* addrspace(1)* [[ARRAYIDX11]], align 4 -; CHECK-NEXT: [[I2:%.*]] = load float, float addrspace(1)* [[I1_GLOBAL]], align 4 -; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[X]] -; CHECK-NEXT: store float [[I2]], float addrspace(3)* [[ARRAYIDX512]], align 4 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast ptr [[I1]] to ptr addrspace(1) +; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARRAYIDX10]], i32 [[X:%.*]] +; CHECK-NEXT: store ptr null, ptr addrspace(1) [[ARRAYIDX11]], align 4 +; CHECK-NEXT: [[I2:%.*]] = load float, ptr addrspace(1) [[I1_GLOBAL]], align 4 +; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[X]] +; CHECK-NEXT: store float [[I2]], ptr addrspace(3) [[ARRAYIDX512]], align 4 ; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[SUB]] -; CHECK-NEXT: [[I6:%.*]] = load float, float addrspace(3)* [[ARRAYIDX711]], align 4 +; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[SUB]] +; CHECK-NEXT: [[I6:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX711]], align 4 ; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 [[IDXPROM8]] -; CHECK-NEXT: store float [[I6]], float addrspace(1)* [[ARRAYIDX9]], align 4 +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 [[IDXPROM8]] +; CHECK-NEXT: store float [[I6]], ptr addrspace(1) [[ARRAYIDX9]], align 4 ; CHECK-NEXT: ret void ; entry: %i = tail call i32 @llvm.amdgcn.workitem.id.x() %idxprom = zext i32 %i to i64 - %arrayidx10 = getelementptr inbounds float*, float* addrspace(1)* %Arg, i64 %idxprom - %i1 = load float*, float* addrspace(1)* %arrayidx10, align 8 - %arrayidx11 = getelementptr inbounds float*, float* addrspace(1)* %arrayidx10, i32 %X - store float* null, float* addrspace(1)* %arrayidx11, align 4 - %i2 = load float, float* %i1, align 4 - %arrayidx512 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %X - store float %i2, float addrspace(3)* %arrayidx512, align 4 + %arrayidx10 = getelementptr inbounds ptr, ptr addrspace(1) %Arg, i64 %idxprom + %i1 = load ptr, ptr addrspace(1) %arrayidx10, align 8 + %arrayidx11 = getelementptr inbounds ptr, ptr addrspace(1) %arrayidx10, i32 %X + store ptr null, ptr addrspace(1) %arrayidx11, align 4 + %i2 = load float, ptr %i1, align 4 + %arrayidx512 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %X + store float %i2, ptr addrspace(3) %arrayidx512, align 4 %sub = add nsw i32 %X, -1 - %arrayidx711 = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 %sub - %i6 = load float, float addrspace(3)* %arrayidx711, align 4 + %arrayidx711 = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 %sub + %i6 = load float, ptr addrspace(3) %arrayidx711, align 4 %idxprom8 = sext i32 %X to i64 - %arrayidx9 = getelementptr inbounds float, float* %i1, i64 %idxprom8 - store float %i6, float* %arrayidx9, align 4 + %arrayidx9 = getelementptr inbounds float, ptr %i1, i64 %idxprom8 + store float %i6, ptr %arrayidx9, align 4 ret void } ; GCN-LABEL: ptr_nest_3_barrier: ; GCN-COUNT-2: global_load_dwordx2 ; GCN: global_store_dword -define amdgpu_kernel void @ptr_nest_3_barrier(float** addrspace(1)* nocapture readonly %Arg) { +define amdgpu_kernel void @ptr_nest_3_barrier(ptr addrspace(1) nocapture readonly %Arg) { ; CHECK-LABEL: @ptr_nest_3_barrier( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float**, float** addrspace(1)* [[ARG:%.*]], i32 [[I]] +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i32 [[I]] ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() -; CHECK-NEXT: [[P2:%.*]] = load float**, float** addrspace(1)* [[P1]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast float** [[P2]] to float* addrspace(1)* -; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(1)* [[P2_GLOBAL]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P3_GLOBAL]], align 4 +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) +; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr addrspace(1) [[P2_GLOBAL]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: %i = tail call i32 @llvm.amdgcn.workitem.id.x() - %p1 = getelementptr inbounds float**, float** addrspace(1)* %Arg, i32 %i + %p1 = getelementptr inbounds ptr, ptr addrspace(1) %Arg, i32 %i tail call void @llvm.amdgcn.s.barrier() - %p2 = load float**, float** addrspace(1)* %p1, align 8 - %p3 = load float*, float** %p2, align 8 - store float 0.000000e+00, float* %p3, align 4 + %p2 = load ptr, ptr addrspace(1) %p1, align 8 + %p3 = load ptr, ptr %p2, align 8 + store float 0.000000e+00, ptr %p3, align 4 ret void } @@ -344,20 +340,20 @@ ; GCN: s_lshl_b64 ; GCN: s_load_dwordx2 ; GCN: global_store_dword -define amdgpu_kernel void @flat_ptr_nest_2(float** nocapture readonly %Arg, i32 %i) { +define amdgpu_kernel void @flat_ptr_nest_2(ptr nocapture readonly %Arg, i32 %i) { ; CHECK-LABEL: @flat_ptr_nest_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast float** [[ARG:%.*]] to float* addrspace(1)* -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG_GLOBAL]], i32 [[I:%.*]] -; CHECK-NEXT: [[P2:%.*]] = load float*, float* addrspace(1)* [[P1]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast float* [[P2]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P2_GLOBAL]], align 4 +; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1) +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I:%.*]] +; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P2_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: - %p1 = getelementptr inbounds float*, float** %Arg, i32 %i - %p2 = load float*, float** %p1, align 8 - store float 0.000000e+00, float* %p2, align 4 + %p1 = getelementptr inbounds ptr, ptr %Arg, i32 %i + %p2 = load ptr, ptr %p1, align 8 + store float 0.000000e+00, ptr %p2, align 4 ret void } @@ -366,21 +362,21 @@ ; GCN: s_load_dwordx2 ; GCN: s_load_dwordx2 ; GCN: global_store_dword -define amdgpu_kernel void @const_ptr_nest_3(float* addrspace(4)* addrspace(4)* nocapture readonly %Arg, i32 %i) { +define amdgpu_kernel void @const_ptr_nest_3(ptr addrspace(4) nocapture readonly %Arg, i32 %i) { ; CHECK-LABEL: @const_ptr_nest_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float* addrspace(4)*, float* addrspace(4)* addrspace(4)* [[ARG:%.*]], i32 [[I:%.*]] -; CHECK-NEXT: [[P2:%.*]] = load float* addrspace(4)*, float* addrspace(4)* addrspace(4)* [[P1]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(4)* [[P2]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[TMP0]], align 4 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[ARG:%.*]], i32 [[I:%.*]] +; CHECK-NEXT: [[P2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr addrspace(4) [[P2]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[TMP0]], align 4 ; CHECK-NEXT: ret void ; entry: - %p1 = getelementptr inbounds float* addrspace(4)*, float* addrspace(4)* addrspace(4)* %Arg, i32 %i - %p2 = load float* addrspace(4)*, float * addrspace(4)* addrspace(4)* %p1, align 8 - %p3 = load float*, float* addrspace(4)* %p2, align 8 - store float 0.000000e+00, float* %p3, align 4 + %p1 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %Arg, i32 %i + %p2 = load ptr addrspace(4), ptr addrspace(4) %p1, align 8 + %p3 = load ptr, ptr addrspace(4) %p2, align 8 + store float 0.000000e+00, ptr %p3, align 4 ret void } @@ -389,23 +385,23 @@ ; GCN: s_load_dwordx2 ; GCN: s_load_dwordx2 ; GCN: global_store_dword -define amdgpu_kernel void @cast_from_const_const_ptr_nest_3(float* addrspace(4)* addrspace(4)* nocapture readonly %Arg, i32 %i) { +define amdgpu_kernel void @cast_from_const_const_ptr_nest_3(ptr addrspace(4) nocapture readonly %Arg, i32 %i) { ; CHECK-LABEL: @cast_from_const_const_ptr_nest_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float* addrspace(4)*, float* addrspace(4)* addrspace(4)* [[ARG:%.*]], i32 [[I:%.*]] -; CHECK-NEXT: [[P2:%.*]] = load float* addrspace(4)*, float* addrspace(4)* addrspace(4)* [[P1]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(4)* [[P2]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P3_GLOBAL]], align 4 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[ARG:%.*]], i32 [[I:%.*]] +; CHECK-NEXT: [[P2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr addrspace(4) [[P2]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: - %p1 = getelementptr inbounds float* addrspace(4)*, float* addrspace(4)* addrspace(4)* %Arg, i32 %i - %a1 = addrspacecast float* addrspace(4)* addrspace(4)* %p1 to float* addrspace(4)** - %p2 = load float* addrspace(4)*, float* addrspace(4)** %a1, align 8 - %a2 = addrspacecast float* addrspace(4)* %p2 to float** - %p3 = load float*, float** %a2, align 8 - store float 0.000000e+00, float* %p3, align 4 + %p1 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %Arg, i32 %i + %a1 = addrspacecast ptr addrspace(4) %p1 to ptr + %p2 = load ptr addrspace(4), ptr %a1, align 8 + %a2 = addrspacecast ptr addrspace(4) %p2 to ptr + %p3 = load ptr, ptr %a2, align 8 + store float 0.000000e+00, ptr %p3, align 4 ret void } @@ -413,21 +409,21 @@ ; GCN: s_lshl_b64 ; GCN: flat_load_dwordx2 ; GCN: global_store_dword -define amdgpu_kernel void @flat_ptr_volatile_load(float** nocapture readonly %Arg, i32 %i) { +define amdgpu_kernel void @flat_ptr_volatile_load(ptr nocapture readonly %Arg, i32 %i) { ; CHECK-LABEL: @flat_ptr_volatile_load( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast float** [[ARG:%.*]] to float* addrspace(1)* -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG_GLOBAL]], i32 [[I:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float* addrspace(1)* [[P1]] to float** -; CHECK-NEXT: [[P2:%.*]] = load volatile float*, float** [[TMP0]], align 8 -; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast float* [[P2]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P2_GLOBAL]], align 4 +; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1) +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr +; CHECK-NEXT: [[P2:%.*]] = load volatile ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P2_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: - %p1 = getelementptr inbounds float*, float** %Arg, i32 %i - %p2 = load volatile float*, float** %p1, align 8 - store float 0.000000e+00, float* %p2, align 4 + %p1 = getelementptr inbounds ptr, ptr %Arg, i32 %i + %p2 = load volatile ptr, ptr %p1, align 8 + store float 0.000000e+00, ptr %p2, align 4 ret void } @@ -435,20 +431,20 @@ ; GCN: s_lshl_b64 ; GCN: global_load_dwordx2 ; GCN: global_store_dword -define amdgpu_kernel void @flat_ptr_atomic_load(float** nocapture readonly %Arg, i32 %i) { +define amdgpu_kernel void @flat_ptr_atomic_load(ptr nocapture readonly %Arg, i32 %i) { ; CHECK-LABEL: @flat_ptr_atomic_load( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast float** [[ARG:%.*]] to float* addrspace(1)* -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG_GLOBAL]], i32 [[I:%.*]] -; CHECK-NEXT: [[P2:%.*]] = load atomic float*, float* addrspace(1)* [[P1]] monotonic, align 8 -; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast float* [[P2]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P2_GLOBAL]], align 4 +; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1) +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I:%.*]] +; CHECK-NEXT: [[P2:%.*]] = load atomic ptr, ptr addrspace(1) [[P1]] monotonic, align 8 +; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P2_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: - %p1 = getelementptr inbounds float*, float** %Arg, i32 %i - %p2 = load atomic float*, float** %p1 monotonic, align 8 - store float 0.000000e+00, float* %p2, align 4 + %p1 = getelementptr inbounds ptr, ptr %Arg, i32 %i + %p2 = load atomic ptr, ptr %p1 monotonic, align 8 + store float 0.000000e+00, ptr %p2, align 4 ret void } @@ -457,25 +453,23 @@ ; GCN: s_load_dwordx2 ; GCN: s_load_dwordx2 ; GCN: global_store_dword -define amdgpu_kernel void @cast_changing_pointee_type(float* addrspace(1)* addrspace(1)* nocapture readonly %Arg, i32 %i) { +define amdgpu_kernel void @cast_changing_pointee_type(ptr addrspace(1) nocapture readonly %Arg, i32 %i) { ; CHECK-LABEL: @cast_changing_pointee_type( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float* addrspace(1)*, float* addrspace(1)* addrspace(1)* [[ARG:%.*]], i32 [[I:%.*]] -; CHECK-NEXT: [[A1:%.*]] = bitcast float* addrspace(1)* addrspace(1)* [[P1]] to i32* addrspace(1)* addrspace(1)* -; CHECK-NEXT: [[P2:%.*]] = load i32* addrspace(1)*, i32* addrspace(1)* addrspace(1)* [[A1]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[A2:%.*]] = bitcast i32* addrspace(1)* [[P2]] to float* addrspace(1)* -; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(1)* [[A2]], align 8, !amdgpu.noclobber !0 -; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P3_GLOBAL]], align 4 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]] +; CHECK-NEXT: [[P2:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr addrspace(1) [[P2]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: - %p1 = getelementptr inbounds float* addrspace(1)*, float* addrspace(1)* addrspace(1)* %Arg, i32 %i - %a1 = addrspacecast float* addrspace(1)* addrspace(1)* %p1 to i32* addrspace(1)** - %p2 = load i32* addrspace(1)*, i32* addrspace(1)** %a1, align 8 - %a2 = addrspacecast i32* addrspace(1)* %p2 to float** - %p3 = load float*, float** %a2, align 8 - store float 0.000000e+00, float* %p3, align 4 + %p1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %Arg, i32 %i + %a1 = addrspacecast ptr addrspace(1) %p1 to ptr + %p2 = load ptr addrspace(1), ptr %a1, align 8 + %a2 = addrspacecast ptr addrspace(1) %p2 to ptr + %p3 = load ptr, ptr %a2, align 8 + store float 0.000000e+00, ptr %p3, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/promote-vect3-load.ll b/llvm/test/CodeGen/AMDGPU/promote-vect3-load.ll --- a/llvm/test/CodeGen/AMDGPU/promote-vect3-load.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-vect3-load.ll @@ -6,65 +6,65 @@ ; This type promotion on smaller aligned loads can cause a page fault error ; while accessing one extra dword beyond the buffer. -define protected amdgpu_kernel void @load_v3i32_align4(<3 x i32> addrspace(1)* %arg) #0 { +define protected amdgpu_kernel void @load_v3i32_align4(ptr addrspace(1) %arg) #0 { ; GCN-LABEL: load_v3i32_align4: ; GCN: ; %bb.0: ; GCN: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx2 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0 ; GCN-NEXT: s_load_dword s{{[0-9]+}}, s[0:1], 0x8 - %vec = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 4 - store <3 x i32> %vec, <3 x i32> addrspace(1)* undef, align 4 + %vec = load <3 x i32>, ptr addrspace(1) %arg, align 4 + store <3 x i32> %vec, ptr addrspace(1) undef, align 4 ret void } -define protected amdgpu_kernel void @load_v3i32_align8(<3 x i32> addrspace(1)* %arg) #0 { +define protected amdgpu_kernel void @load_v3i32_align8(ptr addrspace(1) %arg) #0 { ; GCN-LABEL: load_v3i32_align8: ; GCN: ; %bb.0: ; GCN: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0 - %vec = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 8 - store <3 x i32> %vec, <3 x i32> addrspace(1)* undef, align 8 + %vec = load <3 x i32>, ptr addrspace(1) %arg, align 8 + store <3 x i32> %vec, ptr addrspace(1) undef, align 8 ret void } -define protected amdgpu_kernel void @load_v3i32_align16(<3 x i32> addrspace(1)* %arg) #0 { +define protected amdgpu_kernel void @load_v3i32_align16(ptr addrspace(1) %arg) #0 { ; GCN-LABEL: load_v3i32_align16: ; GCN: ; %bb.0: ; GCN: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0 - %vec = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 16 - store <3 x i32> %vec, <3 x i32> addrspace(1)* undef, align 16 + %vec = load <3 x i32>, ptr addrspace(1) %arg, align 16 + store <3 x i32> %vec, ptr addrspace(1) undef, align 16 ret void } -define protected amdgpu_kernel void @load_v3f32_align4(<3 x float> addrspace(1)* %arg) #0 { +define protected amdgpu_kernel void @load_v3f32_align4(ptr addrspace(1) %arg) #0 { ; GCN-LABEL: load_v3f32_align4: ; GCN: ; %bb.0: ; GCN: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx2 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0 ; GCN-NEXT: s_load_dword s{{[0-9]+}}, s[0:1], 0x8 - %vec = load <3 x float>, <3 x float> addrspace(1)* %arg, align 4 - store <3 x float> %vec, <3 x float> addrspace(1)* undef, align 4 + %vec = load <3 x float>, ptr addrspace(1) %arg, align 4 + store <3 x float> %vec, ptr addrspace(1) undef, align 4 ret void } -define protected amdgpu_kernel void @load_v3f32_align8(<3 x float> addrspace(1)* %arg) #0 { +define protected amdgpu_kernel void @load_v3f32_align8(ptr addrspace(1) %arg) #0 { ; GCN-LABEL: load_v3f32_align8: ; GCN: ; %bb.0: ; GCN: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0 - %vec = load <3 x float>, <3 x float> addrspace(1)* %arg, align 8 - store <3 x float> %vec, <3 x float> addrspace(1)* undef, align 8 + %vec = load <3 x float>, ptr addrspace(1) %arg, align 8 + store <3 x float> %vec, ptr addrspace(1) undef, align 8 ret void } -define protected amdgpu_kernel void @load_v3f32_align16(<3 x float> addrspace(1)* %arg) #0 { +define protected amdgpu_kernel void @load_v3f32_align16(ptr addrspace(1) %arg) #0 { ; GCN-LABEL: load_v3f32_align16: ; GCN: ; %bb.0: ; GCN: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x0 - %vec = load <3 x float>, <3 x float> addrspace(1)* %arg, align 16 - store <3 x float> %vec, <3 x float> addrspace(1)* undef, align 16 + %vec = load <3 x float>, ptr addrspace(1) %arg, align 16 + store <3 x float> %vec, ptr addrspace(1) undef, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll --- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll +++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll @@ -3,27 +3,27 @@ ; passed to the original call instruction as an argument. ; ; Example: -; `call void @f(void ()* @g)` +; `call void @f(ptr @g)` ; could become -; `call void @g(void ()* @g.1)` +; `call void @g(ptr @g.1)` ; which is invalid IR. ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s | FileCheck %s ; CHECK-LABEL: define amdgpu_kernel void @thiswasabug() #0 -; CHECK-NOT: call void @g(void ()* @g.1) -; CHECK-DAG: call void @f(void ()* @g.1) +; CHECK-NOT: call void @g(ptr @g.1) +; CHECK-DAG: call void @f(ptr @g.1) ; CHECK-DAG: call void @g() define amdgpu_kernel void @thiswasabug() #0 { ; no replacement, but @g should be renamed to @g.1 - call void @f(void ()* @g) + call void @f(ptr @g) ; this should call the clone, which takes the name @g call void @g() ret void } -define private void @f(void ()* nocapture %0) #0 { +define private void @f(ptr nocapture %0) #0 { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll --- a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll +++ b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll @@ -30,12 +30,12 @@ ; GETREG: v_mov_b32_e32 v[[VCNT2:[0-9]+]], [[CNT2]] ; GETREG: global_store_{{dwordx2|b64}} v{{.+}}, v[[[VCNT2]]:[[ZERO]]] -define amdgpu_kernel void @test_readcyclecounter(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_readcyclecounter(ptr addrspace(1) %out) #0 { %cycle0 = call i64 @llvm.readcyclecounter() - store volatile i64 %cycle0, i64 addrspace(1)* %out + store volatile i64 %cycle0, ptr addrspace(1) %out %cycle1 = call i64 @llvm.readcyclecounter() - store volatile i64 %cycle1, i64 addrspace(1)* %out + store volatile i64 %cycle1, ptr addrspace(1) %out ret void } @@ -45,9 +45,9 @@ ; MEMTIME-DAG: s_memtime ; GCN-DAG: s_load_{{dword|b32|b64}} ; GETREG-DAG: s_getreg_b32 s{{[0-9]+}}, hwreg(HW_REG_SHADER_CYCLES, 0, 20) -define amdgpu_cs i32 @test_readcyclecounter_smem(i64 addrspace(4)* inreg %in) #0 { +define amdgpu_cs i32 @test_readcyclecounter_smem(ptr addrspace(4) inreg %in) #0 { %cycle0 = call i64 @llvm.readcyclecounter() - %in.v = load i64, i64 addrspace(4)* %in + %in.v = load i64, ptr addrspace(4) %in %r.64 = add i64 %cycle0, %in.v %r.32 = trunc i64 %r.64 to i32 ret i32 %r.32 diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll --- a/llvm/test/CodeGen/AMDGPU/recursion.ll +++ b/llvm/test/CodeGen/AMDGPU/recursion.ll @@ -5,7 +5,7 @@ ; CHECK: ScratchSize: 16 define void @recursive() { call void @recursive() - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef ret void } @@ -24,7 +24,7 @@ ; CHECK-LABEL: {{^}}tail_recursive_with_stack: define void @tail_recursive_with_stack() { %alloca = alloca i32, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca + store volatile i32 0, ptr addrspace(5) %alloca tail call void @tail_recursive_with_stack() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll b/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll --- a/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll @@ -6,32 +6,32 @@ ; GCN: buffer_load_dword [[VAL:v[0-9]+]] ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, [[VAL]] ; GCN: buffer_store_dwordx2 -define amdgpu_kernel void @reduce_i64_load_align_4_width_to_i32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 { - %a = load i64, i64 addrspace(1)* %in, align 4 +define amdgpu_kernel void @reduce_i64_load_align_4_width_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %a = load i64, ptr addrspace(1) %in, align 4 %and = and i64 %a, 1234567 - store i64 %and, i64 addrspace(1)* %out, align 8 + store i64 %and, ptr addrspace(1) %out, align 8 ret void } ; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt0: ; GCN: buffer_load_dword [[VAL:v[0-9]+]] ; GCN: buffer_store_dword [[VAL]] -define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt0(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 { - %a = load i64, i64 addrspace(1)* %in, align 4 +define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %a = load i64, ptr addrspace(1) %in, align 4 %vec = bitcast i64 %a to <2 x i32> %elt0 = extractelement <2 x i32> %vec, i32 0 - store i32 %elt0, i32 addrspace(1)* %out + store i32 %elt0, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt1: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; GCN: buffer_store_dword [[VAL]] -define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 { - %a = load i64, i64 addrspace(1)* %in, align 4 +define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %a = load i64, ptr addrspace(1) %in, align 4 %vec = bitcast i64 %a to <2 x i32> %elt0 = extractelement <2 x i32> %vec, i32 1 - store i32 %elt0, i32 addrspace(1)* %out + store i32 %elt0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll b/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll --- a/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll @@ -3,9 +3,9 @@ ; GCN-LABEL: {{^}}store_v2i32_as_v4i16_align_4: ; GCN: s_load_dwordx2 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} -define amdgpu_kernel void @store_v2i32_as_v4i16_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 { +define amdgpu_kernel void @store_v2i32_as_v4i16_align_4(ptr addrspace(3) align 4 %out, <2 x i32> %x) #0 { %x.bc = bitcast <2 x i32> %x to <4 x i16> - store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4 + store <4 x i16> %x.bc, ptr addrspace(3) %out, align 4 ret void } @@ -13,18 +13,18 @@ ; GCN: s_load_dwordx4 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} -define amdgpu_kernel void @store_v4i32_as_v8i16_align_4(<8 x i16> addrspace(3)* align 4 %out, <4 x i32> %x) #0 { +define amdgpu_kernel void @store_v4i32_as_v8i16_align_4(ptr addrspace(3) align 4 %out, <4 x i32> %x) #0 { %x.bc = bitcast <4 x i32> %x to <8 x i16> - store <8 x i16> %x.bc, <8 x i16> addrspace(3)* %out, align 4 + store <8 x i16> %x.bc, ptr addrspace(3) %out, align 4 ret void } ; GCN-LABEL: {{^}}store_v2i32_as_i64_align_4: ; GCN: s_load_dwordx2 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} -define amdgpu_kernel void @store_v2i32_as_i64_align_4(<4 x i16> addrspace(3)* align 4 %out, <2 x i32> %x) #0 { +define amdgpu_kernel void @store_v2i32_as_i64_align_4(ptr addrspace(3) align 4 %out, <2 x i32> %x) #0 { %x.bc = bitcast <2 x i32> %x to <4 x i16> - store <4 x i16> %x.bc, <4 x i16> addrspace(3)* %out, align 4 + store <4 x i16> %x.bc, ptr addrspace(3) %out, align 4 ret void } @@ -32,9 +32,9 @@ ; GCN: s_load_dwordx4 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} -define amdgpu_kernel void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* align 4 %out, <4 x i32> %x) #0 { +define amdgpu_kernel void @store_v4i32_as_v2i64_align_4(ptr addrspace(3) align 4 %out, <4 x i32> %x) #0 { %x.bc = bitcast <4 x i32> %x to <2 x i64> - store <2 x i64> %x.bc, <2 x i64> addrspace(3)* %out, align 4 + store <2 x i64> %x.bc, ptr addrspace(3) %out, align 4 ret void } @@ -44,9 +44,9 @@ ; GCN-NOT: {{buffer|flat|global}} ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} -define amdgpu_kernel void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) #0 { +define amdgpu_kernel void @store_v4i16_as_v2i32_align_4(ptr addrspace(3) align 4 %out, <4 x i16> %x) #0 { %x.bc = bitcast <4 x i16> %x to <2 x i32> - store <2 x i32> %x.bc, <2 x i32> addrspace(3)* %out, align 4 + store <2 x i32> %x.bc, ptr addrspace(3) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/reorder-stores.ll b/llvm/test/CodeGen/AMDGPU/reorder-stores.ll --- a/llvm/test/CodeGen/AMDGPU/reorder-stores.ll +++ b/llvm/test/CodeGen/AMDGPU/reorder-stores.ll @@ -7,11 +7,11 @@ ; GCN: buffer_store_dwordx4 ; GCN: buffer_store_dwordx4 ; GCN: s_endpgm -define amdgpu_kernel void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind { - %tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16 - %tmp4 = load <2 x double>, <2 x double> addrspace(1)* %y, align 16 - store <2 x double> %tmp4, <2 x double> addrspace(1)* %x, align 16 - store <2 x double> %tmp1, <2 x double> addrspace(1)* %y, align 16 +define amdgpu_kernel void @no_reorder_v2f64_global_load_store(ptr addrspace(1) nocapture %x, ptr addrspace(1) nocapture %y) nounwind { + %tmp1 = load <2 x double>, ptr addrspace(1) %x, align 16 + %tmp4 = load <2 x double>, ptr addrspace(1) %y, align 16 + store <2 x double> %tmp4, ptr addrspace(1) %x, align 16 + store <2 x double> %tmp1, ptr addrspace(1) %y, align 16 ret void } @@ -23,11 +23,11 @@ ; VI: ds_write_b128 ; GCN: s_endpgm -define amdgpu_kernel void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind { - %tmp1 = load <2 x double>, <2 x double> addrspace(3)* %x, align 16 - %tmp4 = load <2 x double>, <2 x double> addrspace(3)* %y, align 16 - store <2 x double> %tmp4, <2 x double> addrspace(3)* %x, align 16 - store <2 x double> %tmp1, <2 x double> addrspace(3)* %y, align 16 +define amdgpu_kernel void @no_reorder_scalarized_v2f64_local_load_store(ptr addrspace(3) nocapture %x, ptr addrspace(3) nocapture %y) nounwind { + %tmp1 = load <2 x double>, ptr addrspace(3) %x, align 16 + %tmp4 = load <2 x double>, ptr addrspace(3) %y, align 16 + store <2 x double> %tmp4, ptr addrspace(3) %x, align 16 + store <2 x double> %tmp1, ptr addrspace(3) %y, align 16 ret void } @@ -43,11 +43,11 @@ ; GCN: buffer_store_dwordx4 ; GCN: buffer_store_dwordx4 ; GCN: s_endpgm -define amdgpu_kernel void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind { - %tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32 - %tmp4 = load <8 x i32>, <8 x i32> addrspace(1)* %y, align 32 - store <8 x i32> %tmp4, <8 x i32> addrspace(1)* %x, align 32 - store <8 x i32> %tmp1, <8 x i32> addrspace(1)* %y, align 32 +define amdgpu_kernel void @no_reorder_split_v8i32_global_load_store(ptr addrspace(1) nocapture %x, ptr addrspace(1) nocapture %y) nounwind { + %tmp1 = load <8 x i32>, ptr addrspace(1) %x, align 32 + %tmp4 = load <8 x i32>, ptr addrspace(1) %y, align 32 + store <8 x i32> %tmp4, ptr addrspace(1) %x, align 32 + store <8 x i32> %tmp1, ptr addrspace(1) %y, align 32 ret void } @@ -58,16 +58,16 @@ ; GCN-NOT: ds_read ; GCN: ds_write_b64 ; GCN: s_endpgm -define amdgpu_kernel void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind { - %tmp1 = load <2 x i32>, <2 x i32> addrspace(3)* %x, align 8 - %tmp4 = load <2 x i32>, <2 x i32> addrspace(3)* %y, align 8 +define amdgpu_kernel void @no_reorder_extload_64(ptr addrspace(3) nocapture %x, ptr addrspace(3) nocapture %y) nounwind { + %tmp1 = load <2 x i32>, ptr addrspace(3) %x, align 8 + %tmp4 = load <2 x i32>, ptr addrspace(3) %y, align 8 %tmp1ext = zext <2 x i32> %tmp1 to <2 x i64> %tmp4ext = zext <2 x i32> %tmp4 to <2 x i64> %tmp7 = add <2 x i64> %tmp1ext, %tmp9 = add <2 x i64> %tmp4ext, %trunctmp9 = trunc <2 x i64> %tmp9 to <2 x i32> %trunctmp7 = trunc <2 x i64> %tmp7 to <2 x i32> - store <2 x i32> %trunctmp9, <2 x i32> addrspace(3)* %x, align 8 - store <2 x i32> %trunctmp7, <2 x i32> addrspace(3)* %y, align 8 + store <2 x i32> %trunctmp9, ptr addrspace(3) %x, align 8 + store <2 x i32> %trunctmp7, ptr addrspace(3) %y, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll --- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll @@ -5,140 +5,127 @@ ; CHECK-LABEL: @invalid_reqd_work_group_size( ; CHECK: load i16, -define amdgpu_kernel void @invalid_reqd_work_group_size(i16 addrspace(1)* %out) #0 !reqd_work_group_size !1 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - store i16 %group.size.x, i16 addrspace(1)* %out +define amdgpu_kernel void @invalid_reqd_work_group_size(ptr addrspace(1) %out) #0 !reqd_work_group_size !1 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + store i16 %group.size.x, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @volatile_load_group_size_x( ; CHECK: load volatile i16, -define amdgpu_kernel void @volatile_load_group_size_x(i16 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load volatile i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - store i16 %group.size.x, i16 addrspace(1)* %out +define amdgpu_kernel void @volatile_load_group_size_x(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load volatile i16, ptr addrspace(4) %gep.group.size.x, align 4 + store i16 %group.size.x, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @load_group_size_x( ; CHECK-NEXT: store i16 8, -define amdgpu_kernel void @load_group_size_x(i16 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - store i16 %group.size.x, i16 addrspace(1)* %out +define amdgpu_kernel void @load_group_size_x(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + store i16 %group.size.x, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @load_group_size_y( ; CHECK-NEXT: store i16 16, -define amdgpu_kernel void @load_group_size_y(i16 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.y = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 6 - %gep.group.size.y.bc = bitcast i8 addrspace(4)* %gep.group.size.y to i16 addrspace(4)* - %group.size.y = load i16, i16 addrspace(4)* %gep.group.size.y.bc, align 4 - store i16 %group.size.y, i16 addrspace(1)* %out +define amdgpu_kernel void @load_group_size_y(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.y = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 6 + %group.size.y = load i16, ptr addrspace(4) %gep.group.size.y, align 4 + store i16 %group.size.y, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @load_group_size_z( ; CHECK-NEXT: store i16 2, -define amdgpu_kernel void @load_group_size_z(i16 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.z = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 8 - %gep.group.size.z.bc = bitcast i8 addrspace(4)* %gep.group.size.z to i16 addrspace(4)* - %group.size.z = load i16, i16 addrspace(4)* %gep.group.size.z.bc, align 4 - store i16 %group.size.z, i16 addrspace(1)* %out +define amdgpu_kernel void @load_group_size_z(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.z = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 8 + %group.size.z = load i16, ptr addrspace(4) %gep.group.size.z, align 4 + store i16 %group.size.z, ptr addrspace(1) %out ret void } ; Metadata uses i64 instead of i32 ; CHECK-LABEL: @load_group_size_x_reqd_work_group_size_i64( ; CHECK-NEXT: store i16 8, -define amdgpu_kernel void @load_group_size_x_reqd_work_group_size_i64(i16 addrspace(1)* %out) #0 !reqd_work_group_size !2 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - store i16 %group.size.x, i16 addrspace(1)* %out +define amdgpu_kernel void @load_group_size_x_reqd_work_group_size_i64(ptr addrspace(1) %out) #0 !reqd_work_group_size !2 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + store i16 %group.size.x, ptr addrspace(1) %out ret void } ; Metadata uses i16 instead of i32 ; CHECK-LABEL: @load_group_size_x_reqd_work_group_size_i16( ; CHECK-NEXT: store i16 8, -define amdgpu_kernel void @load_group_size_x_reqd_work_group_size_i16(i16 addrspace(1)* %out) #0 !reqd_work_group_size !3 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - store i16 %group.size.x, i16 addrspace(1)* %out +define amdgpu_kernel void @load_group_size_x_reqd_work_group_size_i16(ptr addrspace(1) %out) #0 !reqd_work_group_size !3 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + store i16 %group.size.x, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @use_local_size_x_8_16_2( ; CHECK-NEXT: store i64 8, -define amdgpu_kernel void @use_local_size_x_8_16_2(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - %gep.grid.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 12 - %gep.grid.size.x.bc = bitcast i8 addrspace(4)* %gep.grid.size.x to i32 addrspace(4)* - %grid.size.x = load i32, i32 addrspace(4)* %gep.grid.size.x.bc, align 4 +define amdgpu_kernel void @use_local_size_x_8_16_2(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + %gep.grid.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 12 + %grid.size.x = load i32, ptr addrspace(4) %gep.grid.size.x, align 4 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() %group.size.x.zext = zext i16 %group.size.x to i32 %group.id_x_group.size.x = mul i32 %group.id, %group.size.x.zext %sub = sub i32 %grid.size.x, %group.id_x_group.size.x %umin = call i32 @llvm.umin.i32(i32 %sub, i32 %group.size.x.zext) %zext = zext i32 %umin to i64 - store i64 %zext, i64 addrspace(1)* %out + store i64 %zext, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @use_local_size_y_8_16_2( ; CHECK-NEXT: store i64 16, -define amdgpu_kernel void @use_local_size_y_8_16_2(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.y = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 6 - %gep.group.size.y.bc = bitcast i8 addrspace(4)* %gep.group.size.y to i16 addrspace(4)* - %group.size.y = load i16, i16 addrspace(4)* %gep.group.size.y.bc, align 4 - %gep.grid.size.y = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 16 - %gep.grid.size.y.bc = bitcast i8 addrspace(4)* %gep.grid.size.y to i32 addrspace(4)* - %grid.size.y = load i32, i32 addrspace(4)* %gep.grid.size.y.bc, align 4 +define amdgpu_kernel void @use_local_size_y_8_16_2(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.y = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 6 + %group.size.y = load i16, ptr addrspace(4) %gep.group.size.y, align 4 + %gep.grid.size.y = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 16 + %grid.size.y = load i32, ptr addrspace(4) %gep.grid.size.y, align 4 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.y() %group.size.y.zext = zext i16 %group.size.y to i32 %group.id_x_group.size.y = mul i32 %group.id, %group.size.y.zext %sub = sub i32 %grid.size.y, %group.id_x_group.size.y %umin = call i32 @llvm.umin.i32(i32 %sub, i32 %group.size.y.zext) %zext = zext i32 %umin to i64 - store i64 %zext, i64 addrspace(1)* %out + store i64 %zext, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @use_local_size_z_8_16_2( ; CHECK-NEXT: store i64 2, -define amdgpu_kernel void @use_local_size_z_8_16_2(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.z = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 8 - %gep.group.size.z.bc = bitcast i8 addrspace(4)* %gep.group.size.z to i16 addrspace(4)* - %group.size.z = load i16, i16 addrspace(4)* %gep.group.size.z.bc, align 4 - %gep.grid.size.z = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 20 - %gep.grid.size.z.bc = bitcast i8 addrspace(4)* %gep.grid.size.z to i32 addrspace(4)* - %grid.size.z = load i32, i32 addrspace(4)* %gep.grid.size.z.bc, align 4 +define amdgpu_kernel void @use_local_size_z_8_16_2(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.z = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 8 + %group.size.z = load i16, ptr addrspace(4) %gep.group.size.z, align 4 + %gep.grid.size.z = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 20 + %grid.size.z = load i32, ptr addrspace(4) %gep.grid.size.z, align 4 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.z() %group.size.z.zext = zext i16 %group.size.z to i32 %group.id_x_group.size.z = mul i32 %group.id, %group.size.z.zext %sub = sub i32 %grid.size.z, %group.id_x_group.size.z %umin = call i32 @llvm.umin.i32(i32 %sub, i32 %group.size.z.zext) %zext = zext i32 %umin to i64 - store i64 %zext, i64 addrspace(1)* %out + store i64 %zext, ptr addrspace(1) %out ret void } @@ -148,67 +135,61 @@ ; CHECK-LABEL: @local_size_x_8_16_2_wrong_group_id( ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.y() ; CHECK: %group.id_x_group.size.x = shl i32 %group.id, 3 -define amdgpu_kernel void @local_size_x_8_16_2_wrong_group_id(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - %gep.grid.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 12 - %gep.grid.size.x.bc = bitcast i8 addrspace(4)* %gep.grid.size.x to i32 addrspace(4)* - %grid.size.x = load i32, i32 addrspace(4)* %gep.grid.size.x.bc, align 4 +define amdgpu_kernel void @local_size_x_8_16_2_wrong_group_id(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + %gep.grid.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 12 + %grid.size.x = load i32, ptr addrspace(4) %gep.grid.size.x, align 4 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.y() %group.size.x.zext = zext i16 %group.size.x to i32 %group.id_x_group.size.x = mul i32 %group.id, %group.size.x.zext %sub = sub i32 %grid.size.x, %group.id_x_group.size.x %umin = call i32 @llvm.umin.i32(i32 %sub, i32 %group.size.x.zext) %zext = zext i32 %umin to i64 - store i64 %zext, i64 addrspace(1)* %out + store i64 %zext, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @local_size_x_8_16_2_wrong_grid_size( -; CHECK: %grid.size.x = load i32, i32 addrspace(4)* %gep.grid.size.x.bc, align 4 +; CHECK: %grid.size.x = load i32, ptr addrspace(4) %gep.grid.size.x, align 4 ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() ; CHECK: %group.id_x_group.size.x = shl i32 %group.id, 3 - define amdgpu_kernel void @local_size_x_8_16_2_wrong_grid_size(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - %gep.grid.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 16 - %gep.grid.size.x.bc = bitcast i8 addrspace(4)* %gep.grid.size.x to i32 addrspace(4)* - %grid.size.x = load i32, i32 addrspace(4)* %gep.grid.size.x.bc, align 4 + define amdgpu_kernel void @local_size_x_8_16_2_wrong_grid_size(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + %gep.grid.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 16 + %grid.size.x = load i32, ptr addrspace(4) %gep.grid.size.x, align 4 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() %group.size.x.zext = zext i16 %group.size.x to i32 %group.id_x_group.size.x = mul i32 %group.id, %group.size.x.zext %sub = sub i32 %grid.size.x, %group.id_x_group.size.x %umin = call i32 @llvm.umin.i32(i32 %sub, i32 %group.size.x.zext) %zext = zext i32 %umin to i64 - store i64 %zext, i64 addrspace(1)* %out + store i64 %zext, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @local_size_x_8_16_2_wrong_cmp_type( -; CHECK: %grid.size.x = load i32, i32 addrspace(4)* %gep.grid.size.x.bc, align 4 +; CHECK: %grid.size.x = load i32, ptr addrspace(4) %gep.grid.size.x, align 4 ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() ; CHECK: %group.id_x_group.size.x = shl i32 %group.id, 3 ; CHECK: %sub = sub i32 %grid.size.x, %group.id_x_group.size.x ; CHECK: %smin = call i32 @llvm.smin.i32(i32 %sub, i32 8) -define amdgpu_kernel void @local_size_x_8_16_2_wrong_cmp_type(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - %gep.grid.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 12 - %gep.grid.size.x.bc = bitcast i8 addrspace(4)* %gep.grid.size.x to i32 addrspace(4)* - %grid.size.x = load i32, i32 addrspace(4)* %gep.grid.size.x.bc, align 4 +define amdgpu_kernel void @local_size_x_8_16_2_wrong_cmp_type(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + %gep.grid.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 12 + %grid.size.x = load i32, ptr addrspace(4) %gep.grid.size.x, align 4 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() %group.size.x.zext = zext i16 %group.size.x to i32 %group.id_x_group.size.x = mul i32 %group.id, %group.size.x.zext %sub = sub i32 %grid.size.x, %group.id_x_group.size.x %smin = call i32 @llvm.smin.i32(i32 %sub, i32 %group.size.x.zext) %zext = zext i32 %smin to i64 - store i64 %zext, i64 addrspace(1)* %out + store i64 %zext, ptr addrspace(1) %out ret void } @@ -217,38 +198,34 @@ ; CHECK: %sub = sub i32 %grid.size.x, %group.id_x_group.size.x ; CHECK: %umax = call i32 @llvm.umax.i32(i32 %sub, i32 8) ; CHECK: %zext = zext i32 %umax to i64 -define amdgpu_kernel void @local_size_x_8_16_2_wrong_select(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - %gep.grid.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 12 - %gep.grid.size.x.bc = bitcast i8 addrspace(4)* %gep.grid.size.x to i32 addrspace(4)* - %grid.size.x = load i32, i32 addrspace(4)* %gep.grid.size.x.bc, align 4 +define amdgpu_kernel void @local_size_x_8_16_2_wrong_select(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + %gep.grid.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 12 + %grid.size.x = load i32, ptr addrspace(4) %gep.grid.size.x, align 4 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() %group.size.x.zext = zext i16 %group.size.x to i32 %group.id_x_group.size.x = mul i32 %group.id, %group.size.x.zext %sub = sub i32 %grid.size.x, %group.id_x_group.size.x %umax = call i32 @llvm.umax.i32(i32 %sub, i32 %group.size.x.zext) %zext = zext i32 %umax to i64 - store i64 %zext, i64 addrspace(1)* %out + store i64 %zext, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @use_local_size_x_8_16_2_wrong_grid_load_size( -; CHECK: %grid.size.x = load i16, i16 addrspace(4)* %gep.grid.size.x.bc, align 4 +; CHECK: %grid.size.x = load i16, ptr addrspace(4) %gep.grid.size.x, align 4 ; CHECK: %grid.size.x.zext = zext i16 %grid.size.x to i32 ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() ; CHECK: %group.id_x_group.size.x = shl i32 %group.id, 3 ; CHECK: %sub = sub i32 %grid.size.x.zext, %group.id_x_group.size.x -define amdgpu_kernel void @use_local_size_x_8_16_2_wrong_grid_load_size(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - %gep.grid.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 12 - %gep.grid.size.x.bc = bitcast i8 addrspace(4)* %gep.grid.size.x to i16 addrspace(4)* - %grid.size.x = load i16, i16 addrspace(4)* %gep.grid.size.x.bc, align 4 +define amdgpu_kernel void @use_local_size_x_8_16_2_wrong_grid_load_size(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + %gep.grid.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 12 + %grid.size.x = load i16, ptr addrspace(4) %gep.grid.size.x, align 4 %grid.size.x.zext = zext i16 %grid.size.x to i32 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() %group.size.x.zext = zext i16 %group.size.x to i32 @@ -256,17 +233,16 @@ %sub = sub i32 %grid.size.x.zext, %group.id_x_group.size.x %umin = call i32 @llvm.umin.i32(i32 %sub, i32 %group.size.x.zext) %zext = zext i32 %umin to i64 - store i64 %zext, i64 addrspace(1)* %out + store i64 %zext, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @func_group_size_x( ; CHECK-NEXT: ret i32 8 -define i32 @func_group_size_x(i16 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 +define i32 @func_group_size_x(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 %zext = zext i16 %group.size.x to i32 ret i32 %zext } @@ -275,7 +251,7 @@ ; CHECK: %group.size = phi i32 [ 2, %bb17 ], [ 16, %bb9 ], [ 8, %bb1 ], [ 1, %bb ] define i64 @__ockl_get_local_size_reqd_size(i32 %arg) #1 !reqd_work_group_size !0 { bb: - %tmp = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #2 + %tmp = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #2 switch i32 %arg, label %bb25 [ i32 0, label %bb1 i32 1, label %bb9 @@ -284,32 +260,26 @@ bb1: ; preds = %bb %tmp2 = tail call i32 @llvm.amdgcn.workgroup.id.x() - %tmp3 = getelementptr inbounds i8, i8 addrspace(4)* %tmp, i64 12 - %tmp4 = bitcast i8 addrspace(4)* %tmp3 to i32 addrspace(4)* - %tmp5 = load i32, i32 addrspace(4)* %tmp4, align 4 - %tmp6 = getelementptr inbounds i8, i8 addrspace(4)* %tmp, i64 4 - %tmp7 = bitcast i8 addrspace(4)* %tmp6 to i16 addrspace(4)* - %tmp8 = load i16, i16 addrspace(4)* %tmp7, align 4 + %tmp3 = getelementptr inbounds i8, ptr addrspace(4) %tmp, i64 12 + %tmp5 = load i32, ptr addrspace(4) %tmp3, align 4 + %tmp6 = getelementptr inbounds i8, ptr addrspace(4) %tmp, i64 4 + %tmp8 = load i16, ptr addrspace(4) %tmp6, align 4 br label %bb25 bb9: ; preds = %bb %tmp10 = tail call i32 @llvm.amdgcn.workgroup.id.y() - %tmp11 = getelementptr inbounds i8, i8 addrspace(4)* %tmp, i64 16 - %tmp12 = bitcast i8 addrspace(4)* %tmp11 to i32 addrspace(4)* - %tmp13 = load i32, i32 addrspace(4)* %tmp12, align 8 - %tmp14 = getelementptr inbounds i8, i8 addrspace(4)* %tmp, i64 6 - %tmp15 = bitcast i8 addrspace(4)* %tmp14 to i16 addrspace(4)* - %tmp16 = load i16, i16 addrspace(4)* %tmp15, align 2 + %tmp11 = getelementptr inbounds i8, ptr addrspace(4) %tmp, i64 16 + %tmp13 = load i32, ptr addrspace(4) %tmp11, align 8 + %tmp14 = getelementptr inbounds i8, ptr addrspace(4) %tmp, i64 6 + %tmp16 = load i16, ptr addrspace(4) %tmp14, align 2 br label %bb25 bb17: ; preds = %bb %tmp18 = tail call i32 @llvm.amdgcn.workgroup.id.z() - %tmp19 = getelementptr inbounds i8, i8 addrspace(4)* %tmp, i64 20 - %tmp20 = bitcast i8 addrspace(4)* %tmp19 to i32 addrspace(4)* - %tmp21 = load i32, i32 addrspace(4)* %tmp20, align 4 - %tmp22 = getelementptr inbounds i8, i8 addrspace(4)* %tmp, i64 8 - %tmp23 = bitcast i8 addrspace(4)* %tmp22 to i16 addrspace(4)* - %tmp24 = load i16, i16 addrspace(4)* %tmp23, align 8 + %tmp19 = getelementptr inbounds i8, ptr addrspace(4) %tmp, i64 20 + %tmp21 = load i32, ptr addrspace(4) %tmp19, align 4 + %tmp22 = getelementptr inbounds i8, ptr addrspace(4) %tmp, i64 8 + %tmp24 = load i16, ptr addrspace(4) %tmp22, align 8 br label %bb25 bb25: ; preds = %bb17, %bb9, %bb1, %bb @@ -325,77 +295,71 @@ } ; CHECK-LABEL: @all_local_size( -; CHECK-NEXT: store volatile i64 8, i64 addrspace(1)* %out, align 4 -; CHECK-NEXT: store volatile i64 16, i64 addrspace(1)* %out, align 4 -; CHECK-NEXT: store volatile i64 2, i64 addrspace(1)* %out, align 4 -define amdgpu_kernel void @all_local_size(i64 addrspace(1)* nocapture readnone %out) #0 !reqd_work_group_size !0 { - %tmp.i = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 +; CHECK-NEXT: store volatile i64 8, ptr addrspace(1) %out, align 4 +; CHECK-NEXT: store volatile i64 16, ptr addrspace(1) %out, align 4 +; CHECK-NEXT: store volatile i64 2, ptr addrspace(1) %out, align 4 +define amdgpu_kernel void @all_local_size(ptr addrspace(1) nocapture readnone %out) #0 !reqd_work_group_size !0 { + %tmp.i = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 %tmp2.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #0 - %tmp3.i = getelementptr inbounds i8, i8 addrspace(4)* %tmp.i, i64 12 - %tmp4.i = bitcast i8 addrspace(4)* %tmp3.i to i32 addrspace(4)* - %tmp5.i = load i32, i32 addrspace(4)* %tmp4.i, align 4 - %tmp6.i = getelementptr inbounds i8, i8 addrspace(4)* %tmp.i, i64 4 - %tmp7.i = bitcast i8 addrspace(4)* %tmp6.i to i16 addrspace(4)* - %tmp8.i = load i16, i16 addrspace(4)* %tmp7.i, align 4 + %tmp3.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 12 + %tmp5.i = load i32, ptr addrspace(4) %tmp3.i, align 4 + %tmp6.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 4 + %tmp8.i = load i16, ptr addrspace(4) %tmp6.i, align 4 %tmp29.i = zext i16 %tmp8.i to i32 %tmp30.i = mul i32 %tmp2.i, %tmp29.i %tmp31.i = sub i32 %tmp5.i, %tmp30.i %umin0 = call i32 @llvm.umin.i32(i32 %tmp31.i, i32 %tmp29.i) %tmp34.i = zext i32 %umin0 to i64 %tmp10.i = tail call i32 @llvm.amdgcn.workgroup.id.y() #0 - %tmp11.i = getelementptr inbounds i8, i8 addrspace(4)* %tmp.i, i64 16 - %tmp12.i = bitcast i8 addrspace(4)* %tmp11.i to i32 addrspace(4)* - %tmp13.i = load i32, i32 addrspace(4)* %tmp12.i, align 8 - %tmp14.i = getelementptr inbounds i8, i8 addrspace(4)* %tmp.i, i64 6 - %tmp15.i = bitcast i8 addrspace(4)* %tmp14.i to i16 addrspace(4)* - %tmp16.i = load i16, i16 addrspace(4)* %tmp15.i, align 2 + %tmp11.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 16 + %tmp13.i = load i32, ptr addrspace(4) %tmp11.i, align 8 + %tmp14.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 6 + %tmp16.i = load i16, ptr addrspace(4) %tmp14.i, align 2 %tmp29.i9 = zext i16 %tmp16.i to i32 %tmp30.i10 = mul i32 %tmp10.i, %tmp29.i9 %tmp31.i11 = sub i32 %tmp13.i, %tmp30.i10 %umin1 = call i32 @llvm.umin.i32(i32 %tmp31.i11, i32 %tmp29.i9) %tmp34.i14 = zext i32 %umin1 to i64 %tmp18.i = tail call i32 @llvm.amdgcn.workgroup.id.z() #0 - %tmp19.i = getelementptr inbounds i8, i8 addrspace(4)* %tmp.i, i64 20 - %tmp20.i = bitcast i8 addrspace(4)* %tmp19.i to i32 addrspace(4)* - %tmp21.i = load i32, i32 addrspace(4)* %tmp20.i, align 4 - %tmp22.i = getelementptr inbounds i8, i8 addrspace(4)* %tmp.i, i64 8 - %tmp23.i = bitcast i8 addrspace(4)* %tmp22.i to i16 addrspace(4)* - %tmp24.i = load i16, i16 addrspace(4)* %tmp23.i, align 8 + %tmp19.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 20 + %tmp21.i = load i32, ptr addrspace(4) %tmp19.i, align 4 + %tmp22.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 8 + %tmp24.i = load i16, ptr addrspace(4) %tmp22.i, align 8 %tmp29.i2 = zext i16 %tmp24.i to i32 %tmp30.i3 = mul i32 %tmp18.i, %tmp29.i2 %tmp31.i4 = sub i32 %tmp21.i, %tmp30.i3 %umin2 = call i32 @llvm.umin.i32(i32 %tmp31.i4, i32 %tmp29.i2) %tmp34.i7 = zext i32 %umin2 to i64 - store volatile i64 %tmp34.i, i64 addrspace(1)* %out, align 4 - store volatile i64 %tmp34.i14, i64 addrspace(1)* %out, align 4 - store volatile i64 %tmp34.i7, i64 addrspace(1)* %out, align 4 + store volatile i64 %tmp34.i, ptr addrspace(1) %out, align 4 + store volatile i64 %tmp34.i14, ptr addrspace(1) %out, align 4 + store volatile i64 %tmp34.i7, ptr addrspace(1) %out, align 4 ret void } ; TODO: Should be able to handle this, but not much reason to. ; CHECK-LABEL: @partial_load_group_size_x( -; CHECK-NEXT: %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 -; CHECK-NEXT: %group.size.x.lo = load i8, i8 addrspace(4)* %gep.group.size.x, align 4 -; CHECK-NEXT: store i8 %group.size.x.lo, i8 addrspace(1)* %out, align 1 -define amdgpu_kernel void @partial_load_group_size_x(i8 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %group.size.x.lo = load i8, i8 addrspace(4)* %gep.group.size.x, align 1 - store i8 %group.size.x.lo, i8 addrspace(1)* %out +; CHECK-NEXT: %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 +; CHECK-NEXT: %group.size.x.lo = load i8, ptr addrspace(4) %gep.group.size.x, align 4 +; CHECK-NEXT: store i8 %group.size.x.lo, ptr addrspace(1) %out, align 1 +define amdgpu_kernel void @partial_load_group_size_x(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x.lo = load i8, ptr addrspace(4) %gep.group.size.x, align 1 + store i8 %group.size.x.lo, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @partial_load_group_size_x_explicit_callsite_align( -; CHECK-NEXT: %dispatch.ptr = tail call align 2 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 -; CHECK-NEXT: %group.size.x.lo = load i8, i8 addrspace(4)* %gep.group.size.x, align 2 -; CHECK-NEXT: store i8 %group.size.x.lo, i8 addrspace(1)* %out, align 1 -define amdgpu_kernel void @partial_load_group_size_x_explicit_callsite_align(i8 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call align 2 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %group.size.x.lo = load i8, i8 addrspace(4)* %gep.group.size.x, align 1 - store i8 %group.size.x.lo, i8 addrspace(1)* %out +; CHECK-NEXT: %dispatch.ptr = tail call align 2 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 +; CHECK-NEXT: %group.size.x.lo = load i8, ptr addrspace(4) %gep.group.size.x, align 2 +; CHECK-NEXT: store i8 %group.size.x.lo, ptr addrspace(1) %out, align 1 +define amdgpu_kernel void @partial_load_group_size_x_explicit_callsite_align(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call align 2 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x.lo = load i8, ptr addrspace(4) %gep.group.size.x, align 1 + store i8 %group.size.x.lo, ptr addrspace(1) %out ret void } @@ -403,87 +367,79 @@ ; CHECK-LABEL: @load_group_size_xy_i32( ; CHECK: %group.size.xy = load i32, ; CHECK: store i32 %group.size.xy -define amdgpu_kernel void @load_group_size_xy_i32(i32 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i32 addrspace(4)* - %group.size.xy = load i32, i32 addrspace(4)* %gep.group.size.x.bc, align 4 - store i32 %group.size.xy, i32 addrspace(1)* %out +define amdgpu_kernel void @load_group_size_xy_i32(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.xy = load i32, ptr addrspace(4) %gep.group.size.x, align 4 + store i32 %group.size.xy, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @load_group_size_x_y_multiple_dispatch_ptr( -; CHECK-NEXT: store volatile i16 8, i16 addrspace(1)* %out, align 2 -; CHECK-NEXT: store volatile i16 16, i16 addrspace(1)* %out, align 2 -define amdgpu_kernel void @load_group_size_x_y_multiple_dispatch_ptr(i16 addrspace(1)* %out) #0 !reqd_work_group_size !0 { - %dispatch.ptr0 = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr0, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - store volatile i16 %group.size.x, i16 addrspace(1)* %out - - %dispatch.ptr1 = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.y = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr1, i64 6 - %gep.group.size.y.bc = bitcast i8 addrspace(4)* %gep.group.size.y to i16 addrspace(4)* - %group.size.y = load i16, i16 addrspace(4)* %gep.group.size.y.bc, align 4 - store volatile i16 %group.size.y, i16 addrspace(1)* %out +; CHECK-NEXT: store volatile i16 8, ptr addrspace(1) %out, align 2 +; CHECK-NEXT: store volatile i16 16, ptr addrspace(1) %out, align 2 +define amdgpu_kernel void @load_group_size_x_y_multiple_dispatch_ptr(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr0 = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr0, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + store volatile i16 %group.size.x, ptr addrspace(1) %out + + %dispatch.ptr1 = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.y = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr1, i64 6 + %group.size.y = load i16, ptr addrspace(4) %gep.group.size.y, align 4 + store volatile i16 %group.size.y, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @use_local_size_x_uniform_work_group_size( -; CHECK-NEXT: %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 -; CHECK-NEXT: %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* -; CHECK-NEXT: %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 +; CHECK-NEXT: %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 +; CHECK-NEXT: %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 ; CHECK-NEXT: %zext = zext i16 %group.size.x to i64 -; CHECK-NEXT: store i64 %zext, i64 addrspace(1)* %out, align 4 -define amdgpu_kernel void @use_local_size_x_uniform_work_group_size(i64 addrspace(1)* %out) #2 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - %gep.grid.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 12 - %gep.grid.size.x.bc = bitcast i8 addrspace(4)* %gep.grid.size.x to i32 addrspace(4)* - %grid.size.x = load i32, i32 addrspace(4)* %gep.grid.size.x.bc, align 4 +; CHECK-NEXT: store i64 %zext, ptr addrspace(1) %out, align 4 +define amdgpu_kernel void @use_local_size_x_uniform_work_group_size(ptr addrspace(1) %out) #2 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + %gep.grid.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 12 + %grid.size.x = load i32, ptr addrspace(4) %gep.grid.size.x, align 4 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() %group.size.x.zext = zext i16 %group.size.x to i32 %group.id_x_group.size.x = mul i32 %group.id, %group.size.x.zext %sub = sub i32 %grid.size.x, %group.id_x_group.size.x %umin = call i32 @llvm.umin.i32(i32 %sub, i32 %group.size.x.zext) %zext = zext i32 %umin to i64 - store i64 %zext, i64 addrspace(1)* %out + store i64 %zext, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @use_local_size_x_uniform_work_group_size_false( ; CHECK: call i32 @llvm.umin -define amdgpu_kernel void @use_local_size_x_uniform_work_group_size_false(i64 addrspace(1)* %out) #3 { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() - %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 - %gep.group.size.x.bc = bitcast i8 addrspace(4)* %gep.group.size.x to i16 addrspace(4)* - %group.size.x = load i16, i16 addrspace(4)* %gep.group.size.x.bc, align 4 - %gep.grid.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 12 - %gep.grid.size.x.bc = bitcast i8 addrspace(4)* %gep.grid.size.x to i32 addrspace(4)* - %grid.size.x = load i32, i32 addrspace(4)* %gep.grid.size.x.bc, align 4 +define amdgpu_kernel void @use_local_size_x_uniform_work_group_size_false(ptr addrspace(1) %out) #3 { + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4 + %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4 + %gep.grid.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 12 + %grid.size.x = load i32, ptr addrspace(4) %gep.grid.size.x, align 4 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() %group.size.x.zext = zext i16 %group.size.x to i32 %group.id_x_group.size.x = mul i32 %group.id, %group.size.x.zext %sub = sub i32 %grid.size.x, %group.id_x_group.size.x %umin = call i32 @llvm.umin.i32(i32 %sub, i32 %group.size.x.zext) %zext = zext i32 %umin to i64 - store i64 %zext, i64 addrspace(1)* %out + store i64 %zext, ptr addrspace(1) %out ret void } ; CHECK-LABEL: @no_use_dispatch_ptr( ; CHECK-NEXT: ret void define amdgpu_kernel void @no_use_dispatch_ptr() { - %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() ret void } -declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1 +declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1 declare i32 @llvm.amdgcn.workgroup.id.x() #1 declare i32 @llvm.amdgcn.workgroup.id.y() #1 declare i32 @llvm.amdgcn.workgroup.id.z() #1 diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll --- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll +++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll @@ -4,13 +4,13 @@ ; Make sure there's no assertion when trying to report the resource ; usage for a function which becomes dead during codegen. -@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4 +@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 ; GCN-LABEL: unreachable: ; Function info: ; codeLenInByte = 4 define internal fastcc void @unreachable() { - %fptr = load void()*, void()* addrspace(4)* @gv.fptr0 + %fptr = load ptr, ptr addrspace(4) @gv.fptr0 call void %fptr() unreachable } diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll --- a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll +++ b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll @@ -3,31 +3,29 @@ ; CHECK: %void_one_out_non_private_arg_i32_1_use = type { i32 } ; CHECK: %bitcast_pointer_as1 = type { <4 x i32> } -; CHECK-LABEL: define private %void_one_out_non_private_arg_i32_1_use @void_one_out_non_private_arg_i32_1_use.body(i32 addrspace(1)* %val) #0 { +; CHECK-LABEL: define private %void_one_out_non_private_arg_i32_1_use @void_one_out_non_private_arg_i32_1_use.body(ptr addrspace(1) %val) #0 { ; CHECK-NEXT: ret %void_one_out_non_private_arg_i32_1_use zeroinitializer -; CHECK-LABEL: define void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %0) #1 { -; CHECK-NEXT: %2 = call %void_one_out_non_private_arg_i32_1_use @void_one_out_non_private_arg_i32_1_use.body(i32 addrspace(1)* poison) +; CHECK-LABEL: define void @void_one_out_non_private_arg_i32_1_use(ptr addrspace(1) %0) #1 { +; CHECK-NEXT: %2 = call %void_one_out_non_private_arg_i32_1_use @void_one_out_non_private_arg_i32_1_use.body(ptr addrspace(1) poison) ; CHECK-NEXT: %3 = extractvalue %void_one_out_non_private_arg_i32_1_use %2, 0 -; CHECK-NEXT: store i32 %3, i32 addrspace(1)* %0, align 4 +; CHECK-NEXT: store i32 %3, ptr addrspace(1) %0, align 4 ; CHECK-NEXT: ret void -define void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %val) #0 { - store i32 0, i32 addrspace(1)* %val +define void @void_one_out_non_private_arg_i32_1_use(ptr addrspace(1) %val) #0 { + store i32 0, ptr addrspace(1) %val ret void } -; CHECK-LABEL: define private %bitcast_pointer_as1 @bitcast_pointer_as1.body(<3 x i32> addrspace(1)* %out) #0 { -; CHECK-NEXT: %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison -; CHECK-NEXT: %bitcast = bitcast <3 x i32> addrspace(1)* %out to <4 x i32> addrspace(1)* +; CHECK-LABEL: define private %bitcast_pointer_as1 @bitcast_pointer_as1.body(ptr addrspace(1) %out) #0 { +; CHECK-NEXT: %load = load volatile <4 x i32>, ptr addrspace(1) poison ; CHECK-NEXT: %1 = insertvalue %bitcast_pointer_as1 poison, <4 x i32> %load, 0 ; CHECK-NEXT: ret %bitcast_pointer_as1 %1 -; CHECK-LABEL: define void @bitcast_pointer_as1(<3 x i32> addrspace(1)* %0) #1 { -; CHECK-NEXT: %2 = call %bitcast_pointer_as1 @bitcast_pointer_as1.body(<3 x i32> addrspace(1)* poison) -define void @bitcast_pointer_as1(<3 x i32> addrspace(1)* %out) #0 { - %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison - %bitcast = bitcast <3 x i32> addrspace(1)* %out to <4 x i32> addrspace(1)* - store <4 x i32> %load, <4 x i32> addrspace(1)* %bitcast +; CHECK-LABEL: define void @bitcast_pointer_as1(ptr addrspace(1) %0) #1 { +; CHECK-NEXT: %2 = call %bitcast_pointer_as1 @bitcast_pointer_as1.body(ptr addrspace(1) poison) +define void @bitcast_pointer_as1(ptr addrspace(1) %out) #0 { + %load = load volatile <4 x i32>, ptr addrspace(1) poison + store <4 x i32> %load, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll --- a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll +++ b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll @@ -6,114 +6,114 @@ unreachable } -define void @void_one_out_arg_i32_no_use(i32* %val) #0 { +define void @void_one_out_arg_i32_no_use(ptr %val) #0 { ret void } -define void @skip_byval_arg(i32* byval(i32) %val) #0 { - store i32 0, i32* %val +define void @skip_byval_arg(ptr byval(i32) %val) #0 { + store i32 0, ptr %val ret void } -define void @skip_optnone(i32* byval(i32) %val) #1 { - store i32 0, i32* %val +define void @skip_optnone(ptr byval(i32) %val) #1 { + store i32 0, ptr %val ret void } -define void @skip_volatile(i32* byval(i32) %val) #0 { - store volatile i32 0, i32* %val +define void @skip_volatile(ptr byval(i32) %val) #0 { + store volatile i32 0, ptr %val ret void } -define void @skip_atomic(i32* byval(i32) %val) #0 { - store atomic i32 0, i32* %val seq_cst, align 4 +define void @skip_atomic(ptr byval(i32) %val) #0 { + store atomic i32 0, ptr %val seq_cst, align 4 ret void } -define void @skip_store_pointer_val(i32* %val) #0 { - store i32* %val, i32** poison +define void @skip_store_pointer_val(ptr %val) #0 { + store ptr %val, ptr poison ret void } -define void @skip_store_gep(i32* %val) #0 { - %gep = getelementptr inbounds i32, i32* %val, i32 1 - store i32 0, i32* %gep +define void @skip_store_gep(ptr %val) #0 { + %gep = getelementptr inbounds i32, ptr %val, i32 1 + store i32 0, ptr %gep ret void } -define void @skip_sret(i32* sret(i32) %sret, i32* %out) #0 { - store i32 1, i32* %sret - store i32 0, i32* %out +define void @skip_sret(ptr sret(i32) %sret, ptr %out) #0 { + store i32 1, ptr %sret + store i32 0, ptr %out ret void } -define void @void_one_out_arg_i32_1_use(i32* %val) #0 { - store i32 0, i32* %val +define void @void_one_out_arg_i32_1_use(ptr %val) #0 { + store i32 0, ptr %val ret void } -define void @void_one_out_arg_i32_1_use_align(i32* align 8 %val) #0 { - store i32 0, i32* %val, align 8 +define void @void_one_out_arg_i32_1_use_align(ptr align 8 %val) #0 { + store i32 0, ptr %val, align 8 ret void } -define void @void_one_out_arg_i32_2_use(i1 %arg0, i32* %val) #0 { +define void @void_one_out_arg_i32_2_use(i1 %arg0, ptr %val) #0 { br i1 %arg0, label %ret0, label %ret1 ret0: - store i32 0, i32* %val + store i32 0, ptr %val ret void ret1: - store i32 9, i32* %val + store i32 9, ptr %val ret void } declare void @may.clobber() -define void @void_one_out_arg_i32_2_stores(i32* %val) #0 { - store i32 0, i32* %val - store i32 1, i32* %val +define void @void_one_out_arg_i32_2_stores(ptr %val) #0 { + store i32 0, ptr %val + store i32 1, ptr %val ret void } -define void @void_one_out_arg_i32_2_stores_clobber(i32* %val) #0 { - store i32 0, i32* %val +define void @void_one_out_arg_i32_2_stores_clobber(ptr %val) #0 { + store i32 0, ptr %val call void @may.clobber() - store i32 1, i32* %val + store i32 1, ptr %val ret void } -define void @void_one_out_arg_i32_call_may_clobber(i32* %val) #0 { - store i32 0, i32* %val +define void @void_one_out_arg_i32_call_may_clobber(ptr %val) #0 { + store i32 0, ptr %val call void @may.clobber() ret void } -define void @void_one_out_arg_i32_pre_call_may_clobber(i32* %val) #0 { +define void @void_one_out_arg_i32_pre_call_may_clobber(ptr %val) #0 { call void @may.clobber() - store i32 0, i32* %val + store i32 0, ptr %val ret void } -define void @void_one_out_arg_i32_reload(i32* %val) #0 { - store i32 0, i32* %val - %load = load i32, i32* %val, align 4 +define void @void_one_out_arg_i32_reload(ptr %val) #0 { + store i32 0, ptr %val + %load = load i32, ptr %val, align 4 ret void } -define void @void_one_out_arg_i32_store_in_different_block(i32* %out) #0 { - %load = load i32, i32 addrspace(1)* poison - store i32 0, i32* %out +define void @void_one_out_arg_i32_store_in_different_block(ptr %out) #0 { + %load = load i32, ptr addrspace(1) poison + store i32 0, ptr %out br label %ret ret: @@ -121,20 +121,20 @@ } -define void @unused_out_arg_one_branch(i1 %arg0, i32* %val) #0 { +define void @unused_out_arg_one_branch(i1 %arg0, ptr %val) #0 { br i1 %arg0, label %ret0, label %ret1 ret0: ret void ret1: - store i32 9, i32* %val + store i32 9, ptr %val ret void } -define void @void_one_out_arg_v2i32_1_use(<2 x i32>* %val) #0 { - store <2 x i32> , <2 x i32>* %val +define void @void_one_out_arg_v2i32_1_use(ptr %val) #0 { + store <2 x i32> , ptr %val ret void } @@ -142,54 +142,54 @@ ; Normally this is split into element accesses which we don't handle. -define void @void_one_out_arg_struct_1_use(%struct* %out) #0 { - store %struct { i32 9, i8 99, float 4.0 }, %struct* %out +define void @void_one_out_arg_struct_1_use(ptr %out) #0 { + store %struct { i32 9, i8 99, float 4.0 }, ptr %out ret void } -define i32 @i32_one_out_arg_i32_1_use(i32* %val) #0 { - store i32 24, i32* %val +define i32 @i32_one_out_arg_i32_1_use(ptr %val) #0 { + store i32 24, ptr %val ret i32 9 } -define void @unused_different_type(i32* %arg0, float* nocapture %arg1) #0 { - store float 4.0, float* %arg1, align 4 +define void @unused_different_type(ptr %arg0, ptr nocapture %arg1) #0 { + store float 4.0, ptr %arg1, align 4 ret void } -define void @multiple_same_return_noalias(i32* noalias %out0, i32* noalias %out1) #0 { - store i32 1, i32* %out0, align 4 - store i32 2, i32* %out1, align 4 +define void @multiple_same_return_noalias(ptr noalias %out0, ptr noalias %out1) #0 { + store i32 1, ptr %out0, align 4 + store i32 2, ptr %out1, align 4 ret void } -define void @multiple_same_return_mayalias(i32* %out0, i32* %out1) #0 { - store i32 1, i32* %out0, align 4 - store i32 2, i32* %out1, align 4 +define void @multiple_same_return_mayalias(ptr %out0, ptr %out1) #0 { + store i32 1, ptr %out0, align 4 + store i32 2, ptr %out1, align 4 ret void } -define void @multiple_same_return_mayalias_order(i32* %out0, i32* %out1) #0 { - store i32 2, i32* %out1, align 4 - store i32 1, i32* %out0, align 4 +define void @multiple_same_return_mayalias_order(ptr %out0, ptr %out1) #0 { + store i32 2, ptr %out1, align 4 + store i32 1, ptr %out0, align 4 ret void } ; Currently this fails to convert because the store won't be found if ; it isn't in the same block as the return. -define i32 @store_in_entry_block(i1 %arg0, i32* %out) #0 { +define i32 @store_in_entry_block(i1 %arg0, ptr %out) #0 { entry: - %val0 = load i32, i32 addrspace(1)* poison - store i32 %val0, i32* %out + %val0 = load i32, ptr addrspace(1) poison + store i32 %val0, ptr %out br i1 %arg0, label %if, label %endif if: - %val1 = load i32, i32 addrspace(1)* poison + %val1 = load i32, ptr addrspace(1) poison br label %endif endif: @@ -198,8 +198,8 @@ } -define i1 @i1_one_out_arg_i32_1_use(i32* %val) #0 { - store i32 24, i32* %val +define i1 @i1_one_out_arg_i32_1_use(ptr %val) #0 { + store i32 24, ptr %val ret i1 true } @@ -207,99 +207,96 @@ ; incompatible with struct return types. -define zeroext i1 @i1_zeroext_one_out_arg_i32_1_use(i32* %val) #0 { - store i32 24, i32* %val +define zeroext i1 @i1_zeroext_one_out_arg_i32_1_use(ptr %val) #0 { + store i32 24, ptr %val ret i1 true } -define signext i1 @i1_signext_one_out_arg_i32_1_use(i32* %val) #0 { - store i32 24, i32* %val +define signext i1 @i1_signext_one_out_arg_i32_1_use(ptr %val) #0 { + store i32 24, ptr %val ret i1 true } -define noalias i32 addrspace(1)* @p1i32_noalias_one_out_arg_i32_1_use(i32* %val) #0 { - store i32 24, i32* %val - ret i32 addrspace(1)* null +define noalias ptr addrspace(1) @p1i32_noalias_one_out_arg_i32_1_use(ptr %val) #0 { + store i32 24, ptr %val + ret ptr addrspace(1) null } -define void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %val) #0 { - store i32 0, i32 addrspace(1)* %val +define void @void_one_out_non_private_arg_i32_1_use(ptr addrspace(1) %val) #0 { + store i32 0, ptr addrspace(1) %val ret void } -define void @func_ptr_type(void()** %out) #0 { - %func = load void()*, void()** poison - store void()* %func, void()** %out +define void @func_ptr_type(ptr %out) #0 { + %func = load ptr, ptr poison + store ptr %func, ptr %out ret void } -define void @bitcast_func_ptr_type(void()** %out) #0 { - %func = load i32()*, i32()** poison - %cast = bitcast void()** %out to i32()** - store i32()* %func, i32()** %cast +define void @bitcast_func_ptr_type(ptr %out) #0 { + %func = load ptr, ptr poison + store ptr %func, ptr %out ret void } -define void @out_arg_small_array([4 x i32]* %val) #0 { - store [4 x i32] [i32 0, i32 1, i32 2, i32 3], [4 x i32]* %val +define void @out_arg_small_array(ptr %val) #0 { + store [4 x i32] [i32 0, i32 1, i32 2, i32 3], ptr %val ret void } -define void @out_arg_large_array([17 x i32]* %val) #0 { - store [17 x i32] zeroinitializer, [17 x i32]* %val +define void @out_arg_large_array(ptr %val) #0 { + store [17 x i32] zeroinitializer, ptr %val ret void } -define <16 x i32> @num_regs_return_limit(i32* %out, i32 %val) #0 { - %load = load volatile <16 x i32>, <16 x i32> addrspace(1)* poison - store i32 %val, i32* %out +define <16 x i32> @num_regs_return_limit(ptr %out, i32 %val) #0 { + %load = load volatile <16 x i32>, ptr addrspace(1) poison + store i32 %val, ptr %out ret <16 x i32> %load } -define [15 x i32] @num_regs_reach_limit(i32* %out, i32 %val) #0 { - %load = load volatile [15 x i32], [15 x i32] addrspace(1)* poison - store i32 %val, i32* %out +define [15 x i32] @num_regs_reach_limit(ptr %out, i32 %val) #0 { + %load = load volatile [15 x i32], ptr addrspace(1) poison + store i32 %val, ptr %out ret [15 x i32] %load } -define [15 x i32] @num_regs_reach_limit_leftover(i32* %out0, i32* %out1, i32 %val0) #0 { - %load0 = load volatile [15 x i32], [15 x i32] addrspace(1)* poison - %load1 = load volatile i32, i32 addrspace(1)* poison - store i32 %val0, i32* %out0 - store i32 %load1, i32* %out1 +define [15 x i32] @num_regs_reach_limit_leftover(ptr %out0, ptr %out1, i32 %val0) #0 { + %load0 = load volatile [15 x i32], ptr addrspace(1) poison + %load1 = load volatile i32, ptr addrspace(1) poison + store i32 %val0, ptr %out0 + store i32 %load1, ptr %out1 ret [15 x i32] %load0 } -define void @preserve_debug_info(i32 %arg0, i32* %val) #0 !dbg !5 { +define void @preserve_debug_info(i32 %arg0, ptr %val) #0 !dbg !5 { call void @may.clobber(), !dbg !10 - store i32 %arg0, i32* %val, !dbg !11 + store i32 %arg0, ptr %val, !dbg !11 ret void, !dbg !12 } -define void @preserve_metadata(i32 %arg0, i32* %val) #0 !kernel_arg_access_qual !13 { +define void @preserve_metadata(i32 %arg0, ptr %val) #0 !kernel_arg_access_qual !13 { call void @may.clobber() - store i32 %arg0, i32* %val + store i32 %arg0, ptr %val ret void } ; Clang emits this pattern for 3-vectors for some reason. -define void @bitcast_pointer_v4i32_v3i32(<3 x i32>* %out) #0 { - %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison - %bitcast = bitcast <3 x i32>* %out to <4 x i32>* - store <4 x i32> %load, <4 x i32>* %bitcast +define void @bitcast_pointer_v4i32_v3i32(ptr %out) #0 { + %load = load volatile <4 x i32>, ptr addrspace(1) poison + store <4 x i32> %load, ptr %out ret void } -define void @bitcast_pointer_v4i32_v3f32(<3 x float>* %out) #0 { - %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison - %bitcast = bitcast <3 x float>* %out to <4 x i32>* - store <4 x i32> %load, <4 x i32>* %bitcast +define void @bitcast_pointer_v4i32_v3f32(ptr %out) #0 { + %load = load volatile <4 x i32>, ptr addrspace(1) poison + store <4 x i32> %load, ptr %out ret void } @@ -308,24 +305,21 @@ ; casts. -define void @bitcast_pointer_i32_f32(float* %out) #0 { - %load = load volatile i32, i32 addrspace(1)* poison - %bitcast = bitcast float* %out to i32* - store i32 %load, i32* %bitcast +define void @bitcast_pointer_i32_f32(ptr %out) #0 { + %load = load volatile i32, ptr addrspace(1) poison + store i32 %load, ptr %out ret void } -define void @bitcast_pointer_i32_f16(half* %out) #0 { - %load = load volatile i32, i32 addrspace(1)* poison - %bitcast = bitcast half* %out to i32* - store i32 %load, i32* %bitcast +define void @bitcast_pointer_i32_f16(ptr %out) #0 { + %load = load volatile i32, ptr addrspace(1) poison + store i32 %load, ptr %out ret void } -define void @bitcast_pointer_f16_i32(i32* %out) #0 { - %load = load volatile half, half addrspace(1)* poison - %bitcast = bitcast i32* %out to half* - store half %load, half* %bitcast +define void @bitcast_pointer_f16_i32(ptr %out) #0 { + %load = load volatile half, ptr addrspace(1) poison + store half %load, ptr %out ret void } @@ -336,93 +330,80 @@ %struct.v4f32 = type { <4 x float> } -define void @bitcast_struct_v3f32_v3f32(%struct.v3f32* %out, <3 x float> %value) #0 { +define void @bitcast_struct_v3f32_v3f32(ptr %out, <3 x float> %value) #0 { %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> - %cast = bitcast %struct.v3f32* %out to <4 x float>* - store <4 x float> %extractVec, <4 x float>* %cast, align 16 + store <4 x float> %extractVec, ptr %out, align 16 ret void } -define void @bitcast_struct_v3f32_v3i32(%struct.v3f32* %out, <3 x i32> %value) #0 { +define void @bitcast_struct_v3f32_v3i32(ptr %out, <3 x i32> %value) #0 { %extractVec = shufflevector <3 x i32> %value, <3 x i32> poison, <4 x i32> - %cast = bitcast %struct.v3f32* %out to <4 x i32>* - store <4 x i32> %extractVec, <4 x i32>* %cast, align 16 + store <4 x i32> %extractVec, ptr %out, align 16 ret void } -define void @bitcast_struct_v4f32_v4f32(%struct.v4f32* %out, <4 x float> %value) #0 { - %cast = bitcast %struct.v4f32* %out to <4 x float>* - store <4 x float> %value, <4 x float>* %cast, align 16 +define void @bitcast_struct_v4f32_v4f32(ptr %out, <4 x float> %value) #0 { + store <4 x float> %value, ptr %out, align 16 ret void } -define void @bitcast_struct_v3f32_v4i32(%struct.v3f32* %out, <4 x i32> %value) #0 { - %cast = bitcast %struct.v3f32* %out to <4 x i32>* - store <4 x i32> %value, <4 x i32>* %cast, align 16 +define void @bitcast_struct_v3f32_v4i32(ptr %out, <4 x i32> %value) #0 { + store <4 x i32> %value, ptr %out, align 16 ret void } -define void @bitcast_struct_v4f32_v3f32(%struct.v4f32* %out, <3 x float> %value) #0 { +define void @bitcast_struct_v4f32_v3f32(ptr %out, <3 x float> %value) #0 { %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> - %cast = bitcast %struct.v4f32* %out to <4 x float>* - store <4 x float> %extractVec, <4 x float>* %cast, align 16 + store <4 x float> %extractVec, ptr %out, align 16 ret void } -define void @bitcast_struct_v3f32_v2f32(%struct.v3f32* %out, <2 x float> %value) #0 { - %cast = bitcast %struct.v3f32* %out to <2 x float>* - store <2 x float> %value, <2 x float>* %cast, align 8 +define void @bitcast_struct_v3f32_v2f32(ptr %out, <2 x float> %value) #0 { + store <2 x float> %value, ptr %out, align 8 ret void } -define void @bitcast_struct_v3f32_f32_v3f32(%struct.v3f32.f32* %out, <3 x float> %value) #0 { +define void @bitcast_struct_v3f32_f32_v3f32(ptr %out, <3 x float> %value) #0 { %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> - %cast = bitcast %struct.v3f32.f32* %out to <4 x float>* - store <4 x float> %extractVec, <4 x float>* %cast, align 16 + store <4 x float> %extractVec, ptr %out, align 16 ret void } -define void @bitcast_struct_v3f32_f32_v4f32(%struct.v3f32.f32* %out, <4 x float> %value) #0 { - %cast = bitcast %struct.v3f32.f32* %out to <4 x float>* - store <4 x float> %value, <4 x float>* %cast, align 16 +define void @bitcast_struct_v3f32_f32_v4f32(ptr %out, <4 x float> %value) #0 { + store <4 x float> %value, ptr %out, align 16 ret void } -define void @bitcast_struct_i128_v4f32(%struct.i128* %out, <4 x float> %value) #0 { - %cast = bitcast %struct.i128* %out to <4 x float>* - store <4 x float> %value, <4 x float>* %cast, align 16 +define void @bitcast_struct_i128_v4f32(ptr %out, <4 x float> %value) #0 { + store <4 x float> %value, ptr %out, align 16 ret void } -define void @bitcast_array_v4i32_v4f32([4 x i32]* %out, [4 x float] %value) #0 { - %cast = bitcast [4 x i32]* %out to [4 x float]* - store [4 x float] %value, [4 x float]* %cast, align 4 +define void @bitcast_array_v4i32_v4f32(ptr %out, [4 x float] %value) #0 { + store [4 x float] %value, ptr %out, align 4 ret void } -define void @multi_return_bitcast_struct_v3f32_v3f32(i1 %cond, %struct.v3f32* %out, <3 x float> %value) #0 { +define void @multi_return_bitcast_struct_v3f32_v3f32(i1 %cond, ptr %out, <3 x float> %value) #0 { entry: br i1 %cond, label %ret0, label %ret1 ret0: %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> - %cast0 = bitcast %struct.v3f32* %out to <4 x float>* - store <4 x float> %extractVec, <4 x float>* %cast0, align 16 + store <4 x float> %extractVec, ptr %out, align 16 ret void ret1: - %cast1 = bitcast %struct.v3f32* %out to <4 x float>* - %load = load <4 x float>, <4 x float> addrspace(1)* poison - store <4 x float> %load, <4 x float>* %cast1, align 16 + %load = load <4 x float>, ptr addrspace(1) poison + store <4 x float> %load, ptr %out, align 16 ret void } -define void @bitcast_v3f32_struct_v3f32(<3 x float>* %out, %struct.v3f32 %value) #0 { - %cast = bitcast <3 x float>* %out to %struct.v3f32* - store %struct.v3f32 %value, %struct.v3f32* %cast, align 4 +define void @bitcast_v3f32_struct_v3f32(ptr %out, %struct.v3f32 %value) #0 { + store %struct.v3f32 %value, ptr %out, align 4 ret void } @@ -454,82 +435,82 @@ ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_no_use -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_byval_arg -; CHECK-SAME: (i32* byval(i32) [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 0, i32* [[VAL]], align 4 +; CHECK-SAME: (ptr byval(i32) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_optnone -; CHECK-SAME: (i32* byval(i32) [[VAL:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: store i32 0, i32* [[VAL]], align 4 +; CHECK-SAME: (ptr byval(i32) [[VAL:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_volatile -; CHECK-SAME: (i32* byval(i32) [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store volatile i32 0, i32* [[VAL]], align 4 +; CHECK-SAME: (ptr byval(i32) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store volatile i32 0, ptr [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_atomic -; CHECK-SAME: (i32* byval(i32) [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store atomic i32 0, i32* [[VAL]] seq_cst, align 4 +; CHECK-SAME: (ptr byval(i32) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store atomic i32 0, ptr [[VAL]] seq_cst, align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_store_pointer_val -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32* [[VAL]], i32** poison, align 8 +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store ptr [[VAL]], ptr poison, align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_store_gep -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[VAL]], i32 1 -; CHECK-NEXT: store i32 0, i32* [[GEP]], align 4 +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[VAL]], i32 1 +; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@skip_sret -; CHECK-SAME: (i32* sret(i32) [[SRET:%.*]], i32* [[OUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 1, i32* [[SRET]], align 4 -; CHECK-NEXT: store i32 0, i32* [[OUT]], align 4 +; CHECK-SAME: (ptr sret(i32) [[SRET:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 1, ptr [[SRET]], align 4 +; CHECK-NEXT: store i32 0, ptr [[OUT]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use.body -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_1_USE:%.*]] zeroinitializer ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use -; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE:%.*]] @void_one_out_arg_i32_1_use.body(i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE:%.*]] @void_one_out_arg_i32_1_use.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use_align.body -; CHECK-SAME: (i32* align 8 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr align 8 [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN:%.*]] zeroinitializer ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use_align -; CHECK-SAME: (i32* align 8 [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN:%.*]] @void_one_out_arg_i32_1_use_align.body(i32* poison) +; CHECK-SAME: (ptr align 8 [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN:%.*]] @void_one_out_arg_i32_1_use_align.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP0]], align 8 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_use.body -; CHECK-SAME: (i1 [[ARG0:%.*]], i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i1 [[ARG0:%.*]], ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[ARG0]], label [[RET0:%.*]], label [[RET1:%.*]] ; CHECK: ret0: ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_2_USE:%.*]] zeroinitializer @@ -538,195 +519,195 @@ ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_use -; CHECK-SAME: (i1 [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_USE:%.*]] @void_one_out_arg_i32_2_use.body(i1 [[TMP0]], i32* poison) +; CHECK-SAME: (i1 [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_USE:%.*]] @void_one_out_arg_i32_2_use.body(i1 [[TMP0]], ptr poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_USE]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores.body -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 0, i32* [[VAL]], align 4 +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_2_STORES:%.*]] { i32 1 } ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores -; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES:%.*]] @void_one_out_arg_i32_2_stores.body(i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES:%.*]] @void_one_out_arg_i32_2_stores.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_STORES]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores_clobber.body -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 0, i32* [[VAL]], align 4 +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 ; CHECK-NEXT: call void @may.clobber() ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER:%.*]] { i32 1 } ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores_clobber -; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER:%.*]] @void_one_out_arg_i32_2_stores_clobber.body(i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER:%.*]] @void_one_out_arg_i32_2_stores_clobber.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_call_may_clobber -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 0, i32* [[VAL]], align 4 +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 ; CHECK-NEXT: call void @may.clobber() ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_pre_call_may_clobber.body -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @may.clobber() ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER:%.*]] zeroinitializer ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_pre_call_may_clobber -; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER:%.*]] @void_one_out_arg_i32_pre_call_may_clobber.body(i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER:%.*]] @void_one_out_arg_i32_pre_call_may_clobber.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER]] [[TMP2]], 0 -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_reload -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 0, i32* [[VAL]], align 4 -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[VAL]], align 4 +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, ptr [[VAL]], align 4 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_store_in_different_block -; CHECK-SAME: (i32* [[OUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32 addrspace(1)* poison, align 4 -; CHECK-NEXT: store i32 0, i32* [[OUT]], align 4 +; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) poison, align 4 +; CHECK-NEXT: store i32 0, ptr [[OUT]], align 4 ; CHECK-NEXT: br label [[RET:%.*]] ; CHECK: ret: ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@unused_out_arg_one_branch -; CHECK-SAME: (i1 [[ARG0:%.*]], i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i1 [[ARG0:%.*]], ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[ARG0]], label [[RET0:%.*]], label [[RET1:%.*]] ; CHECK: ret0: ; CHECK-NEXT: ret void ; CHECK: ret1: -; CHECK-NEXT: store i32 9, i32* [[VAL]], align 4 +; CHECK-NEXT: store i32 9, ptr [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_v2i32_1_use.body -; CHECK-SAME: (<2 x i32>* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_V2I32_1_USE:%.*]] { <2 x i32> } ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_v2i32_1_use -; CHECK-SAME: (<2 x i32>* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_V2I32_1_USE:%.*]] @void_one_out_arg_v2i32_1_use.body(<2 x i32>* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_V2I32_1_USE:%.*]] @void_one_out_arg_v2i32_1_use.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_V2I32_1_USE]] [[TMP2]], 0 -; CHECK-NEXT: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP0]], align 8 +; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP0]], align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_struct_1_use.body -; CHECK-SAME: (%struct* [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[VOID_ONE_OUT_ARG_STRUCT_1_USE:%.*]] { [[STRUCT:%.*]] { i32 9, i8 99, float 4.000000e+00 } } ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_struct_1_use -; CHECK-SAME: (%struct* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_STRUCT_1_USE:%.*]] @void_one_out_arg_struct_1_use.body(%struct* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_STRUCT_1_USE:%.*]] @void_one_out_arg_struct_1_use.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_STRUCT_1_USE]] [[TMP2]], 0 -; CHECK-NEXT: store [[STRUCT:%.*]] [[TMP3]], %struct* [[TMP0]], align 4 +; CHECK-NEXT: store [[STRUCT:%.*]] [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@i32_one_out_arg_i32_1_use.body -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[I32_ONE_OUT_ARG_I32_1_USE:%.*]] { i32 9, i32 24 } ; ; ; CHECK-LABEL: define {{[^@]+}}@i32_one_out_arg_i32_1_use -; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[I32_ONE_OUT_ARG_I32_1_USE:%.*]] @i32_one_out_arg_i32_1_use.body(i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[I32_ONE_OUT_ARG_I32_1_USE:%.*]] @i32_one_out_arg_i32_1_use.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[I32_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1 -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[I32_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 ; CHECK-NEXT: ret i32 [[TMP4]] ; ; ; CHECK-LABEL: define {{[^@]+}}@unused_different_type.body -; CHECK-SAME: (i32* [[ARG0:%.*]], float* nocapture [[ARG1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[ARG0:%.*]], ptr nocapture [[ARG1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[UNUSED_DIFFERENT_TYPE:%.*]] { float 4.000000e+00 } ; ; ; CHECK-LABEL: define {{[^@]+}}@unused_different_type -; CHECK-SAME: (i32* [[TMP0:%.*]], float* nocapture [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[UNUSED_DIFFERENT_TYPE:%.*]] @unused_different_type.body(i32* [[TMP0]], float* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]], ptr nocapture [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[UNUSED_DIFFERENT_TYPE:%.*]] @unused_different_type.body(ptr [[TMP0]], ptr poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[UNUSED_DIFFERENT_TYPE]] [[TMP3]], 0 -; CHECK-NEXT: store float [[TMP4]], float* [[TMP1]], align 4 +; CHECK-NEXT: store float [[TMP4]], ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_noalias.body -; CHECK-SAME: (i32* noalias [[OUT0:%.*]], i32* noalias [[OUT1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr noalias [[OUT0:%.*]], ptr noalias [[OUT1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[MULTIPLE_SAME_RETURN_NOALIAS:%.*]] { i32 1, i32 2 } ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_noalias -; CHECK-SAME: (i32* noalias [[TMP0:%.*]], i32* noalias [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_NOALIAS:%.*]] @multiple_same_return_noalias.body(i32* poison, i32* poison) +; CHECK-SAME: (ptr noalias [[TMP0:%.*]], ptr noalias [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_NOALIAS:%.*]] @multiple_same_return_noalias.body(ptr poison, ptr poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_NOALIAS]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_NOALIAS]] [[TMP3]], 1 -; CHECK-NEXT: store i32 [[TMP5]], i32* [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias.body -; CHECK-SAME: (i32* [[OUT0:%.*]], i32* [[OUT1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[MULTIPLE_SAME_RETURN_MAYALIAS:%.*]] { i32 2, i32 1 } ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias -; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS:%.*]] @multiple_same_return_mayalias.body(i32* poison, i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS:%.*]] @multiple_same_return_mayalias.body(ptr poison, ptr poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS]] [[TMP3]], 1 -; CHECK-NEXT: store i32 [[TMP5]], i32* [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias_order.body -; CHECK-SAME: (i32* [[OUT0:%.*]], i32* [[OUT1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER:%.*]] { i32 1, i32 2 } ; ; ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias_order -; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER:%.*]] @multiple_same_return_mayalias_order.body(i32* poison, i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER:%.*]] @multiple_same_return_mayalias_order.body(ptr poison, ptr poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER]] [[TMP3]], 1 -; CHECK-NEXT: store i32 [[TMP5]], i32* [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@store_in_entry_block -; CHECK-SAME: (i1 [[ARG0:%.*]], i32* [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i1 [[ARG0:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VAL0:%.*]] = load i32, i32 addrspace(1)* poison, align 4 -; CHECK-NEXT: store i32 [[VAL0]], i32* [[OUT]], align 4 +; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(1) poison, align 4 +; CHECK-NEXT: store i32 [[VAL0]], ptr [[OUT]], align 4 ; CHECK-NEXT: br i1 [[ARG0]], label [[IF:%.*]], label [[ENDIF:%.*]] ; CHECK: if: -; CHECK-NEXT: [[VAL1:%.*]] = load i32, i32 addrspace(1)* poison, align 4 +; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(1) poison, align 4 ; CHECK-NEXT: br label [[ENDIF]] ; CHECK: endif: ; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL1]], [[IF]] ] @@ -734,146 +715,144 @@ ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_one_out_arg_i32_1_use.body -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[I1_ONE_OUT_ARG_I32_1_USE:%.*]] { i1 true, i32 24 } ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_one_out_arg_i32_1_use -; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_one_out_arg_i32_1_use.body(i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_one_out_arg_i32_1_use.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[I1_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1 -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[I1_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 ; CHECK-NEXT: ret i1 [[TMP4]] ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_zeroext_one_out_arg_i32_1_use.body -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE:%.*]] { i1 true, i32 24 } ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_zeroext_one_out_arg_i32_1_use -; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_zeroext_one_out_arg_i32_1_use.body(i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_zeroext_one_out_arg_i32_1_use.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1 -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 ; CHECK-NEXT: ret i1 [[TMP4]] ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_signext_one_out_arg_i32_1_use.body -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE:%.*]] { i1 true, i32 24 } ; ; ; CHECK-LABEL: define {{[^@]+}}@i1_signext_one_out_arg_i32_1_use -; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_signext_one_out_arg_i32_1_use.body(i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_signext_one_out_arg_i32_1_use.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1 -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 ; CHECK-NEXT: ret i1 [[TMP4]] ; ; ; CHECK-LABEL: define {{[^@]+}}@p1i32_noalias_one_out_arg_i32_1_use.body -; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: ret [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE:%.*]] { i32 addrspace(1)* null, i32 24 } +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE:%.*]] { ptr addrspace(1) null, i32 24 } ; ; ; CHECK-LABEL: define {{[^@]+}}@p1i32_noalias_one_out_arg_i32_1_use -; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE:%.*]] @p1i32_noalias_one_out_arg_i32_1_use.body(i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE:%.*]] @p1i32_noalias_one_out_arg_i32_1_use.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1 -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0 -; CHECK-NEXT: ret i32 addrspace(1)* [[TMP4]] +; CHECK-NEXT: ret ptr addrspace(1) [[TMP4]] ; ; ; CHECK-LABEL: define {{[^@]+}}@void_one_out_non_private_arg_i32_1_use -; CHECK-SAME: (i32 addrspace(1)* [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[VAL]], align 4 +; CHECK-SAME: (ptr addrspace(1) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, ptr addrspace(1) [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@func_ptr_type.body -; CHECK-SAME: (void ()** [[OUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[FUNC:%.*]] = load void ()*, void ()** poison, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[FUNC_PTR_TYPE:%.*]] poison, void ()* [[FUNC]], 0 +; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[FUNC:%.*]] = load ptr, ptr poison, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[FUNC_PTR_TYPE:%.*]] poison, ptr [[FUNC]], 0 ; CHECK-NEXT: ret [[FUNC_PTR_TYPE]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@func_ptr_type -; CHECK-SAME: (void ()** [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[FUNC_PTR_TYPE:%.*]] @func_ptr_type.body(void ()** poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[FUNC_PTR_TYPE:%.*]] @func_ptr_type.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[FUNC_PTR_TYPE]] [[TMP2]], 0 -; CHECK-NEXT: store void ()* [[TMP3]], void ()** [[TMP0]], align 8 +; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0]], align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type.body -; CHECK-SAME: (void ()** [[OUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[FUNC:%.*]] = load i32 ()*, i32 ()** poison, align 8 -; CHECK-NEXT: [[CAST:%.*]] = bitcast void ()** [[OUT]] to i32 ()** -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_FUNC_PTR_TYPE:%.*]] poison, i32 ()* [[FUNC]], 0 +; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[FUNC:%.*]] = load ptr, ptr poison, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_FUNC_PTR_TYPE:%.*]] poison, ptr [[FUNC]], 0 ; CHECK-NEXT: ret [[BITCAST_FUNC_PTR_TYPE]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type -; CHECK-SAME: (void ()** [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_FUNC_PTR_TYPE:%.*]] @bitcast_func_ptr_type.body(void ()** poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_FUNC_PTR_TYPE:%.*]] @bitcast_func_ptr_type.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_FUNC_PTR_TYPE]] [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast void ()** [[TMP0]] to i32 ()** -; CHECK-NEXT: store i32 ()* [[TMP3]], i32 ()** [[TMP4]], align 8 +; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0]], align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@out_arg_small_array.body -; CHECK-SAME: ([4 x i32]* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret [[OUT_ARG_SMALL_ARRAY:%.*]] { [4 x i32] [i32 0, i32 1, i32 2, i32 3] } ; ; ; CHECK-LABEL: define {{[^@]+}}@out_arg_small_array -; CHECK-SAME: ([4 x i32]* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[OUT_ARG_SMALL_ARRAY:%.*]] @out_arg_small_array.body([4 x i32]* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[OUT_ARG_SMALL_ARRAY:%.*]] @out_arg_small_array.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[OUT_ARG_SMALL_ARRAY]] [[TMP2]], 0 -; CHECK-NEXT: store [4 x i32] [[TMP3]], [4 x i32]* [[TMP0]], align 4 +; CHECK-NEXT: store [4 x i32] [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@out_arg_large_array -; CHECK-SAME: ([17 x i32]* [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: store [17 x i32] zeroinitializer, [17 x i32]* [[VAL]], align 4 +; CHECK-SAME: (ptr [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store [17 x i32] zeroinitializer, ptr [[VAL]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@num_regs_return_limit -; CHECK-SAME: (i32* [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LOAD:%.*]] = load volatile <16 x i32>, <16 x i32> addrspace(1)* poison, align 64 -; CHECK-NEXT: store i32 [[VAL]], i32* [[OUT]], align 4 +; CHECK-SAME: (ptr [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile <16 x i32>, ptr addrspace(1) poison, align 64 +; CHECK-NEXT: store i32 [[VAL]], ptr [[OUT]], align 4 ; CHECK-NEXT: ret <16 x i32> [[LOAD]] ; ; ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit.body -; CHECK-SAME: (i32* [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LOAD:%.*]] = load volatile [15 x i32], [15 x i32] addrspace(1)* poison, align 4 +; CHECK-SAME: (ptr [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile [15 x i32], ptr addrspace(1) poison, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT:%.*]] poison, [15 x i32] [[LOAD]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT]] [[TMP1]], i32 [[VAL]], 1 ; CHECK-NEXT: ret [[NUM_REGS_REACH_LIMIT]] [[TMP2]] ; ; ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit -; CHECK-SAME: (i32* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[NUM_REGS_REACH_LIMIT:%.*]] @num_regs_reach_limit.body(i32* poison, i32 [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[NUM_REGS_REACH_LIMIT:%.*]] @num_regs_reach_limit.body(ptr poison, i32 [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT]] [[TMP3]], 1 -; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT]] [[TMP3]], 0 ; CHECK-NEXT: ret [15 x i32] [[TMP5]] ; ; ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit_leftover.body -; CHECK-SAME: (i32* [[OUT0:%.*]], i32* [[OUT1:%.*]], i32 [[VAL0:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LOAD0:%.*]] = load volatile [15 x i32], [15 x i32] addrspace(1)* poison, align 4 -; CHECK-NEXT: [[LOAD1:%.*]] = load volatile i32, i32 addrspace(1)* poison, align 4 +; CHECK-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], i32 [[VAL0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD0:%.*]] = load volatile [15 x i32], ptr addrspace(1) poison, align 4 +; CHECK-NEXT: [[LOAD1:%.*]] = load volatile i32, ptr addrspace(1) poison, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] poison, [15 x i32] [[LOAD0]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP1]], i32 [[LOAD1]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP2]], i32 [[VAL0]], 2 @@ -881,332 +860,297 @@ ; ; ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit_leftover -; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP4:%.*]] = call [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] @num_regs_reach_limit_leftover.body(i32* poison, i32* poison, i32 [[TMP2]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP4:%.*]] = call [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] @num_regs_reach_limit_leftover.body(ptr poison, ptr poison, i32 [[TMP2]]) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP4]], 1 -; CHECK-NEXT: store i32 [[TMP5]], i32* [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP4]], 2 -; CHECK-NEXT: store i32 [[TMP6]], i32* [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP4]], 0 ; CHECK-NEXT: ret [15 x i32] [[TMP7]] ; ; ; CHECK-LABEL: define {{[^@]+}}@preserve_debug_info.body -; CHECK-SAME: (i32 [[ARG0:%.*]], i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i32 [[ARG0:%.*]], ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @may.clobber(), !dbg [[DBG5:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[PRESERVE_DEBUG_INFO:%.*]] poison, i32 [[ARG0]], 0, !dbg [[DBG11:![0-9]+]] ; CHECK-NEXT: ret [[PRESERVE_DEBUG_INFO]] [[TMP1]], !dbg [[DBG11]] ; ; ; CHECK-LABEL: define {{[^@]+}}@preserve_debug_info -; CHECK-SAME: (i32 [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] !dbg [[DBG6:![0-9]+]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[PRESERVE_DEBUG_INFO:%.*]] @preserve_debug_info.body(i32 [[TMP0]], i32* poison) +; CHECK-SAME: (i32 [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR2]] !dbg [[DBG6:![0-9]+]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[PRESERVE_DEBUG_INFO:%.*]] @preserve_debug_info.body(i32 [[TMP0]], ptr poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[PRESERVE_DEBUG_INFO]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@preserve_metadata.body -; CHECK-SAME: (i32 [[ARG0:%.*]], i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i32 [[ARG0:%.*]], ptr [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @may.clobber() ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[PRESERVE_METADATA:%.*]] poison, i32 [[ARG0]], 0 ; CHECK-NEXT: ret [[PRESERVE_METADATA]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@preserve_metadata -; CHECK-SAME: (i32 [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] !kernel_arg_access_qual !12 { -; CHECK-NEXT: [[TMP3:%.*]] = call [[PRESERVE_METADATA:%.*]] @preserve_metadata.body(i32 [[TMP0]], i32* poison) +; CHECK-SAME: (i32 [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR2]] !kernel_arg_access_qual !12 { +; CHECK-NEXT: [[TMP3:%.*]] = call [[PRESERVE_METADATA:%.*]] @preserve_metadata.body(i32 [[TMP0]], ptr poison) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[PRESERVE_METADATA]] [[TMP3]], 0 -; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32.body -; CHECK-SAME: (<3 x i32>* [[OUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison, align 16 -; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <3 x i32>* [[OUT]] to <4 x i32>* +; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, ptr addrspace(1) poison, align 16 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3I32:%.*]] poison, <4 x i32> [[LOAD]], 0 ; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3I32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32 -; CHECK-SAME: (<3 x i32>* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3I32:%.*]] @bitcast_pointer_v4i32_v3i32.body(<3 x i32>* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3I32:%.*]] @bitcast_pointer_v4i32_v3i32.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3I32]] [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <3 x i32>* [[TMP0]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16 +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32.body -; CHECK-SAME: (<3 x float>* [[OUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison, align 16 -; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <3 x float>* [[OUT]] to <4 x i32>* +; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, ptr addrspace(1) poison, align 16 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3F32:%.*]] poison, <4 x i32> [[LOAD]], 0 ; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32 -; CHECK-SAME: (<3 x float>* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3F32:%.*]] @bitcast_pointer_v4i32_v3f32.body(<3 x float>* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3F32:%.*]] @bitcast_pointer_v4i32_v3f32.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3F32]] [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <3 x float>* [[TMP0]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16 +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32.body -; CHECK-SAME: (float* [[OUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* poison, align 4 -; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float* [[OUT]] to i32* +; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) poison, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F32:%.*]] poison, i32 [[LOAD]], 0 ; CHECK-NEXT: ret [[BITCAST_POINTER_I32_F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32 -; CHECK-SAME: (float* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F32:%.*]] @bitcast_pointer_i32_f32.body(float* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F32:%.*]] @bitcast_pointer_i32_f32.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F32]] [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP0]] to i32* -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP4]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16.body -; CHECK-SAME: (half* [[OUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* poison, align 4 -; CHECK-NEXT: [[BITCAST:%.*]] = bitcast half* [[OUT]] to i32* +; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) poison, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F16:%.*]] poison, i32 [[LOAD]], 0 ; CHECK-NEXT: ret [[BITCAST_POINTER_I32_F16]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16 -; CHECK-SAME: (half* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F16:%.*]] @bitcast_pointer_i32_f16.body(half* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F16:%.*]] @bitcast_pointer_i32_f16.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F16]] [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast half* [[TMP0]] to i32* -; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP4]], align 4 +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32.body -; CHECK-SAME: (i32* [[OUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LOAD:%.*]] = load volatile half, half addrspace(1)* poison, align 2 -; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[OUT]] to half* +; CHECK-SAME: (ptr [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile half, ptr addrspace(1) poison, align 2 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_F16_I32:%.*]] poison, half [[LOAD]], 0 ; CHECK-NEXT: ret [[BITCAST_POINTER_F16_I32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32 -; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_F16_I32:%.*]] @bitcast_pointer_f16_i32.body(i32* poison) +; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_F16_I32:%.*]] @bitcast_pointer_f16_i32.body(ptr poison) ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_F16_I32]] [[TMP2]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to half* -; CHECK-NEXT: store half [[TMP3]], half* [[TMP4]], align 2 +; CHECK-NEXT: store half [[TMP3]], ptr [[TMP0]], align 2 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32.body -; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> -; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32 -; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3F32:%.*]] @bitcast_struct_v3f32_v3f32.body(%struct.v3f32* poison, <3 x float> [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3F32:%.*]] @bitcast_struct_v3f32_v3f32.body(ptr poison, <3 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32.body -; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x i32> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[OUT:%.*]], <3 x i32> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[VALUE]], <3 x i32> poison, <4 x i32> -; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3I32:%.*]] poison, <4 x i32> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32 -; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <3 x i32> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3I32:%.*]] @bitcast_struct_v3f32_v3i32.body(%struct.v3f32* poison, <3 x i32> [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], <3 x i32> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3I32:%.*]] @bitcast_struct_v3f32_v3i32.body(ptr poison, <3 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 16 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32.body -; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>* +; CHECK-SAME: (ptr [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V4F32:%.*]] poison, <4 x float> [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32 -; CHECK-SAME: (%struct.v4f32* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V4F32:%.*]] @bitcast_struct_v4f32_v4f32.body(%struct.v4f32* poison, <4 x float> [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V4F32:%.*]] @bitcast_struct_v4f32_v4f32.body(ptr poison, <4 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v4f32* [[TMP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32.body -; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <4 x i32> [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>* +; CHECK-SAME: (ptr [[OUT:%.*]], <4 x i32> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V4I32:%.*]] poison, <4 x i32> [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32 -; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <4 x i32> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V4I32:%.*]] @bitcast_struct_v3f32_v4i32.body(%struct.v3f32* poison, <4 x i32> [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], <4 x i32> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V4I32:%.*]] @bitcast_struct_v3f32_v4i32.body(ptr poison, <4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 16 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32.body -; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> -; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32 -; CHECK-SAME: (%struct.v4f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V3F32:%.*]] @bitcast_struct_v4f32_v3f32.body(%struct.v4f32* poison, <3 x float> [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V3F32:%.*]] @bitcast_struct_v4f32_v3f32.body(ptr poison, <3 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v4f32* [[TMP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32.body -; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <2 x float>* +; CHECK-SAME: (ptr [[OUT:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V2F32:%.*]] poison, <2 x float> [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32 -; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <2 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V2F32:%.*]] @bitcast_struct_v3f32_v2f32.body(%struct.v3f32* poison, <2 x float> [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], <2 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V2F32:%.*]] @bitcast_struct_v3f32_v2f32.body(ptr poison, <2 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP4]], <2 x float>* [[TMP5]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP4]], ptr [[TMP0]], align 8 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32.body -; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (ptr [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> -; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32 -; CHECK-SAME: (%struct.v3f32.f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] @bitcast_struct_v3f32_f32_v3f32.body(%struct.v3f32.f32* poison, <3 x float> [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] @bitcast_struct_v3f32_f32_v3f32.body(ptr poison, <3 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32.f32* [[TMP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32.body -; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>* +; CHECK-SAME: (ptr [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] poison, <4 x float> [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32 -; CHECK-SAME: (%struct.v3f32.f32* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] @bitcast_struct_v3f32_f32_v4f32.body(%struct.v3f32.f32* poison, <4 x float> [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] @bitcast_struct_v3f32_f32_v4f32.body(ptr poison, <4 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32.f32* [[TMP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32.body -; CHECK-SAME: (%struct.i128* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.i128* [[OUT]] to <4 x float>* +; CHECK-SAME: (ptr [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_I128_V4F32:%.*]] poison, <4 x float> [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_I128_V4F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32 -; CHECK-SAME: (%struct.i128* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_I128_V4F32:%.*]] @bitcast_struct_i128_v4f32.body(%struct.i128* poison, <4 x float> [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_I128_V4F32:%.*]] @bitcast_struct_i128_v4f32.body(ptr poison, <4 x float> [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_I128_V4F32]] [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.i128* [[TMP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32.body -; CHECK-SAME: ([4 x i32]* [[OUT:%.*]], [4 x float] [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = bitcast [4 x i32]* [[OUT]] to [4 x float]* +; CHECK-SAME: (ptr [[OUT:%.*]], [4 x float] [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_ARRAY_V4I32_V4F32:%.*]] poison, [4 x float] [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32 -; CHECK-SAME: ([4 x i32]* [[TMP0:%.*]], [4 x float] [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_ARRAY_V4I32_V4F32:%.*]] @bitcast_array_v4i32_v4f32.body([4 x i32]* poison, [4 x float] [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], [4 x float] [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_ARRAY_V4I32_V4F32:%.*]] @bitcast_array_v4i32_v4f32.body(ptr poison, [4 x float] [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast [4 x i32]* [[TMP0]] to [4 x float]* -; CHECK-NEXT: store [4 x float] [[TMP4]], [4 x float]* [[TMP5]], align 4 +; CHECK-NEXT: store [4 x float] [[TMP4]], ptr [[TMP0]], align 4 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@multi_return_bitcast_struct_v3f32_v3f32.body -; CHECK-SAME: (i1 [[COND:%.*]], %struct.v3f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i1 [[COND:%.*]], ptr [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND]], label [[RET0:%.*]], label [[RET1:%.*]] ; CHECK: ret0: ; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> -; CHECK-NEXT: [[CAST0:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>* ; CHECK-NEXT: [[TMP0:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP0]] ; CHECK: ret1: -; CHECK-NEXT: [[CAST1:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>* -; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float> addrspace(1)* poison, align 16 +; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, ptr addrspace(1) poison, align 16 ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] poison, <4 x float> [[LOAD]], 0 ; CHECK-NEXT: ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@multi_return_bitcast_struct_v3f32_v3f32 -; CHECK-SAME: (i1 [[TMP0:%.*]], %struct.v3f32* [[TMP1:%.*]], <3 x float> [[TMP2:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP4:%.*]] = call [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] @multi_return_bitcast_struct_v3f32_v3f32.body(i1 [[TMP0]], %struct.v3f32* poison, <3 x float> [[TMP2]]) +; CHECK-SAME: (i1 [[TMP0:%.*]], ptr [[TMP1:%.*]], <3 x float> [[TMP2:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP4:%.*]] = call [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] @multi_return_bitcast_struct_v3f32_v3f32.body(i1 [[TMP0]], ptr poison, <3 x float> [[TMP2]]) ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP4]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast %struct.v3f32* [[TMP1]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP5]], <4 x float>* [[TMP6]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP1]], align 16 ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32.body -; CHECK-SAME: (<3 x float>* [[OUT:%.*]], [[STRUCT_V3F32:%.*]] [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = bitcast <3 x float>* [[OUT]] to %struct.v3f32* +; CHECK-SAME: (ptr [[OUT:%.*]], [[STRUCT_V3F32:%.*]] [[VALUE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_V3F32_STRUCT_V3F32:%.*]] poison, [[STRUCT_V3F32]] [[VALUE]], 0 ; CHECK-NEXT: ret [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP1]] ; ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32 -; CHECK-SAME: (<3 x float>* [[TMP0:%.*]], [[STRUCT_V3F32:%.*]] [[TMP1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_V3F32_STRUCT_V3F32:%.*]] @bitcast_v3f32_struct_v3f32.body(<3 x float>* poison, [[STRUCT_V3F32]] [[TMP1]]) +; CHECK-SAME: (ptr [[TMP0:%.*]], [[STRUCT_V3F32:%.*]] [[TMP1:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_V3F32_STRUCT_V3F32:%.*]] @bitcast_v3f32_struct_v3f32.body(ptr poison, [[STRUCT_V3F32]] [[TMP1]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <3 x float>* [[TMP0]] to %struct.v3f32* -; CHECK-NEXT: store [[STRUCT_V3F32]] [[TMP4]], %struct.v3f32* [[TMP5]], align 16 +; CHECK-NEXT: store [[STRUCT_V3F32]] [[TMP4]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll --- a/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll +++ b/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll @@ -10,9 +10,9 @@ ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[VAL]] ; SI: buffer_store_dword [[VRESULT]] ; SI: s_endpgm -define amdgpu_kernel void @s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) { +define amdgpu_kernel void @s_addk_i32_k0(ptr addrspace(1) %out, i32 %b) { %add = add i32 %b, 65 - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out ret void } @@ -20,38 +20,38 @@ ; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41 ; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41 ; SI: s_endpgm -define amdgpu_kernel void @s_addk_i32_k0_x2(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %a, i32 %b) { +define amdgpu_kernel void @s_addk_i32_k0_x2(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %a, i32 %b) { %add0 = add i32 %a, 65 %add1 = add i32 %b, 65 - store i32 %add0, i32 addrspace(1)* %out0 - store i32 %add1, i32 addrspace(1)* %out1 + store i32 %add0, ptr addrspace(1) %out0 + store i32 %add1, ptr addrspace(1) %out1 ret void } ; SI-LABEL: {{^}}s_addk_i32_k1: ; SI: s_addk_i32 {{s[0-9]+}}, 0x7fff{{$}} ; SI: s_endpgm -define amdgpu_kernel void @s_addk_i32_k1(i32 addrspace(1)* %out, i32 %b) { +define amdgpu_kernel void @s_addk_i32_k1(ptr addrspace(1) %out, i32 %b) { %add = add i32 %b, 32767 ; (1 << 15) - 1 - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out ret void } ; SI-LABEL: {{^}}s_addk_i32_k2: ; SI: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, 17 ; SI: s_endpgm -define amdgpu_kernel void @s_addk_i32_k2(i32 addrspace(1)* %out, i32 %b) { +define amdgpu_kernel void @s_addk_i32_k2(ptr addrspace(1) %out, i32 %b) { %add = add i32 %b, -17 - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out ret void } ; SI-LABEL: {{^}}s_addk_i32_k3: ; SI: s_addk_i32 {{s[0-9]+}}, 0xffbf{{$}} ; SI: s_endpgm -define amdgpu_kernel void @s_addk_i32_k3(i32 addrspace(1)* %out, i32 %b) { +define amdgpu_kernel void @s_addk_i32_k3(ptr addrspace(1) %out, i32 %b) { %add = add i32 %b, -65 - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out ret void } @@ -60,9 +60,9 @@ ; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42 ; SI: s_endpgm ; Note: dummy argument here to prevent combining of descriptor loads for %out and %b -define amdgpu_kernel void @s_addk_v2i32_k0(<2 x i32> addrspace(1)* %out, i32 %dummy, <2 x i32> %b) { +define amdgpu_kernel void @s_addk_v2i32_k0(ptr addrspace(1) %out, i32 %dummy, <2 x i32> %b) { %add = add <2 x i32> %b, - store <2 x i32> %add, <2 x i32> addrspace(1)* %out + store <2 x i32> %add, ptr addrspace(1) %out ret void } @@ -72,9 +72,9 @@ ; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x43 ; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x44 ; SI: s_endpgm -define amdgpu_kernel void @s_addk_v4i32_k0(<4 x i32> addrspace(1)* %out, <4 x i32> %b) { +define amdgpu_kernel void @s_addk_v4i32_k0(ptr addrspace(1) %out, <4 x i32> %b) { %add = add <4 x i32> %b, - store <4 x i32> %add, <4 x i32> addrspace(1)* %out + store <4 x i32> %add, ptr addrspace(1) %out ret void } @@ -88,18 +88,18 @@ ; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x47 ; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x48 ; SI: s_endpgm -define amdgpu_kernel void @s_addk_v8i32_k0(<8 x i32> addrspace(1)* %out, <8 x i32> %b) { +define amdgpu_kernel void @s_addk_v8i32_k0(ptr addrspace(1) %out, <8 x i32> %b) { %add = add <8 x i32> %b, - store <8 x i32> %add, <8 x i32> addrspace(1)* %out + store <8 x i32> %add, ptr addrspace(1) %out ret void } ; SI-LABEL: {{^}}no_s_addk_i32_k0: ; SI: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x8000{{$}} ; SI: s_endpgm -define amdgpu_kernel void @no_s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) { +define amdgpu_kernel void @no_s_addk_i32_k0(ptr addrspace(1) %out, i32 %b) { %add = add i32 %b, 32768 ; 1 << 15 - store i32 %add, i32 addrspace(1)* %out + store i32 %add, ptr addrspace(1) %out ret void } @@ -107,10 +107,10 @@ ; SI-LABEL: {{^}}commute_s_addk_i32: ; SI: s_addk_i32 s{{[0-9]+}}, 0x800{{$}} -define amdgpu_kernel void @commute_s_addk_i32(i32 addrspace(1)* %out, i32 %b) #0 { +define amdgpu_kernel void @commute_s_addk_i32(ptr addrspace(1) %out, i32 %b) #0 { %size = call i32 @llvm.amdgcn.groupstaticsize() %add = add i32 %size, %b - call void asm sideeffect "; foo $0, $1", "v,s"([512 x i32] addrspace(3)* @lds, i32 %add) + call void asm sideeffect "; foo $0, $1", "v,s"(ptr addrspace(3) @lds, i32 %add) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll --- a/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll +++ b/llvm/test/CodeGen/AMDGPU/s_movk_i32.ll @@ -6,10 +6,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffff, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k0(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32) - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 4295032831) ret void } @@ -19,10 +19,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x7fff, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k1(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32) - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 4295000063) ret void } @@ -33,10 +33,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x7fff, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 64, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k2(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32) - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 274877939711) ret void } @@ -46,10 +46,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x8000, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k3(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32) - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 4295000064) ret void } @@ -59,10 +59,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x20000, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k4(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32) - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 4295098368) ret void } @@ -72,10 +72,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffffffef, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xff00ffff, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k5(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 18374967954648334319) ret void } @@ -85,10 +85,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x41, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 63, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k6(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 270582939713 ; 65 | (63 << 32) - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 270582939713) ret void } @@ -98,10 +98,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x2000, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x4000, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k7(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32) - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 70368744185856) ret void } @@ -111,10 +111,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffff8000, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x11111111, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k8(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000 - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 1229782942255906816) ret void } @@ -124,10 +124,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffff8001, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x11111111, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k9(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001 - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 1229782942255906817) ret void } @@ -137,10 +137,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffff8888, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x11111111, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k10(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888 - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 1229782942255909000) ret void } @@ -150,10 +150,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffff8fff, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x11111111, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k11(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 1229782942255910911) ret void } @@ -163,10 +163,10 @@ ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffff7001, v[[LO_VREG]] ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x11111111, v[[HI_VREG]] ; SI: s_endpgm -define amdgpu_kernel void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { - %loada = load i64, i64 addrspace(1)* %a, align 4 +define amdgpu_kernel void @s_movk_i32_k12(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) { + %loada = load i64, ptr addrspace(1) %a, align 4 %or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001 - store i64 %or, i64 addrspace(1)* %out + store i64 %or, ptr addrspace(1) %out call void asm sideeffect "; use $0", "s"(i64 1229782942255902721) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll --- a/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll +++ b/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll @@ -7,36 +7,36 @@ ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[VAL]] ; SI: buffer_store_dword [[VRESULT]] ; SI: s_endpgm -define amdgpu_kernel void @s_mulk_i32_k0(i32 addrspace(1)* %out, i32 %b) { +define amdgpu_kernel void @s_mulk_i32_k0(ptr addrspace(1) %out, i32 %b) { %mul = mul i32 %b, 65 - store i32 %mul, i32 addrspace(1)* %out + store i32 %mul, ptr addrspace(1) %out ret void } ; SI-LABEL: {{^}}s_mulk_i32_k1: ; SI: s_mulk_i32 {{s[0-9]+}}, 0x7fff{{$}} ; SI: s_endpgm -define amdgpu_kernel void @s_mulk_i32_k1(i32 addrspace(1)* %out, i32 %b) { +define amdgpu_kernel void @s_mulk_i32_k1(ptr addrspace(1) %out, i32 %b) { %mul = mul i32 %b, 32767 ; (1 << 15) - 1 - store i32 %mul, i32 addrspace(1)* %out + store i32 %mul, ptr addrspace(1) %out ret void } ; SI-LABEL: {{^}}s_mulk_i32_k2: ; SI: s_mulk_i32 {{s[0-9]+}}, 0xffef{{$}} ; SI: s_endpgm -define amdgpu_kernel void @s_mulk_i32_k2(i32 addrspace(1)* %out, i32 %b) { +define amdgpu_kernel void @s_mulk_i32_k2(ptr addrspace(1) %out, i32 %b) { %mul = mul i32 %b, -17 - store i32 %mul, i32 addrspace(1)* %out + store i32 %mul, ptr addrspace(1) %out ret void } ; SI-LABEL: {{^}}no_s_mulk_i32_k0: ; SI: s_mul_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x8001{{$}} ; SI: s_endpgm -define amdgpu_kernel void @no_s_mulk_i32_k0(i32 addrspace(1)* %out, i32 %b) { +define amdgpu_kernel void @no_s_mulk_i32_k0(ptr addrspace(1) %out, i32 %b) { %mul = mul i32 %b, 32769 ; 1 << 15 + 1 - store i32 %mul, i32 addrspace(1)* %out + store i32 %mul, ptr addrspace(1) %out ret void } @@ -44,10 +44,10 @@ ; SI-LABEL: {{^}}commute_s_mulk_i32: ; SI: s_mulk_i32 s{{[0-9]+}}, 0x800{{$}} -define amdgpu_kernel void @commute_s_mulk_i32(i32 addrspace(1)* %out, i32 %b) #0 { +define amdgpu_kernel void @commute_s_mulk_i32(ptr addrspace(1) %out, i32 %b) #0 { %size = call i32 @llvm.amdgcn.groupstaticsize() %add = mul i32 %size, %b - call void asm sideeffect "; foo $0, $1", "v,s"([512 x i32] addrspace(3)* @lds, i32 %add) + call void asm sideeffect "; foo $0, $1", "v,s"(ptr addrspace(3) @lds, i32 %add) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sad.ll b/llvm/test/CodeGen/AMDGPU/sad.ll --- a/llvm/test/CodeGen/AMDGPU/sad.ll +++ b/llvm/test/CodeGen/AMDGPU/sad.ll @@ -2,7 +2,7 @@ ; GCN-LABEL: {{^}}v_sad_u32_pat1: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { +define amdgpu_kernel void @v_sad_u32_pat1(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) { %icmp0 = icmp ugt i32 %a, %b %t0 = select i1 %icmp0, i32 %a, i32 %b @@ -12,13 +12,13 @@ %ret0 = sub i32 %t0, %t1 %ret = add i32 %ret0, %c - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_sad_u32_constant_pat1: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 20 -define amdgpu_kernel void @v_sad_u32_constant_pat1(i32 addrspace(1)* %out, i32 %a) { +define amdgpu_kernel void @v_sad_u32_constant_pat1(ptr addrspace(1) %out, i32 %a) { %icmp0 = icmp ugt i32 %a, 90 %t0 = select i1 %icmp0, i32 %a, i32 90 @@ -28,13 +28,13 @@ %ret0 = sub i32 %t0, %t1 %ret = add i32 %ret0, 20 - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_sad_u32_pat2: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { +define amdgpu_kernel void @v_sad_u32_pat2(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) { %icmp0 = icmp ugt i32 %a, %b %sub0 = sub i32 %a, %b %sub1 = sub i32 %b, %a @@ -42,7 +42,7 @@ %ret = add i32 %ret0, %c - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } @@ -51,7 +51,7 @@ ; GCN: s_min_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { +define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat1(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) { %icmp0 = icmp ugt i32 %a, %b %t0 = select i1 %icmp0, i32 %a, i32 %b @@ -59,16 +59,16 @@ %t1 = select i1 %icmp1, i32 %a, i32 %b %ret0 = sub i32 %t0, %t1 - store volatile i32 %ret0, i32 addrspace(5)*undef + store volatile i32 %ret0, ptr addrspace(5) undef %ret = add i32 %ret0, %c - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_pat1: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_multi_use_add_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { +define amdgpu_kernel void @v_sad_u32_multi_use_add_pat1(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) { %icmp0 = icmp ugt i32 %a, %b %t0 = select i1 %icmp0, i32 %a, i32 %b @@ -77,17 +77,17 @@ %ret0 = sub i32 %t0, %t1 %ret = add i32 %ret0, %c - store volatile i32 %ret, i32 addrspace(5)*undef - store i32 %ret, i32 addrspace(1)* %out + store volatile i32 %ret, ptr addrspace(5) undef + store i32 %ret, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_sad_u32_multi_use_max_pat1: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { +define amdgpu_kernel void @v_sad_u32_multi_use_max_pat1(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) { %icmp0 = icmp ugt i32 %a, %b %t0 = select i1 %icmp0, i32 %a, i32 %b - store volatile i32 %t0, i32 addrspace(5)*undef + store volatile i32 %t0, ptr addrspace(5) undef %icmp1 = icmp ule i32 %a, %b %t1 = select i1 %icmp1, i32 %a, i32 %b @@ -95,40 +95,40 @@ %ret0 = sub i32 %t0, %t1 %ret = add i32 %ret0, %c - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_sad_u32_multi_use_min_pat1: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_multi_use_min_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { +define amdgpu_kernel void @v_sad_u32_multi_use_min_pat1(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) { %icmp0 = icmp ugt i32 %a, %b %t0 = select i1 %icmp0, i32 %a, i32 %b %icmp1 = icmp ule i32 %a, %b %t1 = select i1 %icmp1, i32 %a, i32 %b - store volatile i32 %t1, i32 addrspace(5)*undef + store volatile i32 %t1, ptr addrspace(5) undef %ret0 = sub i32 %t0, %t1 %ret = add i32 %ret0, %c - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat2: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { +define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat2(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) { %icmp0 = icmp ugt i32 %a, %b %sub0 = sub i32 %a, %b - store volatile i32 %sub0, i32 addrspace(5)*undef + store volatile i32 %sub0, ptr addrspace(5) undef %sub1 = sub i32 %b, %a %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 %ret = add i32 %ret0, %c - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } @@ -136,16 +136,16 @@ ; GCN-DAG: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: s_cmp_gt_u32 s{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_multi_use_select_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { +define amdgpu_kernel void @v_sad_u32_multi_use_select_pat2(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) { %icmp0 = icmp ugt i32 %a, %b %sub0 = sub i32 %a, %b %sub1 = sub i32 %b, %a %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1 - store volatile i32 %ret0, i32 addrspace(5)*undef + store volatile i32 %ret0, ptr addrspace(5) undef %ret = add i32 %ret0, %c - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } @@ -154,7 +154,7 @@ ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +define amdgpu_kernel void @v_sad_u32_vector_pat1(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { %icmp0 = icmp ugt <4 x i32> %a, %b %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b @@ -164,7 +164,7 @@ %ret0 = sub <4 x i32> %t0, %t1 %ret = add <4 x i32> %ret0, %c - store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + store <4 x i32> %ret, ptr addrspace(1) %out ret void } @@ -173,7 +173,7 @@ ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +define amdgpu_kernel void @v_sad_u32_vector_pat2(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { %icmp0 = icmp ugt <4 x i32> %a, %b %sub0 = sub <4 x i32> %a, %b %sub1 = sub <4 x i32> %b, %a @@ -181,13 +181,13 @@ %ret = add <4 x i32> %ret0, %c - store <4 x i32> %ret, <4 x i32> addrspace(1)* %out + store <4 x i32> %ret, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_sad_u32_i16_pat1: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_i16_pat1(i16 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) { +define amdgpu_kernel void @v_sad_u32_i16_pat1(ptr addrspace(1) %out, i16 %a, i16 %b, i16 %c) { %icmp0 = icmp ugt i16 %a, %b %t0 = select i1 %icmp0, i16 %a, i16 %b @@ -198,16 +198,16 @@ %ret0 = sub i16 %t0, %t1 %ret = add i16 %ret0, %c - store i16 %ret, i16 addrspace(1)* %out + store i16 %ret, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_sad_u32_i16_pat2: ; GCN: v_sad_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_i16_pat2(i16 addrspace(1)* %out) { - %a = load volatile i16, i16 addrspace(1)* undef - %b = load volatile i16, i16 addrspace(1)* undef - %c = load volatile i16, i16 addrspace(1)* undef +define amdgpu_kernel void @v_sad_u32_i16_pat2(ptr addrspace(1) %out) { + %a = load volatile i16, ptr addrspace(1) undef + %b = load volatile i16, ptr addrspace(1) undef + %c = load volatile i16, ptr addrspace(1) undef %icmp0 = icmp ugt i16 %a, %b %sub0 = sub i16 %a, %b %sub1 = sub i16 %b, %a @@ -215,13 +215,13 @@ %ret = add i16 %ret0, %c - store i16 %ret, i16 addrspace(1)* %out + store i16 %ret, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_sad_u32_i8_pat1: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_i8_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { +define amdgpu_kernel void @v_sad_u32_i8_pat1(ptr addrspace(1) %out, i8 %a, i8 %b, i8 %c) { %icmp0 = icmp ugt i8 %a, %b %t0 = select i1 %icmp0, i8 %a, i8 %b @@ -231,16 +231,16 @@ %ret0 = sub i8 %t0, %t1 %ret = add i8 %ret0, %c - store i8 %ret, i8 addrspace(1)* %out + store i8 %ret, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_sad_u32_i8_pat2: ; GCN: v_sad_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_i8_pat2(i8 addrspace(1)* %out) { - %a = load volatile i8, i8 addrspace(1)* undef - %b = load volatile i8, i8 addrspace(1)* undef - %c = load volatile i8, i8 addrspace(1)* undef +define amdgpu_kernel void @v_sad_u32_i8_pat2(ptr addrspace(1) %out) { + %a = load volatile i8, ptr addrspace(1) undef + %b = load volatile i8, ptr addrspace(1) undef + %c = load volatile i8, ptr addrspace(1) undef %icmp0 = icmp ugt i8 %a, %b %sub0 = sub i8 %a, %b %sub1 = sub i8 %b, %a @@ -248,7 +248,7 @@ %ret = add i8 %ret0, %c - store i8 %ret, i8 addrspace(1)* %out + store i8 %ret, ptr addrspace(1) %out ret void } @@ -260,7 +260,7 @@ ; GCN-DAG: s_sub_i32 ; GCN-DAG: s_lshr_b32 ; GCN: s_add_i32 -define amdgpu_kernel void @s_sad_u32_i8_pat2(i8 addrspace(1)* %out, i8 zeroext %a, i8 zeroext %b, i8 zeroext %c) { +define amdgpu_kernel void @s_sad_u32_i8_pat2(ptr addrspace(1) %out, i8 zeroext %a, i8 zeroext %b, i8 zeroext %c) { %icmp0 = icmp ugt i8 %a, %b %sub0 = sub i8 %a, %b %sub1 = sub i8 %b, %a @@ -268,7 +268,7 @@ %ret = add i8 %ret0, %c - store i8 %ret, i8 addrspace(1)* %out + store i8 %ret, ptr addrspace(1) %out ret void } @@ -277,7 +277,7 @@ ; GCN-DAG: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { +define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat1(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d) { %icmp0 = icmp ugt i32 %a, %b %t0 = select i1 %icmp0, i32 %a, i32 %b @@ -287,7 +287,7 @@ %ret0 = sub i32 %t0, %t1 %ret = add i32 %ret0, %c - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } @@ -295,7 +295,7 @@ ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { +define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat2(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d) { %icmp0 = icmp ugt i32 %a, %b %sub0 = sub i32 %a, %d %sub1 = sub i32 %b, %a @@ -303,7 +303,7 @@ %ret = add i32 %ret0, %c - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/saddo.ll b/llvm/test/CodeGen/AMDGPU/saddo.ll --- a/llvm/test/CodeGen/AMDGPU/saddo.ll +++ b/llvm/test/CodeGen/AMDGPU/saddo.ll @@ -12,7 +12,7 @@ declare { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -define amdgpu_kernel void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { +define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind { ; SI-LABEL: saddo_i64_zext: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -120,11 +120,11 @@ %carry = extractvalue { i64, i1 } %sadd, 1 %ext = zext i1 %carry to i64 %add2 = add i64 %val, %ext - store i64 %add2, i64 addrspace(1)* %out, align 8 + store i64 %add2, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind { +define amdgpu_kernel void @s_saddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) nounwind { ; SI-LABEL: s_saddo_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -224,12 +224,12 @@ %sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind %val = extractvalue { i32, i1 } %sadd, 0 %carry = extractvalue { i32, i1 } %sadd, 1 - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %carry, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %carry, ptr addrspace(1) %carryout ret void } -define amdgpu_kernel void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { +define amdgpu_kernel void @v_saddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind { ; SI-LABEL: v_saddo_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 @@ -336,17 +336,17 @@ ; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %a = load i32, i32 addrspace(1)* %aptr, align 4 - %b = load i32, i32 addrspace(1)* %bptr, align 4 + %a = load i32, ptr addrspace(1) %aptr, align 4 + %b = load i32, ptr addrspace(1) %bptr, align 4 %sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind %val = extractvalue { i32, i1 } %sadd, 0 %carry = extractvalue { i32, i1 } %sadd, 1 - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %carry, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %carry, ptr addrspace(1) %carryout ret void } -define amdgpu_kernel void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { +define amdgpu_kernel void @s_saddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) nounwind { ; SI-LABEL: s_saddo_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 @@ -453,12 +453,12 @@ %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %sadd, 0 %carry = extractvalue { i64, i1 } %sadd, 1 - store i64 %val, i64 addrspace(1)* %out, align 8 - store i1 %carry, i1 addrspace(1)* %carryout + store i64 %val, ptr addrspace(1) %out, align 8 + store i1 %carry, ptr addrspace(1) %carryout ret void } -define amdgpu_kernel void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { +define amdgpu_kernel void @v_saddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind { ; SI-LABEL: v_saddo_i64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 @@ -574,17 +574,17 @@ ; GFX11-NEXT: global_store_b8 v6, v0, s[6:7] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %a = load i64, i64 addrspace(1)* %aptr, align 4 - %b = load i64, i64 addrspace(1)* %bptr, align 4 + %a = load i64, ptr addrspace(1) %aptr, align 4 + %b = load i64, ptr addrspace(1) %bptr, align 4 %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %sadd, 0 %carry = extractvalue { i64, i1 } %sadd, 1 - store i64 %val, i64 addrspace(1)* %out, align 8 - store i1 %carry, i1 addrspace(1)* %carryout + store i64 %val, ptr addrspace(1) %out, align 8 + store i1 %carry, ptr addrspace(1) %carryout ret void } -define amdgpu_kernel void @v_saddo_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %carryout, <2 x i32> addrspace(1)* %aptr, <2 x i32> addrspace(1)* %bptr) nounwind { +define amdgpu_kernel void @v_saddo_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind { ; SI-LABEL: v_saddo_v2i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 @@ -713,13 +713,13 @@ ; GFX11-NEXT: global_store_b64 v5, v[0:1], s[2:3] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - %a = load <2 x i32>, <2 x i32> addrspace(1)* %aptr, align 4 - %b = load <2 x i32>, <2 x i32> addrspace(1)* %bptr, align 4 + %a = load <2 x i32>, ptr addrspace(1) %aptr, align 4 + %b = load <2 x i32>, ptr addrspace(1) %bptr, align 4 %sadd = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) nounwind %val = extractvalue { <2 x i32>, <2 x i1> } %sadd, 0 %carry = extractvalue { <2 x i32>, <2 x i1> } %sadd, 1 - store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 + store <2 x i32> %val, ptr addrspace(1) %out, align 4 %carry.ext = zext <2 x i1> %carry to <2 x i32> - store <2 x i32> %carry.ext, <2 x i32> addrspace(1)* %carryout + store <2 x i32> %carry.ext, ptr addrspace(1) %carryout ret void } diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -24,7 +24,7 @@ ; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}} ; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}} -define amdgpu_kernel void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { +define amdgpu_kernel void @mubuf(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = call i32 @llvm.amdgcn.workitem.id.y() @@ -35,14 +35,14 @@ loop: ; preds = %loop, %entry %tmp4 = phi i64 [ 0, %entry ], [ %tmp5, %loop ] %tmp5 = add i64 %tmp2, %tmp4 - %tmp6 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp5 - %tmp7 = load i8, i8 addrspace(1)* %tmp6, align 1 + %tmp6 = getelementptr i8, ptr addrspace(1) %in, i64 %tmp5 + %tmp7 = load i8, ptr addrspace(1) %tmp6, align 1 %tmp8 = or i64 %tmp5, 1 - %tmp9 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp8 - %tmp10 = load i8, i8 addrspace(1)* %tmp9, align 1 + %tmp9 = getelementptr i8, ptr addrspace(1) %in, i64 %tmp8 + %tmp10 = load i8, ptr addrspace(1) %tmp9, align 1 %tmp11 = add i8 %tmp7, %tmp10 %tmp12 = sext i8 %tmp11 to i32 - store i32 %tmp12, i32 addrspace(1)* %out + store i32 %tmp12, ptr addrspace(1) %out %tmp13 = icmp slt i64 %tmp5, 10 br i1 %tmp13, label %loop, label %done @@ -64,25 +64,25 @@ ; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]] ; GCN-NOHSA: buffer_store_dword [[V_OUT]] ; GCN-HSA: flat_store_dword {{.*}}, [[V_OUT]] -define amdgpu_kernel void @smrd_valu(i32 addrspace(4)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @smrd_valu(ptr addrspace(1) %in, i32 %a, i32 %b, ptr addrspace(1) %out) #1 { entry: %tmp = icmp ne i32 %a, 0 br i1 %tmp, label %if, label %else if: ; preds = %entry - %tmp1 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %in + %tmp1 = load ptr addrspace(4), ptr addrspace(1) %in br label %endif else: ; preds = %entry - %tmp2 = getelementptr i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %in - %tmp3 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(1)* %tmp2 + %tmp2 = getelementptr ptr addrspace(4), ptr addrspace(1) %in + %tmp3 = load ptr addrspace(4), ptr addrspace(1) %tmp2 br label %endif endif: ; preds = %else, %if - %tmp4 = phi i32 addrspace(4)* [ %tmp1, %if ], [ %tmp3, %else ] - %tmp5 = getelementptr i32, i32 addrspace(4)* %tmp4, i32 3000 - %tmp6 = load i32, i32 addrspace(4)* %tmp5 - store i32 %tmp6, i32 addrspace(1)* %out + %tmp4 = phi ptr addrspace(4) [ %tmp1, %if ], [ %tmp3, %else ] + %tmp5 = getelementptr i32, ptr addrspace(4) %tmp4, i32 3000 + %tmp6 = load i32, ptr addrspace(4) %tmp5 + store i32 %tmp6, ptr addrspace(1) %out ret void } @@ -92,13 +92,13 @@ ; GCN-NOHSA-NOT: v_add ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}} ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(4)* %in) #1 { +define amdgpu_kernel void @smrd_valu2(ptr addrspace(1) %out, ptr addrspace(4) %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 - %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(4)* %in, i32 %tmp, i32 4 - %tmp3 = load i32, i32 addrspace(4)* %tmp2 - store i32 %tmp3, i32 addrspace(1)* %out + %tmp2 = getelementptr [8 x i32], ptr addrspace(4) %in, i32 %tmp, i32 4 + %tmp3 = load i32, ptr addrspace(4) %tmp2 + store i32 %tmp3, ptr addrspace(1) %out ret void } @@ -112,14 +112,14 @@ ; GCN-NOHSA: buffer_store_dword ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} -define amdgpu_kernel void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(4)* %in, i32 %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset(ptr addrspace(1) %out, ptr addrspace(4) %in, i32 %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() - %tmp2 = getelementptr i32, i32 addrspace(4)* %in, i32 %tmp - %tmp3 = getelementptr i32, i32 addrspace(4)* %tmp2, i32 5000 - %tmp4 = load i32, i32 addrspace(4)* %tmp3 + %tmp2 = getelementptr i32, ptr addrspace(4) %in, i32 %tmp + %tmp3 = getelementptr i32, ptr addrspace(4) %tmp2, i32 5000 + %tmp4 = load i32, ptr addrspace(4) %tmp3 %tmp5 = add i32 %tmp4, %c - store i32 %tmp5, i32 addrspace(1)* %out + store i32 %tmp5, ptr addrspace(1) %out ret void } @@ -132,14 +132,14 @@ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: buffer_store_dwordx2 ; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(4)* %in, i64 %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x2(ptr addrspace(1) %out, ptr addrspace(4) %in, i64 %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() - %tmp2 = getelementptr i64, i64 addrspace(4)* %in, i32 %tmp - %tmp3 = getelementptr i64, i64 addrspace(4)* %tmp2, i32 5000 - %tmp4 = load i64, i64 addrspace(4)* %tmp3 + %tmp2 = getelementptr i64, ptr addrspace(4) %in, i32 %tmp + %tmp3 = getelementptr i64, ptr addrspace(4) %tmp2, i32 5000 + %tmp4 = load i64, ptr addrspace(4) %tmp3 %tmp5 = or i64 %tmp4, %c - store i64 %tmp5, i64 addrspace(1)* %out + store i64 %tmp5, ptr addrspace(1) %out ret void } @@ -154,14 +154,14 @@ ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: buffer_store_dwordx4 ; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(4)* %in, <4 x i32> %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x4(ptr addrspace(1) %out, ptr addrspace(4) %in, <4 x i32> %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() - %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %in, i32 %tmp - %tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %tmp2, i32 1234 - %tmp4 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp3 + %tmp2 = getelementptr <4 x i32>, ptr addrspace(4) %in, i32 %tmp + %tmp3 = getelementptr <4 x i32>, ptr addrspace(4) %tmp2, i32 1234 + %tmp4 = load <4 x i32>, ptr addrspace(4) %tmp3 %tmp5 = or <4 x i32> %tmp4, %c - store <4 x i32> %tmp5, <4 x i32> addrspace(1)* %out + store <4 x i32> %tmp5, ptr addrspace(1) %out ret void } @@ -188,14 +188,14 @@ ; GCN-NOHSA: buffer_store_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define amdgpu_kernel void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in, <8 x i32> %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x8(ptr addrspace(1) %out, ptr addrspace(4) %in, <8 x i32> %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() - %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %in, i32 %tmp - %tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %tmp2, i32 1234 - %tmp4 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp3 + %tmp2 = getelementptr <8 x i32>, ptr addrspace(4) %in, i32 %tmp + %tmp3 = getelementptr <8 x i32>, ptr addrspace(4) %tmp2, i32 1234 + %tmp4 = load <8 x i32>, ptr addrspace(4) %tmp3 %tmp5 = or <8 x i32> %tmp4, %c - store <8 x i32> %tmp5, <8 x i32> addrspace(1)* %out + store <8 x i32> %tmp5, ptr addrspace(1) %out ret void } @@ -234,14 +234,14 @@ ; GCN-HSA: flat_load_dwordx4 ; GCN: s_endpgm -define amdgpu_kernel void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in, <16 x i32> %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x16(ptr addrspace(1) %out, ptr addrspace(4) %in, <16 x i32> %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() - %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(4)* %in, i32 %tmp - %tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(4)* %tmp2, i32 1234 - %tmp4 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp3 + %tmp2 = getelementptr <16 x i32>, ptr addrspace(4) %in, i32 %tmp + %tmp3 = getelementptr <16 x i32>, ptr addrspace(4) %tmp2, i32 1234 + %tmp4 = load <16 x i32>, ptr addrspace(4) %tmp3 %tmp5 = or <16 x i32> %tmp4, %c - store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out + store <16 x i32> %tmp5, ptr addrspace(1) %out ret void } @@ -251,27 +251,27 @@ ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] ; GCN-NOHSA: buffer_store_dword [[ADD]] ; GCN-HSA: flat_store_dword {{.*}}, [[ADD]] -define amdgpu_kernel void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(4)* %in, i32 %a) #1 { +define amdgpu_kernel void @smrd_valu2_salu_user(ptr addrspace(1) %out, ptr addrspace(4) %in, i32 %a) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 - %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(4)* %in, i32 %tmp, i32 4 - %tmp3 = load i32, i32 addrspace(4)* %tmp2 + %tmp2 = getelementptr [8 x i32], ptr addrspace(4) %in, i32 %tmp, i32 4 + %tmp3 = load i32, ptr addrspace(4) %tmp2 %tmp4 = add i32 %tmp3, %a - store i32 %tmp4, i32 addrspace(1)* %out + store i32 %tmp4, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset: ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} ; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(4)* %in) #1 { +define amdgpu_kernel void @smrd_valu2_max_smrd_offset(ptr addrspace(1) %out, ptr addrspace(4) %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 - %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(4)* %in, i32 %tmp, i32 255 - %tmp3 = load i32, i32 addrspace(4)* %tmp2 - store i32 %tmp3, i32 addrspace(1)* %out + %tmp2 = getelementptr [1024 x i32], ptr addrspace(4) %in, i32 %tmp, i32 255 + %tmp3 = load i32, ptr addrspace(4) %tmp2 + store i32 %tmp3, ptr addrspace(1) %out ret void } @@ -279,13 +279,13 @@ ; GCN-NOHSA-NOT: v_add ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}} ; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(4)* %in) #1 { +define amdgpu_kernel void @smrd_valu2_mubuf_offset(ptr addrspace(1) %out, ptr addrspace(4) %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 - %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(4)* %in, i32 %tmp, i32 256 - %tmp3 = load i32, i32 addrspace(4)* %tmp2 - store i32 %tmp3, i32 addrspace(1)* %out + %tmp2 = getelementptr [1024 x i32], ptr addrspace(4) %in, i32 %tmp, i32 256 + %tmp3 = load i32, ptr addrspace(4) %tmp2 + store i32 %tmp3, ptr addrspace(1) %out ret void } @@ -294,13 +294,12 @@ ; GCN-NOHSA: buffer_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define amdgpu_kernel void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() - %tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0 - %tmp2 = bitcast i32 addrspace(4)* %tmp1 to <8 x i32> addrspace(4)* - %tmp3 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp2, align 4 - store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32 + %tmp1 = getelementptr inbounds i32, ptr addrspace(4) %in, i32 %tmp0 + %tmp3 = load <8 x i32>, ptr addrspace(4) %tmp1, align 4 + store <8 x i32> %tmp3, ptr addrspace(1) %out, align 32 ret void } @@ -317,12 +316,11 @@ ; GCN-NOHSA: buffer_store_dword ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define amdgpu_kernel void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v8i32_salu_user(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() - %tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0 - %tmp2 = bitcast i32 addrspace(4)* %tmp1 to <8 x i32> addrspace(4)* - %tmp3 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp2, align 4 + %tmp1 = getelementptr inbounds i32, ptr addrspace(4) %in, i32 %tmp0 + %tmp3 = load <8 x i32>, ptr addrspace(4) %tmp1, align 4 %elt0 = extractelement <8 x i32> %tmp3, i32 0 %elt1 = extractelement <8 x i32> %tmp3, i32 1 @@ -341,7 +339,7 @@ %add5 = add i32 %add4, %elt6 %add6 = add i32 %add5, %elt7 - store i32 %add6, i32 addrspace(1)* %out + store i32 %add6, ptr addrspace(1) %out ret void } @@ -354,13 +352,12 @@ ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define amdgpu_kernel void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() - %tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0 - %tmp2 = bitcast i32 addrspace(4)* %tmp1 to <16 x i32> addrspace(4)* - %tmp3 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp2, align 4 - store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32 + %tmp1 = getelementptr inbounds i32, ptr addrspace(4) %in, i32 %tmp0 + %tmp3 = load <16 x i32>, ptr addrspace(4) %tmp1, align 4 + store <16 x i32> %tmp3, ptr addrspace(1) %out, align 32 ret void } @@ -389,12 +386,11 @@ ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define amdgpu_kernel void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(4)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v16i32_salu_user(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() - %tmp1 = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tmp0 - %tmp2 = bitcast i32 addrspace(4)* %tmp1 to <16 x i32> addrspace(4)* - %tmp3 = load <16 x i32>, <16 x i32> addrspace(4)* %tmp2, align 4 + %tmp1 = getelementptr inbounds i32, ptr addrspace(4) %in, i32 %tmp0 + %tmp3 = load <16 x i32>, ptr addrspace(4) %tmp1, align 4 %elt0 = extractelement <16 x i32> %tmp3, i32 0 %elt1 = extractelement <16 x i32> %tmp3, i32 1 @@ -429,7 +425,7 @@ %add13 = add i32 %add12, %elt14 %add14 = add i32 %add13, %elt15 - store i32 %add14, i32 addrspace(1)* %out + store i32 %add14, ptr addrspace(1) %out ret void } @@ -444,7 +440,7 @@ ; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ONE]] ; GCN: {{^}}[[EXIT]]: ; GCN: s_endpgm -define amdgpu_kernel void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @sopc_vopc_legalize_bug(i32 %cond, ptr addrspace(1) %out, ptr addrspace(1) %in) { bb3: ; preds = %bb2 %tmp0 = bitcast i32 %cond to float %tmp1 = fadd float %tmp0, 2.500000e-01 @@ -453,7 +449,7 @@ br i1 %tmp3, label %bb6, label %bb7 bb6: - store i32 1, i32 addrspace(1)* %out + store i32 1, ptr addrspace(1) %out br label %bb7 bb7: ; preds = %bb3 @@ -478,7 +474,7 @@ bb4: %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp, %bb1 ] - store volatile i32 %tmp5, i32 addrspace(1)* undef + store volatile i32 %tmp5, ptr addrspace(1) undef br label %bb1 } @@ -487,7 +483,7 @@ ; GCN: [[LOOP_LABEL:.L[0-9a-zA-Z_]+]]: ; GCN: s_xor_b32 [[B]], [[B]], 0x400 ; GCN: s_cbranch_scc{{[01]}} [[LOOP_LABEL]] -define amdgpu_kernel void @phi_imm_in_sgprs(i32 addrspace(3)* %out, i32 %cond) { +define amdgpu_kernel void @phi_imm_in_sgprs(ptr addrspace(3) %out, i32 %cond) { entry: br label %loop @@ -496,8 +492,8 @@ %offset = phi i32 [1024, %entry], [%offset.xor, %loop] %offset.xor = xor i32 %offset, 1024 %offset.i = add i32 %offset.xor, %i - %ptr = getelementptr i32, i32 addrspace(3)* %out, i32 %offset.i - store i32 0, i32 addrspace(3)* %ptr + %ptr = getelementptr i32, ptr addrspace(3) %out, i32 %offset.i + store i32 0, ptr addrspace(3) %ptr %i.add = add i32 %i, 1 %cmp = icmp ult i32 %i.add, %cond br i1 %cmp, label %loop, label %exit diff --git a/llvm/test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll b/llvm/test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll --- a/llvm/test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll +++ b/llvm/test/CodeGen/AMDGPU/shader-addr64-nonuniform.ll @@ -15,8 +15,8 @@ define amdgpu_ps float @nonuniform_uniform(i32 %arg18) { .entry: %tmp31 = sext i32 %arg18 to i64 - %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31 - %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 + %tmp32 = getelementptr [6 x <3 x float>], ptr addrspace(1) @indexable, i64 0, i64 %tmp31 + %tmp33 = load <3 x float>, ptr addrspace(1) %tmp32, align 16 %tmp34 = extractelement <3 x float> %tmp33, i32 0 ret float %tmp34 } @@ -29,9 +29,9 @@ define amdgpu_ps float @uniform_nonuniform(i32 inreg %offset, i32 %arg18) { .entry: %tmp1 = zext i32 %arg18 to i64 - %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* - %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset - %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 + %tmp2 = inttoptr i64 %tmp1 to ptr addrspace(1) + %tmp32 = getelementptr [6 x <3 x float>], ptr addrspace(1) %tmp2, i32 0, i32 %offset + %tmp33 = load <3 x float>, ptr addrspace(1) %tmp32, align 16 %tmp34 = extractelement <3 x float> %tmp33, i32 0 ret float %tmp34 } @@ -44,9 +44,9 @@ define amdgpu_ps float @const_nonuniform(i32 %arg18) { .entry: %tmp1 = zext i32 %arg18 to i64 - %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* - %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 1 - %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 + %tmp2 = inttoptr i64 %tmp1 to ptr addrspace(1) + %tmp32 = getelementptr [6 x <3 x float>], ptr addrspace(1) %tmp2, i32 0, i32 1 + %tmp33 = load <3 x float>, ptr addrspace(1) %tmp32, align 16 %tmp34 = extractelement <3 x float> %tmp33, i32 0 ret float %tmp34 } @@ -59,9 +59,9 @@ define amdgpu_ps float @nonuniform_nonuniform(i32 %offset, i32 %arg18) { .entry: %tmp1 = zext i32 %arg18 to i64 - %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* - %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset - %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 + %tmp2 = inttoptr i64 %tmp1 to ptr addrspace(1) + %tmp32 = getelementptr [6 x <3 x float>], ptr addrspace(1) %tmp2, i32 0, i32 %offset + %tmp33 = load <3 x float>, ptr addrspace(1) %tmp32, align 16 %tmp34 = extractelement <3 x float> %tmp33, i32 0 ret float %tmp34 } @@ -73,8 +73,8 @@ define amdgpu_ps float @nonuniform_uniform_const(i32 %arg18) { .entry: %tmp31 = sext i32 %arg18 to i64 - %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31, i64 1 - %tmp33 = load float, float addrspace(1)* %tmp32, align 4 + %tmp32 = getelementptr [6 x <3 x float>], ptr addrspace(1) @indexable, i64 0, i64 %tmp31, i64 1 + %tmp33 = load float, ptr addrspace(1) %tmp32, align 4 ret float %tmp33 } @@ -85,9 +85,9 @@ define amdgpu_ps float @uniform_nonuniform_const(i32 inreg %offset, i32 %arg18) { .entry: %tmp1 = zext i32 %arg18 to i64 - %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* - %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset, i32 1 - %tmp33 = load float, float addrspace(1)* %tmp32, align 4 + %tmp2 = inttoptr i64 %tmp1 to ptr addrspace(1) + %tmp32 = getelementptr [6 x <3 x float>], ptr addrspace(1) %tmp2, i32 0, i32 %offset, i32 1 + %tmp33 = load float, ptr addrspace(1) %tmp32, align 4 ret float %tmp33 } @@ -98,9 +98,9 @@ define amdgpu_ps float @nonuniform_nonuniform_const(i32 %offset, i32 %arg18) { .entry: %tmp1 = zext i32 %arg18 to i64 - %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* - %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset, i32 1 - %tmp33 = load float, float addrspace(1)* %tmp32, align 4 + %tmp2 = inttoptr i64 %tmp1 to ptr addrspace(1) + %tmp32 = getelementptr [6 x <3 x float>], ptr addrspace(1) %tmp2, i32 0, i32 %offset, i32 1 + %tmp33 = load float, ptr addrspace(1) %tmp32, align 4 ret float %tmp33 } diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll --- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll @@ -8,7 +8,7 @@ ; Test that add/sub with a constant is swapped to sub/add with negated ; constant to minimize code size. -define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i32_x_sub_64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -77,15 +77,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %x = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %x = load i32, ptr addrspace(1) %gep %result = sub i32 %x, 64 - store i32 %result, i32 addrspace(1)* %gep.out + store i32 %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i32_x_sub_64_multi_use: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -184,18 +184,18 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %x = load volatile i32, i32 addrspace(1)* %gep - %y = load volatile i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile i32, ptr addrspace(1) %gep + %y = load volatile i32, ptr addrspace(1) %gep %result0 = sub i32 %x, 64 %result1 = sub i32 %y, 64 - store volatile i32 %result0, i32 addrspace(1)* %gep.out - store volatile i32 %result1, i32 addrspace(1)* %gep.out + store volatile i32 %result0, ptr addrspace(1) %gep.out + store volatile i32 %result1, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i32_64_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i32_64_sub_x: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -264,15 +264,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %x = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %x = load i32, ptr addrspace(1) %gep %result = sub i32 64, %x - store i32 %result, i32 addrspace(1)* %gep.out + store i32 %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i32_x_sub_65: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -341,15 +341,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %x = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %x = load i32, ptr addrspace(1) %gep %result = sub i32 %x, 65 - store i32 %result, i32 addrspace(1)* %gep.out + store i32 %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i32_65_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i32_65_sub_x: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -418,15 +418,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %x = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %x = load i32, ptr addrspace(1) %gep %result = sub i32 65, %x - store i32 %result, i32 addrspace(1)* %gep.out + store i32 %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i32_x_sub_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i32_x_sub_neg16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -495,15 +495,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %x = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %x = load i32, ptr addrspace(1) %gep %result = sub i32 %x, -16 - store i32 %result, i32 addrspace(1)* %gep.out + store i32 %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i32_neg16_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i32_neg16_sub_x: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -572,15 +572,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %x = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %x = load i32, ptr addrspace(1) %gep %result = sub i32 -16, %x - store i32 %result, i32 addrspace(1)* %gep.out + store i32 %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i32_x_sub_neg17: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -649,15 +649,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %x = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %x = load i32, ptr addrspace(1) %gep %result = sub i32 %x, -17 - store i32 %result, i32 addrspace(1)* %gep.out + store i32 %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i32_neg17_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i32_neg17_sub_x: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -726,11 +726,11 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %x = load i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %x = load i32, ptr addrspace(1) %gep %result = sub i32 -17, %x - store i32 %result, i32 addrspace(1)* %gep.out + store i32 %result, ptr addrspace(1) %gep.out ret void } @@ -789,7 +789,7 @@ ret void } -define amdgpu_kernel void @v_test_i16_x_sub_64(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i16_x_sub_64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -858,15 +858,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext - %x = load i16, i16 addrspace(1)* %gep + %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i64 %tid.ext + %x = load i16, ptr addrspace(1) %gep %result = sub i16 %x, 64 - store i16 %result, i16 addrspace(1)* %gep.out + store i16 %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i16_x_sub_64_zext_to_i32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -944,16 +944,16 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %x = load i16, i16 addrspace(1)* %gep + %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %x = load i16, ptr addrspace(1) %gep %result = sub i16 %x, 64 %zext = zext i16 %result to i32 - store i32 %zext, i32 addrspace(1)* %gep.out + store i32 %zext, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_i16_x_sub_64_multi_use: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1052,18 +1052,18 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext - %x = load volatile i16, i16 addrspace(1)* %gep - %y = load volatile i16, i16 addrspace(1)* %gep + %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile i16, ptr addrspace(1) %gep + %y = load volatile i16, ptr addrspace(1) %gep %result0 = sub i16 %x, 64 %result1 = sub i16 %y, 64 - store volatile i16 %result0, i16 addrspace(1)* %gep.out - store volatile i16 %result1, i16 addrspace(1)* %gep.out + store volatile i16 %result0, ptr addrspace(1) %gep.out + store volatile i16 %result1, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_sub_64_64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1138,15 +1138,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = sub <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_sub_7_64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1222,15 +1222,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = sub <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_sub_64_123: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1306,16 +1306,16 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = sub <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } ; Can fold 0 and inline immediate in other half. -define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_sub_7_0: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1388,16 +1388,16 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = sub <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } ; Can fold 0 and inline immediate in other half. -define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_sub_0_16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1468,15 +1468,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = sub <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_sub_0_1_0: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1548,15 +1548,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = sub <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_sub_0_neg1_0: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1628,16 +1628,16 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = sub <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } ; -32 isn't an inline immediate, but 32 is -define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_neg32_neg32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1712,15 +1712,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_add_0_neg32(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_0_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_0_neg32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1791,15 +1791,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_neg32_0: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1872,16 +1872,16 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } ; 16 and -16 are both inline immediates -define amdgpu_kernel void @v_test_v2i16_x_add_neg16_neg16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_neg16_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_neg16_neg16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1956,15 +1956,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_add_0_neg16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_0_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_0_neg16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2035,15 +2035,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_add_neg16_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_neg16_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_neg16_0: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2116,15 +2116,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_neg_fpone: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2200,15 +2200,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_neg_negfpone: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2284,15 +2284,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_add_neg_fptwo(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_neg_fptwo(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_neg_fptwo: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2368,15 +2368,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfptwo(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfptwo(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_neg_negfptwo: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2452,15 +2452,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_undef_neg32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2531,15 +2531,15 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } -define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: v_test_v2i16_x_add_neg32_undef: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -2609,11 +2609,11 @@ ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext - %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext - %x = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext + %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext + %x = load <2 x i16>, ptr addrspace(1) %gep %result = add <2 x i16> %x, - store <2 x i16> %result, <2 x i16> addrspace(1)* %gep.out + store <2 x i16> %result, ptr addrspace(1) %gep.out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -187,8 +187,6 @@ ; SI-NEXT: s_branch .LBB3_3 ; SI-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1 ; SI-NEXT: s_mov_b64 s[8:9], 0 -; SI-NEXT: s_mov_b64 s[12:13], -1 -; SI-NEXT: s_mov_b64 s[14:15], -1 ; SI-NEXT: .LBB3_2: ; %Flow ; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1 ; SI-NEXT: s_and_b64 vcc, exec, s[14:15] @@ -206,7 +204,6 @@ ; SI-NEXT: s_cbranch_vccz .LBB3_1 ; SI-NEXT: ; %bb.5: ; %if.end ; SI-NEXT: ; in Loop: Header=BB3_3 Depth=1 -; SI-NEXT: s_mov_b64 s[14:15], -1 ; SI-NEXT: s_mov_b64 vcc, s[6:7] ; SI-NEXT: s_cbranch_vccz .LBB3_7 ; SI-NEXT: ; %bb.6: ; %if.else @@ -263,8 +260,6 @@ ; FLAT-NEXT: s_branch .LBB3_3 ; FLAT-NEXT: .LBB3_1: ; in Loop: Header=BB3_3 Depth=1 ; FLAT-NEXT: s_mov_b64 s[8:9], 0 -; FLAT-NEXT: s_mov_b64 s[12:13], -1 -; FLAT-NEXT: s_mov_b64 s[14:15], -1 ; FLAT-NEXT: .LBB3_2: ; %Flow ; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1 ; FLAT-NEXT: s_and_b64 vcc, exec, s[14:15] @@ -282,7 +277,6 @@ ; FLAT-NEXT: s_cbranch_vccz .LBB3_1 ; FLAT-NEXT: ; %bb.5: ; %if.end ; FLAT-NEXT: ; in Loop: Header=BB3_3 Depth=1 -; FLAT-NEXT: s_mov_b64 s[14:15], -1 ; FLAT-NEXT: s_mov_b64 vcc, s[6:7] ; FLAT-NEXT: s_cbranch_vccz .LBB3_7 ; FLAT-NEXT: ; %bb.6: ; %if.else diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll --- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -1,9 +1,9 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -@stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4 -@stored_constant_ptr = addrspace(3) global i32 addrspace(4)* undef, align 8 -@stored_global_ptr = addrspace(3) global i32 addrspace(1)* undef, align 8 +@stored_lds_ptr = addrspace(3) global ptr addrspace(3) undef, align 4 +@stored_constant_ptr = addrspace(3) global ptr addrspace(4) undef, align 8 +@stored_global_ptr = addrspace(3) global ptr addrspace(1) undef, align 8 ; GCN-LABEL: {{^}}reorder_local_load_global_store_local_load: ; CI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:1 offset1:3 @@ -12,19 +12,19 @@ ; GFX9: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:1 offset1:3 ; GFX9: global_store_dword ; GFX9: global_store_dword -define amdgpu_kernel void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { - %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 +define amdgpu_kernel void @reorder_local_load_global_store_local_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) #0 { + %ptr0 = load ptr addrspace(3), ptr addrspace(3) @stored_lds_ptr, align 4 - %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3 + %ptr1 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 1 + %ptr2 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 3 - %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4 - store i32 99, i32 addrspace(1)* %gptr, align 4 - %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4 + %tmp1 = load i32, ptr addrspace(3) %ptr1, align 4 + store i32 99, ptr addrspace(1) %gptr, align 4 + %tmp2 = load i32, ptr addrspace(3) %ptr2, align 4 %add = add nsw i32 %tmp1, %tmp2 - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -36,19 +36,19 @@ ; GFX9: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4 ; GFX9: global_store_dword ; GFX9: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12 -define amdgpu_kernel void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { - %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 +define amdgpu_kernel void @no_reorder_local_load_volatile_global_store_local_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) #0 { + %ptr0 = load ptr addrspace(3), ptr addrspace(3) @stored_lds_ptr, align 4 - %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3 + %ptr1 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 1 + %ptr2 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 3 - %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4 - store volatile i32 99, i32 addrspace(1)* %gptr, align 4 - %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4 + %tmp1 = load i32, ptr addrspace(3) %ptr1, align 4 + store volatile i32 99, ptr addrspace(1) %gptr, align 4 + %tmp2 = load i32, ptr addrspace(3) %ptr2, align 4 %add = add nsw i32 %tmp1, %tmp2 - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -62,20 +62,20 @@ ; GFX9: s_barrier ; GFX9-DAG: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12 ; GFX9-DAG: global_store_dword -define amdgpu_kernel void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { - %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 +define amdgpu_kernel void @no_reorder_barrier_local_load_global_store_local_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) #0 { + %ptr0 = load ptr addrspace(3), ptr addrspace(3) @stored_lds_ptr, align 4 - %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3 + %ptr1 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 1 + %ptr2 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 3 - %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4 - store i32 99, i32 addrspace(1)* %gptr, align 4 + %tmp1 = load i32, ptr addrspace(3) %ptr1, align 4 + store i32 99, ptr addrspace(1) %gptr, align 4 call void @llvm.amdgcn.s.barrier() #1 - %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4 + %tmp2 = load i32, ptr addrspace(3) %ptr2, align 4 %add = add nsw i32 %tmp1, %tmp2 - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -93,19 +93,19 @@ ; CI: buffer_store_dword ; GFX9: global_store_dword -define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { - %ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @stored_constant_ptr, align 8 +define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) #0 { + %ptr0 = load ptr addrspace(4), ptr addrspace(3) @stored_constant_ptr, align 8 - %ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 3 + %ptr1 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 1 + %ptr2 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 3 - %tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4 - store i32 99, i32 addrspace(1)* %gptr, align 4 - %tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4 + %tmp1 = load i32, ptr addrspace(4) %ptr1, align 4 + store i32 99, ptr addrspace(1) %gptr, align 4 + %tmp2 = load i32, ptr addrspace(4) %ptr2, align 4 %add = add nsw i32 %tmp1, %tmp2 - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -122,19 +122,19 @@ ; GCN-DAG: ds_write_b32 ; CI: buffer_store_dword ; GFX9: global_store_dword -define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 { - %ptr0 = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(3)* @stored_constant_ptr, align 8 +define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(ptr addrspace(1) %out, ptr addrspace(3) %lptr) #0 { + %ptr0 = load ptr addrspace(4), ptr addrspace(3) @stored_constant_ptr, align 8 - %ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 3 + %ptr1 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 1 + %ptr2 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 3 - %tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4 - store i32 99, i32 addrspace(3)* %lptr, align 4 - %tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4 + %tmp1 = load i32, ptr addrspace(4) %ptr1, align 4 + store i32 99, ptr addrspace(3) %lptr, align 4 + %tmp2 = load i32, ptr addrspace(4) %ptr2, align 4 %add = add nsw i32 %tmp1, %tmp2 - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -145,17 +145,17 @@ ; GCN: ds_write_b32 ; CI: buffer_store_dword ; GFX9: global_store_dword -define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(4)* %ptr0) #0 { - %ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 2 +define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(ptr addrspace(1) %out, ptr addrspace(3) noalias %lptr, ptr addrspace(4) %ptr0) #0 { + %ptr1 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 1 + %ptr2 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 2 - %tmp1 = load i32, i32 addrspace(4)* %ptr1, align 4 - store i32 99, i32 addrspace(3)* %lptr, align 4 - %tmp2 = load i32, i32 addrspace(4)* %ptr2, align 4 + %tmp1 = load i32, ptr addrspace(4) %ptr1, align 4 + store i32 99, ptr addrspace(3) %lptr, align 4 + %tmp2 = load i32, ptr addrspace(4) %ptr2, align 4 %add = add nsw i32 %tmp1, %tmp2 - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -168,17 +168,17 @@ ; GFX9: global_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 ; GFX9: global_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:12 ; GFX9: ds_write_b32 -define amdgpu_kernel void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr, i32 addrspace(1)* %ptr0) #0 { - %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 3 +define amdgpu_kernel void @reorder_global_load_local_store_global_load(ptr addrspace(1) %out, ptr addrspace(3) %lptr, ptr addrspace(1) %ptr0) #0 { + %ptr1 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i64 1 + %ptr2 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i64 3 - %tmp1 = load i32, i32 addrspace(1)* %ptr1, align 4 - store i32 99, i32 addrspace(3)* %lptr, align 4 - %tmp2 = load i32, i32 addrspace(1)* %ptr2, align 4 + %tmp1 = load i32, ptr addrspace(1) %ptr1, align 4 + store i32 99, ptr addrspace(3) %lptr, align 4 + %tmp2 = load i32, ptr addrspace(1) %ptr2, align 4 %add = add nsw i32 %tmp1, %tmp2 - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } @@ -189,21 +189,21 @@ ; CI: buffer_store_dword ; GFX9: global_store_dword ; GCN: s_endpgm -define amdgpu_kernel void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 { - %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3 - %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 100 - %ptr3 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 102 - - store i32 123, i32 addrspace(3)* %ptr1, align 4 - %tmp1 = load i32, i32 addrspace(3)* %ptr2, align 4 - %tmp2 = load i32, i32 addrspace(3)* %ptr3, align 4 - store i32 123, i32 addrspace(3)* %ptr2, align 4 - %tmp3 = load i32, i32 addrspace(3)* %ptr1, align 4 - store i32 789, i32 addrspace(3)* %ptr3, align 4 +define amdgpu_kernel void @reorder_local_offsets(ptr addrspace(1) nocapture %out, ptr addrspace(1) noalias nocapture readnone %gptr, ptr addrspace(3) noalias nocapture %ptr0) #0 { + %ptr1 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 3 + %ptr2 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 100 + %ptr3 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 102 + + store i32 123, ptr addrspace(3) %ptr1, align 4 + %tmp1 = load i32, ptr addrspace(3) %ptr2, align 4 + %tmp2 = load i32, ptr addrspace(3) %ptr3, align 4 + store i32 123, ptr addrspace(3) %ptr2, align 4 + %tmp3 = load i32, ptr addrspace(3) %ptr1, align 4 + store i32 789, ptr addrspace(3) %ptr3, align 4 %add.0 = add nsw i32 %tmp2, %tmp1 %add.1 = add nsw i32 %add.0, %tmp3 - store i32 %add.1, i32 addrspace(1)* %out, align 4 + store i32 %add.1, ptr addrspace(1) %out, align 4 ret void } @@ -223,21 +223,21 @@ ; GFX9-DAG: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:408 ; GFX9: global_store_dword ; GFX9: s_endpgm -define amdgpu_kernel void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 { - %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3 - %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 100 - %ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 102 - - store i32 123, i32 addrspace(1)* %ptr1, align 4 - %tmp1 = load i32, i32 addrspace(1)* %ptr2, align 4 - %tmp2 = load i32, i32 addrspace(1)* %ptr3, align 4 - store i32 123, i32 addrspace(1)* %ptr2, align 4 - %tmp3 = load i32, i32 addrspace(1)* %ptr1, align 4 - store i32 789, i32 addrspace(1)* %ptr3, align 4 +define amdgpu_kernel void @reorder_global_offsets(ptr addrspace(1) nocapture %out, ptr addrspace(1) noalias nocapture readnone %gptr, ptr addrspace(1) noalias nocapture %ptr0) #0 { + %ptr1 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 3 + %ptr2 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 100 + %ptr3 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 102 + + store i32 123, ptr addrspace(1) %ptr1, align 4 + %tmp1 = load i32, ptr addrspace(1) %ptr2, align 4 + %tmp2 = load i32, ptr addrspace(1) %ptr3, align 4 + store i32 123, ptr addrspace(1) %ptr2, align 4 + %tmp3 = load i32, ptr addrspace(1) %ptr1, align 4 + store i32 789, ptr addrspace(1) %ptr3, align 4 %add.0 = add nsw i32 %tmp2, %tmp1 %add.1 = add nsw i32 %add.0, %tmp3 - store i32 %add.1, i32 addrspace(1)* %out, align 4 + store i32 %add.1, ptr addrspace(1) %out, align 4 ret void } @@ -266,49 +266,49 @@ ; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:36 ; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:52 -define amdgpu_kernel void @reorder_global_offsets_addr64_soffset0(i32 addrspace(1)* noalias nocapture %ptr.base) #0 { +define amdgpu_kernel void @reorder_global_offsets_addr64_soffset0(ptr addrspace(1) noalias nocapture %ptr.base) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() %id.ext = sext i32 %id to i64 - %ptr0 = getelementptr inbounds i32, i32 addrspace(1)* %ptr.base, i64 %id.ext - %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3 - %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 5 - %ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 7 - %ptr4 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 9 - %ptr5 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 11 - %ptr6 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 13 - - store i32 789, i32 addrspace(1)* %ptr0, align 4 - %tmp1 = load i32, i32 addrspace(1)* %ptr1, align 4 - store i32 123, i32 addrspace(1)* %ptr2, align 4 - %tmp2 = load i32, i32 addrspace(1)* %ptr3, align 4 + %ptr0 = getelementptr inbounds i32, ptr addrspace(1) %ptr.base, i64 %id.ext + %ptr1 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 3 + %ptr2 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 5 + %ptr3 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 7 + %ptr4 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 9 + %ptr5 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 11 + %ptr6 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 13 + + store i32 789, ptr addrspace(1) %ptr0, align 4 + %tmp1 = load i32, ptr addrspace(1) %ptr1, align 4 + store i32 123, ptr addrspace(1) %ptr2, align 4 + %tmp2 = load i32, ptr addrspace(1) %ptr3, align 4 %add.0 = add nsw i32 %tmp1, %tmp2 - store i32 %add.0, i32 addrspace(1)* %ptr4, align 4 - %tmp3 = load i32, i32 addrspace(1)* %ptr5, align 4 + store i32 %add.0, ptr addrspace(1) %ptr4, align 4 + %tmp3 = load i32, ptr addrspace(1) %ptr5, align 4 %add.1 = add nsw i32 %add.0, %tmp3 - store i32 %add.1, i32 addrspace(1)* %ptr6, align 4 + store i32 %add.1, ptr addrspace(1) %ptr6, align 4 ret void } ; GCN-LABEL: {{^}}reorder_local_load_tbuffer_store_local_load: ; GCN: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:1 offset1:2 ; GCN: tbuffer_store_format -define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #0 { - %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 +define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(ptr addrspace(1) %out, i32 %a1, i32 %vaddr) #0 { + %ptr0 = load ptr addrspace(3), ptr addrspace(3) @stored_lds_ptr, align 4 - %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2 + %ptr1 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 1 + %ptr2 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 2 - %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4 + %tmp1 = load i32, ptr addrspace(3) %ptr1, align 4 %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 %vaddr.add = add i32 %vaddr, 32 call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %vdata, <4 x i32> undef, i32 %vaddr.add, i32 0, i32 0, i32 228, i32 3) - %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4 + %tmp2 = load i32, ptr addrspace(3) %ptr2, align 4 %add = add nsw i32 %tmp1, %tmp2 - store i32 %add, i32 addrspace(1)* %out, align 4 + store i32 %add, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll --- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll +++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll @@ -60,7 +60,6 @@ ; CHECK-NEXT: s_cmp_lg_u32 s10, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_14 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: s_mov_b64 s[2:3], 0 ; CHECK-NEXT: s_mov_b64 s[0:1], -1 ; CHECK-NEXT: .LBB0_4: ; %Flow3 ; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec @@ -103,7 +102,6 @@ ; CHECK-NEXT: s_branch .LBB0_10 ; CHECK-NEXT: .LBB0_14: ; %cond.false.i8 ; CHECK-NEXT: s_mov_b64 s[2:3], -1 -; CHECK-NEXT: s_mov_b64 s[0:1], 0 ; CHECK-NEXT: s_trap 2 ; CHECK-NEXT: s_branch .LBB0_4 entry: diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll --- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll +++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll @@ -140,7 +140,6 @@ ; GCN-NEXT: s_cbranch_scc1 .LBB0_10 ; GCN-NEXT: ; %bb.9: ; GCN-NEXT: s_mov_b64 s[6:7], -1 -; GCN-NEXT: s_mov_b64 s[4:5], 0 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[6:7] ; GCN-NEXT: s_cbranch_execnz .LBB0_3 ; GCN-NEXT: s_branch .LBB0_4 @@ -173,7 +172,6 @@ ; GCN-NEXT: ; %bb.15: ; %LeafBlock9 ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0 ; GCN-NEXT: s_mov_b64 s[8:9], -1 -; GCN-NEXT: s_mov_b64 s[4:5], 0 ; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc ; GCN-NEXT: ; %bb.16: ; %do.body.i.i.i.i ; GCN-NEXT: s_mov_b64 s[4:5], exec diff --git a/llvm/test/CodeGen/AMDGPU/si-vector-hang.ll b/llvm/test/CodeGen/AMDGPU/si-vector-hang.ll --- a/llvm/test/CodeGen/AMDGPU/si-vector-hang.ll +++ b/llvm/test/CodeGen/AMDGPU/si-vector-hang.ll @@ -12,81 +12,81 @@ ; CHECK: buffer_store_byte ; ModuleID = 'radeon' -define amdgpu_kernel void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 { +define amdgpu_kernel void @test_8_min_char(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture readonly %in0, ptr addrspace(1) nocapture readonly %in1) #0 { entry: - %0 = load i8, i8 addrspace(1)* %in0, align 1 + %0 = load i8, ptr addrspace(1) %in0, align 1 %1 = insertelement <8 x i8> undef, i8 %0, i32 0 - %arrayidx2.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 1 - %2 = load i8, i8 addrspace(1)* %arrayidx2.i.i, align 1 + %arrayidx2.i.i = getelementptr inbounds i8, ptr addrspace(1) %in0, i64 1 + %2 = load i8, ptr addrspace(1) %arrayidx2.i.i, align 1 %3 = insertelement <8 x i8> %1, i8 %2, i32 1 - %arrayidx6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 2 - %4 = load i8, i8 addrspace(1)* %arrayidx6.i.i, align 1 + %arrayidx6.i.i = getelementptr inbounds i8, ptr addrspace(1) %in0, i64 2 + %4 = load i8, ptr addrspace(1) %arrayidx6.i.i, align 1 %5 = insertelement <8 x i8> %3, i8 %4, i32 2 - %arrayidx10.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 3 - %6 = load i8, i8 addrspace(1)* %arrayidx10.i.i, align 1 + %arrayidx10.i.i = getelementptr inbounds i8, ptr addrspace(1) %in0, i64 3 + %6 = load i8, ptr addrspace(1) %arrayidx10.i.i, align 1 %7 = insertelement <8 x i8> %5, i8 %6, i32 3 - %arrayidx.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 4 - %8 = load i8, i8 addrspace(1)* %arrayidx.i.i, align 1 + %arrayidx.i.i = getelementptr inbounds i8, ptr addrspace(1) %in0, i64 4 + %8 = load i8, ptr addrspace(1) %arrayidx.i.i, align 1 %9 = insertelement <8 x i8> undef, i8 %8, i32 0 - %arrayidx2.i9.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 5 - %10 = load i8, i8 addrspace(1)* %arrayidx2.i9.i, align 1 + %arrayidx2.i9.i = getelementptr inbounds i8, ptr addrspace(1) %in0, i64 5 + %10 = load i8, ptr addrspace(1) %arrayidx2.i9.i, align 1 %11 = insertelement <8 x i8> %9, i8 %10, i32 1 - %arrayidx6.i11.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 6 - %12 = load i8, i8 addrspace(1)* %arrayidx6.i11.i, align 1 + %arrayidx6.i11.i = getelementptr inbounds i8, ptr addrspace(1) %in0, i64 6 + %12 = load i8, ptr addrspace(1) %arrayidx6.i11.i, align 1 %13 = insertelement <8 x i8> %11, i8 %12, i32 2 - %arrayidx10.i13.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 7 - %14 = load i8, i8 addrspace(1)* %arrayidx10.i13.i, align 1 + %arrayidx10.i13.i = getelementptr inbounds i8, ptr addrspace(1) %in0, i64 7 + %14 = load i8, ptr addrspace(1) %arrayidx10.i13.i, align 1 %15 = insertelement <8 x i8> %13, i8 %14, i32 3 %vecinit5.i = shufflevector <8 x i8> %7, <8 x i8> %15, <8 x i32> - %16 = load i8, i8 addrspace(1)* %in1, align 1 + %16 = load i8, ptr addrspace(1) %in1, align 1 %17 = insertelement <8 x i8> undef, i8 %16, i32 0 - %arrayidx2.i.i4 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 1 - %18 = load i8, i8 addrspace(1)* %arrayidx2.i.i4, align 1 + %arrayidx2.i.i4 = getelementptr inbounds i8, ptr addrspace(1) %in1, i64 1 + %18 = load i8, ptr addrspace(1) %arrayidx2.i.i4, align 1 %19 = insertelement <8 x i8> %17, i8 %18, i32 1 - %arrayidx6.i.i5 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 2 - %20 = load i8, i8 addrspace(1)* %arrayidx6.i.i5, align 1 + %arrayidx6.i.i5 = getelementptr inbounds i8, ptr addrspace(1) %in1, i64 2 + %20 = load i8, ptr addrspace(1) %arrayidx6.i.i5, align 1 %21 = insertelement <8 x i8> %19, i8 %20, i32 2 - %arrayidx10.i.i6 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 3 - %22 = load i8, i8 addrspace(1)* %arrayidx10.i.i6, align 1 + %arrayidx10.i.i6 = getelementptr inbounds i8, ptr addrspace(1) %in1, i64 3 + %22 = load i8, ptr addrspace(1) %arrayidx10.i.i6, align 1 %23 = insertelement <8 x i8> %21, i8 %22, i32 3 - %arrayidx.i.i7 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 4 - %24 = load i8, i8 addrspace(1)* %arrayidx.i.i7, align 1 + %arrayidx.i.i7 = getelementptr inbounds i8, ptr addrspace(1) %in1, i64 4 + %24 = load i8, ptr addrspace(1) %arrayidx.i.i7, align 1 %25 = insertelement <8 x i8> undef, i8 %24, i32 0 - %arrayidx2.i9.i8 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 5 - %26 = load i8, i8 addrspace(1)* %arrayidx2.i9.i8, align 1 + %arrayidx2.i9.i8 = getelementptr inbounds i8, ptr addrspace(1) %in1, i64 5 + %26 = load i8, ptr addrspace(1) %arrayidx2.i9.i8, align 1 %27 = insertelement <8 x i8> %25, i8 %26, i32 1 - %arrayidx6.i11.i9 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 6 - %28 = load i8, i8 addrspace(1)* %arrayidx6.i11.i9, align 1 + %arrayidx6.i11.i9 = getelementptr inbounds i8, ptr addrspace(1) %in1, i64 6 + %28 = load i8, ptr addrspace(1) %arrayidx6.i11.i9, align 1 %29 = insertelement <8 x i8> %27, i8 %28, i32 2 - %arrayidx10.i13.i10 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 7 - %30 = load i8, i8 addrspace(1)* %arrayidx10.i13.i10, align 1 + %arrayidx10.i13.i10 = getelementptr inbounds i8, ptr addrspace(1) %in1, i64 7 + %30 = load i8, ptr addrspace(1) %arrayidx10.i13.i10, align 1 %31 = insertelement <8 x i8> %29, i8 %30, i32 3 %vecinit5.i11 = shufflevector <8 x i8> %23, <8 x i8> %31, <8 x i32> %cmp.i = icmp slt <8 x i8> %vecinit5.i, %vecinit5.i11 %cond.i = select <8 x i1> %cmp.i, <8 x i8> %vecinit5.i, <8 x i8> %vecinit5.i11 %32 = extractelement <8 x i8> %cond.i, i32 0 - store i8 %32, i8 addrspace(1)* %out, align 1 + store i8 %32, ptr addrspace(1) %out, align 1 %33 = extractelement <8 x i8> %cond.i, i32 1 - %arrayidx2.i.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 - store i8 %33, i8 addrspace(1)* %arrayidx2.i.i.i, align 1 + %arrayidx2.i.i.i = getelementptr inbounds i8, ptr addrspace(1) %out, i64 1 + store i8 %33, ptr addrspace(1) %arrayidx2.i.i.i, align 1 %34 = extractelement <8 x i8> %cond.i, i32 2 - %arrayidx.i.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 2 - store i8 %34, i8 addrspace(1)* %arrayidx.i.i.i, align 1 + %arrayidx.i.i.i = getelementptr inbounds i8, ptr addrspace(1) %out, i64 2 + store i8 %34, ptr addrspace(1) %arrayidx.i.i.i, align 1 %35 = extractelement <8 x i8> %cond.i, i32 3 - %arrayidx2.i6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 3 - store i8 %35, i8 addrspace(1)* %arrayidx2.i6.i.i, align 1 - %arrayidx.i.i3 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4 + %arrayidx2.i6.i.i = getelementptr inbounds i8, ptr addrspace(1) %out, i64 3 + store i8 %35, ptr addrspace(1) %arrayidx2.i6.i.i, align 1 + %arrayidx.i.i3 = getelementptr inbounds i8, ptr addrspace(1) %out, i64 4 %36 = extractelement <8 x i8> %cond.i, i32 4 - store i8 %36, i8 addrspace(1)* %arrayidx.i.i3, align 1 + store i8 %36, ptr addrspace(1) %arrayidx.i.i3, align 1 %37 = extractelement <8 x i8> %cond.i, i32 5 - %arrayidx2.i.i6.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 5 - store i8 %37, i8 addrspace(1)* %arrayidx2.i.i6.i, align 1 + %arrayidx2.i.i6.i = getelementptr inbounds i8, ptr addrspace(1) %out, i64 5 + store i8 %37, ptr addrspace(1) %arrayidx2.i.i6.i, align 1 %38 = extractelement <8 x i8> %cond.i, i32 6 - %arrayidx.i.i7.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 6 - store i8 %38, i8 addrspace(1)* %arrayidx.i.i7.i, align 1 + %arrayidx.i.i7.i = getelementptr inbounds i8, ptr addrspace(1) %out, i64 6 + store i8 %38, ptr addrspace(1) %arrayidx.i.i7.i, align 1 %39 = extractelement <8 x i8> %cond.i, i32 7 - %arrayidx2.i6.i8.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 7 - store i8 %39, i8 addrspace(1)* %arrayidx2.i6.i8.i, align 1 + %arrayidx2.i6.i8.i = getelementptr inbounds i8, ptr addrspace(1) %out, i64 7 + store i8 %39, ptr addrspace(1) %arrayidx2.i6.i8.i, align 1 ret void } @@ -97,7 +97,7 @@ !0 = !{null} !1 = !{null} !2 = !{null} -!3 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char} +!3 = !{ptr @test_8_min_char} !4 = !{null} !5 = !{null} !6 = !{null} diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -11,14 +11,14 @@ ; GCN-PRELINK: call fast float @_Z6sincosfPf( ; GCN-NATIVE: call fast float @_Z10native_sinf( ; GCN-NATIVE: call fast float @_Z10native_cosf( -define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_sincos(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3sinf(float %tmp) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 %call2 = call fast float @_Z3cosf(float %tmp) - %arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - store float %call2, float addrspace(1)* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + store float %call2, ptr addrspace(1) %arrayidx3, align 4 ret void } @@ -32,14 +32,14 @@ ; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_( ; GCN-NATIVE: call fast <2 x float> @_Z10native_sinDv2_f( ; GCN-NATIVE: call fast <2 x float> @_Z10native_cosDv2_f( -define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_sincos_v2(ptr addrspace(1) nocapture %a) { entry: - %tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8 + %tmp = load <2 x float>, ptr addrspace(1) %a, align 8 %call = call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp) - store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8 + store <2 x float> %call, ptr addrspace(1) %a, align 8 %call2 = call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp) - %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1 - store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8 + %arrayidx3 = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i64 1 + store <2 x float> %call2, ptr addrspace(1) %arrayidx3, align 8 ret void } @@ -53,19 +53,17 @@ ; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_( ; GCN-NATIVE: call fast <3 x float> @_Z10native_sinDv3_f( ; GCN-NATIVE: call fast <3 x float> @_Z10native_cosDv3_f( -define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_sincos_v3(ptr addrspace(1) nocapture %a) { entry: - %castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)* - %loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16 + %loadVec4 = load <4 x float>, ptr addrspace(1) %a, align 16 %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> - store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16 + store <4 x float> %extractVec6, ptr addrspace(1) %a, align 16 %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) - %arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1 + %arrayidx12 = getelementptr inbounds <3 x float>, ptr addrspace(1) %a, i64 1 %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> - %storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)* - store <4 x float> %extractVec13, <4 x float> addrspace(1)* %storetmp14, align 16 + store <4 x float> %extractVec13, ptr addrspace(1) %arrayidx12, align 16 ret void } @@ -79,14 +77,14 @@ ; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_( ; GCN-NATIVE: call fast <4 x float> @_Z10native_sinDv4_f( ; GCN-NATIVE: call fast <4 x float> @_Z10native_cosDv4_f( -define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_sincos_v4(ptr addrspace(1) nocapture %a) { entry: - %tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16 + %tmp = load <4 x float>, ptr addrspace(1) %a, align 16 %call = call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp) - store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16 + store <4 x float> %call, ptr addrspace(1) %a, align 16 %call2 = call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp) - %arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1 - store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16 + %arrayidx3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i64 1 + store <4 x float> %call2, ptr addrspace(1) %arrayidx3, align 16 ret void } @@ -100,14 +98,14 @@ ; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_( ; GCN-NATIVE: call fast <8 x float> @_Z10native_sinDv8_f( ; GCN-NATIVE: call fast <8 x float> @_Z10native_cosDv8_f( -define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_sincos_v8(ptr addrspace(1) nocapture %a) { entry: - %tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32 + %tmp = load <8 x float>, ptr addrspace(1) %a, align 32 %call = call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp) - store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32 + store <8 x float> %call, ptr addrspace(1) %a, align 32 %call2 = call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp) - %arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1 - store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32 + %arrayidx3 = getelementptr inbounds <8 x float>, ptr addrspace(1) %a, i64 1 + store <8 x float> %call2, ptr addrspace(1) %arrayidx3, align 32 ret void } @@ -121,14 +119,14 @@ ; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_( ; GCN-NATIVE: call fast <16 x float> @_Z10native_sinDv16_f( ; GCN-NATIVE: call fast <16 x float> @_Z10native_cosDv16_f( -define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_sincos_v16(ptr addrspace(1) nocapture %a) { entry: - %tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64 + %tmp = load <16 x float>, ptr addrspace(1) %a, align 64 %call = call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp) - store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64 + store <16 x float> %call, ptr addrspace(1) %a, align 64 %call2 = call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp) - %arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1 - store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64 + %arrayidx3 = getelementptr inbounds <16 x float>, ptr addrspace(1) %a, i64 1 + store <16 x float> %call2, ptr addrspace(1) %arrayidx3, align 64 ret void } @@ -137,22 +135,22 @@ declare <16 x float> @_Z3cosDv16_f(<16 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_recip -; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a -define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) { +; GCN: store float 0x3FD5555560000000, ptr addrspace(1) %a +define amdgpu_kernel void @test_native_recip(ptr addrspace(1) nocapture %a) { entry: %call = call fast float @_Z12native_recipf(float 3.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } declare float @_Z12native_recipf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_recip -; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a -define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) { +; GCN: store float 0x3FD5555560000000, ptr addrspace(1) %a +define amdgpu_kernel void @test_half_recip(ptr addrspace(1) nocapture %a) { entry: %call = call fast float @_Z10half_recipf(float 3.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -160,11 +158,11 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide ; GCN: fmul fast float %tmp, 0x3FD5555560000000 -define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_native_divide(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -172,129 +170,129 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide ; GCN: fmul fast float %tmp, 0x3FD5555560000000 -define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_half_divide(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } declare float @_Z11half_divideff(float, float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0f -; GCN: store float 1.000000e+00, float addrspace(1)* %a -define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) { +; GCN: store float 1.000000e+00, ptr addrspace(1) %a +define amdgpu_kernel void @test_pow_0f(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } declare float @_Z3powff(float, float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0i -; GCN: store float 1.000000e+00, float addrspace(1)* %a -define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) { +; GCN: store float 1.000000e+00, ptr addrspace(1) %a +define amdgpu_kernel void @test_pow_0i(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1f -; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 -; GCN: store float %tmp, float addrspace(1)* %a, align 4 -define amdgpu_kernel void @test_pow_1f(float addrspace(1)* nocapture %a) { +; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4 +; GCN: store float %tmp, ptr addrspace(1) %a, align 4 +define amdgpu_kernel void @test_pow_1f(ptr addrspace(1) nocapture %a) { entry: - %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp = load float, float addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp = load float, ptr addrspace(1) %arrayidx, align 4 %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1i -; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 -; GCN: store float %tmp, float addrspace(1)* %a, align 4 -define amdgpu_kernel void @test_pow_1i(float addrspace(1)* nocapture %a) { +; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4 +; GCN: store float %tmp, ptr addrspace(1) %a, align 4 +define amdgpu_kernel void @test_pow_1i(ptr addrspace(1) nocapture %a) { entry: - %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp = load float, float addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp = load float, ptr addrspace(1) %arrayidx, align 4 %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2f -; GCN: %tmp = load float, float addrspace(1)* %a, align 4 +; GCN: %tmp = load float, ptr addrspace(1) %a, align 4 ; GCN: %__pow2 = fmul fast float %tmp, %tmp -define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_pow_2f(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2i -; GCN: %tmp = load float, float addrspace(1)* %a, align 4 +; GCN: %tmp = load float, ptr addrspace(1) %a, align 4 ; GCN: %__pow2 = fmul fast float %tmp, %tmp -define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_pow_2i(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1f -; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 +; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4 ; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp -define amdgpu_kernel void @test_pow_m1f(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_pow_m1f(ptr addrspace(1) nocapture %a) { entry: - %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp = load float, float addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp = load float, ptr addrspace(1) %arrayidx, align 4 %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1i -; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 +; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4 ; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp -define amdgpu_kernel void @test_pow_m1i(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_pow_m1i(ptr addrspace(1) nocapture %a) { entry: - %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp = load float, float addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp = load float, ptr addrspace(1) %arrayidx, align 4 %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half ; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 5.000000e-01) ; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp) -define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_pow_half(ptr addrspace(1) nocapture %a) { entry: - %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp = load float, float addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp = load float, ptr addrspace(1) %arrayidx, align 4 %call = call fast float @_Z3powff(float %tmp, float 5.000000e-01) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf ; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float -5.000000e-01) ; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) -define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_pow_mhalf(ptr addrspace(1) nocapture %a) { entry: - %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp = load float, float addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp = load float, ptr addrspace(1) %arrayidx, align 4 %call = call fast float @_Z3powff(float %tmp, float -5.000000e-01) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -304,12 +302,12 @@ ; GCN: %__powx22 = fmul fast float %__powx2, %tmp ; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 ; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 -define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_pow_c(ptr addrspace(1) nocapture %a) { entry: - %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp = load float, float addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp = load float, ptr addrspace(1) %arrayidx, align 4 %call = call fast float @_Z3powff(float %tmp, float 1.100000e+01) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -319,12 +317,12 @@ ; GCN: %__powx22 = fmul fast float %__powx2, %tmp ; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 ; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 -define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_powr_c(ptr addrspace(1) nocapture %a) { entry: - %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp = load float, float addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp = load float, ptr addrspace(1) %arrayidx, align 4 %call = call fast float @_Z4powrff(float %tmp, float 1.100000e+01) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -336,12 +334,12 @@ ; GCN: %__powx22 = fmul fast float %__powx2, %tmp ; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 ; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 -define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_pown_c(ptr addrspace(1) nocapture %a) { entry: - %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp = load float, float addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp = load float, ptr addrspace(1) %arrayidx, align 4 %call = call fast float @_Z4pownfi(float %tmp, i32 11) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -357,13 +355,12 @@ ; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648 ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 ; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] -; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* -; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 -define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) { +; GCN-PRELINK: store i32 %[[r2]], ptr addrspace(1) %a, align 4 +define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3powff(float %tmp, float 1.013000e+03) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -372,18 +369,18 @@ ; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp) ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) -; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4 +; GCN-PRELINK: store float %__exp2, ptr addrspace(1) %a, align 4 ; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) ; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 ; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) -; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 -define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) { +; GCN-NATIVE: store float %__exp2, ptr addrspace(1) %a, align 4 +define amdgpu_kernel void @test_powr(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 - %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 + %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4 %call = call fast float @_Z4powrff(float %tmp, float %tmp1) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -400,28 +397,27 @@ ; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]] ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 ; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] -; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* -; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 -define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) { +; GCN-PRELINK: store i32 %[[r2]], ptr addrspace(1) %a, align 4 +define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 - %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 + %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4 %conv = fptosi float %tmp1 to i32 %call = call fast float @_Z4pownfi(float %tmp, i32 %conv) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1 -; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 -; GCN: store float %tmp, float addrspace(1)* %a, align 4 -define amdgpu_kernel void @test_rootn_1(float addrspace(1)* nocapture %a) { +; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4 +; GCN: store float %tmp, ptr addrspace(1) %a, align 4 +define amdgpu_kernel void @test_rootn_1(ptr addrspace(1) nocapture %a) { entry: - %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp = load float, float addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp = load float, ptr addrspace(1) %arrayidx, align 4 %call = call fast float @_Z5rootnfi(float %tmp, i32 1) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -430,129 +426,129 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2 ; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 2) ; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp) -define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_rootn_2(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z5rootnfi(float %tmp, i32 2) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3 ; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 3) ; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp) -define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_rootn_3(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z5rootnfi(float %tmp, i32 3) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m1 ; GCN: fdiv fast float 1.000000e+00, %tmp -define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_rootn_m1(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z5rootnfi(float %tmp, i32 -1) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2 ; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 -2) ; GCN-PRELINK: %__rootn2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) -define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_rootn_m2(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z5rootnfi(float %tmp, i32 -2) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x -; GCN: store float %y, float addrspace(1)* %a -define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) { +; GCN: store float %y, ptr addrspace(1) %a +define amdgpu_kernel void @test_fma_0x(ptr addrspace(1) nocapture %a, float %y) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } declare float @_Z3fmafff(float, float, float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0 -; GCN: store float %y, float addrspace(1)* %a -define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) { +; GCN: store float %y, ptr addrspace(1) %a +define amdgpu_kernel void @test_fma_x0(ptr addrspace(1) nocapture %a, float %y) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x -; GCN: store float %y, float addrspace(1)* %a -define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) { +; GCN: store float %y, ptr addrspace(1) %a +define amdgpu_kernel void @test_mad_0x(ptr addrspace(1) nocapture %a, float %y) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } declare float @_Z3madfff(float, float, float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0 -; GCN: store float %y, float addrspace(1)* %a -define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) { +; GCN: store float %y, ptr addrspace(1) %a +define amdgpu_kernel void @test_mad_x0(ptr addrspace(1) nocapture %a, float %y) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y ; GCN: %fmaadd = fadd fast float %tmp, %y -define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) { +define amdgpu_kernel void @test_fma_x1y(ptr addrspace(1) nocapture %a, float %y) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy ; GCN: %fmaadd = fadd fast float %tmp, %y -define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) { +define amdgpu_kernel void @test_fma_1xy(ptr addrspace(1) nocapture %a, float %y) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0 ; GCN: %fmamul = fmul fast float %tmp1, %tmp -define amdgpu_kernel void @test_fma_xy0(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_fma_xy0(ptr addrspace(1) nocapture %a) { entry: - %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %tmp1 = load float, float addrspace(1)* %a, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp = load float, ptr addrspace(1) %arrayidx, align 4 + %tmp1 = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp ; GCN-NATIVE: call fast float @_Z10native_expf(float %tmp) -define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_use_native_exp(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3expf(float %tmp) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -560,11 +556,11 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2 ; GCN-NATIVE: call fast float @_Z11native_exp2f(float %tmp) -define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_use_native_exp2(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z4exp2f(float %tmp) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -572,11 +568,11 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10 ; GCN-NATIVE: call fast float @_Z12native_exp10f(float %tmp) -define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_use_native_exp10(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z5exp10f(float %tmp) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -584,11 +580,11 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log ; GCN-NATIVE: call fast float @_Z10native_logf(float %tmp) -define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_use_native_log(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3logf(float %tmp) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -596,11 +592,11 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2 ; GCN-NATIVE: call fast float @_Z11native_log2f(float %tmp) -define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_use_native_log2(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z4log2f(float %tmp) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -608,49 +604,49 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10 ; GCN-NATIVE: call fast float @_Z12native_log10f(float %tmp) -define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_use_native_log10(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z5log10f(float %tmp) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } declare float @_Z5log10f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr -; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 +; GCN-NATIVE: %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4 ; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) ; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 ; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) -; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 -define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) { +; GCN-NATIVE: store float %__exp2, ptr addrspace(1) %a, align 4 +define amdgpu_kernel void @test_use_native_powr(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 - %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 + %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4 %call = call fast float @_Z4powrff(float %tmp, float %tmp1) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt ; GCN-NATIVE: call fast float @_Z11native_sqrtf(float %tmp) -define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_use_native_sqrt(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z4sqrtf(float %tmp) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64 ; GCN: call fast double @_Z4sqrtd(double %tmp) -define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(ptr addrspace(1) nocapture %a) { entry: - %tmp = load double, double addrspace(1)* %a, align 8 + %tmp = load double, ptr addrspace(1) %a, align 8 %call = call fast double @_Z4sqrtd(double %tmp) - store double %call, double addrspace(1)* %a, align 8 + store double %call, ptr addrspace(1) %a, align 8 ret void } @@ -659,11 +655,11 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt ; GCN-NATIVE: call fast float @_Z12native_rsqrtf(float %tmp) -define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_use_native_rsqrt(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z5rsqrtf(float %tmp) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -671,11 +667,11 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan ; GCN-NATIVE: call fast float @_Z10native_tanf(float %tmp) -define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) { +define amdgpu_kernel void @test_use_native_tan(ptr addrspace(1) nocapture %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 %call = call fast float @_Z3tanf(float %tmp) - store float %call, float addrspace(1)* %a, align 4 + store float %call, ptr addrspace(1) %a, align 4 ret void } @@ -684,105 +680,95 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos ; GCN-NATIVE: call float @_Z10native_sinf(float %tmp) ; GCN-NATIVE: call float @_Z10native_cosf(float %tmp) -define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) { +define amdgpu_kernel void @test_use_native_sincos(ptr addrspace(1) %a) { entry: - %tmp = load float, float addrspace(1)* %a, align 4 - %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 - %tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float* - %call = call fast float @_Z6sincosfPf(float %tmp, float* %tmp1) - store float %call, float addrspace(1)* %a, align 4 + %tmp = load float, ptr addrspace(1) %a, align 4 + %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 + %tmp1 = addrspacecast ptr addrspace(1) %arrayidx1 to ptr + %call = call fast float @_Z6sincosfPf(float %tmp, ptr %tmp1) + store float %call, ptr addrspace(1) %a, align 4 ret void } -declare float @_Z6sincosfPf(float, float*) +declare float @_Z6sincosfPf(float, ptr) %opencl.pipe_t = type opaque %opencl.reserve_id_t = type opaque -; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) -; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND:[0-9]+]] -; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]] -define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { +; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) +; GCN-PRELINK: call i32 @__read_pipe_2_4(ptr addrspace(1) %{{.*}}, ptr %{{.*}}) #[[$NOUNWIND:[0-9]+]] +; GCN-PRELINK: call i32 @__read_pipe_4_4(ptr addrspace(1) %{{.*}}, ptr addrspace(5) %{{.*}}, i32 2, ptr %{{.*}}) #[[$NOUNWIND]] +define amdgpu_kernel void @test_read_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) local_unnamed_addr { entry: - %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* - %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* - %tmp2 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 - %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) - %tmp4 = call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 - call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) + %tmp1 = addrspacecast ptr addrspace(1) %ptr to ptr + %tmp2 = call i32 @__read_pipe_2(ptr addrspace(1) %p, ptr %tmp1, i32 4, i32 4) #0 + %tmp3 = call ptr addrspace(5) @__reserve_read_pipe(ptr addrspace(1) %p, i32 2, i32 4, i32 4) + %tmp4 = call i32 @__read_pipe_4(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 2, ptr %tmp1, i32 4, i32 4) #0 + call void @__commit_read_pipe(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 4, i32 4) ret void } -declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) +declare i32 @__read_pipe_2(ptr addrspace(1), ptr, i32, i32) -declare %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) +declare ptr addrspace(5) @__reserve_read_pipe(ptr addrspace(1), i32, i32, i32) -declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) +declare i32 @__read_pipe_4(ptr addrspace(1), ptr addrspace(5), i32, ptr, i32, i32) -declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) +declare void @__commit_read_pipe(ptr addrspace(1), ptr addrspace(5), i32, i32) -; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) -; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND]] -; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]] -define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { +; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) +; GCN-PRELINK: call i32 @__write_pipe_2_4(ptr addrspace(1) %{{.*}}, ptr %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__write_pipe_4_4(ptr addrspace(1) %{{.*}}, ptr addrspace(5) %{{.*}}, i32 2, ptr %{{.*}}) #[[$NOUNWIND]] +define amdgpu_kernel void @test_write_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) local_unnamed_addr { entry: - %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* - %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* - %tmp2 = call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 - %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 - %tmp4 = call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 - call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0 + %tmp1 = addrspacecast ptr addrspace(1) %ptr to ptr + %tmp2 = call i32 @__write_pipe_2(ptr addrspace(1) %p, ptr %tmp1, i32 4, i32 4) #0 + %tmp3 = call ptr addrspace(5) @__reserve_write_pipe(ptr addrspace(1) %p, i32 2, i32 4, i32 4) #0 + %tmp4 = call i32 @__write_pipe_4(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 2, ptr %tmp1, i32 4, i32 4) #0 + call void @__commit_write_pipe(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 4, i32 4) #0 ret void } -declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) local_unnamed_addr +declare i32 @__write_pipe_2(ptr addrspace(1), ptr, i32, i32) local_unnamed_addr -declare %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr +declare ptr addrspace(5) @__reserve_write_pipe(ptr addrspace(1), i32, i32, i32) local_unnamed_addr -declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) local_unnamed_addr +declare i32 @__write_pipe_4(ptr addrspace(1), ptr addrspace(5), i32, ptr, i32, i32) local_unnamed_addr -declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) local_unnamed_addr +declare void @__commit_write_pipe(ptr addrspace(1), ptr addrspace(5), i32, i32) local_unnamed_addr %struct.S = type { [100 x i32] } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size -; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[$NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[$NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[$NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[$NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[$NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[$NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[$NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[$NOUNWIND]] -; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[$NOUNWIND]] -define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 { -entry: - %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8* - %tmp1 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0 - %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)* - %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8* - %tmp4 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0 - %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)* - %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8* - %tmp7 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0 - %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)* - %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8* - %tmp10 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0 - %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)* - %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8* - %tmp13 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0 - %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)* - %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8* - %tmp16 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0 - %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)* - %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8* - %tmp19 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0 - %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)* - %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8* - %tmp22 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0 - %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)* - %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8* - %tmp25 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0 +; GCN-PRELINK: call i32 @__read_pipe_2_1(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_2(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_4(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_8(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_16(ptr addrspace(1) %{{.*}}, ptr %{{.*}}) #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_32(ptr addrspace(1) %{{.*}}, ptr %{{.*}} #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_64(ptr addrspace(1) %{{.*}}, ptr %{{.*}} #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_128(ptr addrspace(1) %{{.*}}, ptr %{{.*}} #[[$NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2(ptr addrspace(1) %{{.*}}, ptr %{{.*}} i32 400, i32 4) #[[$NOUNWIND]] +define amdgpu_kernel void @test_pipe_size(ptr addrspace(1) %p1, ptr addrspace(1) %ptr1, ptr addrspace(1) %p2, ptr addrspace(1) %ptr2, ptr addrspace(1) %p4, ptr addrspace(1) %ptr4, ptr addrspace(1) %p8, ptr addrspace(1) %ptr8, ptr addrspace(1) %p16, ptr addrspace(1) %ptr16, ptr addrspace(1) %p32, ptr addrspace(1) %ptr32, ptr addrspace(1) %p64, ptr addrspace(1) %ptr64, ptr addrspace(1) %p128, ptr addrspace(1) %ptr128, ptr addrspace(1) %pu, ptr addrspace(1) %ptru) local_unnamed_addr #0 { +entry: + %tmp = addrspacecast ptr addrspace(1) %ptr1 to ptr + %tmp1 = call i32 @__read_pipe_2(ptr addrspace(1) %p1, ptr %tmp, i32 1, i32 1) #0 + %tmp3 = addrspacecast ptr addrspace(1) %ptr2 to ptr + %tmp4 = call i32 @__read_pipe_2(ptr addrspace(1) %p2, ptr %tmp3, i32 2, i32 2) #0 + %tmp6 = addrspacecast ptr addrspace(1) %ptr4 to ptr + %tmp7 = call i32 @__read_pipe_2(ptr addrspace(1) %p4, ptr %tmp6, i32 4, i32 4) #0 + %tmp9 = addrspacecast ptr addrspace(1) %ptr8 to ptr + %tmp10 = call i32 @__read_pipe_2(ptr addrspace(1) %p8, ptr %tmp9, i32 8, i32 8) #0 + %tmp12 = addrspacecast ptr addrspace(1) %ptr16 to ptr + %tmp13 = call i32 @__read_pipe_2(ptr addrspace(1) %p16, ptr %tmp12, i32 16, i32 16) #0 + %tmp15 = addrspacecast ptr addrspace(1) %ptr32 to ptr + %tmp16 = call i32 @__read_pipe_2(ptr addrspace(1) %p32, ptr %tmp15, i32 32, i32 32) #0 + %tmp18 = addrspacecast ptr addrspace(1) %ptr64 to ptr + %tmp19 = call i32 @__read_pipe_2(ptr addrspace(1) %p64, ptr %tmp18, i32 64, i32 64) #0 + %tmp21 = addrspacecast ptr addrspace(1) %ptr128 to ptr + %tmp22 = call i32 @__read_pipe_2(ptr addrspace(1) %p128, ptr %tmp21, i32 128, i32 128) #0 + %tmp24 = addrspacecast ptr addrspace(1) %ptru to ptr + %tmp25 = call i32 @__read_pipe_2(ptr addrspace(1) %pu, ptr %tmp24, i32 400, i32 4) #0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll @@ -2,19 +2,19 @@ ; RUN: opt -S -amdgpu-simplifylib -debug-only=amdgpu-simplifylib -mtriple=amdgcn-unknown-amdhsa -disable-output < %s 2>&1 | FileCheck %s ; RUN: opt -S -passes=amdgpu-simplifylib -debug-only=amdgpu-simplifylib -mtriple=amdgcn-unknown-amdhsa -disable-output < %s 2>&1 | FileCheck %s -; CHECK-NOT: AMDIC: try folding call void @llvm.lifetime.start.p0i8 -; CHECK-NOT: AMDIC: try folding call void @llvm.lifetime.end.p0i8 +; CHECK-NOT: AMDIC: try folding call void @llvm.lifetime.start.p0 +; CHECK-NOT: AMDIC: try folding call void @llvm.lifetime.end.p0 ; CHECK-NOT: AMDIC: try folding call void @llvm.dbg.value define void @foo(i32 %i) { - call void @llvm.lifetime.start.p0i8(i64 1, i8* undef) - call void @llvm.lifetime.end.p0i8(i64 1, i8* undef) + call void @llvm.lifetime.start.p0(i64 1, ptr undef) + call void @llvm.lifetime.end.p0(i64 1, ptr undef) call void @llvm.dbg.value(metadata i32 undef, metadata !DILocalVariable(name: "1", scope: !2), metadata !DIExpression()), !dbg !3 ret void } -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.module.flags = !{!1} diff --git a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll --- a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll +++ b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll @@ -19,7 +19,7 @@ declare float @llvm.fmuladd.f32(float, float, float) #0 ; CHECK: s_endpgm -define amdgpu_kernel void @foo(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1, float addrspace(1)* noalias nocapture %arg2, float %arg3) local_unnamed_addr !reqd_work_group_size !0 { +define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3) local_unnamed_addr !reqd_work_group_size !0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.y() %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -28,7 +28,6 @@ %tmp7 = sub i32 %tmp6, 0 %tmp8 = add i32 %tmp7, 0 %tmp9 = add i32 %tmp8, 0 - %tmp10 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 0 br label %bb12 bb11: ; preds = %bb30 @@ -58,8 +57,8 @@ bb21: ; preds = %bb21, %bb17 %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ] %tmp23 = add i32 %tmp22, %tmp16 - %tmp24 = getelementptr inbounds float, float addrspace(3)* %tmp10, i32 %tmp23 - store float undef, float addrspace(3)* %tmp24, align 4 + %tmp24 = getelementptr inbounds float, ptr addrspace(3) @0, i32 %tmp23 + store float undef, ptr addrspace(3) %tmp24, align 4 %tmp25 = add nuw i32 %tmp22, 8 br i1 undef, label %bb21, label %.loopexit @@ -77,8 +76,8 @@ bb31: ; preds = %bb31, %bb26 %tmp32 = phi i32 [ %tmp9, %bb26 ], [ undef, %bb31 ] - %tmp33 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 %tmp32 - %tmp34 = load float, float addrspace(3)* %tmp33, align 4 + %tmp33 = getelementptr inbounds [462 x float], ptr addrspace(3) @0, i32 0, i32 %tmp32 + %tmp34 = load float, ptr addrspace(3) %tmp33, align 4 %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float undef, float undef) %tmp36 = tail call float @llvm.fmuladd.f32(float undef, float undef, float %tmp35) br i1 undef, label %bb30, label %bb31 diff --git a/llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll b/llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll --- a/llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll @@ -12,14 +12,14 @@ ; GCN: buffer_store_dword ; GCN: [[EXIT]]: ; GCN: s_endpgm -define amdgpu_kernel void @vccz_workaround(i32 addrspace(4)* %in, i32 addrspace(1)* %out, float %cond) { +define amdgpu_kernel void @vccz_workaround(ptr addrspace(4) %in, ptr addrspace(1) %out, float %cond) { entry: %cnd = fcmp oeq float 0.0, %cond - %sgpr = load volatile i32, i32 addrspace(4)* %in + %sgpr = load volatile i32, ptr addrspace(4) %in br i1 %cnd, label %if, label %endif if: - store i32 %sgpr, i32 addrspace(1)* %out + store i32 %sgpr, ptr addrspace(1) %out br label %endif endif: @@ -34,14 +34,14 @@ ; GCN: buffer_store_dword ; GCN: [[EXIT]]: ; GCN: s_endpgm -define amdgpu_kernel void @vccz_noworkaround(float addrspace(1)* %in, float addrspace(1)* %out) { +define amdgpu_kernel void @vccz_noworkaround(ptr addrspace(1) %in, ptr addrspace(1) %out) { entry: - %vgpr = load volatile float, float addrspace(1)* %in + %vgpr = load volatile float, ptr addrspace(1) %in %cnd = fcmp oeq float 0.0, %vgpr br i1 %cnd, label %if, label %endif if: - store float %vgpr, float addrspace(1)* %out + store float %vgpr, ptr addrspace(1) %out br label %endif endif: diff --git a/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll b/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll --- a/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll @@ -5,19 +5,19 @@ ; GCN: s_waitcnt lgkmcnt(0) ; GCN: global_store_dword v -define amdgpu_kernel void @zot(i32 addrspace(1)* nocapture %arg, i64 addrspace(1)* nocapture %arg1) { +define amdgpu_kernel void @zot(ptr addrspace(1) nocapture %arg, ptr addrspace(1) nocapture %arg1) { bb: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = icmp eq i32 %tmp, 0 br i1 %tmp2, label %bb3, label %bb8 bb3: ; preds = %bb - %tmp4 = load i32, i32 addrspace(1)* %arg, align 4 - store i32 0, i32 addrspace(1)* %arg, align 4 + %tmp4 = load i32, ptr addrspace(1) %arg, align 4 + store i32 0, ptr addrspace(1) %arg, align 4 %tmp5 = zext i32 %tmp4 to i64 - %tmp6 = load i64, i64 addrspace(1)* %arg1, align 8 + %tmp6 = load i64, ptr addrspace(1) %arg1, align 8 %tmp7 = add i64 %tmp6, %tmp5 - store i64 %tmp7, i64 addrspace(1)* %arg1, align 8 + store i64 %tmp7, ptr addrspace(1) %arg1, align 8 br label %bb8 bb8: ; preds = %bb3, %bb diff --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll --- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll @@ -34,7 +34,6 @@ ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_movk_i32 s0, 0xffc ; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8 @@ -71,7 +70,6 @@ ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: s_mov_b32 s4, 0x40000 ; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 @@ -90,7 +88,6 @@ ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_movk_i32 s0, 0x1000 ; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8 @@ -237,7 +234,6 @@ ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_movk_i32 s8, 0x1004 ; FLATSCR-NEXT: scratch_load_dword v0, off, s8 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART @@ -320,7 +316,6 @@ ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s0, 0xff8 ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART @@ -367,7 +362,6 @@ ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_mov_b32 s4, 0x3ff00 ; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload ; MUBUF-NEXT: s_nop 0 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload @@ -391,7 +385,6 @@ ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s0, 0xffc ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -10551,7 +10551,6 @@ ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: ;;#ASMSTART ; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: s_mov_b32 s2, 0x84800 ; GFX6-NEXT: buffer_load_dword v17, off, s[40:43], s2 ; 4-byte Folded Reload ; GFX6-NEXT: buffer_load_dword v18, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload ; GFX6-NEXT: buffer_load_dword v19, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload @@ -10796,7 +10795,7 @@ ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s0 ; 16-byte Folded Spill ; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100 ; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100 +; GFX9-FLATSCR-NEXT: s_nop 0 ; GFX9-FLATSCR-NEXT: ;;#ASMSTART ; GFX9-FLATSCR-NEXT: ;;#ASMEND ; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[8:11], off, s0 ; 16-byte Folded Reload @@ -11032,7 +11031,6 @@ ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v60 ; GFX10-FLATSCR-NEXT: ;;#ASMSTART ; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x2010 ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v65 ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v66 ; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v67 diff --git a/llvm/test/CodeGen/AMDGPU/sram-ecc-default.ll b/llvm/test/CodeGen/AMDGPU/sram-ecc-default.ll --- a/llvm/test/CodeGen/AMDGPU/sram-ecc-default.ll +++ b/llvm/test/CodeGen/AMDGPU/sram-ecc-default.ll @@ -13,12 +13,12 @@ ; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg: ; NO-ECC: global_load_short_d16_hi ; ECC: global_load_ushort -define void @load_global_hi_v2i16_reglo_vreg(i16 addrspace(1)* %in, i16 %reg) { +define void @load_global_hi_v2i16_reglo_vreg(ptr addrspace(1) %in, i16 %reg) { entry: - %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047 - %load = load i16, i16 addrspace(1)* %gep + %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 -2047 + %load = load i16, ptr addrspace(1) %gep %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1 - store <2 x i16> %build1, <2 x i16> addrspace(1)* undef + store <2 x i16> %build1, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sroa-before-unroll.ll b/llvm/test/CodeGen/AMDGPU/sroa-before-unroll.ll --- a/llvm/test/CodeGen/AMDGPU/sroa-before-unroll.ll +++ b/llvm/test/CodeGen/AMDGPU/sroa-before-unroll.ll @@ -16,11 +16,11 @@ ; LOOP: br i1 %{{[^,]+}}, label %exit, label %loop.header ; FULL-UNROLL: alloca -; FULL-UNROLL-COUNT-256: store i32 {{[0-9]+}}, i32 addrspace(5)* +; FULL-UNROLL-COUNT-256: store i32 {{[0-9]+}}, ptr addrspace(5) ; FULL-UNROLL-NOT: br -; FUNC: store i32 %{{[^,]+}}, i32 addrspace(1)* %out -define amdgpu_kernel void @private_memory(i32 addrspace(1)* %out, i32 %n) { +; FUNC: store i32 %{{[^,]+}}, ptr addrspace(1) %out +define amdgpu_kernel void @private_memory(ptr addrspace(1) %out, i32 %n) { entry: %alloca = alloca [16 x i32], addrspace(5) br label %loop.header @@ -32,8 +32,8 @@ loop.body: %salt = xor i32 %counter, %n %idx = and i32 %salt, 15 - %ptr = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %idx - store i32 %counter, i32 addrspace(5)* %ptr + %ptr = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx + store i32 %counter, ptr addrspace(5) %ptr br label %loop.inc loop.inc: @@ -42,8 +42,8 @@ br i1 %cmp, label %exit, label %loop.header exit: - %gep = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %n - %load = load i32, i32 addrspace(5)* %gep - store i32 %load, i32 addrspace(1)* %out + %gep = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %n + %load = load i32, ptr addrspace(5) %gep + store i32 %load, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/ssubo.ll b/llvm/test/CodeGen/AMDGPU/ssubo.ll --- a/llvm/test/CodeGen/AMDGPU/ssubo.ll +++ b/llvm/test/CodeGen/AMDGPU/ssubo.ll @@ -8,47 +8,47 @@ declare { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32>, <2 x i32>) nounwind readnone ; FUNC-LABEL: {{^}}ssubo_i64_zext: -define amdgpu_kernel void @ssubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { +define amdgpu_kernel void @ssubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind { %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %ssub, 0 %carry = extractvalue { i64, i1 } %ssub, 1 %ext = zext i1 %carry to i64 %add2 = add i64 %val, %ext - store i64 %add2, i64 addrspace(1)* %out, align 8 + store i64 %add2, ptr addrspace(1) %out, align 8 ret void } ; FUNC-LABEL: {{^}}s_ssubo_i32: -define amdgpu_kernel void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind { +define amdgpu_kernel void @s_ssubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) nounwind { %ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind %val = extractvalue { i32, i1 } %ssub, 0 %carry = extractvalue { i32, i1 } %ssub, 1 - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %carry, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %carry, ptr addrspace(1) %carryout ret void } ; FUNC-LABEL: {{^}}v_ssubo_i32: -define amdgpu_kernel void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { - %a = load i32, i32 addrspace(1)* %aptr, align 4 - %b = load i32, i32 addrspace(1)* %bptr, align 4 +define amdgpu_kernel void @v_ssubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind { + %a = load i32, ptr addrspace(1) %aptr, align 4 + %b = load i32, ptr addrspace(1) %bptr, align 4 %ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind %val = extractvalue { i32, i1 } %ssub, 0 %carry = extractvalue { i32, i1 } %ssub, 1 - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %carry, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %carry, ptr addrspace(1) %carryout ret void } ; FUNC-LABEL: {{^}}s_ssubo_i64: ; GCN: s_sub_u32 ; GCN: s_subb_u32 -define amdgpu_kernel void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { +define amdgpu_kernel void @s_ssubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) nounwind { %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %ssub, 0 %carry = extractvalue { i64, i1 } %ssub, 1 - store i64 %val, i64 addrspace(1)* %out, align 8 - store i1 %carry, i1 addrspace(1)* %carryout + store i64 %val, ptr addrspace(1) %out, align 8 + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -61,14 +61,14 @@ ; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, ; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, -define amdgpu_kernel void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { - %a = load i64, i64 addrspace(1)* %aptr, align 4 - %b = load i64, i64 addrspace(1)* %bptr, align 4 +define amdgpu_kernel void @v_ssubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind { + %a = load i64, ptr addrspace(1) %aptr, align 4 + %b = load i64, ptr addrspace(1) %bptr, align 4 %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %ssub, 0 %carry = extractvalue { i64, i1 } %ssub, 1 - store i64 %val, i64 addrspace(1)* %out, align 8 - store i1 %carry, i1 addrspace(1)* %carryout + store i64 %val, ptr addrspace(1) %out, align 8 + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -79,14 +79,14 @@ ; SICIVI: v_cmp_lt_i32 ; SICIVI: v_cmp_lt_i32 ; SICIVI: v_sub_{{[iu]}}32 -define amdgpu_kernel void @v_ssubo_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %carryout, <2 x i32> addrspace(1)* %aptr, <2 x i32> addrspace(1)* %bptr) nounwind { - %a = load <2 x i32>, <2 x i32> addrspace(1)* %aptr, align 4 - %b = load <2 x i32>, <2 x i32> addrspace(1)* %bptr, align 4 +define amdgpu_kernel void @v_ssubo_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind { + %a = load <2 x i32>, ptr addrspace(1) %aptr, align 4 + %b = load <2 x i32>, ptr addrspace(1) %bptr, align 4 %sadd = call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) nounwind %val = extractvalue { <2 x i32>, <2 x i1> } %sadd, 0 %carry = extractvalue { <2 x i32>, <2 x i1> } %sadd, 1 - store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 + store <2 x i32> %val, ptr addrspace(1) %out, align 4 %carry.ext = zext <2 x i1> %carry to <2 x i32> - store <2 x i32> %carry.ext, <2 x i32> addrspace(1)* %carryout + store <2 x i32> %carry.ext, ptr addrspace(1) %carryout ret void } diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -7,7 +7,7 @@ ; During instruction selection, we use immediate const zero for soffset in ; MUBUF stack accesses and let eliminateFrameIndex to fix up this field to use ; the correct frame register whenever required. -define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <4 x i32> addrspace(1)* %input, <4 x float> addrspace(1)* %output, i32 %i) { +define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr addrspace(1) %input, ptr addrspace(1) %output, i32 %i) { ; MUBUF-LABEL: kernel_background_evaluate: ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -136,26 +136,26 @@ entry: %sd = alloca < 1339 x i32>, align 8192, addrspace(5) %state = alloca <4 x i32>, align 16, addrspace(5) - %rslt = call i32 @svm_eval_nodes(float addrspace(5)* %kg, <1339 x i32> addrspace(5)* %sd, <4 x i32> addrspace(5)* %state, i32 0, i32 4194304) + %rslt = call i32 @svm_eval_nodes(ptr addrspace(5) %kg, ptr addrspace(5) %sd, ptr addrspace(5) %state, i32 0, i32 4194304) %cmp = icmp eq i32 %rslt, 0 br i1 %cmp, label %shader_eval_surface.exit, label %if.then4.i if.then4.i: ; preds = %entry - %rng_hash.i.i = getelementptr inbounds < 4 x i32>, <4 x i32> addrspace(5)* %state, i32 0, i32 1 - %tmp0 = load i32, i32 addrspace(5)* %rng_hash.i.i, align 4 - %rng_offset.i.i = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* %state, i32 0, i32 2 - %tmp1 = load i32, i32 addrspace(5)* %rng_offset.i.i, align 4 + %rng_hash.i.i = getelementptr inbounds < 4 x i32>, ptr addrspace(5) %state, i32 0, i32 1 + %tmp0 = load i32, ptr addrspace(5) %rng_hash.i.i, align 4 + %rng_offset.i.i = getelementptr inbounds <4 x i32>, ptr addrspace(5) %state, i32 0, i32 2 + %tmp1 = load i32, ptr addrspace(5) %rng_offset.i.i, align 4 %add.i.i = add i32 %tmp1, %tmp0 %add1.i.i = add i32 %add.i.i, 0 %mul.i.i.i.i = mul i32 %add1.i.i, 1103515245 %add.i.i.i.i = add i32 %mul.i.i.i.i, 12345 - store i32 %add.i.i.i.i, i32 addrspace(5)* undef, align 16 + store i32 %add.i.i.i.i, ptr addrspace(5) undef, align 16 br label %shader_eval_surface.exit shader_eval_surface.exit: ; preds = %entry ret void } -declare hidden i32 @svm_eval_nodes(float addrspace(5)*, <1339 x i32> addrspace(5)*, <4 x i32> addrspace(5)*, i32, i32) local_unnamed_addr #0 +declare hidden i32 @svm_eval_nodes(ptr addrspace(5), ptr addrspace(5), ptr addrspace(5), i32, i32) local_unnamed_addr #0 attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -102,7 +102,7 @@ ; GFX9-NEXT: .end_amdhsa_kernel ; GFX9-NEXT: .text %alloca.align = alloca i32, align 128, addrspace(5) - store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128 + store volatile i32 9, ptr addrspace(5) %alloca.align, align 128 ret void } @@ -205,7 +205,7 @@ ; GFX9-NEXT: .end_amdhsa_kernel ; GFX9-NEXT: .text %alloca.align = alloca i32, align 4, addrspace(5) - store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4 + store volatile i32 9, ptr addrspace(5) %alloca.align, align 4 ret void } @@ -308,7 +308,7 @@ ; GFX9-NEXT: .end_amdhsa_kernel ; GFX9-NEXT: .text %alloca.align = alloca i32, align 4, addrspace(5) - store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4 + store volatile i32 9, ptr addrspace(5) %alloca.align, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -26,8 +26,8 @@ ; GCN: ; ScratchSize: 144 define void @needs_align16_default_stack_align(i32 %idx) #0 { %alloca.align16 = alloca [8 x <4 x i32>], align 16, addrspace(5) - %gep0 = getelementptr inbounds [8 x <4 x i32>], [8 x <4 x i32>] addrspace(5)* %alloca.align16, i32 0, i32 %idx - store volatile <4 x i32> , <4 x i32> addrspace(5)* %gep0, align 16 + %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx + store volatile <4 x i32> , ptr addrspace(5) %gep0, align 16 ret void } @@ -47,8 +47,8 @@ ; GCN: ; ScratchSize: 160 define void @needs_align16_stack_align4(i32 %idx) #2 { %alloca.align16 = alloca [8 x <4 x i32>], align 16, addrspace(5) - %gep0 = getelementptr inbounds [8 x <4 x i32>], [8 x <4 x i32>] addrspace(5)* %alloca.align16, i32 0, i32 %idx - store volatile <4 x i32> , <4 x i32> addrspace(5)* %gep0, align 16 + %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx + store volatile <4 x i32> , ptr addrspace(5) %gep0, align 16 ret void } @@ -68,8 +68,8 @@ ; GCN: ; ScratchSize: 192 define void @needs_align32(i32 %idx) #0 { %alloca.align16 = alloca [8 x <4 x i32>], align 32, addrspace(5) - %gep0 = getelementptr inbounds [8 x <4 x i32>], [8 x <4 x i32>] addrspace(5)* %alloca.align16, i32 0, i32 %idx - store volatile <4 x i32> , <4 x i32> addrspace(5)* %gep0, align 32 + %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx + store volatile <4 x i32> , ptr addrspace(5) %gep0, align 32 ret void } @@ -84,8 +84,8 @@ ; GCN: ; ScratchSize: 52 define void @force_realign4(i32 %idx) #1 { %alloca.align16 = alloca [8 x i32], align 4, addrspace(5) - %gep0 = getelementptr inbounds [8 x i32], [8 x i32] addrspace(5)* %alloca.align16, i32 0, i32 %idx - store volatile i32 3, i32 addrspace(5)* %gep0, align 4 + %gep0 = getelementptr inbounds [8 x i32], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx + store volatile i32 3, ptr addrspace(5) %gep0, align 4 ret void } @@ -95,7 +95,7 @@ ; GCN: s_swappc_b64 define amdgpu_kernel void @kernel_call_align16_from_8() #0 { %alloca = alloca i32, align 4, addrspace(5) - store volatile i32 2, i32 addrspace(5)* %alloca + store volatile i32 2, ptr addrspace(5) %alloca call void @needs_align16_default_stack_align(i32 1) ret void } @@ -106,7 +106,7 @@ ; GCN: s_swappc_b64 define amdgpu_kernel void @kernel_call_align16_from_5() { %alloca0 = alloca i8, align 1, addrspace(5) - store volatile i8 2, i8 addrspace(5)* %alloca0 + store volatile i8 2, ptr addrspace(5) %alloca0 call void @needs_align16_default_stack_align(i32 1) ret void @@ -117,7 +117,7 @@ ; GCN: s_swappc_b64 define amdgpu_kernel void @kernel_call_align4_from_5() { %alloca0 = alloca i8, align 1, addrspace(5) - store volatile i8 2, i8 addrspace(5)* %alloca0 + store volatile i8 2, ptr addrspace(5) %alloca0 call void @needs_align16_stack_align4(i32 1) ret void @@ -134,7 +134,7 @@ ; GCN: s_mov_b32 s33, [[FP_COPY]] define void @default_realign_align128(i32 %idx) #0 { %alloca.align = alloca i32, align 128, addrspace(5) - store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128 + store volatile i32 9, ptr addrspace(5) %alloca.align, align 128 ret void } @@ -144,7 +144,7 @@ ; GCN-NOT: s32 define void @disable_realign_align128(i32 %idx) #3 { %alloca.align = alloca i32, align 128, addrspace(5) - store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128 + store volatile i32 9, ptr addrspace(5) %alloca.align, align 128 ret void } @@ -181,13 +181,13 @@ ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN: s_setpc_b64 s[30:31] %temp = alloca i32, align 1024, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %temp, align 1024 + store volatile i32 0, ptr addrspace(5) %temp, align 1024 call void @extern_func(<32 x i32> %a, i32 %b) ret void } %struct.Data = type { [9 x i32] } -define i32 @needs_align1024_stack_args_used_inside_loop(%struct.Data addrspace(5)* nocapture readonly byval(%struct.Data) align 8 %arg) local_unnamed_addr #4 { +define i32 @needs_align1024_stack_args_used_inside_loop(ptr addrspace(5) nocapture readonly byval(%struct.Data) align 8 %arg) local_unnamed_addr #4 { ; The local object allocation needed an alignment of 1024. ; Since the function argument is accessed in a loop with an ; index variable, the base pointer first get loaded into a VGPR @@ -212,7 +212,7 @@ ; GCN-NEXT: s_setpc_b64 s[30:31] begin: %local_var = alloca i32, align 1024, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %local_var, align 1024 + store volatile i32 0, ptr addrspace(5) %local_var, align 1024 br label %loop_body loop_end: ; preds = %loop_body @@ -222,8 +222,8 @@ loop_body: ; preds = %loop_end, %begin %lp_idx = phi i32 [ 0, %begin ], [ %idx_next, %loop_end ] - %ptr = getelementptr inbounds %struct.Data, %struct.Data addrspace(5)* %arg, i32 0, i32 0, i32 %lp_idx - %val = load i32, i32 addrspace(5)* %ptr, align 8 + %ptr = getelementptr inbounds %struct.Data, ptr addrspace(5) %arg, i32 0, i32 0, i32 %lp_idx + %val = load i32, ptr addrspace(5) %ptr, align 8 %lp_cond = icmp eq i32 %val, %lp_idx br i1 %lp_cond, label %loop_end, label %exit @@ -245,7 +245,7 @@ ; GCN-NEXT: ;;#ASMEND ; GCN: s_setpc_b64 s[30:31] %local_val = alloca i32, align 128, addrspace(5) - store volatile i32 %b, i32 addrspace(5)* %local_val, align 128 + store volatile i32 %b, ptr addrspace(5) %local_val, align 128 ; Use all clobberable registers, so BP has to spill to a VGPR. call void asm sideeffect "", "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} @@ -265,7 +265,7 @@ ; GCN: v_mov_b32_e32 v0, s34 ; GCN-DAG: buffer_store_dword v0, off, s[0:3], s32 %local_val = alloca i32, align 128, addrspace(5) - store volatile i32 %b, i32 addrspace(5)* %local_val, align 128 + store volatile i32 %b, ptr addrspace(5) %local_val, align 128 call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs", "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} @@ -287,7 +287,7 @@ ret void } -define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i32 %b, [4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #5 { +define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i32 %b, ptr addrspace(5) byval([4096 x i8]) align 4 %arg) #5 { ; If the size of the offset exceeds the MUBUF offset field we need another ; scratch VGPR to hold the offset. @@ -306,7 +306,7 @@ ; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill %local_val = alloca i32, align 128, addrspace(5) - store volatile i32 %b, i32 addrspace(5)* %local_val, align 128 + store volatile i32 %b, ptr addrspace(5) %local_val, align 128 call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs", "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} diff --git a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll --- a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll @@ -1,15 +1,14 @@ ; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; RUN: not llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s -declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture, i8, i32, i32, i1) #1 +declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture, i8, i32, i32, i1) #1 ; ERROR: error: :0:0: stack frame size (131061) exceeds limit (131056) in function 'stack_size_limit_wave64' ; GCN: ; ScratchSize: 131061 define amdgpu_kernel void @stack_size_limit_wave64() #0 { entry: %alloca = alloca [131057 x i8], align 1, addrspace(5) - %alloca.bc = bitcast [131057 x i8] addrspace(5)* %alloca to i8 addrspace(5)* - call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 131057, i32 1, i1 true) + call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 9, i32 131057, i32 1, i1 true) ret void } @@ -18,8 +17,7 @@ define amdgpu_kernel void @stack_size_limit_wave32() #1 { entry: %alloca = alloca [262113 x i8], align 1, addrspace(5) - %alloca.bc = bitcast [262113 x i8] addrspace(5)* %alloca to i8 addrspace(5)* - call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 262113, i32 1, i1 true) + call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 9, i32 262113, i32 1, i1 true) ret void } @@ -28,8 +26,7 @@ define amdgpu_kernel void @max_stack_size_wave64() #0 { entry: %alloca = alloca [131052 x i8], align 1, addrspace(5) - %alloca.bc = bitcast [131052 x i8] addrspace(5)* %alloca to i8 addrspace(5)* - call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 131052, i32 1, i1 true) + call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 9, i32 131052, i32 1, i1 true) ret void } @@ -38,8 +35,7 @@ define amdgpu_kernel void @max_stack_size_wave32() #1 { entry: %alloca = alloca [262108 x i8], align 1, addrspace(5) - %alloca.bc = bitcast [262108 x i8] addrspace(5)* %alloca to i8 addrspace(5)* - call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 262108, i32 1, i1 true) + call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 9, i32 262108, i32 1, i1 true) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll b/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll --- a/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll @@ -21,7 +21,7 @@ define amdgpu_cs float @sub_zext_zext() { .entry: - %t519 = load float, float addrspace(3)* null + %t519 = load float, ptr addrspace(3) null %t524 = fcmp ogt float %t519, 0.000000e+00 %t525 = fcmp olt float %t519, 0.000000e+00 diff --git a/llvm/test/CodeGen/AMDGPU/swdev282079.ll b/llvm/test/CodeGen/AMDGPU/swdev282079.ll --- a/llvm/test/CodeGen/AMDGPU/swdev282079.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev282079.ll @@ -1,13 +1,13 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s -define protected amdgpu_kernel void @foo(i64 addrspace(1)* %arg, i64 addrspace(1)* %arg1) { +define protected amdgpu_kernel void @foo(ptr addrspace(1) %arg, ptr addrspace(1) %arg1) { bb: - %tmp = addrspacecast i64* addrspace(5)* null to i64** - %tmp2 = call i64 @eggs(i64* undef) #1 - %tmp3 = load i64*, i64** %tmp, align 8 - %tmp4 = getelementptr inbounds i64, i64* %tmp3, i64 undef - store i64 %tmp2, i64* %tmp4, align 8 + %tmp = addrspacecast ptr addrspace(5) null to ptr + %tmp2 = call i64 @eggs(ptr undef) #1 + %tmp3 = load ptr, ptr %tmp, align 8 + %tmp4 = getelementptr inbounds i64, ptr %tmp3, i64 undef + store i64 %tmp2, ptr %tmp4, align 8 ret void } -declare hidden i64 @eggs(i64*) +declare hidden i64 @eggs(ptr) diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-cgp.ll b/llvm/test/CodeGen/AMDGPU/tail-call-cgp.ll --- a/llvm/test/CodeGen/AMDGPU/tail-call-cgp.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-cgp.ll @@ -1,7 +1,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -codegenprepare %s | FileCheck %s -define internal fastcc void @callee(i32* nocapture %p, i32 %a) #0 { - store volatile i32 %a, i32* %p, align 4 +define internal fastcc void @callee(ptr nocapture %p, i32 %a) #0 { + store volatile i32 %a, ptr %p, align 4 ret void } @@ -9,13 +9,13 @@ ; CHECK: tail call fastcc void @callee( ; CHECK-NEXT: ret void ; CHECK: ret void -define void @func_caller(i32* nocapture %p, i32 %a, i32 %b) #0 { +define void @func_caller(ptr nocapture %p, i32 %a, i32 %b) #0 { entry: %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %bb, label %ret bb: - tail call fastcc void @callee(i32* %p, i32 %a) + tail call fastcc void @callee(ptr %p, i32 %a) br label %ret ret: @@ -27,13 +27,13 @@ ; CHECK-NEXT: br label %ret ; CHECK: ret void -define amdgpu_kernel void @kernel_caller(i32* nocapture %p, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @kernel_caller(ptr nocapture %p, i32 %a, i32 %b) #0 { entry: %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %bb, label %ret bb: - tail call fastcc void @callee(i32* %p, i32 %a) + tail call fastcc void @callee(ptr %p, i32 %a) br label %ret ret: diff --git a/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll b/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll --- a/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll @@ -15,16 +15,16 @@ ; GCN-LABEL: {{^}}taildup_barrier: ; GCN: s_barrier ; GCN-NOT: s_barrier -define void @taildup_barrier(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond) #0 { +define void @taildup_barrier(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) #0 { entry: br i1 %cond, label %bb1, label %bb2 bb1: - store i32 0, i32 addrspace(1)* %a + store i32 0, ptr addrspace(1) %a br label %call bb2: - store i32 1, i32 addrspace(1)* %a + store i32 1, ptr addrspace(1) %a br label %call call: @@ -38,16 +38,16 @@ ; GCN-LABEL: {{^}}taildup_convergent_call: ; GCN: s_swappc_b64 ; GCN-NOT: s_swappc_b64 -define void @taildup_convergent_call(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond) #1 { +define void @taildup_convergent_call(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) #1 { entry: br i1 %cond, label %bb1, label %bb2 bb1: - store i32 0, i32 addrspace(1)* %a + store i32 0, ptr addrspace(1) %a br label %call bb2: - store i32 1, i32 addrspace(1)* %a + store i32 1, ptr addrspace(1) %a br label %call call: @@ -63,16 +63,16 @@ ; GCN-LABEL: {{^}}taildup_nonconvergent_call: ; GCN: s_swappc_b64 ; GCN-NOT: s_swappc_b64 -define void @taildup_nonconvergent_call(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond) #1 { +define void @taildup_nonconvergent_call(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) #1 { entry: br i1 %cond, label %bb1, label %bb2 bb1: - store i32 0, i32 addrspace(1)* %a + store i32 0, ptr addrspace(1) %a br label %call bb2: - store i32 1, i32 addrspace(1)* %a + store i32 1, ptr addrspace(1) %a br label %call call: @@ -86,16 +86,16 @@ ; GCN-LABEL: {{^}}taildup_convergent_tailcall: ; GCN: s_setpc_b64 ; GCN-NOT: s_setpc_b64 -define void @taildup_convergent_tailcall(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond) #1 { +define void @taildup_convergent_tailcall(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) #1 { entry: br i1 %cond, label %bb1, label %bb2 bb1: - store i32 0, i32 addrspace(1)* %a + store i32 0, ptr addrspace(1) %a br label %call bb2: - store i32 1, i32 addrspace(1)* %a + store i32 1, ptr addrspace(1) %a br label %call call: @@ -106,16 +106,16 @@ ; GCN-LABEL: {{^}}taildup_gws_init: ; GCN: ds_gws_init ; GCN-NOT: ds_gws_init -define amdgpu_kernel void @taildup_gws_init(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond, i32 %val, i32 %offset) #0 { +define amdgpu_kernel void @taildup_gws_init(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond, i32 %val, i32 %offset) #0 { entry: br i1 %cond, label %bb1, label %bb2 bb1: - store i32 0, i32 addrspace(1)* %a + store i32 0, ptr addrspace(1) %a br label %call bb2: - store i32 1, i32 addrspace(1)* %a + store i32 1, ptr addrspace(1) %a br label %call call: @@ -129,16 +129,16 @@ ; GCN-LABEL: {{^}}taildup_gws_barrier: ; GCN: ds_gws_barrier ; GCN-NOT: ds_gws_barrier -define amdgpu_kernel void @taildup_gws_barrier(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond, i32 %val, i32 %offset) #0 { +define amdgpu_kernel void @taildup_gws_barrier(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond, i32 %val, i32 %offset) #0 { entry: br i1 %cond, label %bb1, label %bb2 bb1: - store i32 0, i32 addrspace(1)* %a + store i32 0, ptr addrspace(1) %a br label %call bb2: - store i32 1, i32 addrspace(1)* %a + store i32 1, ptr addrspace(1) %a br label %call call: @@ -152,16 +152,16 @@ ; GCN-LABEL: {{^}}taildup_gws_sema_release_all: ; GCN: ds_gws_sema_release_all ; GCN-NOT: ds_gws -define amdgpu_kernel void @taildup_gws_sema_release_all(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond, i32 %offset) #0 { +define amdgpu_kernel void @taildup_gws_sema_release_all(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond, i32 %offset) #0 { entry: br i1 %cond, label %bb1, label %bb2 bb1: - store i32 0, i32 addrspace(1)* %a + store i32 0, ptr addrspace(1) %a br label %call bb2: - store i32 1, i32 addrspace(1)* %a + store i32 1, ptr addrspace(1) %a br label %call call: diff --git a/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll --- a/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll @@ -6,13 +6,13 @@ ; MIR-LABEL: name: ds_append_noalias ; MIR: DS_APPEND {{.*}} :: (load store (s32) on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3) define amdgpu_kernel void @ds_append_noalias() { - %lds = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(1)* null - %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %lds, i1 false), !noalias !0 - store i32 %val, i32 addrspace(1)* null, align 4 + %lds = load ptr addrspace(3), ptr addrspace(1) null + %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %lds, i1 false), !noalias !0 + store i32 %val, ptr addrspace(1) null, align 4 ret void } -declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #0 +declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) #0 attributes #0 = { argmemonly convergent nounwind willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll --- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll +++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll @@ -25,7 +25,7 @@ ; GCN: bb.2.else: ; GCN: successors: ; GCN: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN: S_WAITCNT 3952 ; GCN: bb.3: entry: @@ -34,7 +34,7 @@ if: ; preds = %entry ret float %b else: ; preds = %entry - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef unreachable } @@ -62,7 +62,7 @@ ; GCN: bb.4.else: ; GCN: successors: ; GCN: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN: S_WAITCNT 3952 ; GCN: bb.5: entry: @@ -76,7 +76,7 @@ else.if: ; preds = %else.if.cond ret float %d else: ; preds = %else.if.cond - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef unreachable } diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll --- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll +++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll @@ -15,7 +15,7 @@ declare void @llvm.trap() #0 declare void @llvm.debugtrap() #1 -define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) { +define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) { ; NOHSA-TRAP-GFX900-V2-LABEL: trap: ; NOHSA-TRAP-GFX900-V2: ; %bb.0: ; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 @@ -347,14 +347,14 @@ ; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1] ; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) ; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm - store volatile i32 1, i32 addrspace(1)* %arg0 + store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.trap() unreachable - store volatile i32 2, i32 addrspace(1)* %arg0 + store volatile i32 2, ptr addrspace(1) %arg0 ret void } -define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr { +define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { ; NOHSA-TRAP-GFX900-V2-LABEL: non_entry_trap: ; NOHSA-TRAP-GFX900-V2: ; %bb.0: ; %entry ; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 @@ -777,7 +777,7 @@ ; HSA-NOTRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap ; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm entry: - %tmp29 = load volatile i32, i32 addrspace(1)* %arg0 + %tmp29 = load volatile i32, ptr addrspace(1) %arg0 %cmp = icmp eq i32 %tmp29, -1 br i1 %cmp, label %ret, label %trap @@ -786,11 +786,11 @@ unreachable ret: - store volatile i32 3, i32 addrspace(1)* %arg0 + store volatile i32 3, ptr addrspace(1) %arg0 ret void } -define amdgpu_kernel void @debugtrap(i32 addrspace(1)* nocapture readonly %arg0) { +define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0) { ; NOHSA-TRAP-GFX900-V2-LABEL: debugtrap: ; NOHSA-TRAP-GFX900-V2: ; %bb.0: ; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1159,9 +1159,9 @@ ; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1] ; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0) ; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm - store volatile i32 1, i32 addrspace(1)* %arg0 + store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.debugtrap() - store volatile i32 2, i32 addrspace(1)* %arg0 + store volatile i32 2, ptr addrspace(1) %arg0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll --- a/llvm/test/CodeGen/AMDGPU/trap.ll +++ b/llvm/test/CodeGen/AMDGPU/trap.ll @@ -23,7 +23,7 @@ ; RUN: llc -global-isel=0 -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s ; RUN: llc -global-isel=1 -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s -; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (i32 addrspace(1)*): debugtrap handler not supported +; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (ptr addrspace(1)): debugtrap handler not supported declare void @llvm.trap() #0 @@ -50,11 +50,11 @@ ; TRAP-BIT: enable_trap_handler = 1 ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @hsa_trap(i32 addrspace(1)* nocapture readonly %arg0) { - store volatile i32 1, i32 addrspace(1)* %arg0 +define amdgpu_kernel void @hsa_trap(ptr addrspace(1) nocapture readonly %arg0) { + store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.trap() unreachable - store volatile i32 2, i32 addrspace(1)* %arg0 + store volatile i32 2, ptr addrspace(1) %arg0 ret void } @@ -78,10 +78,10 @@ ; TRAP-BIT: enable_trap_handler = 1 ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @hsa_debugtrap(i32 addrspace(1)* nocapture readonly %arg0) { - store volatile i32 1, i32 addrspace(1)* %arg0 +define amdgpu_kernel void @hsa_debugtrap(ptr addrspace(1) nocapture readonly %arg0) { + store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.debugtrap() - store volatile i32 2, i32 addrspace(1)* %arg0 + store volatile i32 2, ptr addrspace(1) %arg0 ret void } @@ -91,11 +91,11 @@ ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-HSA-TRAP: s_endpgm ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) { - store volatile i32 1, i32 addrspace(1)* %arg0 +define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) { + store volatile i32 1, ptr addrspace(1) %arg0 call void @llvm.trap() unreachable - store volatile i32 2, i32 addrspace(1)* %arg0 + store volatile i32 2, ptr addrspace(1) %arg0 ret void } @@ -106,9 +106,9 @@ ; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap ; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] ; HSA-TRAP-NEXT: s_trap 2 -define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr { +define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { entry: - %tmp29 = load volatile i32, i32 addrspace(1)* %arg0 + %tmp29 = load volatile i32, ptr addrspace(1) %arg0 %cmp = icmp eq i32 %tmp29, -1 br i1 %cmp, label %ret, label %trap @@ -117,7 +117,7 @@ unreachable ret: - store volatile i32 3, i32 addrspace(1)* %arg0 + store volatile i32 3, ptr addrspace(1) %arg0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/tti-unroll-prefs.ll b/llvm/test/CodeGen/AMDGPU/tti-unroll-prefs.ll --- a/llvm/test/CodeGen/AMDGPU/tti-unroll-prefs.ll +++ b/llvm/test/CodeGen/AMDGPU/tti-unroll-prefs.ll @@ -16,10 +16,10 @@ ; loop to not be unrolled at all, but that may change in the future. ; CHECK-LABEL: @test -; CHECK: store i8 0, i8 addrspace(1)* -; CHECK-NOT: store i8 0, i8 addrspace(1)* +; CHECK: store i8 0, ptr addrspace(1) +; CHECK-NOT: store i8 0, ptr addrspace(1) ; CHECK: ret void -define amdgpu_kernel void @test(i8 addrspace(1)* nocapture %dst, i32 %a, i32 %b, i32 %c) { +define amdgpu_kernel void @test(ptr addrspace(1) nocapture %dst, i32 %a, i32 %b, i32 %c) { entry: %add = add nsw i32 %b, 4 %cmp = icmp sgt i32 %add, %a @@ -39,8 +39,8 @@ %add2 = add nsw i32 %b.addr.014, 1 %1 = sext i32 %b.addr.014 to i64 %add.ptr.sum = add nsw i64 %1, %0 - %add.ptr5 = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %add.ptr.sum - store i8 0, i8 addrspace(1)* %add.ptr5, align 1 + %add.ptr5 = getelementptr inbounds i8, ptr addrspace(1) %dst, i64 %add.ptr.sum + store i8 0, ptr addrspace(1) %add.ptr5, align 1 %inc = add nsw i32 %i.015, 1 %cmp1 = icmp slt i32 %inc, 4 %cmp3 = icmp slt i32 %add2, %a diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll --- a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -stop-after twoaddressinstruction < %s | FileCheck %s ; Check that %16 gets constrained to register class sgpr_96_with_sub0_sub1. -define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) { +define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg %ptr) { ; CHECK-LABEL: name: s_load_constant_v3i32_align4 ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $sgpr0, $sgpr1 @@ -20,6 +20,6 @@ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]] ; CHECK-NEXT: $sgpr2 = COPY killed [[COPY4]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit killed $sgpr0, implicit killed $sgpr1, implicit killed $sgpr2 - %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 + %load = load <3 x i32>, ptr addrspace(4) %ptr, align 4 ret <3 x i32> %load } diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll --- a/llvm/test/CodeGen/AMDGPU/uaddo.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll @@ -9,13 +9,13 @@ ; EG: ADDC_UINT ; EG: ADDC_UINT -define amdgpu_kernel void @s_uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 { +define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) %val = extractvalue { i64, i1 } %uadd, 0 %carry = extractvalue { i64, i1 } %uadd, 1 %ext = zext i1 %carry to i64 %add2 = add i64 %val, %ext - store i64 %add2, i64 addrspace(1)* %out, align 8 + store i64 %add2, ptr addrspace(1) %out, align 8 ret void } @@ -30,12 +30,12 @@ ; EG: ADDC_UINT ; EG: ADD_INT -define amdgpu_kernel void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @s_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 { %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) %val = extractvalue { i32, i1 } %uadd, 0 %carry = extractvalue { i32, i1 } %uadd, 1 - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %carry, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -48,18 +48,18 @@ ; EG: ADDC_UINT ; EG: ADD_INT -define amdgpu_kernel void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr - %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr - %a = load i32, i32 addrspace(1)* %a.gep, align 4 - %b = load i32, i32 addrspace(1)* %b.gep, align 4 + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr + %b.gep = getelementptr inbounds i32, ptr addrspace(1) %b.ptr + %a = load i32, ptr addrspace(1) %a.gep, align 4 + %b = load i32, ptr addrspace(1) %b.gep, align 4 %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) %val = extractvalue { i32, i1 } %uadd, 0 %carry = extractvalue { i32, i1 } %uadd, 1 - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %carry, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -72,19 +72,19 @@ ; EG: ADDC_UINT ; EG: ADD_INT -define amdgpu_kernel void @v_uaddo_i32_novcc(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_uaddo_i32_novcc(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr - %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr - %a = load i32, i32 addrspace(1)* %a.gep, align 4 - %b = load i32, i32 addrspace(1)* %b.gep, align 4 + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr + %b.gep = getelementptr inbounds i32, ptr addrspace(1) %b.ptr + %a = load i32, ptr addrspace(1) %a.gep, align 4 + %b = load i32, ptr addrspace(1) %b.gep, align 4 %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) %val = extractvalue { i32, i1 } %uadd, 0 %carry = extractvalue { i32, i1 } %uadd, 1 - store volatile i32 %val, i32 addrspace(1)* %out, align 4 + store volatile i32 %val, ptr addrspace(1) %out, align 4 call void asm sideeffect "", "~{vcc}"() #0 - store volatile i1 %carry, i1 addrspace(1)* %carryout + store volatile i1 %carry, ptr addrspace(1) %carryout ret void } @@ -94,12 +94,12 @@ ; EG: ADDC_UINT ; EG: ADD_INT -define amdgpu_kernel void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 { +define amdgpu_kernel void @s_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) #0 { %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) %val = extractvalue { i64, i1 } %uadd, 0 %carry = extractvalue { i64, i1 } %uadd, 1 - store i64 %val, i64 addrspace(1)* %out, align 8 - store i1 %carry, i1 addrspace(1)* %carryout + store i64 %val, ptr addrspace(1) %out, align 8 + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -115,18 +115,18 @@ ; EG: ADDC_UINT ; EG: ADD_INT -define amdgpu_kernel void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %a.ptr, i64 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i64, i64 addrspace(1)* %a.ptr - %b.gep = getelementptr inbounds i64, i64 addrspace(1)* %b.ptr - %a = load i64, i64 addrspace(1)* %a.gep - %b = load i64, i64 addrspace(1)* %b.gep + %a.gep = getelementptr inbounds i64, ptr addrspace(1) %a.ptr + %b.gep = getelementptr inbounds i64, ptr addrspace(1) %b.ptr + %a = load i64, ptr addrspace(1) %a.gep + %b = load i64, ptr addrspace(1) %b.gep %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) %val = extractvalue { i64, i1 } %uadd, 0 %carry = extractvalue { i64, i1 } %uadd, 1 - store i64 %val, i64 addrspace(1)* %out - store i1 %carry, i1 addrspace(1)* %carryout + store i64 %val, ptr addrspace(1) %out + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -136,18 +136,18 @@ ; GFX9: v_add_u16_e32 ; GFX9: v_cmp_lt_u16_e32 -define amdgpu_kernel void @v_uaddo_i16(i16 addrspace(1)* %out, i1 addrspace(1)* %carryout, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_uaddo_i16(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr - %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr - %a = load i16, i16 addrspace(1)* %a.gep - %b = load i16, i16 addrspace(1)* %b.gep + %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr + %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr + %a = load i16, ptr addrspace(1) %a.gep + %b = load i16, ptr addrspace(1) %b.gep %uadd = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b) %val = extractvalue { i16, i1 } %uadd, 0 %carry = extractvalue { i16, i1 } %uadd, 1 - store i16 %val, i16 addrspace(1)* %out - store i1 %carry, i1 addrspace(1)* %carryout + store i16 %val, ptr addrspace(1) %out + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -158,22 +158,22 @@ ; SICIVI: v_cmp_lt_i32 ; SICIVI: v_cmp_lt_i32 ; SICIVI: v_add_{{[iu]}}32 -define amdgpu_kernel void @v_uaddo_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %carryout, <2 x i32> addrspace(1)* %aptr, <2 x i32> addrspace(1)* %bptr) nounwind { - %a = load <2 x i32>, <2 x i32> addrspace(1)* %aptr, align 4 - %b = load <2 x i32>, <2 x i32> addrspace(1)* %bptr, align 4 +define amdgpu_kernel void @v_uaddo_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind { + %a = load <2 x i32>, ptr addrspace(1) %aptr, align 4 + %b = load <2 x i32>, ptr addrspace(1) %bptr, align 4 %sadd = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) nounwind %val = extractvalue { <2 x i32>, <2 x i1> } %sadd, 0 %carry = extractvalue { <2 x i32>, <2 x i1> } %sadd, 1 - store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 + store <2 x i32> %val, ptr addrspace(1) %out, align 4 %carry.ext = zext <2 x i1> %carry to <2 x i32> - store <2 x i32> %carry.ext, <2 x i32> addrspace(1)* %carryout + store <2 x i32> %carry.ext, ptr addrspace(1) %carryout ret void } ; FUNC-LABEL: {{^}}s_uaddo_clamp_bit: ; GCN: v_add_{{i|u|co_u}}32_e32 ; GCN: s_endpgm -define amdgpu_kernel void @s_uaddo_clamp_bit(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @s_uaddo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 { entry: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) %val = extractvalue { i32, i1 } %uadd, 0 @@ -187,22 +187,22 @@ exit: %cout = phi i1 [false, %entry], [%c2, %if] - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %cout, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %cout, ptr addrspace(1) %carryout ret void } ; FUNC-LABEL: {{^}}v_uaddo_clamp_bit: ; GCN: v_add_{{i|u|co_u}}32_e64 ; GCN: s_endpgm -define amdgpu_kernel void @v_uaddo_clamp_bit(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_uaddo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr - %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr - %a = load i32, i32 addrspace(1)* %a.gep - %b = load i32, i32 addrspace(1)* %b.gep + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr + %b.gep = getelementptr inbounds i32, ptr addrspace(1) %b.ptr + %a = load i32, ptr addrspace(1) %a.gep + %b = load i32, ptr addrspace(1) %b.gep %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) %val = extractvalue { i32, i1 } %uadd, 0 %carry = extractvalue { i32, i1 } %uadd, 1 @@ -215,8 +215,8 @@ exit: %cout = phi i1 [false, %entry], [%c2, %if] - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %cout, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %cout, ptr addrspace(1) %carryout ret void } @@ -225,11 +225,11 @@ ; GCN: v_addc ; GCN: v_addc ; GCN: v_addc -define amdgpu_cs void @sv_uaddo_i128(i32 addrspace(1)* %out, i128 inreg %a, i128 %b) { +define amdgpu_cs void @sv_uaddo_i128(ptr addrspace(1) %out, i128 inreg %a, i128 %b) { %uadd = call { i128, i1 } @llvm.uadd.with.overflow.i128(i128 %a, i128 %b) %carry = extractvalue { i128, i1 } %uadd, 1 %carry.ext = zext i1 %carry to i32 - store i32 %carry.ext, i32 addrspace(1)* %out + store i32 %carry.ext, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -12,10 +12,10 @@ define void @func1() #0 { ; CHECK-LABEL: define {{[^@]+}}@func1 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: store i32 0, i32* @x, align 4 +; CHECK-NEXT: store i32 0, ptr @x, align 4 ; CHECK-NEXT: ret void ; - store i32 0, i32* @x + store i32 0, ptr @x ret void } diff --git a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll --- a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll +++ b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll @@ -16,6 +16,6 @@ ; R600: MOV define amdgpu_kernel void @foo() { %alloca = alloca i32, align 4, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca + store volatile i32 0, ptr addrspace(5) %alloca ret void } diff --git a/llvm/test/CodeGen/AMDGPU/unroll.ll b/llvm/test/CodeGen/AMDGPU/unroll.ll --- a/llvm/test/CodeGen/AMDGPU/unroll.ll +++ b/llvm/test/CodeGen/AMDGPU/unroll.ll @@ -9,8 +9,8 @@ ; CHECK-LABEL: @private_memory ; CHECK-NOT: alloca -; CHECK: store i32 5, i32 addrspace(1)* %out -define amdgpu_kernel void @private_memory(i32 addrspace(1)* %out) { +; CHECK: store i32 5, ptr addrspace(1) %out +define amdgpu_kernel void @private_memory(ptr addrspace(1) %out) { entry: %0 = alloca [32 x i32], addrspace(5) br label %loop.header @@ -20,8 +20,8 @@ br label %loop.body loop.body: - %ptr = getelementptr [32 x i32], [32 x i32] addrspace(5)* %0, i32 0, i32 %counter - store i32 %counter, i32 addrspace(5)* %ptr + %ptr = getelementptr [32 x i32], ptr addrspace(5) %0, i32 0, i32 %counter + store i32 %counter, ptr addrspace(5) %ptr br label %loop.inc loop.inc: @@ -30,19 +30,19 @@ br i1 %1, label %exit, label %loop.header exit: - %2 = getelementptr [32 x i32], [32 x i32] addrspace(5)* %0, i32 0, i32 5 - %3 = load i32, i32 addrspace(5)* %2 - store i32 %3, i32 addrspace(1)* %out + %2 = getelementptr [32 x i32], ptr addrspace(5) %0, i32 0, i32 5 + %3 = load i32, ptr addrspace(5) %2 + store i32 %3, ptr addrspace(1) %out ret void } ; Check that loop is unrolled for local memory references ; CHECK-LABEL: @local_memory -; CHECK: getelementptr i32, i32 addrspace(1)* %out, i32 128 +; CHECK: getelementptr i32, ptr addrspace(1) %out, i32 128 ; CHECK-NEXT: store ; CHECK-NEXT: ret -define amdgpu_kernel void @local_memory(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) { +define amdgpu_kernel void @local_memory(ptr addrspace(1) %out, ptr addrspace(3) %lds) { entry: br label %loop.header @@ -51,10 +51,10 @@ br label %loop.body loop.body: - %ptr_lds = getelementptr i32, i32 addrspace(3)* %lds, i32 %counter - %val = load i32, i32 addrspace(3)* %ptr_lds - %ptr_out = getelementptr i32, i32 addrspace(1)* %out, i32 %counter - store i32 %val, i32 addrspace(1)* %ptr_out + %ptr_lds = getelementptr i32, ptr addrspace(3) %lds, i32 %counter + %val = load i32, ptr addrspace(3) %ptr_lds + %ptr_out = getelementptr i32, ptr addrspace(1) %out, i32 %counter + store i32 %val, ptr addrspace(1) %ptr_out br label %loop.inc loop.inc: @@ -75,7 +75,7 @@ ; CHECK-NEXT: getelementptr ; CHECK-NEXT: store ; CHECK-NOT: br -define amdgpu_kernel void @unroll_for_if(i32 addrspace(5)* %a) { +define amdgpu_kernel void @unroll_for_if(ptr addrspace(5) %a) { entry: br label %for.body @@ -86,8 +86,8 @@ if.then: ; preds = %for.body %0 = sext i32 %i1 to i64 - %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %a, i64 %0 - store i32 0, i32 addrspace(5)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr addrspace(5) %a, i64 %0 + store i32 0, ptr addrspace(5) %arrayidx, align 4 br label %for.inc for.inc: ; preds = %for.body, %if.then @@ -103,13 +103,13 @@ ; CHECK-LABEL: @local_memory_runtime ; CHECK: loop.header: -; CHECK: load i32, i32 addrspace(3)* -; CHECK: load i32, i32 addrspace(3)* +; CHECK: load i32, ptr addrspace(3) +; CHECK: load i32, ptr addrspace(3) ; CHECK: br i1 ; CHECK: loop.header.epil -; CHECK: load i32, i32 addrspace(3)* +; CHECK: load i32, ptr addrspace(3) ; CHECK: ret -define amdgpu_kernel void @local_memory_runtime(i32 addrspace(1)* %out, i32 addrspace(3)* %lds, i32 %n) { +define amdgpu_kernel void @local_memory_runtime(ptr addrspace(1) %out, ptr addrspace(3) %lds, i32 %n) { entry: br label %loop.header @@ -118,10 +118,10 @@ br label %loop.body loop.body: - %ptr_lds = getelementptr i32, i32 addrspace(3)* %lds, i32 %counter - %val = load i32, i32 addrspace(3)* %ptr_lds - %ptr_out = getelementptr i32, i32 addrspace(1)* %out, i32 %counter - store i32 %val, i32 addrspace(1)* %ptr_out + %ptr_lds = getelementptr i32, ptr addrspace(3) %lds, i32 %counter + %val = load i32, ptr addrspace(3) %ptr_lds + %ptr_out = getelementptr i32, ptr addrspace(1) %out, i32 %counter + store i32 %val, ptr addrspace(1) %ptr_out br label %loop.inc loop.inc: diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll --- a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll @@ -6,13 +6,13 @@ ; GCN-NOT: error ; R600: in function test_call_external{{.*}}: unsupported call to function external_function -define amdgpu_kernel void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { - %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %a = load i32, i32 addrspace(1)* %in - %b = load i32, i32 addrspace(1)* %b_ptr +define amdgpu_kernel void @test_call_external(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 + %a = load i32, ptr addrspace(1) %in + %b = load i32, ptr addrspace(1) %b_ptr %c = call i32 @external_function(i32 %b) nounwind %result = add i32 %a, %c - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } @@ -23,22 +23,22 @@ ; GCN-NOT: error ; R600: in function test_call{{.*}}: unsupported call to function defined_function -define amdgpu_kernel void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { - %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %a = load i32, i32 addrspace(1)* %in - %b = load i32, i32 addrspace(1)* %b_ptr +define amdgpu_kernel void @test_call(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 + %a = load i32, ptr addrspace(1) %in + %b = load i32, ptr addrspace(1) %b_ptr %c = call i32 @defined_function(i32 %b) nounwind %result = add i32 %a, %c - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -; GCN: error: :0:0: in function test_tail_call i32 (i32 addrspace(1)*, i32 addrspace(1)*): unsupported required tail call to function defined_function +; GCN: error: :0:0: in function test_tail_call i32 (ptr addrspace(1), ptr addrspace(1)): unsupported required tail call to function defined_function ; R600: in function test_tail_call{{.*}}: unsupported call to function defined_function -define i32 @test_tail_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { - %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %a = load i32, i32 addrspace(1)* %in - %b = load i32, i32 addrspace(1)* %b_ptr +define i32 @test_tail_call(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 + %a = load i32, ptr addrspace(1) %in + %b = load i32, ptr addrspace(1) %b_ptr %c = tail call i32 @defined_function(i32 %b) ret i32 %c } @@ -58,7 +58,7 @@ ; R600: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported call to function extern_variadic define i32 @test_tail_call_bitcast_extern_variadic(<4 x float> %arg0, <4 x float> %arg1, i32 %arg2) { %add = fadd <4 x float> %arg0, %arg1 - %call = tail call i32 bitcast (i32 (...)* @extern_variadic to i32 (<4 x float>)*)(<4 x float> %add) + %call = tail call i32 @extern_variadic(<4 x float> %add) ret i32 %call } diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-cc.ll b/llvm/test/CodeGen/AMDGPU/unsupported-cc.ll --- a/llvm/test/CodeGen/AMDGPU/unsupported-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-cc.ll @@ -6,11 +6,11 @@ ; CHECK: LSHR ; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 5(7.006492e-45) -define amdgpu_kernel void @slt(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @slt(ptr addrspace(1) %out, i32 %in) { entry: %0 = icmp slt i32 %in, 5 %1 = select i1 %0, i32 -1, i32 0 - store i32 %1, i32 addrspace(1)* %out + store i32 %1, ptr addrspace(1) %out ret void } @@ -18,11 +18,11 @@ ; CHECK: LSHR ; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 5(7.006492e-45) -define amdgpu_kernel void @ult_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @ult_i32(ptr addrspace(1) %out, i32 %in) { entry: %0 = icmp ult i32 %in, 5 %1 = select i1 %0, i32 -1, i32 0 - store i32 %1, i32 addrspace(1)* %out + store i32 %1, ptr addrspace(1) %out ret void } @@ -31,11 +31,11 @@ ; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0 ; CHECK-NEXT: LSHR * -define amdgpu_kernel void @ult_float(float addrspace(1)* %out, float %in) { +define amdgpu_kernel void @ult_float(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp ult float %in, 5.0 %1 = select i1 %0, float 1.0, float 0.0 - store float %1, float addrspace(1)* %out + store float %1, ptr addrspace(1) %out ret void } @@ -43,11 +43,11 @@ ; CHECK: LSHR ; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}} ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @ult_float_native(float addrspace(1)* %out, float %in) { +define amdgpu_kernel void @ult_float_native(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp ult float %in, 5.0 %1 = select i1 %0, float 0.0, float 1.0 - store float %1, float addrspace(1)* %out + store float %1, ptr addrspace(1) %out ret void } @@ -55,11 +55,11 @@ ; CHECK: LSHR ; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @olt(float addrspace(1)* %out, float %in) { +define amdgpu_kernel void @olt(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp olt float %in, 5.0 %1 = select i1 %0, float 1.0, float 0.0 - store float %1, float addrspace(1)* %out + store float %1, ptr addrspace(1) %out ret void } @@ -67,11 +67,11 @@ ; CHECK: LSHR ; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 6(8.407791e-45) -define amdgpu_kernel void @sle(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @sle(ptr addrspace(1) %out, i32 %in) { entry: %0 = icmp sle i32 %in, 5 %1 = select i1 %0, i32 -1, i32 0 - store i32 %1, i32 addrspace(1)* %out + store i32 %1, ptr addrspace(1) %out ret void } @@ -79,11 +79,11 @@ ; CHECK: LSHR ; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 6(8.407791e-45) -define amdgpu_kernel void @ule_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @ule_i32(ptr addrspace(1) %out, i32 %in) { entry: %0 = icmp ule i32 %in, 5 %1 = select i1 %0, i32 -1, i32 0 - store i32 %1, i32 addrspace(1)* %out + store i32 %1, ptr addrspace(1) %out ret void } @@ -92,11 +92,11 @@ ; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0 ; CHECK-NEXT: LSHR * -define amdgpu_kernel void @ule_float(float addrspace(1)* %out, float %in) { +define amdgpu_kernel void @ule_float(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp ule float %in, 5.0 %1 = select i1 %0, float 1.0, float 0.0 - store float %1, float addrspace(1)* %out + store float %1, ptr addrspace(1) %out ret void } @@ -104,11 +104,11 @@ ; CHECK: LSHR ; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}} ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @ule_float_native(float addrspace(1)* %out, float %in) { +define amdgpu_kernel void @ule_float_native(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp ule float %in, 5.0 %1 = select i1 %0, float 0.0, float 1.0 - store float %1, float addrspace(1)* %out + store float %1, ptr addrspace(1) %out ret void } @@ -116,10 +116,10 @@ ; CHECK: LSHR ; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT:1084227584(5.000000e+00) -define amdgpu_kernel void @ole(float addrspace(1)* %out, float %in) { +define amdgpu_kernel void @ole(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp ole float %in, 5.0 %1 = select i1 %0, float 1.0, float 0.0 - store float %1, float addrspace(1)* %out + store float %1, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/update-lds-alignment.ll b/llvm/test/CodeGen/AMDGPU/update-lds-alignment.ll --- a/llvm/test/CodeGen/AMDGPU/update-lds-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/update-lds-alignment.ll @@ -40,20 +40,15 @@ @k0.lds.size.16.align.16 = internal unnamed_addr addrspace(3) global [16 x i8] undef, align 16 define amdgpu_kernel void @k0() { - %k0.lds.size.1.align.1.bc = bitcast [1 x i8] addrspace(3)* @k0.lds.size.1.align.1 to i8 addrspace(3)* - store i8 1, i8 addrspace(3)* %k0.lds.size.1.align.1.bc, align 1 + store i8 1, ptr addrspace(3) @k0.lds.size.1.align.1, align 1 - %k0.lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* @k0.lds.size.2.align.2 to i8 addrspace(3)* - store i8 2, i8 addrspace(3)* %k0.lds.size.2.align.2.bc, align 2 + store i8 2, ptr addrspace(3) @k0.lds.size.2.align.2, align 2 - %k0.lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* @k0.lds.size.4.align.4 to i8 addrspace(3)* - store i8 3, i8 addrspace(3)* %k0.lds.size.4.align.4.bc, align 4 + store i8 3, ptr addrspace(3) @k0.lds.size.4.align.4, align 4 - %k0.lds.size.8.align.8.bc = bitcast [8 x i8] addrspace(3)* @k0.lds.size.8.align.8 to i8 addrspace(3)* - store i8 4, i8 addrspace(3)* %k0.lds.size.8.align.8.bc, align 8 + store i8 4, ptr addrspace(3) @k0.lds.size.8.align.8, align 8 - %k0.lds.size.16.align.16.bc = bitcast [16 x i8] addrspace(3)* @k0.lds.size.16.align.16 to i8 addrspace(3)* - store i8 5, i8 addrspace(3)* %k0.lds.size.16.align.16.bc, align 16 + store i8 5, ptr addrspace(3) @k0.lds.size.16.align.16, align 16 ret void } @@ -71,20 +66,15 @@ @k1.lds.size.1.align.16 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 16 define amdgpu_kernel void @k1() { - %k1.lds.size.1.align.1.bc = bitcast [1 x i8] addrspace(3)* @k1.lds.size.1.align.1 to i8 addrspace(3)* - store i8 1, i8 addrspace(3)* %k1.lds.size.1.align.1.bc, align 1 + store i8 1, ptr addrspace(3) @k1.lds.size.1.align.1, align 1 - %k1.lds.size.1.align.2.bc = bitcast [1 x i8] addrspace(3)* @k1.lds.size.1.align.2 to i8 addrspace(3)* - store i8 2, i8 addrspace(3)* %k1.lds.size.1.align.2.bc, align 2 + store i8 2, ptr addrspace(3) @k1.lds.size.1.align.2, align 2 - %k1.lds.size.1.align.4.bc = bitcast [1 x i8] addrspace(3)* @k1.lds.size.1.align.4 to i8 addrspace(3)* - store i8 3, i8 addrspace(3)* %k1.lds.size.1.align.4.bc, align 4 + store i8 3, ptr addrspace(3) @k1.lds.size.1.align.4, align 4 - %k1.lds.size.1.align.8.bc = bitcast [1 x i8] addrspace(3)* @k1.lds.size.1.align.8 to i8 addrspace(3)* - store i8 4, i8 addrspace(3)* %k1.lds.size.1.align.8.bc, align 8 + store i8 4, ptr addrspace(3) @k1.lds.size.1.align.8, align 8 - %k1.lds.size.1.align.16.bc = bitcast [1 x i8] addrspace(3)* @k1.lds.size.1.align.16 to i8 addrspace(3)* - store i8 5, i8 addrspace(3)* %k1.lds.size.1.align.16.bc, align 16 + store i8 5, ptr addrspace(3) @k1.lds.size.1.align.16, align 16 ret void } @@ -100,17 +90,13 @@ @k2.lds.size.9.align.8 = internal unnamed_addr addrspace(3) global [9 x i8] undef, align 8 define amdgpu_kernel void @k2() { - %k2.lds.size.2.align.1.bc = bitcast [2 x i8] addrspace(3)* @k2.lds.size.2.align.1 to i8 addrspace(3)* - store i8 1, i8 addrspace(3)* %k2.lds.size.2.align.1.bc, align 1 + store i8 1, ptr addrspace(3) @k2.lds.size.2.align.1, align 1 - %k2.lds.size.3.align.2.bc = bitcast [3 x i8] addrspace(3)* @k2.lds.size.3.align.2 to i8 addrspace(3)* - store i8 2, i8 addrspace(3)* %k2.lds.size.3.align.2.bc, align 2 + store i8 2, ptr addrspace(3) @k2.lds.size.3.align.2, align 2 - %k2.lds.size.5.align.4.bc = bitcast [5 x i8] addrspace(3)* @k2.lds.size.5.align.4 to i8 addrspace(3)* - store i8 3, i8 addrspace(3)* %k2.lds.size.5.align.4.bc, align 4 + store i8 3, ptr addrspace(3) @k2.lds.size.5.align.4, align 4 - %k2.lds.size.9.align.8.bc = bitcast [9 x i8] addrspace(3)* @k2.lds.size.9.align.8 to i8 addrspace(3)* - store i8 4, i8 addrspace(3)* %k2.lds.size.9.align.8.bc, align 8 + store i8 4, ptr addrspace(3) @k2.lds.size.9.align.8, align 8 ret void } @@ -126,17 +112,13 @@ @k3.lds.size.7.align.4 = internal unnamed_addr addrspace(3) global [7 x i8] undef, align 4 define amdgpu_kernel void @k3() { - %k3.lds.size.5.align.2.bc = bitcast [5 x i8] addrspace(3)* @k3.lds.size.5.align.2 to i8 addrspace(3)* - store i8 1, i8 addrspace(3)* %k3.lds.size.5.align.2.bc, align 2 + store i8 1, ptr addrspace(3) @k3.lds.size.5.align.2, align 2 - %k3.lds.size.6.align.2.bc = bitcast [6 x i8] addrspace(3)* @k3.lds.size.6.align.2 to i8 addrspace(3)* - store i8 2, i8 addrspace(3)* %k3.lds.size.6.align.2.bc, align 2 + store i8 2, ptr addrspace(3) @k3.lds.size.6.align.2, align 2 - %k3.lds.size.7.align.2.bc = bitcast [7 x i8] addrspace(3)* @k3.lds.size.7.align.2 to i8 addrspace(3)* - store i8 3, i8 addrspace(3)* %k3.lds.size.7.align.2.bc, align 2 + store i8 3, ptr addrspace(3) @k3.lds.size.7.align.2, align 2 - %k3.lds.size.7.align.4.bc = bitcast [7 x i8] addrspace(3)* @k3.lds.size.7.align.4 to i8 addrspace(3)* - store i8 4, i8 addrspace(3)* %k3.lds.size.7.align.4.bc, align 4 + store i8 4, ptr addrspace(3) @k3.lds.size.7.align.4, align 4 ret void } @@ -152,17 +134,13 @@ @k4.lds.size.12.align.8 = internal unnamed_addr addrspace(3) global [12 x i8] undef, align 8 define amdgpu_kernel void @k4() { - %k4.lds.size.9.align.1.bc = bitcast [9 x i8] addrspace(3)* @k4.lds.size.9.align.1 to i8 addrspace(3)* - store i8 1, i8 addrspace(3)* %k4.lds.size.9.align.1.bc, align 1 + store i8 1, ptr addrspace(3) @k4.lds.size.9.align.1, align 1 - %k4.lds.size.10.align.2.bc = bitcast [10 x i8] addrspace(3)* @k4.lds.size.10.align.2 to i8 addrspace(3)* - store i8 2, i8 addrspace(3)* %k4.lds.size.10.align.2.bc, align 2 + store i8 2, ptr addrspace(3) @k4.lds.size.10.align.2, align 2 - %k4.lds.size.11.align.4.bc = bitcast [11 x i8] addrspace(3)* @k4.lds.size.11.align.4 to i8 addrspace(3)* - store i8 3, i8 addrspace(3)* %k4.lds.size.11.align.4.bc, align 4 + store i8 3, ptr addrspace(3) @k4.lds.size.11.align.4, align 4 - %k4.lds.size.12.align.8.bc = bitcast [12 x i8] addrspace(3)* @k4.lds.size.12.align.8 to i8 addrspace(3)* - store i8 4, i8 addrspace(3)* %k4.lds.size.12.align.8.bc, align 8 + store i8 4, ptr addrspace(3) @k4.lds.size.12.align.8, align 8 ret void } @@ -177,17 +155,13 @@ @k5.lds.size.20.align.16 = internal unnamed_addr addrspace(3) global [20 x i8] undef, align 16 define amdgpu_kernel void @k5() { - %k5.lds.size.17.align.16.bc = bitcast [17 x i8] addrspace(3)* @k5.lds.size.17.align.16 to i8 addrspace(3)* - store i8 1, i8 addrspace(3)* %k5.lds.size.17.align.16.bc, align 16 + store i8 1, ptr addrspace(3) @k5.lds.size.17.align.16, align 16 - %k5.lds.size.18.align.16.bc = bitcast [18 x i8] addrspace(3)* @k5.lds.size.18.align.16 to i8 addrspace(3)* - store i8 2, i8 addrspace(3)* %k5.lds.size.18.align.16.bc, align 16 + store i8 2, ptr addrspace(3) @k5.lds.size.18.align.16, align 16 - %k5.lds.size.19.align.16.bc = bitcast [19 x i8] addrspace(3)* @k5.lds.size.19.align.16 to i8 addrspace(3)* - store i8 3, i8 addrspace(3)* %k5.lds.size.19.align.16.bc, align 16 + store i8 3, ptr addrspace(3) @k5.lds.size.19.align.16, align 16 - %k5.lds.size.20.align.16.bc = bitcast [20 x i8] addrspace(3)* @k5.lds.size.20.align.16 to i8 addrspace(3)* - store i8 4, i8 addrspace(3)* %k5.lds.size.20.align.16.bc, align 16 + store i8 4, ptr addrspace(3) @k5.lds.size.20.align.16, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/usubo.ll b/llvm/test/CodeGen/AMDGPU/usubo.ll --- a/llvm/test/CodeGen/AMDGPU/usubo.ll +++ b/llvm/test/CodeGen/AMDGPU/usubo.ll @@ -10,13 +10,13 @@ ; EG: SUBB_UINT ; EG: ADDC_UINT -define amdgpu_kernel void @s_usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) #0 { +define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) #0 { %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) #0 %val = extractvalue { i64, i1 } %usub, 0 %carry = extractvalue { i64, i1 } %usub, 1 %ext = zext i1 %carry to i64 %add2 = add i64 %val, %ext - store i64 %add2, i64 addrspace(1)* %out, align 8 + store i64 %add2, ptr addrspace(1) %out, align 8 ret void } @@ -31,12 +31,12 @@ ; EG-DAG: SUBB_UINT ; EG-DAG: SUB_INT -define amdgpu_kernel void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @s_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 { %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) %val = extractvalue { i32, i1 } %usub, 0 %carry = extractvalue { i32, i1 } %usub, 1 - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %carry, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -49,18 +49,18 @@ ; EG-DAG: SUBB_UINT ; EG-DAG: SUB_INT -define amdgpu_kernel void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr - %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr - %a = load i32, i32 addrspace(1)* %a.gep, align 4 - %b = load i32, i32 addrspace(1)* %b.gep, align 4 + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr + %b.gep = getelementptr inbounds i32, ptr addrspace(1) %b.ptr + %a = load i32, ptr addrspace(1) %a.gep, align 4 + %b = load i32, ptr addrspace(1) %b.gep, align 4 %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) %val = extractvalue { i32, i1 } %usub, 0 %carry = extractvalue { i32, i1 } %usub, 1 - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %carry, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -73,19 +73,19 @@ ; EG-DAG: SUBB_UINT ; EG-DAG: SUB_INT -define amdgpu_kernel void @v_usubo_i32_novcc(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_usubo_i32_novcc(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr - %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr - %a = load i32, i32 addrspace(1)* %a.gep, align 4 - %b = load i32, i32 addrspace(1)* %b.gep, align 4 + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr + %b.gep = getelementptr inbounds i32, ptr addrspace(1) %b.ptr + %a = load i32, ptr addrspace(1) %a.gep, align 4 + %b = load i32, ptr addrspace(1) %b.gep, align 4 %uadd = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) %val = extractvalue { i32, i1 } %uadd, 0 %carry = extractvalue { i32, i1 } %uadd, 1 - store volatile i32 %val, i32 addrspace(1)* %out, align 4 + store volatile i32 %val, ptr addrspace(1) %out, align 4 call void asm sideeffect "", "~{vcc}"() #0 - store volatile i1 %carry, i1 addrspace(1)* %carryout + store volatile i1 %carry, ptr addrspace(1) %carryout ret void } @@ -97,12 +97,12 @@ ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT ; EG: SUB_INT -define amdgpu_kernel void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 { +define amdgpu_kernel void @s_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) #0 { %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) %val = extractvalue { i64, i1 } %usub, 0 %carry = extractvalue { i64, i1 } %usub, 1 - store i64 %val, i64 addrspace(1)* %out, align 8 - store i1 %carry, i1 addrspace(1)* %carryout + store i64 %val, ptr addrspace(1) %out, align 8 + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -119,18 +119,18 @@ ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT ; EG: SUB_INT -define amdgpu_kernel void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %a.ptr, i64 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i64, i64 addrspace(1)* %a.ptr - %b.gep = getelementptr inbounds i64, i64 addrspace(1)* %b.ptr - %a = load i64, i64 addrspace(1)* %a.gep - %b = load i64, i64 addrspace(1)* %b.gep + %a.gep = getelementptr inbounds i64, ptr addrspace(1) %a.ptr + %b.gep = getelementptr inbounds i64, ptr addrspace(1) %b.ptr + %a = load i64, ptr addrspace(1) %a.gep + %b = load i64, ptr addrspace(1) %b.gep %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) %val = extractvalue { i64, i1 } %usub, 0 %carry = extractvalue { i64, i1 } %usub, 1 - store i64 %val, i64 addrspace(1)* %out, align 8 - store i1 %carry, i1 addrspace(1)* %carryout + store i64 %val, ptr addrspace(1) %out, align 8 + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -144,18 +144,18 @@ ; GFX9: v_sub_u16_e32 ; GFX9: v_cmp_gt_u16_e32 -define amdgpu_kernel void @v_usubo_i16(i16 addrspace(1)* %out, i1 addrspace(1)* %carryout, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_usubo_i16(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr - %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %b.ptr - %a = load i16, i16 addrspace(1)* %a.gep - %b = load i16, i16 addrspace(1)* %b.gep + %a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr + %b.gep = getelementptr inbounds i16, ptr addrspace(1) %b.ptr + %a = load i16, ptr addrspace(1) %a.gep + %b = load i16, ptr addrspace(1) %b.gep %usub = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %a, i16 %b) %val = extractvalue { i16, i1 } %usub, 0 %carry = extractvalue { i16, i1 } %usub, 1 - store i16 %val, i16 addrspace(1)* %out - store i1 %carry, i1 addrspace(1)* %carryout + store i16 %val, ptr addrspace(1) %out + store i1 %carry, ptr addrspace(1) %carryout ret void } @@ -164,22 +164,22 @@ ; SICIVI: v_cndmask_b32 ; SICIVI: v_sub_{{[iu]}}32 ; SICIVI: v_cndmask_b32 -define amdgpu_kernel void @v_usubo_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %carryout, <2 x i32> addrspace(1)* %aptr, <2 x i32> addrspace(1)* %bptr) nounwind { - %a = load <2 x i32>, <2 x i32> addrspace(1)* %aptr, align 4 - %b = load <2 x i32>, <2 x i32> addrspace(1)* %bptr, align 4 +define amdgpu_kernel void @v_usubo_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind { + %a = load <2 x i32>, ptr addrspace(1) %aptr, align 4 + %b = load <2 x i32>, ptr addrspace(1) %bptr, align 4 %sadd = call { <2 x i32>, <2 x i1> } @llvm.usub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) nounwind %val = extractvalue { <2 x i32>, <2 x i1> } %sadd, 0 %carry = extractvalue { <2 x i32>, <2 x i1> } %sadd, 1 - store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 + store <2 x i32> %val, ptr addrspace(1) %out, align 4 %carry.ext = zext <2 x i1> %carry to <2 x i32> - store <2 x i32> %carry.ext, <2 x i32> addrspace(1)* %carryout + store <2 x i32> %carry.ext, ptr addrspace(1) %carryout ret void } ; FUNC-LABEL: {{^}}s_usubo_clamp_bit: ; GCN: v_sub_{{i|u|co_u}}32_e32 ; GCN: s_endpgm -define amdgpu_kernel void @s_usubo_clamp_bit(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @s_usubo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 { entry: %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) %val = extractvalue { i32, i1 } %usub, 0 @@ -193,8 +193,8 @@ exit: %cout = phi i1 [false, %entry], [%c2, %if] - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %cout, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %cout, ptr addrspace(1) %carryout ret void } @@ -202,14 +202,14 @@ ; FUNC-LABEL: {{^}}v_usubo_clamp_bit: ; GCN: v_sub_{{i|u|co_u}}32_e64 ; GCN: s_endpgm -define amdgpu_kernel void @v_usubo_clamp_bit(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_usubo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr - %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr - %a = load i32, i32 addrspace(1)* %a.gep, align 4 - %b = load i32, i32 addrspace(1)* %b.gep, align 4 + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr + %b.gep = getelementptr inbounds i32, ptr addrspace(1) %b.ptr + %a = load i32, ptr addrspace(1) %a.gep, align 4 + %b = load i32, ptr addrspace(1) %b.gep, align 4 %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) %val = extractvalue { i32, i1 } %usub, 0 %carry = extractvalue { i32, i1 } %usub, 1 @@ -222,8 +222,8 @@ exit: %cout = phi i1 [false, %entry], [%c2, %if] - store i32 %val, i32 addrspace(1)* %out, align 4 - store i1 %cout, i1 addrspace(1)* %carryout + store i32 %val, ptr addrspace(1) %out, align 4 + store i1 %cout, ptr addrspace(1) %carryout ret void } diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll b/llvm/test/CodeGen/AMDGPU/v1024.ll --- a/llvm/test/CodeGen/AMDGPU/v1024.ll +++ b/llvm/test/CodeGen/AMDGPU/v1024.ll @@ -9,23 +9,22 @@ define amdgpu_kernel void @test_v1024() { entry: %alloca = alloca <32 x i32>, align 16, addrspace(5) - %cast = bitcast <32 x i32> addrspace(5)* %alloca to i8 addrspace(5)* - call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %cast, i8 0, i32 128, i1 false) + call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 128, i1 false) br i1 undef, label %if.then.i.i, label %if.else.i if.then.i.i: ; preds = %entry - call void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* align 16 %cast, i8 addrspace(5)* align 4 undef, i64 128, i1 false) + call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 16 %alloca, ptr addrspace(5) align 4 undef, i64 128, i1 false) br label %if.then.i62.i if.else.i: ; preds = %entry br label %if.then.i62.i if.then.i62.i: ; preds = %if.else.i, %if.then.i.i - call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* align 4 undef, i8 addrspace(5)* align 16 %cast, i64 128, i1 false) + call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 undef, ptr addrspace(5) align 16 %alloca, i64 128, i1 false) ret void } -declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture readonly, i8, i32, i1 immarg) -declare void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg) +declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture readonly, i8, i32, i1 immarg) +declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) -declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg) +declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll --- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll +++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll @@ -15,13 +15,13 @@ ; GCN-DAG: v{{[0-9]}} ; All nan values are converted to 0xffffffff ; GCN: s_endpgm -define amdgpu_kernel void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 { +define amdgpu_kernel void @v_cnd_nan_nosgpr(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) #0 { %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 - %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx - %f = load float, float addrspace(1)* %f.gep + %f.gep = getelementptr float, ptr addrspace(1) %fptr, i32 %idx + %f = load float, ptr addrspace(1) %f.gep %setcc = icmp ne i32 %c, 0 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f - store float %select, float addrspace(1)* %out + store float %select, ptr addrspace(1) %out ret void } @@ -41,10 +41,10 @@ ; GCN-DAG: v{{[0-9]}} ; All nan values are converted to 0xffffffff ; GCN: s_endpgm -define amdgpu_kernel void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 { +define amdgpu_kernel void @v_cnd_nan(ptr addrspace(1) %out, i32 %c, float %f) #0 { %setcc = icmp ne i32 %c, 0 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f - store float %select, float addrspace(1)* %out + store float %select, ptr addrspace(1) %out ret void } @@ -60,13 +60,13 @@ ; SIVI-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]] ; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], [[CC]] ; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 1.0, s[[Z]], [[CC]] -define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, [8 x i32], float %x, float %z) #0 { +define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %out, [8 x i32], float %x, float %z) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext %setcc = fcmp one float %x, 0.0 %select = select i1 %setcc, float 1.0, float %z - store float %select, float addrspace(1)* %out.gep + store float %select, ptr addrspace(1) %out.gep ret void } @@ -77,13 +77,13 @@ ; SIVI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]] ; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VX]], [[CC]] ; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 1.0, [[X]], [[CC]] -define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %out, float %x) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext %setcc = fcmp one float %x, 0.0 %select = select i1 %setcc, float 1.0, float %x - store float %select, float addrspace(1)* %out.gep + store float %select, ptr addrspace(1) %out.gep ret void } @@ -94,13 +94,13 @@ ; SIVI-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]] ; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], [[CC]] ; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 0, s[[Z]], [[CC]] -define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* %out, [8 x i32], float %x, float %z) #0 { +define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %out, [8 x i32], float %x, float %z) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext %setcc = fcmp one float %x, 0.0 %select = select i1 %setcc, float 0.0, float %z - store float %select, float addrspace(1)* %out.gep + store float %select, ptr addrspace(1) %out.gep ret void } @@ -111,13 +111,13 @@ ; SIVI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]] ; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VX]], [[CC]] ; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 0, [[X]], [[CC]] -define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %out, float %x) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext %setcc = fcmp one float %x, 0.0 %select = select i1 %setcc, float 0.0, float %x - store float %select, float addrspace(1)* %out.gep + store float %select, ptr addrspace(1) %out.gep ret void } @@ -126,15 +126,15 @@ ; GCN-DAG: {{buffer|flat|global}}_load_{{dword|b32}} [[Z:v[0-9]+]] ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]] -define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(ptr addrspace(1) %out, float %x, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext - %z = load float, float addrspace(1)* %z.gep + %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %z = load float, ptr addrspace(1) %z.gep %setcc = fcmp one float %x, 0.0 %select = select i1 %setcc, float 0.0, float %z - store float %select, float addrspace(1)* %out.gep + store float %select, ptr addrspace(1) %out.gep ret void } @@ -143,15 +143,15 @@ ; GCN-DAG: s_load_{{dword|b32}} [[X:s[0-9]+]] ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]] -define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %out, float %x, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext - %z = load float, float addrspace(1)* %z.gep + %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %z = load float, ptr addrspace(1) %z.gep %setcc = fcmp one float %x, 0.0 %select = select i1 %setcc, float 1.0, float %z - store float %select, float addrspace(1)* %out.gep + store float %select, ptr addrspace(1) %out.gep ret void } @@ -162,15 +162,15 @@ ; SIVI-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] ; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc ; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 1.0, [[Z]], vcc -define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float %z) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, float %z) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext - %x = load float, float addrspace(1)* %x.gep + %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %x = load float, ptr addrspace(1) %x.gep %setcc = fcmp olt float %x, 0.0 %select = select i1 %setcc, float 1.0, float %z - store float %select, float addrspace(1)* %out.gep + store float %select, ptr addrspace(1) %out.gep ret void } @@ -179,17 +179,17 @@ ; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[Z:v[0-9]+]] ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc -define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext - %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext - %x = load volatile float, float addrspace(1)* %x.gep - %z = load volatile float, float addrspace(1)* %z.gep + %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext + %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile float, ptr addrspace(1) %x.gep + %z = load volatile float, ptr addrspace(1) %z.gep %setcc = fcmp ult float %x, 0.0 %select = select i1 %setcc, float 1.0, float %z - store float %select, float addrspace(1)* %out.gep + store float %select, ptr addrspace(1) %out.gep ret void } @@ -198,17 +198,17 @@ ; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[Z:v[0-9]+]] ; GCN: v_cmp_lt_i32_e32 vcc, -1, [[X]] ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[Z]], vcc -define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i32 addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext - %z.gep = getelementptr inbounds i32, i32 addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %x = load volatile i32, i32 addrspace(1)* %x.gep - %z = load volatile i32, i32 addrspace(1)* %z.gep + %x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext + %z.gep = getelementptr inbounds i32, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile i32, ptr addrspace(1) %x.gep + %z = load volatile i32, ptr addrspace(1) %z.gep %setcc = icmp slt i32 %x, 0 %select = select i1 %setcc, i32 2, i32 %z - store i32 %select, i32 addrspace(1)* %out.gep + store i32 %select, ptr addrspace(1) %out.gep ret void } @@ -218,17 +218,17 @@ ; GCN-DAG: v_cmp_lt_i64_e32 vcc, -1, v[[[X_LO]]:[[X_HI]]] ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc -define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds i64, i64 addrspace(1)* %x.ptr, i64 %tid.ext - %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext - %x = load volatile i64, i64 addrspace(1)* %x.gep - %z = load volatile i64, i64 addrspace(1)* %z.gep + %x.gep = getelementptr inbounds i64, ptr addrspace(1) %x.ptr, i64 %tid.ext + %z.gep = getelementptr inbounds i64, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i64, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile i64, ptr addrspace(1) %x.gep + %z = load volatile i64, ptr addrspace(1) %z.gep %setcc = icmp slt i64 %x, 0 %select = select i1 %setcc, i64 2, i64 %z - store i64 %select, i64 addrspace(1)* %out.gep + store i64 %select, ptr addrspace(1) %out.gep ret void } @@ -241,17 +241,17 @@ ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc -define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext - %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext - %x = load volatile float, float addrspace(1)* %x.gep - %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep + %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext + %z.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile float, ptr addrspace(1) %x.gep + %z = load volatile <4 x float>, ptr addrspace(1) %z.gep %setcc = fcmp ugt float %x, 4.0 %select = select i1 %setcc, <4 x float> %z, <4 x float> - store <4 x float> %select, <4 x float> addrspace(1)* %out.gep + store <4 x float> %select, ptr addrspace(1) %out.gep ret void } @@ -264,17 +264,17 @@ ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc -define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext - %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext - %x = load volatile float, float addrspace(1)* %x.gep - %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep + %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext + %z.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile float, ptr addrspace(1) %x.gep + %z = load volatile <4 x float>, ptr addrspace(1) %z.gep %setcc = fcmp ugt float %x, 4.0 %select = select i1 %setcc, <4 x float> , <4 x float> %z - store <4 x float> %select, <4 x float> addrspace(1)* %out.gep + store <4 x float> %select, ptr addrspace(1) %out.gep ret void } @@ -290,17 +290,17 @@ ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc -define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext - %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext - %x = load volatile float, float addrspace(1)* %x.gep - %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep + %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext + %z.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile float, ptr addrspace(1) %x.gep + %z = load volatile <4 x float>, ptr addrspace(1) %z.gep %setcc = fcmp ugt float 4.0, %x %select = select i1 %setcc, <4 x float> , <4 x float> %z - store <4 x float> %select, <4 x float> addrspace(1)* %out.gep + store <4 x float> %select, ptr addrspace(1) %out.gep ret void } @@ -313,17 +313,17 @@ ; GCN-DAG: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, s{{\[[0-9]+:[0-9]+\]}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s ; GCN: store_{{byte|b8}} -define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i1 addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext - %z.gep = getelementptr inbounds i1, i1 addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i1, i1 addrspace(1)* %out, i64 %tid.ext - %x = load volatile i32, i32 addrspace(1)* %x.gep - %z = load volatile i1, i1 addrspace(1)* %z.gep + %x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext + %z.gep = getelementptr inbounds i1, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i1, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile i32, ptr addrspace(1) %x.gep + %z = load volatile i1, ptr addrspace(1) %z.gep %setcc = icmp slt i32 %x, 0 %select = select i1 %setcc, i1 true, i1 %z - store i1 %select, i1 addrspace(1)* %out.gep + store i1 %select, ptr addrspace(1) %out.gep ret void } @@ -337,17 +337,17 @@ ; SIVI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, vcc ; GFX10-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0x3ff00000, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc -define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace(1)* %out, float addrspace(1)* %x.ptr, double addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext - %z.gep = getelementptr inbounds double, double addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext - %x = load volatile float, float addrspace(1)* %x.gep - %z = load volatile double, double addrspace(1)* %z.gep + %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext + %z.gep = getelementptr inbounds double, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile float, ptr addrspace(1) %x.gep + %z = load volatile double, ptr addrspace(1) %z.gep %setcc = fcmp ult float %x, 0.0 %select = select i1 %setcc, double 1.0, double %z - store double %select, double addrspace(1)* %out.gep + store double %select, ptr addrspace(1) %out.gep ret void } @@ -359,17 +359,17 @@ ; GCN: v_cmp_nlg_f32_e32 vcc, 0, [[X]] ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc -define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(i64 addrspace(1)* %out, float addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext - %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext - %x = load volatile float, float addrspace(1)* %x.gep - %z = load volatile i64, i64 addrspace(1)* %z.gep + %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext + %z.gep = getelementptr inbounds i64, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i64, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile float, ptr addrspace(1) %x.gep + %z = load volatile i64, ptr addrspace(1) %z.gep %setcc = fcmp one float %x, 0.0 %select = select i1 %setcc, i64 3, i64 %z - store i64 %select, i64 addrspace(1)* %out.gep + store i64 %select, ptr addrspace(1) %out.gep ret void } @@ -380,17 +380,17 @@ ; GCN: v_cmp_gt_u32_e32 vcc, 2, [[X]] ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[Z]], vcc -define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(float addrspace(1)* %out, i32 addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext - %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext - %x = load volatile i32, i32 addrspace(1)* %x.gep - %z = load volatile float, float addrspace(1)* %z.gep + %x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext + %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile i32, ptr addrspace(1) %x.gep + %z = load volatile float, ptr addrspace(1) %z.gep %setcc = icmp ugt i32 %x, 1 %select = select i1 %setcc, float 4.0, float %z - store float %select, float addrspace(1)* %out.gep + store float %select, ptr addrspace(1) %out.gep ret void } @@ -402,19 +402,19 @@ ; GCN: v_cmp_nle_f32_e32 vcc, 4.0, [[X]] ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -1.0, vcc ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -2.0, vcc -define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 { +define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 - %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext - %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext - %x = load volatile float, float addrspace(1)* %x.gep - %z = load volatile float, float addrspace(1)* %z.gep + %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext + %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext + %x = load volatile float, ptr addrspace(1) %x.gep + %z = load volatile float, ptr addrspace(1) %z.gep %setcc = fcmp ugt float 4.0, %x %select0 = select i1 %setcc, float -1.0, float %z %select1 = select i1 %setcc, float -2.0, float %z - store volatile float %select0, float addrspace(1)* %out.gep - store volatile float %select1, float addrspace(1)* %out.gep + store volatile float %select0, ptr addrspace(1) %out.gep + store volatile float %select1, ptr addrspace(1) %out.gep ret void } @@ -422,44 +422,44 @@ ; GCN-LABEL: {{^}}v_cndmask_abs_neg_f16: ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -define amdgpu_kernel void @v_cndmask_abs_neg_f16(half addrspace(1)* %out, i32 %c, half addrspace(1)* %fptr) #0 { +define amdgpu_kernel void @v_cndmask_abs_neg_f16(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) #0 { %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 - %f.gep = getelementptr half, half addrspace(1)* %fptr, i32 %idx - %f = load half, half addrspace(1)* %f.gep + %f.gep = getelementptr half, ptr addrspace(1) %fptr, i32 %idx + %f = load half, ptr addrspace(1) %f.gep %f.abs = call half @llvm.fabs.f16(half %f) %f.neg = fneg half %f %setcc = icmp ne i32 %c, 0 %select = select i1 %setcc, half %f.abs, half %f.neg - store half %select, half addrspace(1)* %out + store half %select, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_cndmask_abs_neg_f32: ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, |v{{[0-9]+}}|, -define amdgpu_kernel void @v_cndmask_abs_neg_f32(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 { +define amdgpu_kernel void @v_cndmask_abs_neg_f32(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) #0 { %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 - %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx - %f = load float, float addrspace(1)* %f.gep + %f.gep = getelementptr float, ptr addrspace(1) %fptr, i32 %idx + %f = load float, ptr addrspace(1) %f.gep %f.abs = call float @llvm.fabs.f32(float %f) %f.neg = fneg float %f %setcc = icmp ne i32 %c, 0 %select = select i1 %setcc, float %f.abs, float %f.neg - store float %select, float addrspace(1)* %out + store float %select, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_cndmask_abs_neg_f64: ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -define amdgpu_kernel void @v_cndmask_abs_neg_f64(double addrspace(1)* %out, i32 %c, double addrspace(1)* %fptr) #0 { +define amdgpu_kernel void @v_cndmask_abs_neg_f64(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) #0 { %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 - %f.gep = getelementptr double, double addrspace(1)* %fptr, i32 %idx - %f = load double, double addrspace(1)* %f.gep + %f.gep = getelementptr double, ptr addrspace(1) %fptr, i32 %idx + %f = load double, ptr addrspace(1) %f.gep %f.abs = call double @llvm.fabs.f64(double %f) %f.neg = fneg double %f %setcc = icmp ne i32 %c, 0 %select = select i1 %setcc, double %f.abs, double %f.neg - store double %select, double addrspace(1)* %out + store double %select, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll b/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll --- a/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll +++ b/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll @@ -5,33 +5,33 @@ ; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_0: ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 0, v{{[0-9]+}} -define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_0(i32 addrspace(1)* %out, float %src, i32 %reg) { +define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_0(ptr addrspace(1) %out, float %src, i32 %reg) { %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 0, i32 %reg) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_1: ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 1, v{{[0-9]+}} -define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_1(i32 addrspace(1)* %out, float %src, i32 %reg) { +define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_1(ptr addrspace(1) %out, float %src, i32 %reg) { %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 1, i32 %reg) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_2: ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 2, v{{[0-9]+}} -define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_2(i32 addrspace(1)* %out, float %src, i32 %reg) { +define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_2(ptr addrspace(1) %out, float %src, i32 %reg) { %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 2, i32 %reg) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_3: ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 3, v{{[0-9]+}} -define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_3(i32 addrspace(1)* %out, float %src, i32 %reg) { +define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_3(ptr addrspace(1) %out, float %src, i32 %reg) { %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 3, i32 %reg) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } @@ -40,20 +40,20 @@ ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 1, v{{[0-9]+}} ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 2, v{{[0-9]+}} ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 3, v{{[0-9]+}} -define amdgpu_kernel void @v_cvt_pk_u8_f32_combine(i32 addrspace(1)* %out, float %src, i32 %reg) { +define amdgpu_kernel void @v_cvt_pk_u8_f32_combine(ptr addrspace(1) %out, float %src, i32 %reg) { %result0 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 0, i32 %reg) #0 %result1 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 1, i32 %result0) #0 %result2 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 2, i32 %result1) #0 %result3 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 3, i32 %result2) #0 - store i32 %result3, i32 addrspace(1)* %out, align 4 + store i32 %result3, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx: ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_cvt_pk_u8_f32_idx(i32 addrspace(1)* %out, float %src, i32 %idx, i32 %reg) { +define amdgpu_kernel void @v_cvt_pk_u8_f32_idx(ptr addrspace(1) %out, float %src, i32 %idx, i32 %reg) { %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 %idx, i32 %reg) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -24,7 +24,7 @@ ; SI: [[FLOW_BB]]: ; SI-NEXT: s_andn2_saveexec_b64 [[SAVE2]], [[SAVE2]] ; -define amdgpu_kernel void @test_if(i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 { +define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(1) %dst) #1 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone switch i32 %tid, label %default [ @@ -33,26 +33,26 @@ ] case1: - %arrayidx1 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b - store i32 13, i32 addrspace(1)* %arrayidx1, align 4 + %arrayidx1 = getelementptr i32, ptr addrspace(1) %dst, i32 %b + store i32 13, ptr addrspace(1) %arrayidx1, align 4 br label %end case2: - %arrayidx5 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b - store i32 17, i32 addrspace(1)* %arrayidx5, align 4 + %arrayidx5 = getelementptr i32, ptr addrspace(1) %dst, i32 %b + store i32 17, ptr addrspace(1) %arrayidx5, align 4 br label %end default: %cmp8 = icmp eq i32 %tid, 2 - %arrayidx10 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b + %arrayidx10 = getelementptr i32, ptr addrspace(1) %dst, i32 %b br i1 %cmp8, label %if, label %else if: - store i32 19, i32 addrspace(1)* %arrayidx10, align 4 + store i32 19, ptr addrspace(1) %arrayidx10, align 4 br label %end else: - store i32 21, i32 addrspace(1)* %arrayidx10, align 4 + store i32 21, ptr addrspace(1) %arrayidx10, align 4 br label %end end: @@ -69,14 +69,14 @@ ; SI-NEXT: {{^}}[[EXIT]]: ; SI: s_endpgm -define amdgpu_kernel void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { +define amdgpu_kernel void @simple_test_v_if(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %is.0 = icmp ne i32 %tid, 0 br i1 %is.0, label %then, label %exit then: - %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid - store i32 999, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %dst, i32 %tid + store i32 999, ptr addrspace(1) %gep br label %exit exit: @@ -95,14 +95,14 @@ ; SI-NEXT: {{^}}[[EXIT]]: ; SI: s_endpgm -define amdgpu_kernel void @simple_test_v_if_ret_else_ret(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { +define amdgpu_kernel void @simple_test_v_if_ret_else_ret(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %is.0 = icmp ne i32 %tid, 0 br i1 %is.0, label %then, label %exit then: - %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid - store i32 999, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %dst, i32 %tid + store i32 999, ptr addrspace(1) %gep ret void exit: @@ -132,18 +132,18 @@ ; SI-NEXT: {{^}}[[EXIT]]: ; SI: ds_write_b32 -define amdgpu_kernel void @simple_test_v_if_ret_else_code_ret(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { +define amdgpu_kernel void @simple_test_v_if_ret_else_code_ret(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %is.0 = icmp ne i32 %tid, 0 br i1 %is.0, label %then, label %exit then: - %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid - store i32 999, i32 addrspace(1)* %gep + %gep = getelementptr i32, ptr addrspace(1) %dst, i32 %tid + store i32 999, ptr addrspace(1) %gep ret void exit: - store volatile i32 7, i32 addrspace(3)* undef + store volatile i32 7, ptr addrspace(3) undef ret void } @@ -161,7 +161,7 @@ ; SI: s_cbranch_scc1 [[LABEL_LOOP]] ; SI: [[LABEL_EXIT]]: ; SI: s_endpgm -define amdgpu_kernel void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { +define amdgpu_kernel void @simple_test_v_loop(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %is.0 = icmp ne i32 %tid, 0 @@ -170,10 +170,10 @@ loop: %i = phi i32 [%tid, %entry], [%i.inc, %loop] - %gep.src = getelementptr i32, i32 addrspace(1)* %src, i32 %i - %gep.dst = getelementptr i32, i32 addrspace(1)* %dst, i32 %i - %load = load i32, i32 addrspace(1)* %src - store i32 %load, i32 addrspace(1)* %gep.dst + %gep.src = getelementptr i32, ptr addrspace(1) %src, i32 %i + %gep.dst = getelementptr i32, ptr addrspace(1) %dst, i32 %i + %load = load i32, ptr addrspace(1) %src + store i32 %load, ptr addrspace(1) %gep.dst %i.inc = add nsw i32 %i, 1 %cmp = icmp eq i32 %limit, %i.inc br i1 %cmp, label %exit, label %loop @@ -220,12 +220,12 @@ ; SI: [[LABEL_EXIT]]: ; SI-NOT: [[COND_STATE]] ; SI: s_endpgm -define amdgpu_kernel void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 { +define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture readonly %arg2, ptr addrspace(1) noalias nocapture readonly %arg3) #1 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tmp4 = sext i32 %tmp to i64 - %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4 - %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4 + %tmp5 = getelementptr inbounds i32, ptr addrspace(1) %arg3, i64 %tmp4 + %tmp6 = load i32, ptr addrspace(1) %tmp5, align 4 %tmp7 = icmp sgt i32 %tmp6, 0 %tmp8 = sext i32 %tmp6 to i64 br i1 %tmp7, label %bb10, label %bb26 @@ -233,10 +233,10 @@ bb10: ; preds = %bb, %bb20 %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ] %tmp12 = add nsw i64 %tmp11, %tmp4 - %tmp13 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp12 - %tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4 - %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp12 - %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4 + %tmp13 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp12 + %tmp14 = load i32, ptr addrspace(1) %tmp13, align 4 + %tmp15 = getelementptr inbounds i32, ptr addrspace(1) %arg2, i64 %tmp12 + %tmp16 = load i32, ptr addrspace(1) %tmp15, align 4 %tmp17 = icmp ne i32 %tmp14, -1 %tmp18 = icmp ne i32 %tmp16, -1 %tmp19 = and i1 %tmp17, %tmp18 @@ -244,8 +244,8 @@ bb20: ; preds = %bb10 %tmp21 = add nsw i32 %tmp16, %tmp14 - %tmp22 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp12 - store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4 + %tmp22 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp12 + store i32 %tmp21, ptr addrspace(1) %tmp22, align 4 %tmp23 = add nuw nsw i64 %tmp11, 1 %tmp24 = icmp slt i64 %tmp23, %tmp8 br i1 %tmp24, label %bb10, label %bb26 diff --git a/llvm/test/CodeGen/AMDGPU/vop-shrink.ll b/llvm/test/CodeGen/AMDGPU/vop-shrink.ll --- a/llvm/test/CodeGen/AMDGPU/vop-shrink.ll +++ b/llvm/test/CodeGen/AMDGPU/vop-shrink.ll @@ -8,22 +8,22 @@ ; ModuleID = 'vop-shrink.ll' -define amdgpu_kernel void @sub_rev(i32 addrspace(1)* %out, <4 x i32> %sgpr, i32 %cond) { +define amdgpu_kernel void @sub_rev(ptr addrspace(1) %out, <4 x i32> %sgpr, i32 %cond) { entry: %vgpr = call i32 @llvm.amdgcn.workitem.id.x() #1 %tmp = icmp eq i32 %cond, 0 br i1 %tmp, label %if, label %else if: ; preds = %entry - %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 + %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 1 %tmp2 = extractelement <4 x i32> %sgpr, i32 1 - store i32 %tmp2, i32 addrspace(1)* %out + store i32 %tmp2, ptr addrspace(1) %out br label %endif else: ; preds = %entry %tmp3 = extractelement <4 x i32> %sgpr, i32 2 %tmp4 = sub i32 %vgpr, %tmp3 - store i32 %tmp4, i32 addrspace(1)* %out + store i32 %tmp4, ptr addrspace(1) %out br label %endif endif: ; preds = %else, %if @@ -35,12 +35,12 @@ ; FUNC-LABEL: {{^}}add_fold: ; SI: v_add_f32_e32 v{{[0-9]+}}, 0x44800000 -define amdgpu_kernel void @add_fold(float addrspace(1)* %out) { +define amdgpu_kernel void @add_fold(ptr addrspace(1) %out) { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = uitofp i32 %tmp to float %tmp2 = fadd float %tmp1, 1.024000e+03 - store float %tmp2, float addrspace(1)* %out + store float %tmp2, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/wait.ll b/llvm/test/CodeGen/AMDGPU/wait.ll --- a/llvm/test/CodeGen/AMDGPU/wait.ll +++ b/llvm/test/CodeGen/AMDGPU/wait.ll @@ -13,19 +13,18 @@ ; DEFAULT-DAG: exp ; DEFAULT: exp ; DEFAULT-NEXT: s_endpgm -define amdgpu_vs void @main(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, <16 x i8> addrspace(4)* inreg %arg3, <16 x i8> addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(4)* inreg %constptr) #0 { +define amdgpu_vs void @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, ptr addrspace(4) inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, ptr addrspace(4) inreg %constptr) #0 { main_body: - %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 0 - %tmp10 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, !tbaa !0 + %tmp10 = load <16 x i8>, ptr addrspace(4) %arg3, !tbaa !0 %tmp10.cast = bitcast <16 x i8> %tmp10 to <4 x i32> %tmp11 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp10.cast, i32 %arg6, i32 0, i32 0, i32 0) %tmp12 = extractelement <4 x float> %tmp11, i32 0 %tmp13 = extractelement <4 x float> %tmp11, i32 1 call void @llvm.amdgcn.s.barrier() #1 %tmp14 = extractelement <4 x float> %tmp11, i32 2 - %tmp15 = load float, float addrspace(4)* %constptr, align 4 - %tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 1 - %tmp17 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp16, !tbaa !0 + %tmp15 = load float, ptr addrspace(4) %constptr, align 4 + %tmp16 = getelementptr <16 x i8>, ptr addrspace(4) %arg3, i32 1 + %tmp17 = load <16 x i8>, ptr addrspace(4) %tmp16, !tbaa !0 %tmp17.cast = bitcast <16 x i8> %tmp17 to <4 x i32> %tmp18 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp17.cast, i32 %arg6, i32 0, i32 0, i32 0) %tmp19 = extractelement <4 x float> %tmp18, i32 0 @@ -46,10 +45,9 @@ ; ILPMAX: exp pos0 ; ILPMAX-NEXT: exp param0 ; ILPMAX: s_endpgm -define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(4)* inreg %arg, [17 x <16 x i8>] addrspace(4)* inreg %arg1, [17 x <4 x i32>] addrspace(4)* inreg %arg2, [34 x <8 x i32>] addrspace(4)* inreg %arg3, [16 x <16 x i8>] addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 { +define amdgpu_vs void @main2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, ptr addrspace(4) inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 { main_body: - %tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(4)* %arg4, i64 0, i64 0 - %tmp11 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, align 16, !tbaa !0 + %tmp11 = load <16 x i8>, ptr addrspace(4) %arg4, align 16, !tbaa !0 %tmp12 = add i32 %arg5, %arg7 %tmp11.cast = bitcast <16 x i8> %tmp11 to <4 x i32> %tmp13 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp11.cast, i32 %tmp12, i32 0, i32 0, i32 0) @@ -57,8 +55,8 @@ %tmp15 = extractelement <4 x float> %tmp13, i32 1 %tmp16 = extractelement <4 x float> %tmp13, i32 2 %tmp17 = extractelement <4 x float> %tmp13, i32 3 - %tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(4)* %arg4, i64 0, i64 1 - %tmp19 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp18, align 16, !tbaa !0 + %tmp18 = getelementptr [16 x <16 x i8>], ptr addrspace(4) %arg4, i64 0, i64 1 + %tmp19 = load <16 x i8>, ptr addrspace(4) %tmp18, align 16, !tbaa !0 %tmp20 = add i32 %arg5, %arg7 %tmp19.cast = bitcast <16 x i8> %tmp19 to <4 x i32> %tmp21 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp19.cast, i32 %tmp20, i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll --- a/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll @@ -10,9 +10,9 @@ ; XGCN: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[DATA:v[0-9]+]] ; XGCN: s_waitcnt vmcnt(0) lgkmcnt(0) ; XGCN: flat_load_dword [[DATA]], v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @test(i32* %out, i32 %in) { - store volatile i32 0, i32* %out - %val = load volatile i32, i32* %out +define amdgpu_kernel void @test(ptr %out, i32 %in) { + store volatile i32 0, ptr %out + %val = load volatile i32, ptr %out ret void } @@ -21,8 +21,8 @@ ; GFX9: global_load_dword [[LD:v[0-9]+]] ; GFX9-NEXT: s_waitcnt vmcnt(0){{$}} ; GFX9-NEXT: ds_write_b32 [[LD]] -define amdgpu_kernel void @test_waitcnt_type_flat_global(i32 addrspace(1)* %in) { - %val = load volatile i32, i32 addrspace(1)* %in - store volatile i32 %val, i32 addrspace(3)* undef +define amdgpu_kernel void @test_waitcnt_type_flat_global(ptr addrspace(1) %in) { + %val = load volatile i32, ptr addrspace(1) %in + store volatile i32 %val, ptr addrspace(3) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll --- a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll @@ -15,113 +15,111 @@ @data_generic = addrspace(1) global [100 x float] [float 0.000000e+00, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD99999A0000000, float 5.000000e-01, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000, float 0x3FECCCCCC0000000, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000, float 1.500000e+00, float 0x3FF99999A0000000, float 0x3FFB333340000000, float 0x3FFCCCCCC0000000, float 0x3FFE666660000000, float 2.000000e+00, float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000, float 2.500000e+00, float 0x4004CCCCC0000000, float 0x40059999A0000000, float 0x4006666660000000, float 0x4007333340000000, float 3.000000e+00, float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000, float 3.500000e+00, float 0x400CCCCCC0000000, float 0x400D9999A0000000, float 0x400E666660000000, float 0x400F333340000000, float 4.000000e+00, float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000, float 4.500000e+00, float 0x4012666660000000, float 0x4012CCCCC0000000, float 0x4013333340000000, float 0x40139999A0000000, float 5.000000e+00, float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000, float 5.500000e+00, float 0x4016666660000000, float 0x4016CCCCC0000000, float 0x4017333340000000, float 0x40179999A0000000, float 6.000000e+00, float 0x4018666660000000, float 0x4018CCCCC0000000, float 0x4019333340000000, float 0x40199999A0000000, float 6.500000e+00, float 0x401A666660000000, float 0x401ACCCCC0000000, float 0x401B333340000000, float 0x401B9999A0000000, float 7.000000e+00, float 0x401C666660000000, float 0x401CCCCCC0000000, float 0x401D333340000000, float 0x401D9999A0000000, float 7.500000e+00, float 0x401E666660000000, float 0x401ECCCCC0000000, float 0x401F333340000000, float 0x401F9999A0000000, float 8.000000e+00, float 0x4020333340000000, float 0x4020666660000000, float 0x40209999A0000000, float 0x4020CCCCC0000000, float 8.500000e+00, float 0x4021333340000000, float 0x4021666660000000, float 0x40219999A0000000, float 0x4021CCCCC0000000, float 9.000000e+00, float 0x4022333340000000, float 0x4022666660000000, float 0x40229999A0000000, float 0x4022CCCCC0000000, float 9.500000e+00, float 0x4023333340000000, float 0x4023666660000000, float 0x40239999A0000000, float 0x4023CCCCC0000000], align 4 @data_reference = addrspace(1) global [100 x float] [float 0.000000e+00, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD99999A0000000, float 5.000000e-01, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000, float 0x3FECCCCCC0000000, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000, float 1.500000e+00, float 0x3FF99999A0000000, float 0x3FFB333340000000, float 0x3FFCCCCCC0000000, float 0x3FFE666660000000, float 2.000000e+00, float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000, float 2.500000e+00, float 0x4004CCCCC0000000, float 0x40059999A0000000, float 0x4006666660000000, float 0x4007333340000000, float 3.000000e+00, float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000, float 3.500000e+00, float 0x400CCCCCC0000000, float 0x400D9999A0000000, float 0x400E666660000000, float 0x400F333340000000, float 4.000000e+00, float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000, float 4.500000e+00, float 0x4012666660000000, float 0x4012CCCCC0000000, float 0x4013333340000000, float 0x40139999A0000000, float 5.000000e+00, float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000, float 5.500000e+00, float 0x4016666660000000, float 0x4016CCCCC0000000, float 0x4017333340000000, float 0x40179999A0000000, float 6.000000e+00, float 0x4018666660000000, float 0x4018CCCCC0000000, float 0x4019333340000000, float 0x40199999A0000000, float 6.500000e+00, float 0x401A666660000000, float 0x401ACCCCC0000000, float 0x401B333340000000, float 0x401B9999A0000000, float 7.000000e+00, float 0x401C666660000000, float 0x401CCCCCC0000000, float 0x401D333340000000, float 0x401D9999A0000000, float 7.500000e+00, float 0x401E666660000000, float 0x401ECCCCC0000000, float 0x401F333340000000, float 0x401F9999A0000000, float 8.000000e+00, float 0x4020333340000000, float 0x4020666660000000, float 0x40209999A0000000, float 0x4020CCCCC0000000, float 8.500000e+00, float 0x4021333340000000, float 0x4021666660000000, float 0x40219999A0000000, float 0x4021CCCCC0000000, float 9.000000e+00, float 0x4022333340000000, float 0x4022666660000000, float 0x40229999A0000000, float 0x4022CCCCC0000000, float 9.500000e+00, float 0x4023333340000000, float 0x4023666660000000, float 0x40239999A0000000, float 0x4023CCCCC0000000], align 4 -define amdgpu_kernel void @testKernel(i32 addrspace(1)* nocapture %arg) local_unnamed_addr #0 { +define amdgpu_kernel void @testKernel(ptr addrspace(1) nocapture %arg) local_unnamed_addr #0 { bb: - store <2 x float> , <2 x float>* bitcast (float* getelementptr ([100 x float], [100 x float]* addrspacecast ([100 x float] addrspace(1)* @data_generic to [100 x float]*), i64 0, i64 4) to <2 x float>*), align 4 - store <2 x float> , <2 x float>* bitcast (float* getelementptr ([100 x float], [100 x float]* addrspacecast ([100 x float] addrspace(1)* @data_reference to [100 x float]*), i64 0, i64 4) to <2 x float>*), align 4 + store <2 x float> , ptr bitcast (ptr getelementptr ([100 x float], ptr addrspacecast ([100 x float] addrspace(1)* @data_generic to ptr), i64 0, i64 4) to ptr), align 4 + store <2 x float> , ptr bitcast (ptr getelementptr ([100 x float], ptr addrspacecast ([100 x float] addrspace(1)* @data_reference to ptr), i64 0, i64 4) to ptr), align 4 br label %bb18 bb1: ; preds = %bb18 - %tmp = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %tmp = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp3 = tail call i32 @llvm.amdgcn.workgroup.id.x() - %tmp4 = getelementptr inbounds i8, i8 addrspace(4)* %tmp, i64 4 - %tmp5 = bitcast i8 addrspace(4)* %tmp4 to i16 addrspace(4)* - %tmp6 = load i16, i16 addrspace(4)* %tmp5, align 4 + %tmp4 = getelementptr inbounds i8, ptr addrspace(4) %tmp, i64 4 + %tmp6 = load i16, ptr addrspace(4) %tmp4, align 4 %tmp7 = zext i16 %tmp6 to i32 %tmp8 = mul i32 %tmp3, %tmp7 %tmp9 = add i32 %tmp8, %tmp2 - %tmp10 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %tmp10 = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() %tmp11 = zext i32 %tmp9 to i64 - %tmp12 = bitcast i8 addrspace(4)* %tmp10 to i64 addrspace(4)* - %tmp13 = load i64, i64 addrspace(4)* %tmp12, align 8 + %tmp13 = load i64, ptr addrspace(4) %tmp10, align 8 %tmp14 = add i64 %tmp13, %tmp11 %tmp15 = zext i1 %tmp99 to i32 %tmp16 = and i64 %tmp14, 4294967295 - %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp16 - store i32 %tmp15, i32 addrspace(1)* %tmp17, align 4 + %tmp17 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp16 + store i32 %tmp15, ptr addrspace(1) %tmp17, align 4 ret void bb18: ; preds = %bb18, %bb %tmp19 = phi i64 [ 0, %bb ], [ %tmp102, %bb18 ] %tmp20 = phi i32 [ 0, %bb ], [ %tmp100, %bb18 ] %tmp21 = phi i1 [ true, %bb ], [ %tmp99, %bb18 ] - %tmp22 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp19 - %tmp23 = load float, float addrspace(1)* %tmp22, align 4 - %tmp24 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp19 - %tmp25 = load float, float addrspace(1)* %tmp24, align 4 + %tmp22 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp19 + %tmp23 = load float, ptr addrspace(1) %tmp22, align 4 + %tmp24 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp19 + %tmp25 = load float, ptr addrspace(1) %tmp24, align 4 %tmp26 = fcmp oeq float %tmp23, %tmp25 %tmp27 = and i1 %tmp21, %tmp26 %tmp28 = or i32 %tmp20, 1 %tmp29 = sext i32 %tmp28 to i64 - %tmp30 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp29 - %tmp31 = load float, float addrspace(1)* %tmp30, align 4 - %tmp32 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp29 - %tmp33 = load float, float addrspace(1)* %tmp32, align 4 + %tmp30 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp29 + %tmp31 = load float, ptr addrspace(1) %tmp30, align 4 + %tmp32 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp29 + %tmp33 = load float, ptr addrspace(1) %tmp32, align 4 %tmp34 = fcmp oeq float %tmp31, %tmp33 %tmp35 = and i1 %tmp27, %tmp34 %tmp36 = add nuw nsw i32 %tmp20, 2 %tmp37 = sext i32 %tmp36 to i64 - %tmp38 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp37 - %tmp39 = load float, float addrspace(1)* %tmp38, align 4 - %tmp40 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp37 - %tmp41 = load float, float addrspace(1)* %tmp40, align 4 + %tmp38 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp37 + %tmp39 = load float, ptr addrspace(1) %tmp38, align 4 + %tmp40 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp37 + %tmp41 = load float, ptr addrspace(1) %tmp40, align 4 %tmp42 = fcmp oeq float %tmp39, %tmp41 %tmp43 = and i1 %tmp35, %tmp42 %tmp44 = add nuw nsw i32 %tmp20, 3 %tmp45 = sext i32 %tmp44 to i64 - %tmp46 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp45 - %tmp47 = load float, float addrspace(1)* %tmp46, align 4 - %tmp48 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp45 - %tmp49 = load float, float addrspace(1)* %tmp48, align 4 + %tmp46 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp45 + %tmp47 = load float, ptr addrspace(1) %tmp46, align 4 + %tmp48 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp45 + %tmp49 = load float, ptr addrspace(1) %tmp48, align 4 %tmp50 = fcmp oeq float %tmp47, %tmp49 %tmp51 = and i1 %tmp43, %tmp50 %tmp52 = add nuw nsw i32 %tmp20, 4 %tmp53 = sext i32 %tmp52 to i64 - %tmp54 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp53 - %tmp55 = load float, float addrspace(1)* %tmp54, align 4 - %tmp56 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp53 - %tmp57 = load float, float addrspace(1)* %tmp56, align 4 + %tmp54 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp53 + %tmp55 = load float, ptr addrspace(1) %tmp54, align 4 + %tmp56 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp53 + %tmp57 = load float, ptr addrspace(1) %tmp56, align 4 %tmp58 = fcmp oeq float %tmp55, %tmp57 %tmp59 = and i1 %tmp51, %tmp58 %tmp60 = add nuw nsw i32 %tmp20, 5 %tmp61 = sext i32 %tmp60 to i64 - %tmp62 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp61 - %tmp63 = load float, float addrspace(1)* %tmp62, align 4 - %tmp64 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp61 - %tmp65 = load float, float addrspace(1)* %tmp64, align 4 + %tmp62 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp61 + %tmp63 = load float, ptr addrspace(1) %tmp62, align 4 + %tmp64 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp61 + %tmp65 = load float, ptr addrspace(1) %tmp64, align 4 %tmp66 = fcmp oeq float %tmp63, %tmp65 %tmp67 = and i1 %tmp59, %tmp66 %tmp68 = add nuw nsw i32 %tmp20, 6 %tmp69 = sext i32 %tmp68 to i64 - %tmp70 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp69 - %tmp71 = load float, float addrspace(1)* %tmp70, align 4 - %tmp72 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp69 - %tmp73 = load float, float addrspace(1)* %tmp72, align 4 + %tmp70 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp69 + %tmp71 = load float, ptr addrspace(1) %tmp70, align 4 + %tmp72 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp69 + %tmp73 = load float, ptr addrspace(1) %tmp72, align 4 %tmp74 = fcmp oeq float %tmp71, %tmp73 %tmp75 = and i1 %tmp67, %tmp74 %tmp76 = add nuw nsw i32 %tmp20, 7 %tmp77 = sext i32 %tmp76 to i64 - %tmp78 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp77 - %tmp79 = load float, float addrspace(1)* %tmp78, align 4 - %tmp80 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp77 - %tmp81 = load float, float addrspace(1)* %tmp80, align 4 + %tmp78 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp77 + %tmp79 = load float, ptr addrspace(1) %tmp78, align 4 + %tmp80 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp77 + %tmp81 = load float, ptr addrspace(1) %tmp80, align 4 %tmp82 = fcmp oeq float %tmp79, %tmp81 %tmp83 = and i1 %tmp75, %tmp82 %tmp84 = add nuw nsw i32 %tmp20, 8 %tmp85 = sext i32 %tmp84 to i64 - %tmp86 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp85 - %tmp87 = load float, float addrspace(1)* %tmp86, align 4 - %tmp88 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp85 - %tmp89 = load float, float addrspace(1)* %tmp88, align 4 + %tmp86 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp85 + %tmp87 = load float, ptr addrspace(1) %tmp86, align 4 + %tmp88 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp85 + %tmp89 = load float, ptr addrspace(1) %tmp88, align 4 %tmp90 = fcmp oeq float %tmp87, %tmp89 %tmp91 = and i1 %tmp83, %tmp90 %tmp92 = add nuw nsw i32 %tmp20, 9 %tmp93 = sext i32 %tmp92 to i64 - %tmp94 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp93 - %tmp95 = load float, float addrspace(1)* %tmp94, align 4 - %tmp96 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp93 - %tmp97 = load float, float addrspace(1)* %tmp96, align 4 + %tmp94 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp93 + %tmp95 = load float, ptr addrspace(1) %tmp94, align 4 + %tmp96 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp93 + %tmp97 = load float, ptr addrspace(1) %tmp96, align 4 %tmp98 = fcmp oeq float %tmp95, %tmp97 %tmp99 = and i1 %tmp91, %tmp98 %tmp100 = add nuw nsw i32 %tmp20, 10 @@ -131,7 +129,7 @@ } ; Function Attrs: nounwind readnone speculatable -declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1 +declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1 ; Function Attrs: nounwind readnone speculatable declare i32 @llvm.amdgcn.workitem.id.x() #1 @@ -140,7 +138,7 @@ declare i32 @llvm.amdgcn.workgroup.id.x() #1 ; Function Attrs: nounwind readnone speculatable -declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1 +declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1 attributes #0 = { "target-cpu"="fiji" "target-features"="-flat-for-global" } attributes #1 = { nounwind readnone speculatable } diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll @@ -9,20 +9,20 @@ ; GFX8: s_waitcnt vmcnt(0){{$}} ; GFX9PLUS: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: s_barrier -define amdgpu_kernel void @barrier_vmcnt_global(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @barrier_vmcnt_global(ptr addrspace(1) %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = zext i32 %tmp to i64 %tmp2 = shl nuw nsw i64 %tmp1, 32 - %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp1 - %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4 + %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp1 + %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4 fence syncscope("singlethread") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("singlethread") acquire %tmp5 = add nuw nsw i64 %tmp2, 4294967296 %tmp6 = lshr exact i64 %tmp5, 32 - %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp6 - store i32 %tmp4, i32 addrspace(1)* %tmp7, align 4 + %tmp7 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp6 + store i32 %tmp4, ptr addrspace(1) %tmp7, align 4 ret void } @@ -33,22 +33,22 @@ ; GFX9: s_waitcnt vmcnt(0){{$}} ; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_barrier -define amdgpu_kernel void @barrier_vscnt_global(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @barrier_vscnt_global(ptr addrspace(1) %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = zext i32 %tmp to i64 %tmp2 = shl nuw nsw i64 %tmp1, 32 %tmp3 = add nuw nsw i64 %tmp2, 8589934592 %tmp4 = lshr exact i64 %tmp3, 32 - %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp4 - store i32 0, i32 addrspace(1)* %tmp5, align 4 + %tmp5 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp4 + store i32 0, ptr addrspace(1) %tmp5, align 4 fence syncscope("singlethread") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("singlethread") acquire %tmp6 = add nuw nsw i64 %tmp2, 4294967296 %tmp7 = lshr exact i64 %tmp6, 32 - %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp7 - store i32 1, i32 addrspace(1)* %tmp8, align 4 + %tmp8 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp7 + store i32 1, ptr addrspace(1) %tmp8, align 4 ret void } @@ -59,24 +59,24 @@ ; GFX9PLUS: s_waitcnt vmcnt(0){{$}} ; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_barrier -define amdgpu_kernel void @barrier_vmcnt_vscnt_global(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @barrier_vmcnt_vscnt_global(ptr addrspace(1) %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = zext i32 %tmp to i64 %tmp2 = shl nuw nsw i64 %tmp1, 32 %tmp3 = add nuw nsw i64 %tmp2, 8589934592 %tmp4 = lshr exact i64 %tmp3, 32 - %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp4 - store i32 0, i32 addrspace(1)* %tmp5, align 4 - %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp1 - %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4 + %tmp5 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp4 + store i32 0, ptr addrspace(1) %tmp5, align 4 + %tmp6 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp1 + %tmp7 = load i32, ptr addrspace(1) %tmp6, align 4 fence syncscope("singlethread") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("singlethread") acquire %tmp8 = add nuw nsw i64 %tmp2, 4294967296 %tmp9 = lshr exact i64 %tmp8, 32 - %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp9 - store i32 %tmp7, i32 addrspace(1)* %tmp10, align 4 + %tmp10 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp9 + store i32 %tmp7, ptr addrspace(1) %tmp10, align 4 ret void } @@ -84,20 +84,20 @@ ; GCN: flat_load_{{dword|b32}} ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier -define amdgpu_kernel void @barrier_vmcnt_flat(i32* %arg) { +define amdgpu_kernel void @barrier_vmcnt_flat(ptr %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = zext i32 %tmp to i64 %tmp2 = shl nuw nsw i64 %tmp1, 32 - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp1 - %tmp4 = load i32, i32* %tmp3, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp1 + %tmp4 = load i32, ptr %tmp3, align 4 fence syncscope("singlethread") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("singlethread") acquire %tmp5 = add nuw nsw i64 %tmp2, 4294967296 %tmp6 = lshr exact i64 %tmp5, 32 - %tmp7 = getelementptr inbounds i32, i32* %arg, i64 %tmp6 - store i32 %tmp4, i32* %tmp7, align 4 + %tmp7 = getelementptr inbounds i32, ptr %arg, i64 %tmp6 + store i32 %tmp4, ptr %tmp7, align 4 ret void } @@ -107,22 +107,22 @@ ; GFX10PLUS: s_waitcnt lgkmcnt(0){{$}} ; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_barrier -define amdgpu_kernel void @barrier_vscnt_flat(i32* %arg) { +define amdgpu_kernel void @barrier_vscnt_flat(ptr %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = zext i32 %tmp to i64 %tmp2 = shl nuw nsw i64 %tmp1, 32 %tmp3 = add nuw nsw i64 %tmp2, 8589934592 %tmp4 = lshr exact i64 %tmp3, 32 - %tmp5 = getelementptr inbounds i32, i32* %arg, i64 %tmp4 - store i32 0, i32* %tmp5, align 4 + %tmp5 = getelementptr inbounds i32, ptr %arg, i64 %tmp4 + store i32 0, ptr %tmp5, align 4 fence syncscope("singlethread") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("singlethread") acquire %tmp6 = add nuw nsw i64 %tmp2, 4294967296 %tmp7 = lshr exact i64 %tmp6, 32 - %tmp8 = getelementptr inbounds i32, i32* %arg, i64 %tmp7 - store i32 1, i32* %tmp8, align 4 + %tmp8 = getelementptr inbounds i32, ptr %arg, i64 %tmp7 + store i32 1, ptr %tmp8, align 4 ret void } @@ -131,24 +131,24 @@ ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_barrier -define amdgpu_kernel void @barrier_vmcnt_vscnt_flat(i32* %arg) { +define amdgpu_kernel void @barrier_vmcnt_vscnt_flat(ptr %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = zext i32 %tmp to i64 %tmp2 = shl nuw nsw i64 %tmp1, 32 %tmp3 = add nuw nsw i64 %tmp2, 8589934592 %tmp4 = lshr exact i64 %tmp3, 32 - %tmp5 = getelementptr inbounds i32, i32* %arg, i64 %tmp4 - store i32 0, i32* %tmp5, align 4 - %tmp6 = getelementptr inbounds i32, i32* %arg, i64 %tmp1 - %tmp7 = load i32, i32* %tmp6, align 4 + %tmp5 = getelementptr inbounds i32, ptr %arg, i64 %tmp4 + store i32 0, ptr %tmp5, align 4 + %tmp6 = getelementptr inbounds i32, ptr %arg, i64 %tmp1 + %tmp7 = load i32, ptr %tmp6, align 4 fence syncscope("singlethread") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("singlethread") acquire %tmp8 = add nuw nsw i64 %tmp2, 4294967296 %tmp9 = lshr exact i64 %tmp8, 32 - %tmp10 = getelementptr inbounds i32, i32* %arg, i64 %tmp9 - store i32 %tmp7, i32* %tmp10, align 4 + %tmp10 = getelementptr inbounds i32, ptr %arg, i64 %tmp9 + store i32 %tmp7, ptr %tmp10, align 4 ret void } @@ -159,24 +159,24 @@ ; GFX10PLUS: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_barrier -define amdgpu_kernel void @barrier_vmcnt_vscnt_flat_workgroup(i32* %arg) { +define amdgpu_kernel void @barrier_vmcnt_vscnt_flat_workgroup(ptr %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = zext i32 %tmp to i64 %tmp2 = shl nuw nsw i64 %tmp1, 32 %tmp3 = add nuw nsw i64 %tmp2, 8589934592 %tmp4 = lshr exact i64 %tmp3, 32 - %tmp5 = getelementptr inbounds i32, i32* %arg, i64 %tmp4 - store i32 0, i32* %tmp5, align 4 - %tmp6 = getelementptr inbounds i32, i32* %arg, i64 %tmp1 - %tmp7 = load i32, i32* %tmp6, align 4 + %tmp5 = getelementptr inbounds i32, ptr %arg, i64 %tmp4 + store i32 0, ptr %tmp5, align 4 + %tmp6 = getelementptr inbounds i32, ptr %arg, i64 %tmp1 + %tmp7 = load i32, ptr %tmp6, align 4 fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire %tmp8 = add nuw nsw i64 %tmp2, 4294967296 %tmp9 = lshr exact i64 %tmp8, 32 - %tmp10 = getelementptr inbounds i32, i32* %arg, i64 %tmp9 - store i32 %tmp7, i32* %tmp10, align 4 + %tmp10 = getelementptr inbounds i32, ptr %arg, i64 %tmp9 + store i32 %tmp7, ptr %tmp10, align 4 ret void } @@ -186,17 +186,17 @@ ; GFX8: s_waitcnt vmcnt(0){{$}} ; GFX9PLUS: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: {{global|flat}}_store_{{dword|b32}} -define amdgpu_kernel void @load_vmcnt_global(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @load_vmcnt_global(ptr addrspace(1) %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = zext i32 %tmp to i64 %tmp2 = shl nuw nsw i64 %tmp1, 32 - %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp1 - %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4 + %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp1 + %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4 %tmp5 = add nuw nsw i64 %tmp2, 4294967296 %tmp6 = lshr exact i64 %tmp5, 32 - %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp6 - store i32 %tmp4, i32 addrspace(1)* %tmp7, align 4 + %tmp7 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp6 + store i32 %tmp4, ptr addrspace(1) %tmp7, align 4 ret void } @@ -205,17 +205,17 @@ ; GCN-NOT: vscnt ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN-NEXT: {{global|flat}}_store_{{dword|b32}} -define amdgpu_kernel void @load_vmcnt_flat(i32* %arg) { +define amdgpu_kernel void @load_vmcnt_flat(ptr %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = zext i32 %tmp to i64 %tmp2 = shl nuw nsw i64 %tmp1, 32 - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp1 - %tmp4 = load i32, i32* %tmp3, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp1 + %tmp4 = load i32, ptr %tmp3, align 4 %tmp5 = add nuw nsw i64 %tmp2, 4294967296 %tmp6 = lshr exact i64 %tmp5, 32 - %tmp7 = getelementptr inbounds i32, i32* %arg, i64 %tmp6 - store i32 %tmp4, i32* %tmp7, align 4 + %tmp7 = getelementptr inbounds i32, ptr %arg, i64 %tmp6 + store i32 %tmp4, ptr %tmp7, align 4 ret void } @@ -224,8 +224,8 @@ ; GFX8_9: s_waitcnt vmcnt(0) ; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_setpc_b64 -define void @store_vscnt_private(i32 addrspace(5)* %p) { - store i32 0, i32 addrspace(5)* %p +define void @store_vscnt_private(ptr addrspace(5) %p) { + store i32 0, ptr addrspace(5) %p ret void } @@ -235,8 +235,8 @@ ; GFX8_9: s_waitcnt vmcnt(0) ; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_setpc_b64 -define void @store_vscnt_global(i32 addrspace(1)* %p) { - store i32 0, i32 addrspace(1)* %p +define void @store_vscnt_global(ptr addrspace(1) %p) { + store i32 0, ptr addrspace(1) %p ret void } @@ -246,8 +246,8 @@ ; GFX10PLUS: s_waitcnt lgkmcnt(0){{$}} ; GFX10PLUS: s_waitcnt_vscnt null, 0x0 ; GCN-NEXT: s_setpc_b64 -define void @store_vscnt_flat(i32* %p) { - store i32 0, i32* %p +define void @store_vscnt_flat(ptr %p) { + store i32 0, ptr %p ret void } diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -9,13 +9,13 @@ ; GFX1032: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 1, vcc_lo ; GFX1064: v_cmp_lt_i32_e32 vcc, 0, v{{[0-9]+}} ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 1, vcc{{$}} -define amdgpu_kernel void @test_vopc_i32(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @test_vopc_i32(ptr addrspace(1) %arg) { %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %lid - %load = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %lid + %load = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp sgt i32 %load, 0 %sel = select i1 %cmp, i32 1, i32 2 - store i32 %sel, i32 addrspace(1)* %gep, align 4 + store i32 %sel, ptr addrspace(1) %gep, align 4 ret void } @@ -24,13 +24,13 @@ ; GFX1032: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0, vcc_lo ; GFX1064: v_cmp_nge_f32_e32 vcc, 0, v{{[0-9]+}} ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0, vcc{{$}} -define amdgpu_kernel void @test_vopc_f32(float addrspace(1)* %arg) { +define amdgpu_kernel void @test_vopc_f32(ptr addrspace(1) %arg) { %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %lid - %load = load float, float addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %lid + %load = load float, ptr addrspace(1) %gep, align 4 %cmp = fcmp ugt float %load, 0.0 %sel = select i1 %cmp, float 1.0, float 2.0 - store float %sel, float addrspace(1)* %gep, align 4 + store float %sel, ptr addrspace(1) %gep, align 4 ret void } @@ -48,14 +48,14 @@ ; GFX1032: v_cndmask_b32_e32 v{{[0-9]+}}, 0x3c003c00, v{{[0-9]+}}, [[SC]] ; GFX1064: v_cmp_le_f16_sdwa [[SC:vcc|s\[[0-9:]+\]]], {{[vs][0-9]+}}, v{{[0-9]+}} src0_sel:WORD_1 src1_sel:DWORD ; GFX1064: v_cndmask_b32_e32 v{{[0-9]+}}, 0x3c003c00, v{{[0-9]+}}, [[SC]] -define amdgpu_kernel void @test_vopc_2xf16(<2 x half> addrspace(1)* %arg) { +define amdgpu_kernel void @test_vopc_2xf16(ptr addrspace(1) %arg) { %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %lid - %load = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %arg, i32 %lid + %load = load <2 x half>, ptr addrspace(1) %gep, align 4 %elt = extractelement <2 x half> %load, i32 1 %cmp = fcmp ugt half %elt, 0.0 %sel = select i1 %cmp, <2 x half> , <2 x half> %load - store <2 x half> %sel, <2 x half> addrspace(1)* %gep, align 4 + store <2 x half> %sel, ptr addrspace(1) %gep, align 4 ret void } @@ -64,11 +64,11 @@ ; GFX1032: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[C]] ; GFX1064: v_cmp_class_f32_e64 [[C:vcc|s\[[0-9:]+\]]], s{{[0-9]+}}, 0x204 ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[C]]{{$}} -define amdgpu_kernel void @test_vopc_class(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @test_vopc_class(ptr addrspace(1) %out, float %x) #0 { %fabs = tail call float @llvm.fabs.f32(float %x) %cmp = fcmp oeq float %fabs, 0x7FF0000000000000 %ext = zext i1 %cmp to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 + store i32 %ext, ptr addrspace(1) %out, align 4 ret void } @@ -78,10 +78,10 @@ ; GFX1064: v_cmp_neq_f16_e64 [[C:vcc|s\[[0-9:]+\]]], 0x7c00, s{{[0-9]+}} ; GFX1064: v_cndmask_b32_e32 v{{[0-9]+}}, 0x3c00, v{{[0-9]+}}, [[C]]{{$}} -define amdgpu_kernel void @test_vcmp_vcnd_f16(half addrspace(1)* %out, half %x) #0 { +define amdgpu_kernel void @test_vcmp_vcnd_f16(ptr addrspace(1) %out, half %x) #0 { %cmp = fcmp oeq half %x, 0x7FF0000000000000 %sel = select i1 %cmp, half 1.0, half %x - store half %sel, half addrspace(1)* %out, align 2 + store half %sel, ptr addrspace(1) %out, align 2 ret void } @@ -94,15 +94,15 @@ ; GFX1064: v_cmp_nle_f32_e64 [[C2:s\[[0-9:]+\]]], 1.0, v{{[0-9]+}} ; GFX1064: s_and_b64 [[AND:s\[[0-9:]+\]]], vcc, [[C2]] ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0, [[AND]] -define amdgpu_kernel void @test_vop3_cmp_f32_sop_and(float addrspace(1)* %arg) { +define amdgpu_kernel void @test_vop3_cmp_f32_sop_and(ptr addrspace(1) %arg) { %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %lid - %load = load float, float addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %lid + %load = load float, ptr addrspace(1) %gep, align 4 %cmp = fcmp ugt float %load, 0.0 %cmp2 = fcmp ult float %load, 1.0 %and = and i1 %cmp, %cmp2 %sel = select i1 %and, float 1.0, float 2.0 - store float %sel, float addrspace(1)* %gep, align 4 + store float %sel, ptr addrspace(1) %gep, align 4 ret void } @@ -115,15 +115,15 @@ ; GFX1064: v_cmp_gt_i32_e64 [[C2:s\[[0-9:]+\]]], 1, v{{[0-9]+}} ; GFX1064: s_xor_b64 [[AND:s\[[0-9:]+\]]], vcc, [[C2]] ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 1, [[AND]] -define amdgpu_kernel void @test_vop3_cmp_i32_sop_xor(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @test_vop3_cmp_i32_sop_xor(ptr addrspace(1) %arg) { %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %lid - %load = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %lid + %load = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp sgt i32 %load, 0 %cmp2 = icmp slt i32 %load, 1 %xor = xor i1 %cmp, %cmp2 %sel = select i1 %xor, i32 1, i32 2 - store i32 %sel, i32 addrspace(1)* %gep, align 4 + store i32 %sel, ptr addrspace(1) %gep, align 4 ret void } @@ -136,15 +136,15 @@ ; GFX1064: v_cmp_gt_u32_e64 [[C2:s\[[0-9:]+\]]], 2, v{{[0-9]+}} ; GFX1064: s_or_b64 [[AND:s\[[0-9:]+\]]], vcc, [[C2]] ; GFX1064: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 1, [[AND]] -define amdgpu_kernel void @test_vop3_cmp_u32_sop_or(i32 addrspace(1)* %arg) { +define amdgpu_kernel void @test_vop3_cmp_u32_sop_or(ptr addrspace(1) %arg) { %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %lid - %load = load i32, i32 addrspace(1)* %gep, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %lid + %load = load i32, ptr addrspace(1) %gep, align 4 %cmp = icmp ugt i32 %load, 3 %cmp2 = icmp ult i32 %load, 2 %or = or i1 %cmp, %cmp2 %sel = select i1 %or, i32 1, i32 2 - store i32 %sel, i32 addrspace(1)* %gep, align 4 + store i32 %sel, ptr addrspace(1) %gep, align 4 ret void } @@ -152,13 +152,13 @@ ; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo ; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}} ; GCN: s_cbranch_execz -define amdgpu_kernel void @test_mask_if(i32 addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mask_if(ptr addrspace(1) %arg) #0 { %lid = tail call i32 @llvm.amdgcn.workitem.id.x() %cmp = icmp ugt i32 %lid, 10 br i1 %cmp, label %if, label %endif if: - store i32 0, i32 addrspace(1)* %arg, align 4 + store i32 0, ptr addrspace(1) %arg, align 4 br label %endif endif: @@ -189,7 +189,7 @@ ; GCN: ; %bb.{{[0-9]+}}: ; GCN: .LBB{{.*}}: ; GCN: s_endpgm -define amdgpu_kernel void @test_loop_with_if(i32 addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_loop_with_if(ptr addrspace(1) %arg) #0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() br label %bb2 @@ -204,13 +204,13 @@ bb5: %tmp6 = sext i32 %tmp3 to i64 - %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp6 - %tmp8 = load i32, i32 addrspace(1)* %tmp7, align 4 + %tmp7 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp6 + %tmp8 = load i32, ptr addrspace(1) %tmp7, align 4 %tmp9 = icmp sgt i32 %tmp8, 10 br i1 %tmp9, label %bb10, label %bb11 bb10: - store i32 %tmp, i32 addrspace(1)* %tmp7, align 4 + store i32 %tmp, ptr addrspace(1) %tmp7, align 4 br label %bb13 bb11: @@ -255,7 +255,7 @@ ; GCN-DAG: global_load_dword [[LOAD:v[0-9]+]] ; GFX1032: v_cmp_gt_i32_e32 vcc_lo, 11, [[LOAD]] ; GFX1064: v_cmp_gt_i32_e32 vcc, 11, [[LOAD]] -define amdgpu_kernel void @test_loop_with_if_else_break(i32 addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) #0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = icmp eq i32 %tmp, 0 @@ -267,13 +267,13 @@ bb2: %tmp3 = phi i32 [ %tmp9, %bb8 ], [ 0, %.preheader ] %tmp4 = zext i32 %tmp3 to i64 - %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp4 - %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4 + %tmp5 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp4 + %tmp6 = load i32, ptr addrspace(1) %tmp5, align 4 %tmp7 = icmp sgt i32 %tmp6, 10 br i1 %tmp7, label %bb8, label %.loopexit bb8: - store i32 %tmp, i32 addrspace(1)* %tmp5, align 4 + store i32 %tmp, ptr addrspace(1) %tmp5, align 4 %tmp9 = add nuw nsw i32 %tmp3, 1 %tmp10 = icmp ult i32 %tmp9, 256 %tmp11 = icmp ult i32 %tmp9, %tmp @@ -289,13 +289,13 @@ ; GFX1032: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}, vcc_lo ; GFX1064: v_add_co_u32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, s{{[0-9]+}} ; GFX1064: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}, vcc{{$}} -define amdgpu_kernel void @test_addc_vop2b(i64 addrspace(1)* %arg, i64 %arg1) #0 { +define amdgpu_kernel void @test_addc_vop2b(ptr addrspace(1) %arg, i64 %arg1) #0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() - %tmp3 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %tmp - %tmp4 = load i64, i64 addrspace(1)* %tmp3, align 8 + %tmp3 = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %tmp + %tmp4 = load i64, ptr addrspace(1) %tmp3, align 8 %tmp5 = add nsw i64 %tmp4, %arg1 - store i64 %tmp5, i64 addrspace(1)* %tmp3, align 8 + store i64 %tmp5, ptr addrspace(1) %tmp3, align 8 ret void } @@ -304,13 +304,13 @@ ; GFX1032: v_subrev_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[A0]]{{$}} ; GFX1064: v_sub_co_u32 v{{[0-9]+}}, [[A0:s\[[0-9:]+\]|vcc]], v{{[0-9]+}}, s{{[0-9]+}}{{$}} ; GFX1064: v_subrev_co_ci_u32_e32 v{{[0-9]+}}, vcc, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[A0]]{{$}} -define amdgpu_kernel void @test_subbrev_vop2b(i64 addrspace(1)* %arg, i64 %arg1) #0 { +define amdgpu_kernel void @test_subbrev_vop2b(ptr addrspace(1) %arg, i64 %arg1) #0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() - %tmp3 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %tmp - %tmp4 = load i64, i64 addrspace(1)* %tmp3, align 8 + %tmp3 = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %tmp + %tmp4 = load i64, ptr addrspace(1) %tmp3, align 8 %tmp5 = sub nsw i64 %tmp4, %arg1 - store i64 %tmp5, i64 addrspace(1)* %tmp3, align 8 + store i64 %tmp5, ptr addrspace(1) %tmp3, align 8 ret void } @@ -319,13 +319,13 @@ ; GFX1032: v_sub_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, {{[vs][0-9]+}}, v{{[0-9]+}}, [[A0]]{{$}} ; GFX1064: v_sub_co_u32 v{{[0-9]+}}, [[A0:s\[[0-9:]+\]|vcc]], s{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX1064: v_sub_co_ci_u32_e32 v{{[0-9]+}}, vcc, {{[vs][0-9]+}}, v{{[0-9]+}}, [[A0]]{{$}} -define amdgpu_kernel void @test_subb_vop2b(i64 addrspace(1)* %arg, i64 %arg1) #0 { +define amdgpu_kernel void @test_subb_vop2b(ptr addrspace(1) %arg, i64 %arg1) #0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() - %tmp3 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %tmp - %tmp4 = load i64, i64 addrspace(1)* %tmp3, align 8 + %tmp3 = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %tmp + %tmp4 = load i64, ptr addrspace(1) %tmp3, align 8 %tmp5 = sub nsw i64 %arg1, %tmp4 - store i64 %tmp5, i64 addrspace(1)* %tmp3, align 8 + store i64 %tmp5, ptr addrspace(1) %tmp3, align 8 ret void } @@ -339,48 +339,48 @@ ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GCN: s_addc_u32 s{{[0-9]+}}, 0, s{{[0-9]+}} ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @test_udiv64(i64 addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 { bb: - %tmp = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 1 - %tmp1 = load i64, i64 addrspace(1)* %tmp, align 8 - %tmp2 = load i64, i64 addrspace(1)* %arg, align 8 + %tmp = getelementptr inbounds i64, ptr addrspace(1) %arg, i64 1 + %tmp1 = load i64, ptr addrspace(1) %tmp, align 8 + %tmp2 = load i64, ptr addrspace(1) %arg, align 8 %tmp3 = udiv i64 %tmp1, %tmp2 - %tmp4 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 2 - store i64 %tmp3, i64 addrspace(1)* %tmp4, align 8 + %tmp4 = getelementptr inbounds i64, ptr addrspace(1) %arg, i64 2 + store i64 %tmp3, ptr addrspace(1) %tmp4, align 8 ret void } ; GCN-LABEL: {{^}}test_div_scale_f32: ; GFX1032: v_div_scale_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GFX1064: v_div_scale_f32 v{{[0-9]+}}, s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_div_scale_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, ptr addrspace(1) %gep.0, align 4 + %b = load volatile float, ptr addrspace(1) %gep.1, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}test_div_scale_f64: ; GFX1032: v_div_scale_f64 v[{{[0-9:]+}}], s{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] ; GFX1064: v_div_scale_f64 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] -define amdgpu_kernel void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_div_scale_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile double, double addrspace(1)* %gep.0, align 8 - %b = load volatile double, double addrspace(1)* %gep.1, align 8 + %a = load volatile double, ptr addrspace(1) %gep.0, align 8 + %b = load volatile double, ptr addrspace(1) %gep.1, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } @@ -411,9 +411,9 @@ ; GFX1032: s_cselect_b32 vcc_lo, -1, 0 ; GFX1064: s_cselect_b64 vcc, -1, 0 ; GCN: v_div_fmas_f32 v{{[0-9]+}}, {{[vs][0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { +define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i1 %d) nounwind { %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } @@ -422,9 +422,9 @@ ; GFX1032: s_cselect_b32 vcc_lo, -1, 0 ; GFX1064: s_cselect_b64 vcc, -1, 0 ; GCN-DAG: v_div_fmas_f64 v[{{[0-9:]+}}], {{[vs]}}[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] -define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind { +define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, double %b, double %c, i1 %d) nounwind { %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone - store double %result, double addrspace(1)* %out, align 8 + store double %result, ptr addrspace(1) %out, align 8 ret void } @@ -441,30 +441,30 @@ ; GFX1032: s_or_b32 exec_lo, exec_lo, [[SAVE]] ; GFX1064: s_or_b64 exec, exec, [[SAVE]] ; GCN: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} -define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) #0 { +define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %dummy) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.out = getelementptr float, float addrspace(1)* %out, i32 2 - %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1 - %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2 + %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1 + %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2 - %a = load float, float addrspace(1)* %gep.a - %b = load float, float addrspace(1)* %gep.b - %c = load float, float addrspace(1)* %gep.c + %a = load float, ptr addrspace(1) %gep.a + %b = load float, ptr addrspace(1) %gep.b + %c = load float, ptr addrspace(1) %gep.c %cmp0 = icmp eq i32 %tid, 0 br i1 %cmp0, label %bb, label %exit bb: - %val = load volatile i32, i32 addrspace(1)* %dummy + %val = load volatile i32, ptr addrspace(1) %dummy %cmp1 = icmp ne i32 %val, 0 br label %exit exit: %cond = phi i1 [false, %entry], [%cmp1, %bb] %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone - store float %result, float addrspace(1)* %gep.out, align 4 + store float %result, ptr addrspace(1) %gep.out, align 4 ret void } @@ -477,10 +477,10 @@ ; GCN-NOT: vcc ; GCN: v_div_fmas_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 { +define amdgpu_kernel void @fdiv_f32(ptr addrspace(1) %out, float %a, float %b) #0 { entry: %fdiv = fdiv float %a, %b - store float %fdiv, float addrspace(1)* %out + store float %fdiv, ptr addrspace(1) %out ret void } @@ -489,33 +489,33 @@ ; GFX1064: v_cmp_nlt_f16_e32 vcc, ; GCN-NEXT: s_cbranch_vccnz define amdgpu_kernel void @test_br_cc_f16( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) { entry: - %a.val = load half, half addrspace(1)* %a - %b.val = load half, half addrspace(1)* %b + %a.val = load half, ptr addrspace(1) %a + %b.val = load half, ptr addrspace(1) %b %fcmp = fcmp olt half %a.val, %b.val br i1 %fcmp, label %one, label %two one: - store half %a.val, half addrspace(1)* %r + store half %a.val, ptr addrspace(1) %r ret void two: - store half %b.val, half addrspace(1)* %r + store half %b.val, ptr addrspace(1) %r ret void } ; GCN-LABEL: {{^}}test_brcc_i1: ; GCN: s_bitcmp0_b32 s{{[0-9]+}}, 0 ; GCN-NEXT: s_cbranch_scc1 -define amdgpu_kernel void @test_brcc_i1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i1 %val) #0 { +define amdgpu_kernel void @test_brcc_i1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i1 %val) #0 { %cmp0 = icmp ne i1 %val, 0 br i1 %cmp0, label %store, label %end store: - store i32 222, i32 addrspace(1)* %out + store i32 222, ptr addrspace(1) %out ret void end: @@ -549,7 +549,7 @@ br i1 %tmp9, label %bb1, label %bb2 bb1: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef br label %bb2 bb2: @@ -574,7 +574,7 @@ br i1 %cmp0, label %bb4, label %Flow bb4: ; preds = %bb1 - %load = load volatile i32, i32 addrspace(1)* undef, align 4 + %load = load volatile i32, ptr addrspace(1) undef, align 4 %cmp1 = icmp sge i32 %tmp, %load br label %Flow @@ -584,7 +584,7 @@ br i1 %tmp3, label %bb1, label %bb9 bb9: ; preds = %Flow - store volatile i32 7, i32 addrspace(3)* undef + store volatile i32 7, ptr addrspace(3) undef ret void } @@ -601,12 +601,12 @@ ; GFX1064: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v{{[0-9]+}}, vcc ; GFX1064: v_cmp_ne_u32_e32 vcc, 3, v{{[0-9]+}} ; GFX1064: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc -define amdgpu_kernel void @test_movrels_extract_neg_offset_vgpr(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_movrels_extract_neg_offset_vgpr(ptr addrspace(1) %out) #0 { entry: %id = call i32 @llvm.amdgcn.workitem.id.x() #1 %index = add i32 %id, -512 %value = extractelement <4 x i32> , i32 %index - store i32 %value, i32 addrspace(1)* %out + store i32 %value, ptr addrspace(1) %out ret void } @@ -617,9 +617,9 @@ ; GFX1064: s_not_b64 exec, exec{{$}} ; GFX1064: v_mov_b32_e32 {{v[0-9]+}}, 42 ; GFX1064: s_not_b64 exec, exec{{$}} -define amdgpu_kernel void @test_set_inactive(i32 addrspace(1)* %out, i32 %in) #0 { +define amdgpu_kernel void @test_set_inactive(ptr addrspace(1) %out, i32 %in) #0 { %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) - store i32 %tmp, i32 addrspace(1)* %out + store i32 %tmp, ptr addrspace(1) %out ret void } @@ -632,9 +632,9 @@ ; GFX1064: v_mov_b32_e32 {{v[0-9]+}}, 0 ; GFX1064: v_mov_b32_e32 {{v[0-9]+}}, 0 ; GFX1064: s_not_b64 exec, exec{{$}} -define amdgpu_kernel void @test_set_inactive_64(i64 addrspace(1)* %out, i64 %in) #0 { +define amdgpu_kernel void @test_set_inactive_64(ptr addrspace(1) %out, i64 %in) #0 { %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) - store i64 %tmp, i64 addrspace(1)* %out + store i64 %tmp, ptr addrspace(1) %out ret void } @@ -821,10 +821,10 @@ ; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]] ; GFX1064-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[C_HI]] ; GCN: store_dwordx2 v{{[0-9]+}}, v[[[V_LO]]:[[V_HI]]], s -define amdgpu_kernel void @test_intr_fcmp_i64(i64 addrspace(1)* %out, float %src, float %a) { +define amdgpu_kernel void @test_intr_fcmp_i64(ptr addrspace(1) %out, float %src, float %a) { %temp = call float @llvm.fabs.f32(float %a) %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float %src, float %temp, i32 1) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } @@ -836,9 +836,9 @@ ; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]] ; GFX1064-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[C_HI]] ; GCN: store_dwordx2 v{{[0-9]+}}, v[[[V_LO]]:[[V_HI]]], s -define amdgpu_kernel void @test_intr_icmp_i64(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @test_intr_icmp_i64(ptr addrspace(1) %out, i32 %src) { %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %src, i32 100, i32 32) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } @@ -848,10 +848,10 @@ ; GFX1064: v_cmp_eq_f32_e64 s[[[C_LO:[0-9]+]]:[[C_HI:[0-9]+]]], {{s[0-9]+}}, |{{[vs][0-9]+}}| ; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]] ; GCN: store_dword v{{[0-9]+}}, v[[V_LO]], s -define amdgpu_kernel void @test_intr_fcmp_i32(i32 addrspace(1)* %out, float %src, float %a) { +define amdgpu_kernel void @test_intr_fcmp_i32(ptr addrspace(1) %out, float %src, float %a) { %temp = call float @llvm.fabs.f32(float %a) %result = call i32 @llvm.amdgcn.fcmp.i32.f32(float %src, float %temp, i32 1) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } @@ -861,9 +861,9 @@ ; GFX1064: v_cmp_eq_u32_e64 s[[[C_LO:[0-9]+]]:{{[0-9]+}}], 0x64, {{s[0-9]+}} ; GFX1064-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[C_LO]]{{$}} ; GCN: store_dword v{{[0-9]+}}, v[[V_LO]], s -define amdgpu_kernel void @test_intr_icmp_i32(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @test_intr_icmp_i32(ptr addrspace(1) %out, i32 %src) { %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 32) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } @@ -921,9 +921,9 @@ ; GFX1032: s_and_b32 vcc_lo, exec_lo, [[C]] ; GFX1064: v_cmp_neq_f64_e64 [[C:s\[[0-9:]+\]]], s[{{[0-9:]+}}], 1.0 ; GFX1064: s_and_b64 vcc, exec, [[C]] -define amdgpu_kernel void @test_vccnz_ifcvt_triangle64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_vccnz_ifcvt_triangle64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { entry: - %v = load double, double addrspace(1)* %in + %v = load double, ptr addrspace(1) %in %cc = fcmp oeq double %v, 1.000000e+00 br i1 %cc, label %if, label %endif @@ -933,7 +933,7 @@ endif: %r = phi double [ %v, %entry ], [ %u, %if ] - store double %r, double addrspace(1)* %out + store double %r, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -150,7 +150,7 @@ } ; Check that WQM is re-enabled when required. -define amdgpu_ps <4 x float> @test4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, i32 %c, i32 %d, float %data) { +define amdgpu_ps <4 x float> @test4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, ptr addrspace(1) inreg %ptr, i32 %c, i32 %d, float %data) { ; GFX9-W64-LABEL: test4: ; GFX9-W64: ; %bb.0: ; %main_body ; GFX9-W64-NEXT: s_mov_b64 s[12:13], exec @@ -586,7 +586,7 @@ ; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = load volatile float, float addrspace(1)* undef + %src0 = load volatile float, ptr addrspace(1) undef ; use mbcnt to make sure the branch is divergent %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) @@ -594,7 +594,7 @@ br i1 %cc, label %endif, label %if if: - %src1 = load volatile float, float addrspace(1)* undef + %src1 = load volatile float, ptr addrspace(1) undef %out = fadd float %src0, %src1 %out.0 = call float @llvm.amdgcn.wwm.f32(float %out) br label %endif @@ -667,7 +667,7 @@ ; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = load volatile float, float addrspace(1)* undef + %src0 = load volatile float, ptr addrspace(1) undef ; use mbcnt to make sure the branch is divergent %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) @@ -675,7 +675,7 @@ loop: %counter = phi i32 [ %hi, %main_body ], [ %counter.1, %loop ] - %src1 = load volatile float, float addrspace(1)* undef + %src1 = load volatile float, ptr addrspace(1) undef %out = fadd float %src0, %src1 %out.0 = call float @llvm.amdgcn.wwm.f32(float %out) %counter.1 = sub i32 %counter, 1 @@ -1058,7 +1058,7 @@ ; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = load volatile float, float addrspace(1)* undef + %src0 = load volatile float, ptr addrspace(1) undef ; use mbcnt to make sure the branch is divergent %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) @@ -1066,7 +1066,7 @@ br i1 %cc, label %endif, label %if if: - %src1 = load volatile float, float addrspace(1)* undef + %src1 = load volatile float, ptr addrspace(1) undef %out = fadd float %src0, %src1 %out.0 = call float @llvm.amdgcn.strict.wqm.f32(float %out) br label %endif @@ -1145,7 +1145,7 @@ ; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = load volatile float, float addrspace(1)* undef + %src0 = load volatile float, ptr addrspace(1) undef ; use mbcnt to make sure the branch is divergent %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) @@ -1153,7 +1153,7 @@ loop: %counter = phi i32 [ %hi, %main_body ], [ %counter.1, %loop ] - %src1 = load volatile float, float addrspace(1)* undef + %src1 = load volatile float, ptr addrspace(1) undef %out = fadd float %src0, %src1 %out.0 = call float @llvm.amdgcn.strict.wqm.f32(float %out) %counter.1 = sub i32 %counter, 1 @@ -1623,7 +1623,7 @@ } ; Kill is performed in WQM mode so that uniform kill behaves correctly ... -define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, float %coord, float %coord2, float %z) { +define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, ptr addrspace(1) inreg %ptr, <2 x i32> %idx, <2 x float> %data, float %coord, float %coord2, float %z) { ; GFX9-W64-LABEL: test_kill_0: ; GFX9-W64: ; %bb.0: ; %main_body ; GFX9-W64-NEXT: s_mov_b64 s[12:13], exec @@ -2003,13 +2003,12 @@ call void @llvm.amdgcn.raw.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i32 0) - %s.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 0 - store volatile i32 %a, i32 addrspace(5)* %s.gep, align 4 + store volatile i32 %a, ptr addrspace(5) %array, align 4 call void @llvm.amdgcn.struct.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i32 0, i32 0) - %c.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 %idx - %c = load i32, i32 addrspace(5)* %c.gep, align 4 + %c.gep = getelementptr [32 x i32], ptr addrspace(5) %array, i32 0, i32 %idx + %c = load i32, ptr addrspace(5) %c.gep, align 4 %c.bc = bitcast i32 %c to float %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) #0 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i32 0) @@ -2103,7 +2102,7 @@ br i1 %cc, label %if, label %else if: - store volatile <4 x float> %dtex, <4 x float> addrspace(1)* undef + store volatile <4 x float> %dtex, ptr addrspace(1) undef unreachable else: @@ -2590,7 +2589,7 @@ ; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = load volatile float, float addrspace(1)* undef + %src0 = load volatile float, ptr addrspace(1) undef ; use mbcnt to make sure the branch is divergent %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) @@ -2598,7 +2597,7 @@ br i1 %cc, label %endif, label %if if: - %src1 = load volatile float, float addrspace(1)* undef + %src1 = load volatile float, ptr addrspace(1) undef %out = fadd float %src0, %src1 %out.0 = call float @llvm.amdgcn.strict.wwm.f32(float %out) br label %endif @@ -2667,7 +2666,7 @@ ; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: - %src0 = load volatile float, float addrspace(1)* undef + %src0 = load volatile float, ptr addrspace(1) undef ; use mbcnt to make sure the branch is divergent %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) @@ -2675,7 +2674,7 @@ loop: %counter = phi i32 [ %hi, %main_body ], [ %counter.1, %loop ] - %src1 = load volatile float, float addrspace(1)* undef + %src1 = load volatile float, ptr addrspace(1) undef %out = fadd float %src0, %src1 %out.0 = call float @llvm.amdgcn.strict.wwm.f32(float %out) %counter.1 = sub i32 %counter, 1 @@ -3216,7 +3215,7 @@ ; Check if the correct VCC register is selected. WQM pass incorrectly uses VCC for ; vector comparisons in Wave32 mode. -define amdgpu_ps void @test_for_deactivating_lanes_in_wave32(float addrspace(6)* inreg %0) { +define amdgpu_ps void @test_for_deactivating_lanes_in_wave32(ptr addrspace(6) inreg %0) { ; GFX9-W64-LABEL: test_for_deactivating_lanes_in_wave32: ; GFX9-W64: ; %bb.0: ; %main_body ; GFX9-W64-NEXT: s_mov_b32 s3, 0x31016fac @@ -3249,7 +3248,7 @@ ; GFX10-W32-NEXT: exp null off, off, off, off done vm ; GFX10-W32-NEXT: s_endpgm main_body: - %1 = ptrtoint float addrspace(6)* %0 to i32 + %1 = ptrtoint ptr addrspace(6) %0 to i32 %2 = insertelement <4 x i32> , i32 %1, i32 0 %3 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %2, i32 0, i32 0) #3 %4 = fcmp nsz arcp ugt float %3, 0.000000e+00 diff --git a/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll b/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll --- a/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll +++ b/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll @@ -4,7 +4,7 @@ ;CHECK: {{^}}fill3d: ;CHECK-NOT: MULLO_INT T[0-9]+ -define amdgpu_kernel void @fill3d(i32 addrspace(1)* nocapture %out) #0 { +define amdgpu_kernel void @fill3d(ptr addrspace(1) nocapture %out) #0 { entry: %x.i = tail call i32 @llvm.r600.read.global.size.x() #1 %y.i18 = tail call i32 @llvm.r600.read.global.size.y() #1 @@ -30,8 +30,8 @@ %z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1 %add.i = add i32 %z.i8.i, %mul33.i %add13 = add i32 %add.i, %add - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add13 - store i32 %mul3, i32 addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %add13 + store i32 %mul3, ptr addrspace(1) %arrayidx, align 4 ret void } @@ -78,4 +78,4 @@ !0 = !{null} !1 = !{null} -!2 = !{void (i32 addrspace(1)*)* @fill3d} +!2 = !{ptr @fill3d} diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -149,6 +149,7 @@ ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Shrink Wrapping analysis ; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; CHECK-NEXT: Machine Late Instructions Cleanup Pass ; CHECK-NEXT: Control Flow Optimizer ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Tail Duplication diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll --- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll +++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -1652,7 +1652,6 @@ ; THUMB-ENABLE-NEXT: movs r0, #0 ; THUMB-ENABLE-NEXT: cbnz r0, LBB11_5 ; THUMB-ENABLE-NEXT: @ %bb.1: @ %loop2a.preheader -; THUMB-ENABLE-NEXT: movs r0, #0 ; THUMB-ENABLE-NEXT: movs r1, #0 ; THUMB-ENABLE-NEXT: mov r2, r0 ; THUMB-ENABLE-NEXT: b LBB11_3 @@ -1679,7 +1678,6 @@ ; THUMB-DISABLE-NEXT: movs r0, #0 ; THUMB-DISABLE-NEXT: cbnz r0, LBB11_5 ; THUMB-DISABLE-NEXT: @ %bb.1: @ %loop2a.preheader -; THUMB-DISABLE-NEXT: movs r0, #0 ; THUMB-DISABLE-NEXT: movs r1, #0 ; THUMB-DISABLE-NEXT: mov r2, r0 ; THUMB-DISABLE-NEXT: b LBB11_3 diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -3764,7 +3764,6 @@ ; SOFT-NEXT: @ %bb.18: @ %entry ; SOFT-NEXT: mov r3, r6 ; SOFT-NEXT: .LBB48_19: @ %entry -; SOFT-NEXT: ldr r0, .LCPI48_0 ; SOFT-NEXT: cmp r4, r0 ; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload ; SOFT-NEXT: beq .LBB48_21 @@ -4347,7 +4346,6 @@ ; SOFT-NEXT: @ %bb.18: @ %entry ; SOFT-NEXT: mov r3, r6 ; SOFT-NEXT: .LBB51_19: @ %entry -; SOFT-NEXT: ldr r0, .LCPI51_0 ; SOFT-NEXT: cmp r4, r0 ; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload ; SOFT-NEXT: beq .LBB51_21 diff --git a/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll --- a/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll +++ b/llvm/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll @@ -22,7 +22,7 @@ ; for.body -> for.cond.backedge (100%) ; -> cond.false.i (0%) ; CHECK: bb.1.for.body: -; CHECK: successors: %bb.2(0x80000000), %bb.4(0x00000000) +; CHECK: successors: %bb.2(0x80000000), %bb.5(0x00000000) for.body: br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i, !prof !1 diff --git a/llvm/test/CodeGen/ARM/jump-table-islands.ll b/llvm/test/CodeGen/ARM/jump-table-islands.ll --- a/llvm/test/CodeGen/ARM/jump-table-islands.ll +++ b/llvm/test/CodeGen/ARM/jump-table-islands.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=armv7-apple-ios8.0 -o - %s | FileCheck %s -%BigInt = type i5500 +%BigInt = type i8500 define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) { ; CHECK-LABEL: test_moved_jumptable: diff --git a/llvm/test/CodeGen/ARM/loopvectorize_pr33804.ll b/llvm/test/CodeGen/ARM/loopvectorize_pr33804.ll --- a/llvm/test/CodeGen/ARM/loopvectorize_pr33804.ll +++ b/llvm/test/CodeGen/ARM/loopvectorize_pr33804.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -S < %s | FileCheck %s +; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s ; These tests check that we don't crash if vectorizer decides to cast ; a float value to be stored into a pointer type or vice-versa. diff --git a/llvm/test/CodeGen/ARM/reg_sequence.ll b/llvm/test/CodeGen/ARM/reg_sequence.ll --- a/llvm/test/CodeGen/ARM/reg_sequence.ll +++ b/llvm/test/CodeGen/ARM/reg_sequence.ll @@ -283,7 +283,6 @@ ; CHECK-NEXT: vst1.32 {d17[1]}, [r0:32] ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r0, #0 ; CHECK-NEXT: bxne lr ; CHECK-NEXT: LBB9_1: ; CHECK-NEXT: trap diff --git a/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll b/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll --- a/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll +++ b/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll @@ -14,9 +14,8 @@ ;