diff --git a/clang-tools-extra/clangd/index/remote/server/Server.cpp b/clang-tools-extra/clangd/index/remote/server/Server.cpp --- a/clang-tools-extra/clangd/index/remote/server/Server.cpp +++ b/clang-tools-extra/clangd/index/remote/server/Server.cpp @@ -90,7 +90,7 @@ } unsigned Sent = 0; unsigned FailedToSend = 0; - Index->lookup(*Req, [&](const auto &Item) { + Index->lookup(*Req, [&](const clangd::Symbol &Item) { auto SerializedItem = ProtobufMarshaller->toProtobuf(Item); if (!SerializedItem) { ++FailedToSend; @@ -121,7 +121,7 @@ } unsigned Sent = 0; unsigned FailedToSend = 0; - bool HasMore = Index->fuzzyFind(*Req, [&](const auto &Item) { + bool HasMore = Index->fuzzyFind(*Req, [&](const clangd::Symbol &Item) { auto SerializedItem = ProtobufMarshaller->toProtobuf(Item); if (!SerializedItem) { ++FailedToSend; @@ -150,7 +150,7 @@ } unsigned Sent = 0; unsigned FailedToSend = 0; - bool HasMore = Index->refs(*Req, [&](const auto &Item) { + bool HasMore = Index->refs(*Req, [&](const clangd::Ref &Item) { auto SerializedItem = ProtobufMarshaller->toProtobuf(Item); if (!SerializedItem) { ++FailedToSend; diff --git a/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp b/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp --- a/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp @@ -6,6 +6,7 @@ namespace tidy { namespace test { +namespace { class TestCheck : public ClangTidyCheck { public: TestCheck(StringRef Name, ClangTidyContext *Context) @@ -20,17 +21,8 @@ diag(Var->getTypeSpecStartLoc(), "type specifier"); } }; +} // namespace -// FIXME: This test seems to cause a strange linking interference -// with the ValidConfiguration.ValidEnumOptions test on macOS. -// If both tests are enabled, this test will fail as if -// runCheckOnCode() is not invoked at all. Looks like a linker bug. -// For now both tests are disabled on macOS. It is not sufficient -// to only disable the other test because this test keeps failing -// under Address Sanitizer, which may be an indication of more -// such linking interference with other tests and this test -// seems to be in the center of it. -#ifndef __APPLE__ TEST(ClangTidyDiagnosticConsumer, SortsErrors) { std::vector Errors; runCheckOnCode("int a;", &Errors); @@ -38,7 +30,6 @@ EXPECT_EQ("type specifier", Errors[0].Message.Message); EXPECT_EQ("variable", Errors[1].Message.Message); } -#endif } // namespace test } // namespace tidy diff --git a/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp b/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp --- a/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp @@ -118,6 +118,7 @@ EXPECT_TRUE(*Options.UseColor); } +namespace { class TestCheck : public ClangTidyCheck { public: TestCheck(ClangTidyContext *Context) : ClangTidyCheck("test", Context) {} @@ -140,6 +141,7 @@ return Options.getLocalOrGlobal(std::forward(Arguments)...); } }; +} // namespace #define CHECK_VAL(Value, Expected) \ do { \ @@ -222,9 +224,6 @@ #undef CHECK_ERROR_INT } -// FIXME: Figure out why this test causes crashes on mac os. -// See also comments around the ClangTidyDiagnosticConsumer.SortsErrors test. -#ifndef __APPLE__ TEST(ValidConfiguration, ValidEnumOptions) { ClangTidyOptions Options; @@ -276,7 +275,6 @@ #undef CHECK_ERROR_ENUM } -#endif #undef CHECK_VAL #undef CHECK_ERROR diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -270,5 +270,5 @@ +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | device extension | 'present' map type modifier | :part:`mostly done` | D83061, D83062, D84422 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device extension | 'present' motion modifier | :part:`worked on` | D84711, D84712 | +| device extension | 'present' motion modifier | :good:`done` | D84711, D84712 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1174,6 +1174,7 @@ #include "llvm/Support/Extension.def" LoopAnalysisManager LAM(CodeGenOpts.DebugPassManager); + LoopNestAnalysisManager LNAM(LAM); FunctionAnalysisManager FAM(CodeGenOpts.DebugPassManager); CGSCCAnalysisManager CGAM(CodeGenOpts.DebugPassManager); ModuleAnalysisManager MAM(CodeGenOpts.DebugPassManager); @@ -1193,7 +1194,8 @@ PB.registerCGSCCAnalyses(CGAM); PB.registerFunctionAnalyses(FAM); PB.registerLoopAnalyses(LAM); - PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + PB.registerLoopNestAnalyses(LNAM); + PB.crossRegisterProxies(LAM, LNAM, FAM, CGAM, MAM); ModulePassManager MPM(CodeGenOpts.DebugPassManager); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1455,6 +1455,19 @@ } } +static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, + SourceLocation Loc, + SmallString<128> &Buffer) { + llvm::raw_svector_ostream OS(Buffer); + // Build debug location + PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); + OS << ";" << PLoc.getFilename() << ";"; + if (const auto *FD = dyn_cast_or_null(CGF.CurFuncDecl)) + OS << FD->getQualifiedNameAsString(); + OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; + return OS.str(); +} + llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags) { @@ -1464,6 +1477,16 @@ Loc.isInvalid()) return getOrCreateDefaultLocation(Flags).getPointer(); + // If the OpenMPIRBuilder is used we need to use it for all location handling + // as the clang invariants used below might be broken. + if (CGM.getLangOpts().OpenMPIRBuilder) { + SmallString<128> Buffer; + OMPBuilder.updateToLocation(CGF.Builder.saveIP()); + auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( + getIdentStringFromSourceLocation(CGF, Loc, Buffer)); + return OMPBuilder.getOrCreateIdent(SrcLocStr, IdentFlag(Flags)); + } + assert(CGF.CurFn && "No function in current CodeGenFunction."); CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); @@ -1497,15 +1520,9 @@ llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); if (OMPDebugLoc == nullptr) { - SmallString<128> Buffer2; - llvm::raw_svector_ostream OS2(Buffer2); - // Build debug location - PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); - OS2 << ";" << PLoc.getFilename() << ";"; - if (const auto *FD = dyn_cast_or_null(CGF.CurFuncDecl)) - OS2 << FD->getQualifiedNameAsString(); - OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; - OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); + SmallString<128> Buffer; + OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr( + getIdentStringFromSourceLocation(CGF, Loc, Buffer)); OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; } // *psource = ";;;;;;"; @@ -1519,6 +1536,16 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, SourceLocation Loc) { assert(CGF.CurFn && "No function in current CodeGenFunction."); + // If the OpenMPIRBuilder is used we need to use it for all thread id calls as + // the clang invariants used below might be broken. + if (CGM.getLangOpts().OpenMPIRBuilder) { + SmallString<128> Buffer; + OMPBuilder.updateToLocation(CGF.Builder.saveIP()); + auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( + getIdentStringFromSourceLocation(CGF, Loc, Buffer)); + return OMPBuilder.getOrCreateThreadID( + OMPBuilder.getOrCreateIdent(SrcLocStr)); + } llvm::Value *ThreadID = nullptr; // Check whether we've already cached a load of the thread id in this @@ -7392,10 +7419,9 @@ // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM // // map(p[1:24]) + // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ + // in unified shared memory mode or for local pointers // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM - // for data directives - // p, p, sizeof(float*), TARGET_PARAM | TO | FROM - // p, &p[1], 24*sizeof(float), PTR_AND_OBJ | TO | FROM // // map(s) // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM @@ -7530,6 +7556,7 @@ // Track if the map information being generated is the first for a list of // components. bool IsExpressionFirstInfo = true; + bool FirstPointerInComplexData = false; Address BP = Address::invalid(); const Expr *AssocExpr = I->getAssociatedExpression(); const auto *AE = dyn_cast(AssocExpr); @@ -7572,17 +7599,16 @@ QualType Ty = I->getAssociatedDeclaration()->getType().getNonReferenceType(); if (Ty->isAnyPointerType() && std::next(I) != CE) { - BP = CGF.EmitLoadOfPointer(BP, Ty->castAs()); - - // For non-data directives, we do not need to generate individual map - // information for the pointer, it can be associated with the combined - // storage. + // No need to generate individual map information for the pointer, it + // can be associated with the combined storage if shared memory mode is + // active or the base declaration is not global variable. + const auto *VD = dyn_cast(I->getAssociatedDeclaration()); if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || - !CurDir.is() || - !isOpenMPTargetDataManagementDirective( - CurDir.get() - ->getDirectiveKind())) - ++I; + !VD || VD->hasLocalStorage()) + BP = CGF.EmitLoadOfPointer(BP, Ty->castAs()); + else + FirstPointerInComplexData = true; + ++I; } } @@ -7617,8 +7643,19 @@ EncounteredME = dyn_cast(I->getAssociatedExpression()); // If we encounter a PTR_AND_OBJ entry from now on it should be marked // as MEMBER_OF the parent struct. - if (EncounteredME) + if (EncounteredME) { ShouldBeMemberOf = true; + // Do not emit as complex pointer if this is actually not array-like + // expression. + if (FirstPointerInComplexData) { + QualType Ty = std::prev(I) + ->getAssociatedDeclaration() + ->getType() + .getNonReferenceType(); + BP = CGF.EmitLoadOfPointer(BP, Ty->castAs()); + FirstPointerInComplexData = false; + } + } } auto Next = std::next(I); @@ -7760,7 +7797,8 @@ // (there is a set of entries for each capture). OpenMPOffloadMappingFlags Flags = getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, - !IsExpressionFirstInfo || RequiresReference, + !IsExpressionFirstInfo || RequiresReference || + FirstPointerInComplexData, IsCaptureFirstInfo && !RequiresReference); if (!IsExpressionFirstInfo) { @@ -7819,6 +7857,7 @@ IsExpressionFirstInfo = false; IsCaptureFirstInfo = false; + FirstPointerInComplexData = false; } } } @@ -8067,6 +8106,7 @@ // emission of that entry until the whole struct has been processed. llvm::MapVector> DeferredInfo; + MapCombinedInfoTy UseDevicePtrCombinedInfo; for (const auto *C : CurExecDir->getClausesOfKind()) { @@ -8086,15 +8126,27 @@ // We potentially have map information for this declaration already. // Look for the first set of components that refer to it. if (It != Info.end()) { - auto CI = std::find_if( - It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { - return MI.Components.back().getAssociatedDeclaration() == VD; - }); + auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { + return MI.Components.back().getAssociatedDeclaration() == VD; + }); // If we found a map entry, signal that the pointer has to be returned // and move on to the next declaration. + // Exclude cases where the base pointer is mapped as array subscript, + // array section or array shaping. The base address is passed as a + // pointer to base in this case and cannot be used as a base for + // use_device_ptr list item. if (CI != It->second.end()) { - CI->ReturnDevicePointer = true; - continue; + auto PrevCI = std::next(CI->Components.rbegin()); + const auto *VarD = dyn_cast(VD); + if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || + isa(IE) || + !VD->getType().getNonReferenceType()->isPointerType() || + PrevCI == CI->Components.rend() || + isa(PrevCI->getAssociatedExpression()) || !VarD || + VarD->hasLocalStorage()) { + CI->ReturnDevicePointer = true; + continue; + } } } @@ -8115,13 +8167,13 @@ } else { llvm::Value *Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); - CombinedInfo.BasePointers.emplace_back(Ptr, VD); - CombinedInfo.Pointers.push_back(Ptr); - CombinedInfo.Sizes.push_back( + UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); + UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); + UseDevicePtrCombinedInfo.Sizes.push_back( llvm::Constant::getNullValue(CGF.Int64Ty)); - CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | - OMP_MAP_TARGET_PARAM); - CombinedInfo.Mappers.push_back(nullptr); + UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | + OMP_MAP_TARGET_PARAM); + UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); } } } @@ -8273,6 +8325,8 @@ // We need to append the results of this capture to what we already have. CombinedInfo.append(CurInfo); } + // Append data for use_device_ptr clauses. + CombinedInfo.append(UseDevicePtrCombinedInfo); } /// Generate all the base pointers, section pointers, sizes, map types, and diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1707,9 +1707,11 @@ CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); - Builder.restoreIP(OMPBuilder.CreateParallel(Builder, BodyGenCB, PrivCB, - FiniCB, IfCond, NumThreads, - ProcBind, S.hasCancel())); + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( + AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); + Builder.restoreIP( + OMPBuilder.CreateParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, + IfCond, NumThreads, ProcBind, S.hasCancel())); return; } diff --git a/clang/test/CodeGen/Inputs/thinlto_expect1.proftext b/clang/test/CodeGen/Inputs/thinlto_expect1.proftext --- a/clang/test/CodeGen/Inputs/thinlto_expect1.proftext +++ b/clang/test/CodeGen/Inputs/thinlto_expect1.proftext @@ -2,7 +2,7 @@ :ir foo # Func Hash: -25571299074 +784007059655560962 # Num Counters: 2 # Counter Values: diff --git a/clang/test/CodeGen/Inputs/thinlto_expect2.proftext b/clang/test/CodeGen/Inputs/thinlto_expect2.proftext --- a/clang/test/CodeGen/Inputs/thinlto_expect2.proftext +++ b/clang/test/CodeGen/Inputs/thinlto_expect2.proftext @@ -2,7 +2,7 @@ :csir foo # Func Hash: -25571299074 +784007059655560962 # Num Counters: 2 # Counter Values: @@ -11,7 +11,7 @@ foo # Func Hash: -1152921530178146050 +1936928564262407938 # Num Counters: 2 # Counter Values: diff --git a/clang/test/CodeGen/thinlto-distributed-newpm.ll b/clang/test/CodeGen/thinlto-distributed-newpm.ll --- a/clang/test/CodeGen/thinlto-distributed-newpm.ll +++ b/clang/test/CodeGen/thinlto-distributed-newpm.ll @@ -1,3 +1,4 @@ +; FIXME: This test should use CHECK-NEXT to keep up-to-date. ; REQUIRES: x86-registered-target ; Validate ThinLTO post link pipeline at O2 and O3 @@ -18,7 +19,6 @@ ; RUN: -c -fthinlto-index=%t.o.thinlto.bc \ ; RUN: -o %t.native.o -x ir %t.o 2>&1 | FileCheck -check-prefixes=CHECK-O,CHECK-O3 %s --dump-input=fail -; CHECK-O: Running analysis: PassInstrumentationAnalysis ; CHECK-O: Starting {{.*}}Module pass manager run. ; CHECK-O: Running pass: WholeProgramDevirtPass ; CHECK-O: Running analysis: InnerAnalysisManagerProxy @@ -26,15 +26,12 @@ ; CHECK-O: Invalidating all non-preserved analyses for: ; CHECK-O: Invalidating analysis: InnerAnalysisManagerProxy ; CHECK-O: Running pass: ForceFunctionAttrsPass -; CHECK-O: Running pass: PassManager<{{.*}}Module> ; CHECK-O: Starting {{.*}}Module pass manager run. ; CHECK-O: Running pass: PGOIndirectCallPromotion ; CHECK-O: Running analysis: ProfileSummaryAnalysis ; CHECK-O: Running analysis: InnerAnalysisManagerProxy ; CHECK-O: Running analysis: OptimizationRemarkEmitterAnalysis on main -; CHECK-O: Running analysis: PassInstrumentationAnalysis on main ; CHECK-O: Running pass: InferFunctionAttrsPass -; CHECK-O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager<{{.*}}Function>{{ ?}}> ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: SimplifyCFGPass on main ; CHECK-O: Running analysis: TargetIRAnalysis on main @@ -46,18 +43,17 @@ ; CHECK-O: Running pass: LowerExpectIntrinsicPass on main ; CHECK-O3: Running pass: CallSiteSplittingPass on main ; CHECK-O: Finished {{.*}}Function pass manager run. +; CHECK-O: Running pass: LowerTypeTestsPass ; CHECK-O: Running pass: IPSCCPPass ; CHECK-O: Running pass: CalledValuePropagationPass ; CHECK-O: Running pass: GlobalOptPass ; CHECK-O: Invalidating all non-preserved analyses for: ; CHECK-O: Invalidating analysis: InnerAnalysisManagerProxy -; CHECK-O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> ; CHECK-O: Running analysis: InnerAnalysisManagerProxy +; CHECK-O: Running pass: PromotePass ; CHECK-O: Running analysis: DominatorTreeAnalysis on main -; CHECK-O: Running analysis: PassInstrumentationAnalysis on main ; CHECK-O: Running analysis: AssumptionAnalysis on main ; CHECK-O: Running pass: DeadArgumentEliminationPass -; CHECK-O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager<{{.*}}Function>{{ ?}}> ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: InstCombinePass on main ; CHECK-O: Running analysis: TargetLibraryAnalysis on main @@ -74,11 +70,9 @@ ; CHECK-O: Running analysis: GlobalsAA ; CHECK-O: Running analysis: CallGraphAnalysis ; CHECK-O: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis -; CHECK-O: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}DevirtSCCRepeatedPass<{{.*}}PassManager<{{.*}}LazyCallGraph::SCC ; CHECK-O: Running analysis: InnerAnalysisManagerProxy ; CHECK-O: Running analysis: LazyCallGraphAnalysis ; CHECK-O: Running analysis: FunctionAnalysisManagerCGSCCProxy on (main) -; CHECK-O: Running analysis: PassInstrumentationAnalysis on (main) ; CHECK-O: Running analysis: OuterAnalysisManagerProxy ; CHECK-O: Starting CGSCC pass manager run. ; CHECK-O: Running pass: InlinerPass on (main) @@ -87,8 +81,6 @@ ; CHECK-O: Clearing all analysis results for: main ; CHECK-O3: Running pass: ArgumentPromotionPass on (main) ; CHECK-O3: Running analysis: TargetIRAnalysis on main -; CHECK-O: Running analysis: PassInstrumentationAnalysis on main -; CHECK-O3: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: SROA on main ; These next two can appear in any order since they are accessed as parameters @@ -117,7 +109,6 @@ ; CHECK-O: Running pass: SimplifyCFGPass on main ; CHECK-O: Running pass: ReassociatePass on main ; CHECK-O: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-O: Running pass: FunctionToLoopPassAdaptor<{{.*}}PassManager<{{.*}}Loop ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: LoopSimplifyPass on main ; CHECK-O: Running analysis: LoopAnalysis on main @@ -125,7 +116,6 @@ ; CHECK-O: Finished {{.*}}Function pass manager run. ; CHECK-O: Running pass: SimplifyCFGPass on main ; CHECK-O: Running pass: InstCombinePass on main -; CHECK-O: Running pass: FunctionToLoopPassAdaptor<{{.*}}PassManager<{{.*}}Loop ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: LoopSimplifyPass on main ; CHECK-O: Running pass: LCSSAPass on main @@ -143,7 +133,6 @@ ; CHECK-O: Running pass: JumpThreadingPass on main ; CHECK-O: Running pass: CorrelatedValuePropagationPass on main ; CHECK-O: Running pass: DSEPass on main -; CHECK-O: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass> on main ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: LoopSimplifyPass on main ; CHECK-O: Running pass: LCSSAPass on main @@ -168,7 +157,6 @@ ; CHECK-O: Invalidating analysis: PostDominatorTreeAnalysis on main ; CHECK-O: Invalidating analysis: CallGraphAnalysis ; CHECK-O: Finished {{.*}}Module pass manager run. -; CHECK-O: Running pass: PassManager<{{.*}}Module> ; CHECK-O: Starting {{.*}}Module pass manager run. ; CHECK-O: Running pass: GlobalOptPass ; CHECK-O: Running pass: GlobalDCEPass @@ -176,40 +164,43 @@ ; CHECK-O: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-O: Running analysis: CallGraphAnalysis ; CHECK-O: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA -; CHECK-O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager<{{.*}}Function>{{ ?}}> ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: Float2IntPass on main ; CHECK-O: Running pass: LowerConstantIntrinsicsPass on main -; CHECK-O: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopRotatePass> on main ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: LoopSimplifyPass on main ; CHECK-O: Running analysis: LoopAnalysis on main ; CHECK-O: Running pass: LCSSAPass on main ; CHECK-O: Finished {{.*}}Function pass manager run. -; CHECK-O: Running pass: LoopDistributePass on main -; CHECK-O: Running analysis: ScalarEvolutionAnalysis on main +; CHECK-O: Running analysis: MemorySSAAnalysis on main ; CHECK-O: Running analysis: AAManager on main ; CHECK-O: Running analysis: BasicAA on main +; CHECK-O: Running analysis: ScalarEvolutionAnalysis on main ; CHECK-O: Running analysis: InnerAnalysisManagerProxy +; CHECK-O: Running pass: LoopRotatePass on Loop at depth 1 containing: %b +; CHECK-O: Running pass: LoopDistributePass on main +; CHECK-O: Running pass: InjectTLIMappings on main ; CHECK-O: Running pass: LoopVectorizePass on main ; CHECK-O: Running analysis: BlockFrequencyAnalysis on main ; CHECK-O: Running analysis: BranchProbabilityAnalysis on main +; CHECK-O: Running analysis: PostDominatorTreeAnalysis on main ; CHECK-O: Running analysis: DemandedBitsAnalysis on main -; CHECK-O: Running analysis: MemorySSAAnalysis on main ; CHECK-O: Running pass: LoopLoadEliminationPass on main +; CHECK-O: Running analysis: LoopAccessAnalysis on Loop at depth 1 containing: %b ; CHECK-O: Running pass: InstCombinePass on main ; CHECK-O: Running pass: SimplifyCFGPass on main ; CHECK-O: Running pass: SLPVectorizerPass on main +; CHECK-O: Running pass: VectorCombinePass on main ; CHECK-O: Running pass: InstCombinePass on main ; CHECK-O: Running pass: LoopUnrollPass on main ; CHECK-O: Running pass: WarnMissedTransformationsPass on main ; CHECK-O: Running pass: InstCombinePass on main ; CHECK-O: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-O: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass> on main ; CHECK-O: Starting {{.*}}Function pass manager run. ; CHECK-O: Running pass: LoopSimplifyPass on main ; CHECK-O: Running pass: LCSSAPass on main ; CHECK-O: Finished {{.*}}Function pass manager run. +; CHECK-O: Running pass: LICMPass on Loop at depth 1 containing: %b ; CHECK-O: Running pass: AlignmentFromAssumptionsPass on main ; CHECK-O: Running pass: LoopSinkPass on main ; CHECK-O: Running pass: InstSimplifyPass on main @@ -227,6 +218,8 @@ target triple = "x86_64-grtev4-linux-gnu" define i32 @main() { -entry: + br label %b +b: + br label %b ret i32 0 } diff --git a/clang/test/CodeGenCXX/Inputs/profile-remap.proftext b/clang/test/CodeGenCXX/Inputs/profile-remap.proftext --- a/clang/test/CodeGenCXX/Inputs/profile-remap.proftext +++ b/clang/test/CodeGenCXX/Inputs/profile-remap.proftext @@ -1,6 +1,6 @@ :ir _ZN3Foo8functionENS_1XE -29667547796 +146835647075900052 2 10 90 diff --git a/clang/test/CodeGenCXX/Inputs/profile-remap_entry.proftext b/clang/test/CodeGenCXX/Inputs/profile-remap_entry.proftext --- a/clang/test/CodeGenCXX/Inputs/profile-remap_entry.proftext +++ b/clang/test/CodeGenCXX/Inputs/profile-remap_entry.proftext @@ -1,7 +1,7 @@ :ir :entry_first _ZN3Foo8functionENS_1XE -29667547796 +146835647075900052 2 100 90 diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -16,7 +16,6 @@ float flag; int main (int argc, char **argv) { -// ALL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( #pragma omp parallel { #pragma omp cancel parallel if(flag) @@ -42,14 +41,14 @@ } } // ALL: call void @__kmpc_for_static_init_4( -// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3) +// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3) // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] // ALL: [[EXIT]] // ALL: br label // ALL: [[CONTINUE]] // ALL: br label -// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3) +// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3) // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] // ALL: [[EXIT]] @@ -66,7 +65,7 @@ // ALL: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00 // ALL: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]] // ALL: [[THEN]] -// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 2) +// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 2) // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] // ALL: [[EXIT]] @@ -148,7 +147,7 @@ // CHECK: br label // CHECK: [[CONTINUE]] // CHECK: br label -// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3) +// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID:%.*]], i32 3) // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 // CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] // CHECK: [[EXIT]] diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -0,0 +1,299 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefixes=CHECK %s +// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK-DEBUG %s + +// expected-no-diagnostics + +// TODO: Teach the update script to check new functions too. + +#ifndef HEADER +#define HEADER + +// CHECK-LABEL: @_Z14parallel_for_0v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +// CHECK: omp_parallel: +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)) +// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK: omp.par.outlined.exit: +// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// CHECK: omp.par.exit.split: +// CHECK-NEXT: ret void +// +// CHECK-DEBUG-LABEL: @_Z14parallel_for_0v( +// CHECK-DEBUG-NEXT: entry: +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] +// CHECK-DEBUG: omp_parallel: +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +// CHECK-DEBUG: omp.par.outlined.exit: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// CHECK-DEBUG: omp.par.exit.split: +// CHECK-DEBUG-NEXT: ret void, !dbg !{{[0-9]*}} +// +void parallel_for_0(void) { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 100; ++i) { + } + } +} + +// CHECK-LABEL: @_Z14parallel_for_1Pfid( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 +// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +// CHECK: omp_parallel: +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.1 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]) +// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT19:%.*]] +// CHECK: omp.par.outlined.exit19: +// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// CHECK: omp.par.exit.split: +// CHECK-NEXT: ret void +// +// CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid( +// CHECK-DEBUG-NEXT: entry: +// CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 +// CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @12), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] +// CHECK-DEBUG: omp_parallel: +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @12, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.1 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT19:%.*]] +// CHECK-DEBUG: omp.par.outlined.exit19: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// CHECK-DEBUG: omp.par.exit.split: +// CHECK-DEBUG-NEXT: ret void, !dbg !{{[0-9]*}} +// +void parallel_for_1(float *r, int a, double b) { +#pragma omp parallel + { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 100; ++i) { + *r = a + b; + } + } + } +} + +// CHECK-LABEL: @_Z14parallel_for_2Pfid( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: [[DOTOMP_IV212:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP213:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_LB214:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_UB215:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_STRIDE216:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IS_LAST217:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I218:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 +// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +// CHECK: omp_parallel: +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]) +// CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT211:%.*]] +// CHECK: omp.par.outlined.exit211: +// CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// CHECK: omp.par.exit.split: +// CHECK-NEXT: store i32 0, i32* [[DOTOMP_LB214]], align 4 +// CHECK-NEXT: store i32 99, i32* [[DOTOMP_UB215]], align 4 +// CHECK-NEXT: store i32 1, i32* [[DOTOMP_STRIDE216]], align 4 +// CHECK-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST217]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM219:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @41) +// CHECK-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @2, i32 [[OMP_GLOBAL_THREAD_NUM219]], i32 34, i32* [[DOTOMP_IS_LAST217]], i32* [[DOTOMP_LB214]], i32* [[DOTOMP_UB215]], i32* [[DOTOMP_STRIDE216]], i32 1, i32 1) +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4 +// CHECK-NEXT: [[CMP220:%.*]] = icmp sgt i32 [[TMP0]], 99 +// CHECK-NEXT: br i1 [[CMP220]], label [[COND_TRUE221:%.*]], label [[COND_FALSE222:%.*]] +// CHECK: cond.true221: +// CHECK-NEXT: br label [[COND_END223:%.*]] +// CHECK: cond.false222: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4 +// CHECK-NEXT: br label [[COND_END223]] +// CHECK: cond.end223: +// CHECK-NEXT: [[COND224:%.*]] = phi i32 [ 99, [[COND_TRUE221]] ], [ [[TMP1]], [[COND_FALSE222]] ] +// CHECK-NEXT: store i32 [[COND224]], i32* [[DOTOMP_UB215]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB214]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_IV212]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND225:%.*]] +// CHECK: omp.inner.for.cond225: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4 +// CHECK-NEXT: [[CMP226:%.*]] = icmp sle i32 [[TMP3]], [[TMP4]] +// CHECK-NEXT: br i1 [[CMP226]], label [[OMP_INNER_FOR_BODY227:%.*]], label [[OMP_INNER_FOR_END236:%.*]] +// CHECK: omp.inner.for.body227: +// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4 +// CHECK-NEXT: [[MUL228:%.*]] = mul nsw i32 [[TMP5]], 1 +// CHECK-NEXT: [[ADD229:%.*]] = add nsw i32 0, [[MUL228]] +// CHECK-NEXT: store i32 [[ADD229]], i32* [[I218]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-NEXT: [[CONV230:%.*]] = sitofp i32 [[TMP6]] to double +// CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[B_ADDR]], align 8 +// CHECK-NEXT: [[ADD231:%.*]] = fadd double [[CONV230]], [[TMP7]] +// CHECK-NEXT: [[CONV232:%.*]] = fptrunc double [[ADD231]] to float +// CHECK-NEXT: [[TMP8:%.*]] = load float*, float** [[R_ADDR]], align 8 +// CHECK-NEXT: store float [[CONV232]], float* [[TMP8]], align 4 +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE233:%.*]] +// CHECK: omp.body.continue233: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC234:%.*]] +// CHECK: omp.inner.for.inc234: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4 +// CHECK-NEXT: [[ADD235:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NEXT: store i32 [[ADD235]], i32* [[DOTOMP_IV212]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND225]] +// CHECK: omp.inner.for.end236: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT237:%.*]] +// CHECK: omp.loop.exit237: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM238:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @43) +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @2, i32 [[OMP_GLOBAL_THREAD_NUM238]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM239:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) +// CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @7, i32 [[OMP_GLOBAL_THREAD_NUM239]]) +// CHECK-NEXT: ret void +// +// CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid( +// CHECK-DEBUG-NEXT: entry: +// CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 +// CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// CHECK-DEBUG-NEXT: [[DOTOMP_IV212:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[TMP213:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[DOTOMP_LB214:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[DOTOMP_UB215:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[DOTOMP_STRIDE216:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[DOTOMP_IS_LAST217:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[I218:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @25), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] +// CHECK-DEBUG: omp_parallel: +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @25, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT211:%.*]] +// CHECK-DEBUG: omp.par.outlined.exit211: +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +// CHECK-DEBUG: omp.par.exit.split: +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV212]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB214]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store i32 0, i32* [[DOTOMP_LB214]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB215]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store i32 99, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE216]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store i32 1, i32* [[DOTOMP_STRIDE216]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST217]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST217]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I218]], metadata !{{[0-9]*}}, metadata !DIExpression()), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM219:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @97) +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @96, i32 [[OMP_GLOBAL_THREAD_NUM219]], i32 34, i32* [[DOTOMP_IS_LAST217]], i32* [[DOTOMP_LB214]], i32* [[DOTOMP_UB215]], i32* [[DOTOMP_STRIDE216]], i32 1, i32 1), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[CMP220:%.*]] = icmp sgt i32 [[TMP0]], 99, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br i1 [[CMP220]], label [[COND_TRUE221:%.*]], label [[COND_FALSE222:%.*]], !dbg !{{[0-9]*}} +// CHECK-DEBUG: cond.true221: +// CHECK-DEBUG-NEXT: br label [[COND_END223:%.*]], !dbg !{{[0-9]*}} +// CHECK-DEBUG: cond.false222: +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br label [[COND_END223]], !dbg !{{[0-9]*}} +// CHECK-DEBUG: cond.end223: +// CHECK-DEBUG-NEXT: [[COND224:%.*]] = phi i32 [ 99, [[COND_TRUE221]] ], [ [[TMP1]], [[COND_FALSE222]] ], !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store i32 [[COND224]], i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB214]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br label [[OMP_INNER_FOR_COND225:%.*]], !dbg !{{[0-9]*}} +// CHECK-DEBUG: omp.inner.for.cond225: +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB215]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[CMP226:%.*]] = icmp sle i32 [[TMP3]], [[TMP4]], !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br i1 [[CMP226]], label [[OMP_INNER_FOR_BODY227:%.*]], label [[OMP_INNER_FOR_END236:%.*]], !dbg !{{[0-9]*}} +// CHECK-DEBUG: omp.inner.for.body227: +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[MUL228:%.*]] = mul nsw i32 [[TMP5]], 1, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[ADD229:%.*]] = add nsw i32 0, [[MUL228]], !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store i32 [[ADD229]], i32* [[I218]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[CONV230:%.*]] = sitofp i32 [[TMP6]] to double, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[ADD231:%.*]] = fadd double [[CONV230]], [[TMP7]], !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[CONV232:%.*]] = fptrunc double [[ADD231]] to float, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store float [[CONV232]], float* [[TMP8]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br label [[OMP_BODY_CONTINUE233:%.*]], !dbg !{{[0-9]*}} +// CHECK-DEBUG: omp.body.continue233: +// CHECK-DEBUG-NEXT: br label [[OMP_INNER_FOR_INC234:%.*]], !dbg !{{[0-9]*}} +// CHECK-DEBUG: omp.inner.for.inc234: +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[ADD235:%.*]] = add nsw i32 [[TMP9]], 1, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: store i32 [[ADD235]], i32* [[DOTOMP_IV212]], align 4, !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: br label [[OMP_INNER_FOR_COND225]], !dbg !{{[0-9]*}}, !llvm.loop !{{[0-9]*}} +// CHECK-DEBUG: omp.inner.for.end236: +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_EXIT237:%.*]], !dbg !{{[0-9]*}} +// CHECK-DEBUG: omp.loop.exit237: +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM238:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @100) +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @99, i32 [[OMP_GLOBAL_THREAD_NUM238]]), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM239:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @103), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @102, i32 [[OMP_GLOBAL_THREAD_NUM239]]), !dbg !{{[0-9]*}} +// CHECK-DEBUG-NEXT: ret void, !dbg !{{[0-9]*}} +// +void parallel_for_2(float *r, int a, double b) { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 100; ++i) + *r = a + b; +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 100; ++i) + *r = a + b; +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 100; ++i) + *r = a + b; + } +#pragma omp for + for (int i = 0; i < 100; ++i) + *r = a + b; +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 100; ++i) + *r = a + b; + } +#pragma omp for + for (int i = 0; i < 100; ++i) + *r = a + b; + } +#pragma omp for + for (int i = 0; i < 100; ++i) + *r = a + b; + } +#pragma omp for + for (int i = 0; i < 100; ++i) + *r = a + b; +} + +#endif diff --git a/clang/test/OpenMP/target_data_codegen.cpp b/clang/test/OpenMP/target_data_codegen.cpp --- a/clang/test/OpenMP/target_data_codegen.cpp +++ b/clang/test/OpenMP/target_data_codegen.cpp @@ -555,7 +555,7 @@ void test_close_modifier(int arg) { S2 *ps; - // CK5: private unnamed_addr constant [6 x i64] [i64 1059, i64 32, i64 562949953422339, i64 562949953421328, i64 16, i64 1043] + // CK5: private unnamed_addr constant [5 x i64] [i64 1059, i64 32, i64 562949953421328, i64 16, i64 1043] #pragma omp target data map(close,tofrom: arg, ps->ps->ps->ps->s) { ++(arg); @@ -634,20 +634,17 @@ // Make sure the struct picks up present even if another element of the struct // doesn't have present. - // CK8: private unnamed_addr constant [15 x i64] + // CK8: private unnamed_addr constant [11 x i64] // ps1 // // PRESENT=0x1000 | TARGET_PARAM=0x20 = 0x1020 // MEMBER_OF_1=0x1000000000000 | FROM=0x2 | TO=0x1 = 0x1000000000003 - // MEMBER_OF_1=0x1000000000000 | PTR_AND_OBJ=0x10 | FROM=0x2 | TO=0x1 = 0x1000000000013 - // MEMBER_OF_1=0x1000000000000 | PRESENT=0x1000 | FROM=0x2 | TO=0x1 = 0x1000000001003 // MEMBER_OF_1=0x1000000000000 | PRESENT=0x1000 | PTR_AND_OBJ=0x10 = 0x1000000001010 // PRESENT=0x1000 | PTR_AND_OBJ=0x10 = 0x1010 // PRESENT=0x1000 | PTR_AND_OBJ=0x10 | FROM=0x2 | TO=0x1 = 0x1013 // // CK8-SAME: {{^}} [i64 [[#0x1020]], i64 [[#0x1000000000003]], - // CK8-SAME: {{^}} i64 [[#0x1000000000013]], i64 [[#0x1000000001003]], // CK8-SAME: {{^}} i64 [[#0x1000000001010]], i64 [[#0x1010]], i64 [[#0x1013]], // arg @@ -659,16 +656,13 @@ // ps2 // // PRESENT=0x1000 | TARGET_PARAM=0x20 = 0x1020 - // MEMBER_OF_9=0x9000000000000 | PRESENT=0x1000 | FROM=0x2 | TO=0x1 = 0x9000000001003 - // MEMBER_OF_9=0x9000000000000 | PRESENT=0x1000 | PTR_AND_OBJ=0x10 | FROM=0x2 | TO=0x1 = 0x9000000001013 - // MEMBER_OF_9=0x9000000000000 | FROM=0x2 | TO=0x1 = 0x9000000000003 - // MEMBER_OF_9=0x9000000000000 | PTR_AND_OBJ=0x10 = 0x9000000000010 + // MEMBER_OF_7=0x7000000000000 | PRESENT=0x1000 | FROM=0x2 | TO=0x1 = 0x7000000001003 + // MEMBER_OF_7=0x7000000000000 | PTR_AND_OBJ=0x10 = 0x7000000000010 // PTR_AND_OBJ=0x10 = 0x10 // PTR_AND_OBJ=0x10 | FROM=0x2 | TO=0x1 = 0x13 // - // CK8-SAME: {{^}} i64 [[#0x1020]], i64 [[#0x9000000001003]], - // CK8-SAME: {{^}} i64 [[#0x9000000001013]], i64 [[#0x9000000000003]], - // CK8-SAME: {{^}} i64 [[#0x9000000000010]], i64 [[#0x10]], i64 [[#0x13]]] + // CK8-SAME: {{^}} i64 [[#0x1020]], i64 [[#0x7000000001003]], + // CK8-SAME: {{^}} i64 [[#0x7000000000010]], i64 [[#0x10]], i64 [[#0x13]]] #pragma omp target data map(tofrom: ps1->s) \ map(present,tofrom: arg, ps1->ps->ps->ps->s, ps2->s) \ map(tofrom: ps2->ps->ps->ps->s) diff --git a/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp b/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp --- a/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp +++ b/clang/test/OpenMP/target_data_use_device_ptr_codegen.cpp @@ -22,18 +22,18 @@ double *g; // CK1: @g = global double* -// CK1: [[MTYPE00:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 19] -// CK1: [[MTYPE01:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 19] -// CK1: [[MTYPE03:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 19] -// CK1: [[MTYPE04:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 19] -// CK1: [[MTYPE05:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 19] -// CK1: [[MTYPE06:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 19] -// CK1: [[MTYPE07:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 19] -// CK1: [[MTYPE08:@.+]] = {{.*}}constant [4 x i64] [i64 99, i64 19, i64 35, i64 19] -// CK1: [[MTYPE09:@.+]] = {{.*}}constant [4 x i64] [i64 99, i64 19, i64 99, i64 19] -// CK1: [[MTYPE10:@.+]] = {{.*}}constant [4 x i64] [i64 99, i64 19, i64 99, i64 19] -// CK1: [[MTYPE11:@.+]] = {{.*}}constant [3 x i64] [i64 96, i64 35, i64 19] -// CK1: [[MTYPE12:@.+]] = {{.*}}constant [3 x i64] [i64 96, i64 35, i64 19] +// CK1: [[MTYPE00:@.+]] = {{.*}}constant [2 x i64] [i64 51, i64 96] +// CK1: [[MTYPE01:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE03:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE04:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE05:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE06:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE07:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 35] +// CK1: [[MTYPE09:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 99] +// CK1: [[MTYPE10:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 99] +// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i64] [i64 35, i64 96] +// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i64] [i64 35, i64 96] // CK1-LABEL: @_Z3foo template @@ -42,7 +42,7 @@ T *t; // CK1: [[T:%.+]] = load double*, double** [[DECL:@g]], - // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1 // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to double** // CK1: store double* [[T]], double** [[CBP]], // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE00]] @@ -61,7 +61,7 @@ ++g; // CK1: [[T1:%.+]] = load float*, float** [[DECL:%.+]], - // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float** // CK1: store float* [[T1]], float** [[CBP]], // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE01]] @@ -92,7 +92,7 @@ ++l; // CK1: [[T1:%.+]] = load float*, float** [[DECL:%.+]], - // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float** // CK1: store float* [[T1]], float** [[CBP]], // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE03]] @@ -115,7 +115,7 @@ // CK1: [[BTHEN]]: // CK1: [[T1:%.+]] = load float*, float** [[DECL:%.+]], - // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float** // CK1: store float* [[T1]], float** [[CBP]], // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE04]] @@ -152,7 +152,7 @@ // CK1: [[T2:%.+]] = load float**, float*** [[DECL:%.+]], // CK1: [[T1:%.+]] = load float*, float** [[T2]], - // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float** // CK1: store float* [[T1]], float** [[CBP]], // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE05]] @@ -174,7 +174,7 @@ ++lr; // CK1: [[T1:%.+]] = load i32*, i32** [[DECL:%.+]], - // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32** // CK1: store i32* [[T1]], i32** [[CBP]], // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE06]] @@ -194,7 +194,7 @@ // CK1: [[T2:%.+]] = load i32**, i32*** [[DECL:%.+]], // CK1: [[T1:%.+]] = load i32*, i32** [[T2]], - // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: [[BP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* %{{.+}}, i32 0, i32 0 // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32** // CK1: store i32* [[T1]], i32** [[CBP]], // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE07]] @@ -216,7 +216,7 @@ ++tr; // CK1: [[T1:%.+]] = load float*, float** [[DECL:%.+]], - // CK1: [[BP:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* %{{.+}}, i32 0, i32 + // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 0 // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to float** // CK1: store float* [[T1]], float** [[CBP]], // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE08]] @@ -280,7 +280,7 @@ ++l; ++t; // CK1: [[T1:%.+]] = load i32*, i32** [[DECL:%.+]], - // CK1: [[BP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1 // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32** // CK1: store i32* [[T1]], i32** [[CBP]], // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE11]] @@ -300,7 +300,7 @@ // CK1: [[T2:%.+]] = load i32**, i32*** [[DECL:%.+]], // CK1: [[T1:%.+]] = load i32*, i32** [[T2]], - // CK1: [[BP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* %{{.+}}, i32 0, i32 0 + // CK1: [[BP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* %{{.+}}, i32 0, i32 1 // CK1: [[CBP:%.+]] = bitcast i8** [[BP]] to i32** // CK1: store i32* [[T1]], i32** [[CBP]], // CK1: call void @__tgt_target_data_begin{{.+}}[[MTYPE12]] @@ -348,7 +348,7 @@ // CK2: [[ST:%.+]] = type { double*, double** } // CK2: [[MTYPE00:@.+]] = {{.*}}constant [2 x i64] [i64 32, i64 281474976710739] // CK2: [[MTYPE01:@.+]] = {{.*}}constant [2 x i64] [i64 32, i64 281474976710739] -// CK2: [[MTYPE02:@.+]] = {{.*}}constant [4 x i64] [i64 35, i64 19, i64 32, i64 844424930132048] +// CK2: [[MTYPE02:@.+]] = {{.*}}constant [3 x i64] [i64 35, i64 32, i64 562949953421392] // CK2: [[MTYPE03:@.+]] = {{.*}}constant [3 x i64] [i64 32, i64 281474976710739, i64 281474976710736] template @@ -404,7 +404,7 @@ // CK2: getelementptr inbounds double, double* [[TTTT]], i32 1 b++; - // CK2: [[BP:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* %{{.+}}, i32 0, i32 3 + // CK2: [[BP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* %{{.+}}, i32 0, i32 2 // CK2: [[CBP:%.+]] = bitcast i8** [[BP]] to double*** // CK2: store double** [[RVAL:%.+]], double*** [[CBP]], // CK2: call void @__tgt_target_data_begin{{.+}}[[MTYPE02]] diff --git a/clang/test/OpenMP/target_map_codegen.cpp b/clang/test/OpenMP/target_map_codegen.cpp --- a/clang/test/OpenMP/target_map_codegen.cpp +++ b/clang/test/OpenMP/target_map_codegen.cpp @@ -3874,7 +3874,7 @@ // CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0 // CK22: [[SIZE04:@.+]] = private {{.*}}constant [1 x i64] [i64 20] -// CK22: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 35] +// CK22: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 51] // CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0 // CK22: [[SIZE05:@.+]] = private {{.*}}constant [1 x i64] [i64 4] @@ -3894,7 +3894,7 @@ // CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0 // CK22: [[SIZE09:@.+]] = private {{.*}}constant [1 x i64] [i64 20] -// CK22: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 35] +// CK22: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 51] // CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0 // CK22: [[SIZE10:@.+]] = private {{.*}}constant [1 x i64] [i64 4] @@ -3914,7 +3914,7 @@ // CK22-LABEL: @.__omp_offloading_{{.*}}explicit_maps_globals{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0 // CK22: [[SIZE14:@.+]] = private {{.*}}constant [1 x i64] [i64 20] -// CK22: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 35] +// CK22: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 51] int a; int c[100]; @@ -4010,11 +4010,10 @@ // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 - // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32** + // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32*** // CK22-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32** - // CK22-DAG: store i32* [[RVAR0:%.+]], i32** [[CBP0]] + // CK22-DAG: store i32** @d, i32*** [[CBP0]] // CK22-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]] - // CK22-DAG: [[RVAR0]] = load i32*, i32** @d // CK22-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 2 // CK22-DAG: [[RVAR00]] = load i32*, i32** @d @@ -4093,11 +4092,10 @@ // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 - // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]** + // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]*** // CK22-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[ST]]** - // CK22-DAG: store [[ST]]* [[RVAR0:%.+]], [[ST]]** [[CBP0]] + // CK22-DAG: store [[ST]]** @sd, [[ST]]*** [[CBP0]] // CK22-DAG: store [[ST]]* [[SEC0:%.+]], [[ST]]** [[CP0]] - // CK22-DAG: [[RVAR0]] = load [[ST]]*, [[ST]]** @sd // CK22-DAG: [[SEC0]] = getelementptr {{.*}}[[ST]]* [[RVAR00:%.+]], i{{.+}} 2 // CK22-DAG: [[RVAR00]] = load [[ST]]*, [[ST]]** @sd @@ -4176,11 +4174,10 @@ // CK22-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 // CK22-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 - // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[STT]]** + // CK22-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[STT]]*** // CK22-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[STT]]** - // CK22-DAG: store [[STT]]* [[RVAR0:%.+]], [[STT]]** [[CBP0]] + // CK22-DAG: store [[STT]]** @std, [[STT]]*** [[CBP0]] // CK22-DAG: store [[STT]]* [[SEC0:%.+]], [[STT]]** [[CP0]] - // CK22-DAG: [[RVAR0]] = load [[STT]]*, [[STT]]** @std // CK22-DAG: [[SEC0]] = getelementptr {{.*}}[[STT]]* [[RVAR00:%.+]], i{{.+}} 2 // CK22-DAG: [[RVAR00]] = load [[STT]]*, [[STT]]** @std diff --git a/clang/test/OpenMP/target_update_codegen.cpp b/clang/test/OpenMP/target_update_codegen.cpp --- a/clang/test/OpenMP/target_update_codegen.cpp +++ b/clang/test/OpenMP/target_update_codegen.cpp @@ -310,22 +310,23 @@ #ifdef CK5 -// CK5: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 4] -// CK5: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] +// CK5: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// CK5: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 33] // CK5-LABEL: lvalue void lvalue(int *B, int l, int e) { - // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK5-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK5-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - // CK5-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 - // CK5-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 - // CK5-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to i32*** + // CK5-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK5-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK5-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to i32** // CK5-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to i32** - // CK5-DAG: store i32** [[B_ADDR:%.+]], i32*** [[BPC0]] + // CK5-DAG: store i32* [[B_VAL:%.+]], i32** [[BPC0]] // CK5-DAG: store i32* [[B_VAL_2:%.+]], i32** [[PC0]] + // CK5-DAG: [[B_VAL]] = load i32*, i32** [[B_ADDR:%.+]] // CK5-DAG: [[B_VAL_2]] = load i32*, i32** [[B_ADDR]] #pragma omp target update to(*B) *B += e; @@ -351,28 +352,29 @@ #ifdef CK6 -// CK6: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 4] -// CK6: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] +// CK6: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// CK6: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 33] // CK6-LABEL: lvalue void lvalue(int *B, int l, int e) { - // CK6-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK6-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK6-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK6-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - // CK6-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 - // CK6-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 - // CK6-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to i32*** + // CK6-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK6-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK6-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to i32** // CK6-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to i32** - // CK6-DAG: store i32** [[B_ADDR:%.+]], i32*** [[BPC0]] + // CK6-DAG: store i32* [[TWO:%.+]], i32** [[BPC0]] // CK6-DAG: store i32* [[ADD_PTR:%.+]], i32** [[PC0]] // CK6-64-DAG: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[ONE:%.+]], i{{32|64}} [[IDX_EXT:%.+]] // CK6-32-DAG: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[ONE:%.+]], i{{32|64}} [[L_VAL:%.+]] // CK6-64-DAG: [[IDX_EXT]] = sext i32 [[L_VAL:%.+]] to i64 // CK6-DAG: [[L_VAL]] = load i32, i32* [[L_ADDR:%.+]] // CK6-DAG: store i32 {{.+}}, i32* [[L_ADDR]] - // CK6-DAG: [[ONE]] = load i32*, i32** [[B_ADDR]] + // CK6-DAG: [[ONE]] = load i32*, i32** [[B_ADDR:%.+]] + // CK6-DAG: [[TWO]] = load i32*, i32** [[B_ADDR]] #pragma omp target update to(*(B+l)) *(B+l) += e; #pragma omp target update from(*(B+l)) @@ -397,25 +399,26 @@ #ifdef CK7 -// CK7: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 4] -// CK7: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] +// CK7: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] +// CK7: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 33] // CK7-LABEL: lvalue void lvalue(int *B, int l, int e) { - // CK7-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK7-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK7-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK7-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - // CK7-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 - // CK7-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 - // CK7-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to i32*** + // CK7-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK7-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK7-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to i32** // CK7-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to i32** - // CK7-DAG: store i32** [[B_ADDR:%.+]], i32*** [[BPC0]] + // CK7-DAG: store i32* [[B_VAL:%.+]], i32** [[BPC0]] // CK7-DAG: store i32* [[ARRAY_IDX:%.+]], i32** [[PC0]] // CK7-DAG: [[ARRAY_IDX]] = getelementptr inbounds i32, i32* [[ADD_PTR:%.+]], i{{32|64}} [[IDX_PROM:%.+]] // CK7-64-DAG: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[ONE:%.+]], i64 [[IDX_EXT:%.+]] // CK7-32-DAG: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[B_VAL_2:%.+]], i32 [[L_VAL:%.+]] + // CK7-32-DAG: [[B_VAL]] = load i32*, i32** [[B_ADDR:%.+]] // CK7-32-DAG: [[B_VAL_2]] = load i32*, i32** [[B_ADDR]] // CK7-32-DAG: [[L_VAL]] = load i32, i32* [[L_ADDR:%.+]] // CK7-32-DAG: [[IDX_PROM]] = load i32, i32* [[L_ADDR]] @@ -446,18 +449,18 @@ // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} #ifdef CK8 -// CK8: [[SIZE00:@.+]] = {{.+}}constant [3 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} {{8|4}}, i{{64|32}} 4] -// CK8: [[MTYPE00:@.+]] = {{.+}}constant [3 x i64] [i64 33, i64 16, i64 17] +// CK8: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 4] +// CK8: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] // CK8-LABEL: lvalue void lvalue(int **B, int l, int e) { - // CK8-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}], [3 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}, [3 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK8-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}], [2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK8-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK8-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - // CK8-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2 - // CK8-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 + // CK8-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 + // CK8-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 // CK8-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to i32*** // CK8-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to i32** // CK8-DAG: store i32** [[ARRAY_IDX_1:%.+]], i32*** [[BPC0]] @@ -501,19 +504,19 @@ double *p; }; -// CK9: [[MTYPE00:@.+]] = {{.+}}constant [3 x i64] [i64 32, i64 281474976710657, i64 281474976710673] +// CK9: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 32, i64 281474976710673] // CK9-LABEL: lvalue void lvalue(struct S *s, int l, int e) { - // CK9-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}, [3 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK9-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK9-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK9-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK9-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]] // - // CK9-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2 - // CK9-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 - // CK9-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 2 + // CK9-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 + // CK9-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 + // CK9-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1 // CK9-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to double*** // CK9-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to double** // CK9-DAG: store double** [[P:%.+]], double*** [[BPC0]] @@ -551,19 +554,19 @@ double *p; }; -// CK10: [[MTYPE00:@.+]] = {{.+}}constant [3 x i64] [i64 32, i64 281474976710657, i64 281474976710673] +// CK10: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 32, i64 281474976710673] // CK10-LABEL: lvalue void lvalue(struct S *s, int l, int e) { - // CK10-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}, [3 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK10-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK10-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK10-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK10-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]] // - // CK10-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2 - // CK10-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 - // CK10-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 2 + // CK10-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 + // CK10-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 + // CK10-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1 // CK10-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to double*** // CK10-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to double** // CK10-DAG: store double** [[P_VAL:%.+]], double*** [[BPC0]] @@ -601,19 +604,19 @@ struct S { double *p; }; -// CK11: [[MTYPE00:@.+]] = {{.+}}constant [3 x i64] [i64 32, i64 281474976710657, i64 281474976710673] +// CK11: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 32, i64 281474976710673] // CK11-LABEL: lvalue void lvalue(struct S *s, int l, int e) { - // CK11-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}, [3 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK11-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK11-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK11-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK11-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]] // - // CK11-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2 - // CK11-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 - // CK11-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 2 + // CK11-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 + // CK11-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 + // CK11-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1 // CK11-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to double*** // CK11-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to double** // CK11-DAG: store double** [[P:%.+]], double*** [[BPC0]] @@ -653,41 +656,44 @@ double *p; struct S *sp; }; -// CK12: [[MTYPE00:@.+]] = {{.+}}constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710672, i64 17] +// CK12: [[MTYPE00:@.+]] = {{.+}}constant [3 x i64] [i64 32, i64 281474976710672, i64 17] // CK12-LABEL: lvalue void lvalue(struct S *s, int l, int e) { - // CK12-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}, [4 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK12-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}, [3 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK12-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK12-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK12-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]] // - // CK12-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3 - // CK12-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3 - // CK12-DAG: [[SIZE2:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 3 + // CK12-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2 + // CK12-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 + // CK12-DAG: [[SIZE2:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 2 // CK12-DAG: [[BPC2:%.+]] = bitcast i8** [[BP2]] to double*** // CK12-DAG: [[PC2:%.+]] = bitcast i8** [[P2]] to double** // CK12-DAG: store double** [[P_VAL:%.+]], double*** [[BPC2]] // CK12-DAG: store double* [[SIX:%.+]], double** [[PC2]] // CK12-DAG: store i{{.+}} 8, i{{.+}}* [[SIZE2]] - // CK12-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2 - // CK12-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 - // CK12-DAG: [[SIZE1:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 2 + // CK12-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 + // CK12-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 + // CK12-DAG: [[SIZE1:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1 // CK12-DAG: [[BPC1:%.+]] = bitcast i8** [[BP1]] to [[STRUCT_S:%.+]]*** // CK12-DAG: [[PC1:%.+]] = bitcast i8** [[P1]] to double*** // CK12-DAG: store [[STRUCT_S]]** [[SP:%.+]], [[STRUCT_S]]*** [[BPC1]] // CK12-DAG: store double** [[P_VAL:%.+]], double*** [[PC1]] // CK12-DAG: store i{{.+}} {{4|8}}, i{{.+}}* [[SIZE1]] - // CK12-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 - // CK12-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 - // CK12-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1 + // CK12-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK12-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK12-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 0 // CK12-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to [[STRUCT_S:%.+]]** // CK12-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to [[STRUCT_S]]*** + // CK12-DAG: store [[STRUCT_S]]* [[ZERO:%.+]], [[STRUCT_S]]** [[BPC0]] + // CK12-DAG: store [[STRUCT_S]]** [[SP]], [[STRUCT_S]]*** [[PC0]] // CK12-DAG: store [[STRUCT_S]]** [[S:%.+]], [[STRUCT_S]]*** [[S_VAL:%.+]] // CK12-DAG: store i{{.+}} {{.+}}, i{{.+}}* [[SIZE0]] // CK12-DAG: [[SP]] = getelementptr inbounds [[STRUCT_S]], [[STRUCT_S]]* [[ONE:%.+]], i32 0, i32 1 - // CK12-DAG: [[ONE]] = load %struct.S*, %struct.S** [[S]], + // CK12-DAG: [[ONE]] = load [[STRUCT_S]]*, [[STRUCT_S]]** [[S:%.+]], + // CK12-DAG: [[ZERO]] = load [[STRUCT_S]]*, [[STRUCT_S]]** [[S]], #pragma omp target update to(*(s->sp->p)) *(s->sp->p) = e; #pragma omp target update from(*(s->sp->p)) @@ -711,21 +717,21 @@ // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} #ifdef CK13 -// CK13: [[SIZE00:@.+]] = {{.+}}constant [2 x i64] [i64 {{8|4}}, i64 4] -// CK13: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] +// CK13: [[SIZE00:@.+]] = {{.+}}constant [1 x i64] [i64 4] +// CK13: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 33] // CK13-LABEL: lvalue void lvalue(int **BB, int a, int b) { - // CK13-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK13-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK13-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK13-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - // CK13-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 - // CK13-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 - // CK13-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to i32**** + // CK13-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK13-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK13-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to i32*** // CK13-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to i32** - // CK13-DAG: store i32*** [[BB_ADDR:%.+]], i32**** [[BPC0]] + // CK13-DAG: store i32** [[B_VAL1:%.+]], i32*** [[BPC0]] // CK13-DAG: store i32* [[ADD_PTR_2:%.+]], i32** [[PC0]] // CK13-64-DAG: [[ADD_PTR_2]] = getelementptr inbounds i32, i32* [[RESULT:%.+]], i64 [[IDX_EXT_1:%.+]] // CK13-32-DAG: [[ADD_PTR_2]] = getelementptr inbounds i32, i32* [[RESULT:%.+]], i32 [[B_ADDR:%.+]] @@ -734,7 +740,8 @@ // CK13-64-DAG: [[ADD_PTR]] = getelementptr inbounds i32*, i32** [[B_VAL:%.+]], i64 [[IDX_EXT:%.+]] // CK13-32-DAG: [[ADD_PTR]] = getelementptr inbounds i32*, i32** [[B_VAL:%.+]], i32 [[A_ADDR:%.+]] // CK13-64-DAG: [[IDX_EXT]] = sext i32 [[TWO:%.+]] to i64 - // CK13-DAG: [[B_VAL]] = load i32**, i32*** [[BB_ADDR]] + // CK13-DAG: [[B_VAL]] = load i32**, i32*** [[BB_ADDR:%.+]] + // CK13-DAG: [[B_VAL1]] = load i32**, i32*** [[BB_ADDR]] #pragma omp target update to(*(*(BB+a)+b)) *(*(BB+a)+b) = 1; #pragma omp target update from(*(*(BB+a)+b)) @@ -831,7 +838,7 @@ // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} #ifdef CK15 -// CK15: [[MTYPE00:@.+]] = {{.+}}constant [3 x i64] [i64 32, i64 281474976710657, i64 281474976710673] +// CK15: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 32, i64 281474976710673] struct SSA { double *p; @@ -842,36 +849,27 @@ //CK-15-LABEL: lvalue_member void lvalue_member(SSA *sap) { - // CK15-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK15-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK15-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK15-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK15-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]] - // CK15-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2 - // CK15-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 - // CK15-DAG: [[SIZE2:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 2 - // CK15-DAG: [[BPC2:%.+]] = bitcast i8** [[BP2]] to double*** - // CK15-DAG: [[PC2:%.+]] = bitcast i8** [[P2]] to double** - // CK15-DAG: store double** [[P_VAL:%.+]], double*** [[BPC2]] - // CK15-DAG: store double* [[ADD_PTR:%.+]], double** [[PC2]] - // CK15-DAG: store i64 8, i64* [[SIZE2]] - // CK15-DAG: [[ADD_PTR]] = getelementptr inbounds double, double* [[THREE:%.+]], i{{.+}} 3 - // CK15-DAG: [[THREE]] = load double*, double** [[P_VAL_1:%.+]] - // CK15-DAG: [[P_VAL]] = getelementptr inbounds [[SSA:%.+]], [[SSA:%.+]]* [[THIS:%.+]], i32 0, i32 0 // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 // CK15-DAG: [[SIZE1:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1 - // CK15-DAG: [[BPC1:%.+]] = bitcast i8** [[BP1]] to [[SSA]]** - // CK15-DAG: [[PC1:%.+]] = bitcast i8** [[P1]] to [[SSA]]*** - // CK15-DAG: store [[SSA]]* [[SAP_VAL:%.+]], [[SSA]]** [[BPC1]], - // CK15-DAG: store [[SSA]]** [[SAP_ADDR:%.+]], [[SSA]]*** [[PC1]] - // CK15-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[SIZE1]] - // CK15-DAG: [[SAP_VAL]] = load [[SSA]]*, [[SSA]]** [[SAP_ADDR]], + // CK15-DAG: [[BPC1:%.+]] = bitcast i8** [[BP1]] to double*** + // CK15-DAG: [[PC1:%.+]] = bitcast i8** [[P1]] to double** + // CK15-DAG: store double** [[P_VAL:%.+]], double*** [[BPC1]] + // CK15-DAG: store double* [[ADD_PTR:%.+]], double** [[PC1]] + // CK15-DAG: store i64 {{4|8}}, i64* [[SIZE1]] + // CK15-DAG: [[ADD_PTR]] = getelementptr inbounds double, double* [[THREE:%.+]], i{{.+}} 3 + // CK15-DAG: [[THREE]] = load double*, double** [[P_VAL_1:%.+]] + // CK15-DAG: [[P_VAL]] = getelementptr inbounds [[SSA:%.+]], [[SSA:%.+]]* [[THIS:%.+]], i32 0, i32 0 // CK15-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 // CK15-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 // CK15-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 0 - // CK15-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to [[SSA]]*** - // CK15-DAG: store [[SSA]]** [[SAP_ADDR]], [[SSA]]*** [[BPC0]], + // CK15-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to [[SSA]]** + // CK15-DAG: store [[SSA]]* [[ZERO:%.+]], [[SSA]]** [[BPC0]], // CK15-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to double*** // CK15-DAG: store double** [[P_VAL]], double*** [[PC0]], // CK15-DAG: store i{{.+}} [[COMPUTE_SIZE:%.+]], i{{.+}}* [[SIZE0]] @@ -881,6 +879,7 @@ // CK15-DAG: [[EIGHT]] = ptrtoint i8* [[FIVE:%.+]] to i64 // CK15-DAG: [[SIX]] = bitcast double** {{.+}} to i8* // CK15-DAG: [[FIVE]] = bitcast double** {{.+}} to i8* + // CK15-DAG: [[ZERO]] = load [[SSA]]*, [[SSA]]** %{{.+}}, #pragma omp target update to(*(3+sap->p)) *(3+sap->p) = 1; #pragma omp target update from(*(3+sap->p)) @@ -904,25 +903,26 @@ // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} #ifdef CK16 -// CK16: [[SIZE00:@.+]] = {{.+}}constant [2 x i64] [i64 {{8|4}}, i64 4] -// CK16: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] +// CK16: [[SIZE00:@.+]] = {{.+}}constant [1 x i64] [i64 4] +// CK16: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 33] //CK16-LABEL: lvalue_find_base void lvalue_find_base(float *f, int *i) { - // CK16-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK16-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK16-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK16-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - // CK16-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 - // CK16-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 - // CK16-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to float*** + // CK16-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK16-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK16-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to float** // CK16-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to float** - // CK16-DAG: store float** [[F_ADDR:%.+]], float*** [[BPC0]] + // CK16-DAG: store float* [[F:%.+]], float** [[BPC0]] // CK16-DAG: store float* [[ADD_PTR:%.+]], float** [[PC0]] // CK16-32-DAG: [[ADD_PTR]] = getelementptr inbounds float, float* [[THREE:%.+]], i32 [[I:%.+]] // CK16-64-DAG: [[ADD_PTR]] = getelementptr inbounds float, float* [[THREE:%.+]], i64 [[IDX_EXT:%.+]] - // CK16-DAG: [[THREE]] = load float*, float** [[F_ADDR]], + // CK16-DAG: [[THREE]] = load float*, float** [[F_ADDR:%.+]], + // CK16-DAG: [[F]] = load float*, float** [[F_ADDR]], // CK16-64-DAG: [[IDX_EXT]] = sext i32 [[I:%.+]] to i64 #pragma omp target update to(*(*i+f)) @@ -948,8 +948,8 @@ // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} #ifdef CK17 -// CK17: [[SIZE00:@.+]] = {{.+}}constant [2 x i64] [i64 {{4|8}}, i64 4] -// CK17: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] +// CK17: [[SIZE00:@.+]] = {{.+}}constant [1 x i64] [i64 4] +// CK17: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 33] struct SSA { int i; @@ -959,15 +959,15 @@ //CK17-LABEL: lvalue_find_base void lvalue_find_base(float **f, SSA *sa) { - // CK17-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) + // CK17-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null) // CK17-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK17-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - // CK17-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 - // CK17-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 - // CK17-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to float**** + // CK17-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 + // CK17-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 + // CK17-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to float*** // CK17-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to float** - // CK17-DAG: store float*** [[F_ADDR:%.+]], float**** [[BPC0]], + // CK17-DAG: store float** [[F_VAL:%.+]], float*** [[BPC0]], // CK17-DAG: store float* [[ADD_PTR_4:%.+]], float** [[PC0]], // CK17-64-DAG: [[ADD_PTR_4]] = getelementptr inbounds float, float* [[SEVEN:%.+]], i64 [[IDX_EXT_3:%.+]] // CK17-64-DAG: [[IDX_EXT_3]] = sext i32 [[I_VAL:%.+]] to i64 @@ -981,6 +981,8 @@ // CK17-DAG: [[FIVE]] = load i32, i32* [[I_2:%.+]], // CK17-DAG: [[I_2]] = getelementptr inbounds [[SSA:%.+]], [[SSA]]* [[FOUR:%.+]], i32 0, i32 0 // CK17-DAG: [[FOUR]] = load [[SSA]]*, [[SSA]]** [[SSA_ADDR:%.+]], + // CK17-DAG: [[F]] = load float**, float*** [[F_ADDR:%.+]], + // CK17-DAG: [[F_VAL]] = load float**, float*** [[F_ADDR]], #pragma omp target update to(*(sa->sa->i+*(1+sa->i+f))) *(sa->sa->i+*(1+sa->i+f)) = 1; @@ -1005,13 +1007,13 @@ // SIMD-ONLY18-NOT: {{__kmpc|__tgt}} #ifdef CK18 -// CK18-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 16] -// CK18-DAG: [[MTYPE_FROM:@.+]] = {{.+}}constant [2 x i64] [i64 34, i64 16] +// CK18-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [1 x i64] [i64 33] +// CK18-DAG: [[MTYPE_FROM:@.+]] = {{.+}}constant [1 x i64] [i64 34] //CK18-LABEL: array_shaping void array_shaping(float *f, int sa) { - // CK18-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE_TO]]{{.+}}, i8** null) + // CK18-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_TO]]{{.+}}, i8** null) // CK18-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK18-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK18-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -1021,23 +1023,12 @@ // CK18-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0 // CK18-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to float** - // CK18-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to float*** + // CK18-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to float** // CK18-DAG: store float* [[F1:%.+]], float** [[BPC0]], - // CK18-DAG: store float** [[F_ADDR:%.+]], float*** [[PC0]], - // CK18-DAG: store i64 {{8|4}}, i64* [[S0]], - // CK18-DAG: [[F1]] = load float*, float** [[F_ADDR]], - - // CK18-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 - // CK18-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 - // CK18-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1 - - // CK18-DAG: [[BPC1:%.+]] = bitcast i8** [[BP1]] to float*** - // CK18-DAG: [[PC1:%.+]] = bitcast i8** [[P1]] to float** - - // CK18-DAG: store float** [[F_ADDR]], float*** [[BPC1]], - // CK18-DAG: store float* [[F2:%.+]], float** [[PC1]], - // CK18-DAG: store i64 [[SIZE:%.+]], i64* [[S1]], + // CK18-DAG: store float* [[F2:%.+]], float** [[PC0]], + // CK18-DAG: store i64 [[SIZE:%.+]], i64* [[S0]], + // CK18-DAG: [[F1]] = load float*, float** [[F_ADDR:%.+]], // CK18-DAG: [[F2]] = load float*, float** [[F_ADDR]], // CK18-64-DAG: [[SIZE]] = mul nuw i64 [[SZ1:%.+]], 4 @@ -1047,7 +1038,7 @@ // CK18-32-DAG: [[SZ2]] = mul nuw i32 12, %{{.+}} #pragma omp target update to(([3][sa][4])f) sa = 1; - // CK18-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE_FROM]]{{.+}}, i8** null) + // CK18-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_FROM]]{{.+}}, i8** null) // CK18-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK18-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK18-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -1057,23 +1048,12 @@ // CK18-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0 // CK18-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to float** - // CK18-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to float*** + // CK18-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to float** // CK18-DAG: store float* [[F1:%.+]], float** [[BPC0]], - // CK18-DAG: store float** [[F_ADDR:%.+]], float*** [[PC0]], - // CK18-DAG: store i64 {{8|4}}, i64* [[S0]], + // CK18-DAG: store float* [[F2:%.+]], float** [[PC0]], + // CK18-DAG: store i64 [[SIZE:%.+]], i64* [[S0]], // CK18-DAG: [[F1]] = load float*, float** [[F_ADDR]], - - // CK18-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 - // CK18-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 - // CK18-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1 - - // CK18-DAG: [[BPC1:%.+]] = bitcast i8** [[BP1]] to float*** - // CK18-DAG: [[PC1:%.+]] = bitcast i8** [[P1]] to float** - - // CK18-DAG: store float** [[F_ADDR]], float*** [[BPC1]], - // CK18-DAG: store float* [[F2:%.+]], float** [[PC1]], - // CK18-DAG: store i64 [[SIZE:%.+]], i64* [[S1]], // CK18-DAG: [[F2]] = load float*, float** [[F_ADDR]], // CK18-64-DAG: [[SIZE]] = mul nuw i64 [[SZ1:%.+]], 5 diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -33,12 +33,11 @@ char b; S s[2]; int arr[10][a]; -// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}}) // CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0 // CHECK: store i8* [[B]], i8** [[B_REF]] // CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES]], i32 0, i32 1 // CHECK: store [2 x [[STRUCT_S]]]* [[S]], [2 x [[STRUCT_S]]]** [[S_REF]] -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 33, i64 40, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 33, i64 40, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*)) // CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]] // CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS]]* [[CAPTURES]] to i8* @@ -46,7 +45,7 @@ // CHECK: [[PRIORITY_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 4 // CHECK: [[PRIORITY:%.+]] = bitcast %union{{.+}}* [[PRIORITY_REF_PTR]] to i32* // CHECK: store i32 {{.+}}, i32* [[PRIORITY]] -// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) #pragma omp task shared(a, b, s) priority(b) { a = 15; @@ -55,7 +54,7 @@ } // CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS1]], [[STRUCT_SHAREDS1]]* [[CAPTURES:%.+]], i32 0, i32 0 // CHECK: store [2 x [[STRUCT_S]]]* [[S]], [2 x [[STRUCT_S]]]** [[S_REF]] -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 [[GTID]], i32 1, i64 40, i64 8, +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 {{%.*}}, i32 1, i64 40, i64 8, // CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]] // CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS1]]* [[CAPTURES]] to i8* @@ -101,20 +100,20 @@ // CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2 // CHECK: store i8 1, i8* [[T0]] // CHECK: bitcast [[KMP_DEPEND_INFO]]* [[DEP_BASE]] to i8* -// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 4, i8* %{{[^,]+}}, i32 0, i8* null) +// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 4, i8* %{{[^,]+}}, i32 0, i8* null) #pragma omp task shared(a, s) depend(in : a, b, s, arr[:]) { a = 15; s[1].a = 10; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*)) -// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*)) +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) #pragma omp task untied { #pragma omp critical a = 1; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1, +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1, // CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i64 0, i64 0 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0 // CHECK: ptrtoint [[STRUCT_S]]* %{{.+}} to i64 @@ -146,12 +145,12 @@ // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2 // CHECK: store i8 3, i8* // CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8* -// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null) +// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null) #pragma omp task untied depend(out : s[0], arr[4:][b]) { a = 1; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1, +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1, // CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i64 0, i64 0 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0 // CHECK: ptrtoint [[STRUCT_S]]* %{{.+}} to i64 @@ -183,12 +182,12 @@ // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2 // CHECK: store i8 4, i8* // CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8* -// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null) +// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null) #pragma omp task untied depend(mutexinoutset: s[0], arr[4:][b]) { a = 1; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 40, i64 1, +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 3, i64 40, i64 1, // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0 // CHECK: store i64 ptrtoint (i32* @{{.+}} to i64), i64* // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1 @@ -229,38 +228,38 @@ // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2 // CHECK: store i8 3, i8* // CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8* -// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 3, i8* %{{[^,]+}}, i32 0, i8* null) +// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]], i32 3, i8* %{{[^,]+}}, i32 0, i8* null) #pragma omp task final(true) depend(inout: a, s[1], arr[:a][3:]) { a = 2; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*)) -// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 3, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*)) +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) #pragma omp task final(true) { a = 2; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*)) -// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) + // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*)) + // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) const bool flag = false; #pragma omp task final(flag) { a = 3; } -// CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]] -// CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0 -// CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0 -// CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1 -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*)) -// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) + // CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]] + // CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0 + // CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0 + // CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1 + // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 [[FLAGS]], i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*)) + // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) int c __attribute__((aligned(128))); #pragma omp task final(b) shared(c) { a = 4; c = 5; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) -// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) +// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]]) #pragma omp task untied { S s1; diff --git a/clang/test/Profile/Inputs/gcc-flag-compatibility_IR.proftext b/clang/test/Profile/Inputs/gcc-flag-compatibility_IR.proftext --- a/clang/test/Profile/Inputs/gcc-flag-compatibility_IR.proftext +++ b/clang/test/Profile/Inputs/gcc-flag-compatibility_IR.proftext @@ -2,7 +2,7 @@ :ir main # Func Hash: -34137660316 +1063705162469825436 # Num Counters: 2 # Counter Values: diff --git a/clang/test/Profile/Inputs/gcc-flag-compatibility_IR_entry.proftext b/clang/test/Profile/Inputs/gcc-flag-compatibility_IR_entry.proftext --- a/clang/test/Profile/Inputs/gcc-flag-compatibility_IR_entry.proftext +++ b/clang/test/Profile/Inputs/gcc-flag-compatibility_IR_entry.proftext @@ -3,7 +3,7 @@ :entry_first main # Func Hash: -34137660316 +1063705162469825436 # Num Counters: 2 # Counter Values: diff --git a/clang/unittests/CodeGen/IncrementalProcessingTest.cpp b/clang/unittests/CodeGen/IncrementalProcessingTest.cpp --- a/clang/unittests/CodeGen/IncrementalProcessingTest.cpp +++ b/clang/unittests/CodeGen/IncrementalProcessingTest.cpp @@ -159,6 +159,11 @@ // First code should not end up in second module: ASSERT_FALSE(M[2]->getFunction("funcForProg1")); + // TODO: Remove this after the static initialization frontend implementation + // is recovered on AIX. + if (compiler.getTarget().getTriple().isOSAIX()) + return; + // Make sure global inits exist and are unique: const Function* GlobalInit1 = getGlobalInit(*M[1]); ASSERT_TRUE(GlobalInit1); diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -31,6 +31,8 @@ #include "tsan_mman.h" #include "tsan_fd.h" +#include + using namespace __tsan; #if SANITIZER_FREEBSD || SANITIZER_MAC @@ -135,6 +137,7 @@ #endif const int MAP_FIXED = 0x10; typedef long long_t; +typedef __sanitizer::u16 mode_t; // From /usr/include/unistd.h # define F_ULOCK 0 /* Unlock a previously locked region. */ @@ -1508,20 +1511,28 @@ #define TSAN_MAYBE_INTERCEPT_FSTAT64 #endif -TSAN_INTERCEPTOR(int, open, const char *name, int flags, int mode) { - SCOPED_TSAN_INTERCEPTOR(open, name, flags, mode); +TSAN_INTERCEPTOR(int, open, const char *name, int oflag, ...) { + va_list ap; + va_start(ap, oflag); + mode_t mode = va_arg(ap, int); + va_end(ap); + SCOPED_TSAN_INTERCEPTOR(open, name, oflag, mode); READ_STRING(thr, pc, name, 0); - int fd = REAL(open)(name, flags, mode); + int fd = REAL(open)(name, oflag, mode); if (fd >= 0) FdFileCreate(thr, pc, fd); return fd; } #if SANITIZER_LINUX -TSAN_INTERCEPTOR(int, open64, const char *name, int flags, int mode) { - SCOPED_TSAN_INTERCEPTOR(open64, name, flags, mode); +TSAN_INTERCEPTOR(int, open64, const char *name, int oflag, ...) { + va_list ap; + va_start(ap, oflag); + mode_t mode = va_arg(ap, int); + va_end(ap); + SCOPED_TSAN_INTERCEPTOR(open64, name, oflag, mode); READ_STRING(thr, pc, name, 0); - int fd = REAL(open64)(name, flags, mode); + int fd = REAL(open64)(name, oflag, mode); if (fd >= 0) FdFileCreate(thr, pc, fd); return fd; diff --git a/compiler-rt/test/profile/Linux/instrprof-value-merge.c b/compiler-rt/test/profile/Linux/instrprof-value-merge.c --- a/compiler-rt/test/profile/Linux/instrprof-value-merge.c +++ b/compiler-rt/test/profile/Linux/instrprof-value-merge.c @@ -45,7 +45,7 @@ // CHECK: Counters: // CHECK: main: -// CHECK: Hash: 0x00030012a7ab6e87 +// CHECK: Hash: 0x0a9bd81e87ab6e87 // CHECK: Counters: 6 // CHECK: Indirect Call Site Count: 3 // CHECK: Number of Memory Intrinsics Calls: 3 diff --git a/compiler-rt/test/tsan/Darwin/variadic-open.cpp b/compiler-rt/test/tsan/Darwin/variadic-open.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/test/tsan/Darwin/variadic-open.cpp @@ -0,0 +1,24 @@ +// RUN: %clangxx_tsan -O1 %s -o %t && %run %t %t.tmp 2>&1 | FileCheck %s +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + fprintf(stderr, "Hello world.\n"); + assert(argv[1]); + unlink(argv[1]); + int fd = open(argv[1], O_RDWR | O_CREAT, 0600); + assert(fd != -1); + struct stat info; + int result = fstat(fd, &info); + fprintf(stderr, "permissions = 0%o\n", info.st_mode & ~S_IFMT); + assert(result == 0); + close(fd); + fprintf(stderr, "Done.\n"); +} + +// CHECK: Hello world. +// CHECK: permissions = 0600 +// CHECK: Done. diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -909,6 +909,7 @@ void AddSaveName(std::set &, const SourceName &); void SetSaveAttr(Symbol &); bool HandleUnrestrictedSpecificIntrinsicFunction(const parser::Name &); + bool IsUplevelReference(const Symbol &); const parser::Name *FindComponent(const parser::Name *, const parser::Name &); bool CheckInitialDataTarget(const Symbol &, const SomeExpr &, SourceName); void CheckInitialProcTarget(const Symbol &, const parser::Name &, SourceName); @@ -5429,7 +5430,10 @@ if (CheckUseError(name)) { return nullptr; // reported an error } - if (IsDummy(*symbol) || + if (IsUplevelReference(*symbol)) { + name.symbol = nullptr; + MakeSymbol(name, HostAssocDetails{*symbol}); + } else if (IsDummy(*symbol) || (!symbol->GetType() && FindCommonBlockContaining(*symbol))) { ConvertToObjectEntity(*symbol); ApplyImplicitRules(*symbol); @@ -5453,6 +5457,16 @@ return &name; } +bool DeclarationVisitor::IsUplevelReference(const Symbol &symbol) { + const Scope *symbolUnit{FindProgramUnitContaining(symbol)}; + if (symbolUnit == FindProgramUnitContaining(currScope())) { + return false; + } else { + Scope::Kind kind{DEREF(symbolUnit).kind()}; + return kind == Scope::Kind::Subprogram || kind == Scope::Kind::MainProgram; + } +} + // base is a part-ref of a derived type; find the named component in its type. // Also handles intrinsic type parameter inquiries (%kind, %len) and // COMPLEX component references (%re, %im). diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -179,10 +179,21 @@ return DoesScopeContain(maybeAncestor, symbol.owner()); } +static const Symbol &FollowHostAssoc(const Symbol &symbol) { + for (const Symbol *s{&symbol};;) { + const auto *details{s->detailsIf()}; + if (!details) { + return *s; + } + s = &details->symbol(); + } +} + bool IsHostAssociated(const Symbol &symbol, const Scope &scope) { const Scope *subprogram{FindProgramUnitContaining(scope)}; return subprogram && - DoesScopeContain(FindProgramUnitContaining(symbol), *subprogram); + DoesScopeContain( + FindProgramUnitContaining(FollowHostAssoc(symbol)), *subprogram); } bool IsInStmtFunction(const Symbol &symbol) { diff --git a/flang/test/Semantics/symbol02.f90 b/flang/test/Semantics/symbol02.f90 --- a/flang/test/Semantics/symbol02.f90 +++ b/flang/test/Semantics/symbol02.f90 @@ -44,7 +44,7 @@ !REF: /m/x z = x !REF: /m/s/s2/z - !REF: /m/s/y + !DEF: /m/s/s2/y HostAssoc TYPE(t) z = y !REF: /m/s/s call s diff --git a/flang/test/Semantics/symbol03.f90 b/flang/test/Semantics/symbol03.f90 --- a/flang/test/Semantics/symbol03.f90 +++ b/flang/test/Semantics/symbol03.f90 @@ -11,7 +11,14 @@ !REF: /main/s subroutine s !DEF: /main/s/y (Implicit) ObjectEntity REAL(4) - !REF: /main/x + !DEF: /main/s/x HostAssoc INTEGER(4) y = x + contains + !DEF: /main/s/s2 (Subroutine) Subprogram + subroutine s2 + !DEF: /main/s/s2/z (Implicit) ObjectEntity REAL(4) + !DEF: /main/s/s2/x HostAssoc INTEGER(4) + z = x + end subroutine end subroutine end program diff --git a/flang/test/Semantics/symbol05.f90 b/flang/test/Semantics/symbol05.f90 --- a/flang/test/Semantics/symbol05.f90 +++ b/flang/test/Semantics/symbol05.f90 @@ -33,7 +33,7 @@ contains !DEF: /s2/s (Subroutine) Subprogram subroutine s - !REF: /s2/x + !DEF: /s2/s/x HostAssoc INTEGER(4) x = 1 !DEF: /s2/s/w (Implicit) ObjectEntity INTEGER(4) w = 1 diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -1,6 +1,8 @@ set(TARGET_LIBC_ENTRYPOINTS # ctype.h entrypoints + libc.src.ctype.isalnum libc.src.ctype.isalpha + libc.src.ctype.isdigit # errno.h entrypoints libc.src.errno.__errno_location diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -88,7 +88,9 @@ def CTypeAPI : PublicAPI<"ctype.h"> { let Functions = [ + "isalnum", "isalpha", + "isdigit", ]; } diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -3,7 +3,9 @@ libc.src.assert.__assert_fail # ctype.h entrypoints + libc.src.ctype.isalnum libc.src.ctype.isalpha + libc.src.ctype.isdigit # errno.h entrypoints libc.src.errno.__errno_location diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -46,11 +46,21 @@ [], // Types [], // Enumerations [ + FunctionSpec< + "isalnum", + RetValSpec, + [ArgSpec] + >, FunctionSpec< "isalpha", RetValSpec, [ArgSpec] >, + FunctionSpec< + "isdigit", + RetValSpec, + [ArgSpec] + >, ] >; diff --git a/libc/src/ctype/CMakeLists.txt b/libc/src/ctype/CMakeLists.txt --- a/libc/src/ctype/CMakeLists.txt +++ b/libc/src/ctype/CMakeLists.txt @@ -1,7 +1,35 @@ +add_header_library( + ctype_utils + HDRS + ctype_utils.h +) + +add_entrypoint_object( + isalnum + SRCS + isalnum.cpp + HDRS + isalnum.h + DEPENDS + .ctype_utils +) + add_entrypoint_object( isalpha SRCS isalpha.cpp HDRS isalpha.h + DEPENDS + .ctype_utils +) + +add_entrypoint_object( + isdigit + SRCS + isdigit.cpp + HDRS + isdigit.h + DEPENDS + .ctype_utils ) diff --git a/libc/src/ctype/ctype_utils.h b/libc/src/ctype/ctype_utils.h new file mode 100644 --- /dev/null +++ b/libc/src/ctype/ctype_utils.h @@ -0,0 +1,34 @@ +//===-- Collection of utils for implementing ctype functions-------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_CTYPE_CTYPE_UTILS_H +#define LLVM_LIBC_SRC_CTYPE_CTYPE_UTILS_H + +namespace __llvm_libc { +namespace internal { + +// ------------------------------------------------------ +// Rationale: Since these classification functions are +// called in other functions, we will avoid the overhead +// of a function call by inlining them. +// ------------------------------------------------------ + +static inline int isdigit(int c) { + const unsigned ch = c; + return (ch - '0') < 10; +} + +static inline int isalpha(int c) { + const unsigned ch = c; + return (ch | 32) - 'a' < 26; +} + +} // namespace internal +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_CTYPE_CTYPE_UTILS_H diff --git a/libc/src/ctype/isalnum.h b/libc/src/ctype/isalnum.h new file mode 100644 --- /dev/null +++ b/libc/src/ctype/isalnum.h @@ -0,0 +1,18 @@ +//===-- Implementation header for isalnum -------------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_CTYPE_ISALNUM_H +#define LLVM_LIBC_SRC_CTYPE_ISALNUM_H + +namespace __llvm_libc { + +int isalnum(int c); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_CTYPE_ISALNUM_H diff --git a/libc/src/ctype/isalnum.cpp b/libc/src/ctype/isalnum.cpp new file mode 100644 --- /dev/null +++ b/libc/src/ctype/isalnum.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of isalnum------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/isalnum.h" +#include "src/ctype/ctype_utils.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + +// TODO: Currently restricted to default locale. +// These should be extended using locale information. +int LLVM_LIBC_ENTRYPOINT(isalnum)(int c) { + return internal::isalpha(c) || internal::isdigit(c); +} + +} // namespace __llvm_libc diff --git a/libc/src/ctype/isalpha.cpp b/libc/src/ctype/isalpha.cpp --- a/libc/src/ctype/isalpha.cpp +++ b/libc/src/ctype/isalpha.cpp @@ -9,14 +9,12 @@ #include "src/ctype/isalpha.h" #include "src/__support/common.h" +#include "src/ctype/ctype_utils.h" namespace __llvm_libc { // TODO: Currently restricted to default locale. // These should be extended using locale information. -int LLVM_LIBC_ENTRYPOINT(isalpha)(int c) { - const unsigned ch = c; - return (ch | 32) - 'a' < 26; -} +int LLVM_LIBC_ENTRYPOINT(isalpha)(int c) { return internal::isalpha(c); } } // namespace __llvm_libc diff --git a/libc/src/ctype/isdigit.h b/libc/src/ctype/isdigit.h new file mode 100644 --- /dev/null +++ b/libc/src/ctype/isdigit.h @@ -0,0 +1,18 @@ +//===-- Implementation header for isdigit -------------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_CTYPE_ISDIGIT_H +#define LLVM_LIBC_SRC_CTYPE_ISDIGIT_H + +namespace __llvm_libc { + +int isdigit(int c); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_CTYPE_ISDIGIT_H diff --git a/libc/src/ctype/isdigit.cpp b/libc/src/ctype/isdigit.cpp new file mode 100644 --- /dev/null +++ b/libc/src/ctype/isdigit.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of isdigit------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/isdigit.h" +#include "src/__support/common.h" +#include "src/ctype/ctype_utils.h" + +namespace __llvm_libc { + +// TODO: Currently restricted to default locale. +// These should be extended using locale information. +int LLVM_LIBC_ENTRYPOINT(isdigit)(int c) { return internal::isdigit(c); } + +} // namespace __llvm_libc diff --git a/libc/test/src/ctype/CMakeLists.txt b/libc/test/src/ctype/CMakeLists.txt --- a/libc/test/src/ctype/CMakeLists.txt +++ b/libc/test/src/ctype/CMakeLists.txt @@ -1,5 +1,15 @@ add_libc_testsuite(libc_ctype_unittests) +add_libc_unittest( + isalnum + SUITE + libc_ctype_unittests + SRCS + isalnum_test.cpp + DEPENDS + libc.src.ctype.isalnum +) + add_libc_unittest( isalpha SUITE @@ -9,3 +19,13 @@ DEPENDS libc.src.ctype.isalpha ) + +add_libc_unittest( + isdigit + SUITE + libc_ctype_unittests + SRCS + isdigit_test.cpp + DEPENDS + libc.src.ctype.isdigit +) diff --git a/libc/test/src/ctype/isalnum_test.cpp b/libc/test/src/ctype/isalnum_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/ctype/isalnum_test.cpp @@ -0,0 +1,27 @@ +//===-- Unittests for isalnum----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/isalnum.h" +#include "utils/UnitTest/Test.h" + +// Helper function that makes a call to isalnum a bit cleaner +// for use with testing utilities, since it explicitly requires +// a boolean value for EXPECT_TRUE and EXPECT_FALSE. +bool call_isalnum(int c) { return __llvm_libc::isalnum(c); } + +TEST(IsAlNum, DefaultLocale) { + // Loops through all characters, verifying that numbers and letters + // return true and everything else returns false. + for (int c = 0; c < 255; ++c) { + if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || + ('0' <= c && c <= '9')) + EXPECT_TRUE(call_isalnum(c)); + else + EXPECT_FALSE(call_isalnum(c)); + } +} diff --git a/libc/test/src/ctype/isdigit_test.cpp b/libc/test/src/ctype/isdigit_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/ctype/isdigit_test.cpp @@ -0,0 +1,26 @@ +//===-- Unittests for isdigit----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/isdigit.h" +#include "utils/UnitTest/Test.h" + +// Helper function that makes a call to isdigit a bit cleaner +// for use with testing utilities, since it explicitly requires +// a boolean value for EXPECT_TRUE and EXPECT_FALSE. +bool call_isdigit(int c) { return __llvm_libc::isdigit(c); } + +TEST(IsDigit, DefaultLocale) { + // Loops through all characters, verifying that numbers return true + // and everything else returns false. + for (int ch = 0; ch < 255; ++ch) { + if ('0' <= ch && ch <= '9') + EXPECT_TRUE(call_isdigit(ch)); + else + EXPECT_FALSE(call_isdigit(ch)); + } +} diff --git a/libcxx/cmake/Modules/DefineLinkerScript.cmake b/libcxx/cmake/Modules/DefineLinkerScript.cmake --- a/libcxx/cmake/Modules/DefineLinkerScript.cmake +++ b/libcxx/cmake/Modules/DefineLinkerScript.cmake @@ -34,13 +34,8 @@ if ("${lib}" STREQUAL "cxx-headers") continue() endif() - if (TARGET "${lib}" OR - (${lib} MATCHES "cxxabi(_static|_shared)?" AND HAVE_LIBCXXABI) OR - (${lib} MATCHES "unwind(_static|_shared)?" AND HAVE_LIBUNWIND)) - list(APPEND link_libraries "${CMAKE_LINK_LIBRARY_FLAG}$") - else() - list(APPEND link_libraries "${CMAKE_LINK_LIBRARY_FLAG}${lib}") - endif() + set(libname "$,$,${lib}>") + list(APPEND link_libraries "${CMAKE_LINK_LIBRARY_FLAG}${libname}") endforeach() endif() string(REPLACE ";" " " link_libraries "${link_libraries}") diff --git a/libcxx/test/std/depr/depr.c.headers/float_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/float_h.pass.cpp --- a/libcxx/test/std/depr/depr.c.headers/float_h.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/float_h.pass.cpp @@ -6,7 +6,10 @@ // //===----------------------------------------------------------------------===// - // test +// Before Clang 9.0, does not define FLT_HAS_SUBNORM & friends in C++. +// XFAIL: clang-4, clang-5, clang-6, clang-7, clang-8 + +// test #include diff --git a/libcxx/test/std/language.support/support.limits/c.limits/cfloat.pass.cpp b/libcxx/test/std/language.support/support.limits/c.limits/cfloat.pass.cpp --- a/libcxx/test/std/language.support/support.limits/c.limits/cfloat.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/c.limits/cfloat.pass.cpp @@ -6,7 +6,10 @@ // //===----------------------------------------------------------------------===// - // test cfloat +// Before Clang 9.0, does not define FLT_HAS_SUBNORM & friends in C++. +// XFAIL: clang-4, clang-5, clang-6, clang-7, clang-8 + +// test cfloat #include diff --git a/lldb/include/lldb/API/LLDB.h b/lldb/include/lldb/API/LLDB.h --- a/lldb/include/lldb/API/LLDB.h +++ b/lldb/include/lldb/API/LLDB.h @@ -17,6 +17,7 @@ #include "lldb/API/SBBreakpointName.h" #include "lldb/API/SBBroadcaster.h" #include "lldb/API/SBCommandInterpreter.h" +#include "lldb/API/SBCommandInterpreterRunOptions.h" #include "lldb/API/SBCommandReturnObject.h" #include "lldb/API/SBCommunication.h" #include "lldb/API/SBCompileUnit.h" diff --git a/lldb/include/lldb/API/SBCommandInterpreterRunOptions.h b/lldb/include/lldb/API/SBCommandInterpreterRunOptions.h --- a/lldb/include/lldb/API/SBCommandInterpreterRunOptions.h +++ b/lldb/include/lldb/API/SBCommandInterpreterRunOptions.h @@ -26,8 +26,12 @@ public: SBCommandInterpreterRunOptions(); + SBCommandInterpreterRunOptions(const SBCommandInterpreterRunOptions &rhs); ~SBCommandInterpreterRunOptions(); + SBCommandInterpreterRunOptions & + operator=(const SBCommandInterpreterRunOptions &rhs); + bool GetStopOnContinue() const; void SetStopOnContinue(bool); diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -1608,7 +1608,7 @@ """ yaml2obj_bin = configuration.get_yaml2obj_path() if not yaml2obj_bin: - self.assertTrue(False, "No valid FileCheck executable specified") + self.assertTrue(False, "No valid yaml2obj executable specified") command = [yaml2obj_bin, "-o=%s" % obj_path, yaml_path] system([command]) diff --git a/lldb/source/API/SBCommandInterpreterRunOptions.cpp b/lldb/source/API/SBCommandInterpreterRunOptions.cpp --- a/lldb/source/API/SBCommandInterpreterRunOptions.cpp +++ b/lldb/source/API/SBCommandInterpreterRunOptions.cpp @@ -24,8 +24,29 @@ m_opaque_up = std::make_unique(); } +SBCommandInterpreterRunOptions::SBCommandInterpreterRunOptions( + const SBCommandInterpreterRunOptions &rhs) + : m_opaque_up() { + LLDB_RECORD_CONSTRUCTOR(SBCommandInterpreterRunOptions, + (const lldb::SBCommandInterpreterRunOptions &), rhs); + + m_opaque_up = std::make_unique(rhs.ref()); +} + SBCommandInterpreterRunOptions::~SBCommandInterpreterRunOptions() = default; +SBCommandInterpreterRunOptions &SBCommandInterpreterRunOptions::operator=( + const SBCommandInterpreterRunOptions &rhs) { + LLDB_RECORD_METHOD(lldb::SBCommandInterpreterRunOptions &, + SBCommandInterpreterRunOptions, operator=, + (const lldb::SBCommandInterpreterRunOptions &), rhs); + + if (this == &rhs) + return LLDB_RECORD_RESULT(*this); + *m_opaque_up = *rhs.m_opaque_up; + return LLDB_RECORD_RESULT(*this); +} + bool SBCommandInterpreterRunOptions::GetStopOnContinue() const { LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, GetStopOnContinue); @@ -190,12 +211,11 @@ SBCommandInterpreterRunResult &SBCommandInterpreterRunResult::operator=( const SBCommandInterpreterRunResult &rhs) { LLDB_RECORD_METHOD(lldb::SBCommandInterpreterRunResult &, - SBCommandInterpreterRunResult, - operator=,(const lldb::SBCommandInterpreterRunResult &), - rhs); + SBCommandInterpreterRunResult, operator=, + (const lldb::SBCommandInterpreterRunResult &), rhs); if (this == &rhs) - return *this; + return LLDB_RECORD_RESULT(*this); *m_opaque_up = *rhs.m_opaque_up; return LLDB_RECORD_RESULT(*this); } @@ -220,6 +240,11 @@ template <> void RegisterMethods(Registry &R) { LLDB_REGISTER_CONSTRUCTOR(SBCommandInterpreterRunOptions, ()); + LLDB_REGISTER_CONSTRUCTOR(SBCommandInterpreterRunOptions, + (const lldb::SBCommandInterpreterRunOptions &)); + LLDB_REGISTER_METHOD(lldb::SBCommandInterpreterRunOptions &, + SBCommandInterpreterRunOptions, operator=, + (const lldb::SBCommandInterpreterRunOptions &)); LLDB_REGISTER_METHOD_CONST(bool, SBCommandInterpreterRunOptions, GetStopOnContinue, ()); LLDB_REGISTER_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnContinue, @@ -260,8 +285,8 @@ LLDB_REGISTER_CONSTRUCTOR(SBCommandInterpreterRunResult, (const lldb::SBCommandInterpreterRunResult &)); LLDB_REGISTER_METHOD(lldb::SBCommandInterpreterRunResult &, - SBCommandInterpreterRunResult, - operator=,(const lldb::SBCommandInterpreterRunResult &)); + SBCommandInterpreterRunResult, operator=, + (const lldb::SBCommandInterpreterRunResult &)); LLDB_REGISTER_METHOD_CONST(int, SBCommandInterpreterRunResult, GetNumberOfErrors, ()); LLDB_REGISTER_METHOD_CONST(lldb::CommandInterpreterResult, diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/.categories b/lldb/test/API/tools/lldb-server/.categories rename from lldb/packages/Python/lldbsuite/test/tools/lldb-server/.categories rename to lldb/test/API/tools/lldb-server/.categories diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/registers-target-xml-reading/Makefile b/lldb/test/API/tools/lldb-server/registers-target-xml-reading/Makefile rename from lldb/packages/Python/lldbsuite/test/tools/lldb-server/registers-target-xml-reading/Makefile rename to lldb/test/API/tools/lldb-server/registers-target-xml-reading/Makefile diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py b/lldb/test/API/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py rename from lldb/packages/Python/lldbsuite/test/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py rename to lldb/test/API/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py +++ b/lldb/test/API/tools/lldb-server/registers-target-xml-reading/TestGdbRemoteTargetXmlPacket.py @@ -66,4 +66,4 @@ self.assertEqual(q_info_reg["format"], xml_info_reg.get("format")) self.assertEqual(q_info_reg["bitsize"], xml_info_reg.get("bitsize")) self.assertEqual(q_info_reg["offset"], xml_info_reg.get("offset")) - self.assertEqual(q_info_reg["encoding"], xml_info_reg.get("encoding")) \ No newline at end of file + self.assertEqual(q_info_reg["encoding"], xml_info_reg.get("encoding")) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/registers-target-xml-reading/main.cpp b/lldb/test/API/tools/lldb-server/registers-target-xml-reading/main.cpp rename from lldb/packages/Python/lldbsuite/test/tools/lldb-server/registers-target-xml-reading/main.cpp rename to lldb/test/API/tools/lldb-server/registers-target-xml-reading/main.cpp diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -16,6 +16,7 @@ #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassManager.h" namespace llvm { @@ -128,8 +129,18 @@ [](const Loop *L) { return L->isLoopSimplifyForm(); }); } + StringRef getName() const { + // FIXME: Choose a better name for loop nests so that they are + // distinguishable from the loops' names. + Loop &Root = getOutermostLoop(); + return Root.getName(); + } + + /// Reconstruct the loop nest inplace. + void reconstructInplace(ScalarEvolution &SE); + protected: - const unsigned MaxPerfectDepth; // maximum perfect nesting depth level. + unsigned MaxPerfectDepth; // maximum perfect nesting depth level. LoopVectorTy Loops; // the loops in the nest (in breadth first order). }; diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysisManager.h b/llvm/include/llvm/Analysis/LoopNestAnalysisManager.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/LoopNestAnalysisManager.h @@ -0,0 +1,210 @@ +//===- LoopNestAnalysisManager.h - LoopNest analysis management ---------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPNESTANALYSISMANAGER_H +#define LLVM_ANALYSIS_LOOPNESTANALYSISMANAGER_H + +#include "llvm/Analysis/LoopNestAnalysis.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class LNPMUpdater; + +/// The loop nest analysis manager. +/// +/// The loop nest analyses should run on \Loop instead of LoopNests since +/// \c LoopNest are constantly invalidated by both loop nest passes and loop +/// passes. Generally speaking, the passes should update the analysis results +/// dynamically when possible, and running on Loops prevent the analyses from +/// being invalidated when the loop structures change. +/// +/// \c LoopNestAnalysisManager is a wrapper around \c LoopAnalysisManager and +/// provide all the public APIs that \c AnalysisManager has so that is seems to +/// be operating on \c LoopNest. \c LoopNestAnalysisManager also provides the +/// ability to construct \c LoopNest from the top-level \c Loop. The loop nest +/// analyses can also obtain the \c LoopNest object from the \c +/// LoopAnalysisManager. +/// +/// The \c LoopNest object will be invalidated after the loop nest passes unless +/// \c LoopNestAnalysis is explicitly marked as preserved. +template <> class AnalysisManager { +public: + class Invalidator { + public: + /// The following methods should never be called because the + /// invalidation in \c LoopNestAnalysisManager will be passed to the + /// internal \c LoopAnalysisManager. The only purpose of these methods is to + /// satisfy the requirements of being an \c AnalysisManager. + template + bool invalidate(LoopNest &, const PreservedAnalyses &) { + assert(false && "This method should never be called."); + return false; + } + + bool invalidate(AnalysisKey *, LoopNest &, const PreservedAnalyses &) { + assert(false && "This method should never be called."); + return false; + } + }; + + AnalysisManager(LoopAnalysisManager &LAM) : InternalLAM(LAM) {} + + bool empty() const { return InternalLAM.empty(); }; + + void clear(LoopNest &LN, llvm::StringRef Name) { + InternalLAM.clear(LN.getOutermostLoop(), Name); + } + void clear(Loop &L, llvm::StringRef Name) { InternalLAM.clear(L, Name); } + void clear() { InternalLAM.clear(); } + + LoopNest &getLoopNest(Loop &Root, LoopStandardAnalysisResults &LAR) { + return InternalLAM.getResult(Root, LAR); + } + + /// Get the result of an analysis pass for a given LoopNest. + /// + /// Runs the analysis if a cached result is not available. + template + typename PassT::Result &getResult(LoopNest &LN, + LoopStandardAnalysisResults &LAR) { + return InternalLAM.getResult(LN.getOutermostLoop(), LAR); + } + template + typename PassT::Result &getResult(Loop &L, LoopStandardAnalysisResults &LAR) { + return InternalLAM.getResult(L, LAR); + } + + /// Get the cached result of an analysis pass for a given LoopNest. + /// + /// This method never runs the analysis. + /// + /// \returns null if there is no cached result. + template + typename PassT::Result *getCachedResult(LoopNest &LN) const { + return InternalLAM.getCachedResult(LN.getOutermostLoop()); + } + template + typename PassT::Result *getCachedResult(Loop &L) const { + return InternalLAM.getCachedResult(L); + } + + template + void verifyNotInvalidated(LoopNest &LN, + typename PassT::Result *Result) const { + InternalLAM.verifyNotInvalidated(LN.getOutermostLoop(), Result); + } + template + void verifyNotInvalidated(Loop &L, typename PassT::Result *Result) const { + InternalLAM.verifyNotInvalidated(L, Result); + } + + template + bool registerPass(PassBuilderT &&PassBuilder) { + return InternalLAM.registerPass(std::forward(PassBuilder)); + } + + void invalidate(LoopNest &LN, const PreservedAnalyses &PA) { + InternalLAM.invalidate(LN.getOutermostLoop(), PA); + } + void invalidate(Loop &L, const PreservedAnalyses &PA) { + InternalLAM.invalidate(L, PA); + } + + LoopAnalysisManager &getLoopAnalysisManager() { return InternalLAM; } + +private: + LoopAnalysisManager &InternalLAM; + friend class InnerAnalysisManagerProxy< + AnalysisManager, Function>; +}; + +using LoopNestAnalysisManager = + AnalysisManager; + +using LoopNestAnalysisManagerFunctionProxy = + InnerAnalysisManagerProxy; + +/// A specialized result for the \c LoopNestAnalysisManagerFunctionProxy which +/// retains a \c LoopInfo reference. +/// +/// This allows it to collect loop nest objects for which analysis results may +/// be cached in the \c LoopNestAnalysisManager. +template <> class LoopNestAnalysisManagerFunctionProxy::Result { +public: + explicit Result(LoopNestAnalysisManager &InnerAM, LoopInfo &LI) + : InnerAM(&InnerAM), LI(&LI), MSSAUsed(false) {} + Result(Result &&Arg) + : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI), MSSAUsed(Arg.MSSAUsed) { + // We have to null out the analysis manager in the moved-from state + // because we are taking ownership of the responsibilty to clear the + // analysis state. + Arg.InnerAM = nullptr; + } + Result &operator=(Result &&RHS) { + InnerAM = RHS.InnerAM; + LI = RHS.LI; + MSSAUsed = RHS.MSSAUsed; + // We have to null out the analysis manager in the moved-from state + // because we are taking ownership of the responsibilty to clear the + // analysis state. + RHS.InnerAM = nullptr; + return *this; + } + ~Result() { + // InnerAM is cleared in a moved from state where there is nothing to do. + if (!InnerAM) + return; + + // Clear out the analysis manager if we're being destroyed -- it means we + // didn't even see an invalidate call when we got invalidated. + InnerAM->clear(); + } + + /// Mark MemorySSA as used so we can invalidate self if MSSA is invalidated. + void markMSSAUsed() { MSSAUsed = true; } + + /// Accessor for the analysis manager. + LoopNestAnalysisManager &getManager() { return *InnerAM; } + + /// Handler for invalidation of the proxy for a particular function. + /// + /// If the proxy, \c LoopInfo, and associated analyses are preserved, this + /// will merely forward the invalidation event to any cached loop analysis + /// results for loops within this function. + /// + /// If the necessary loop infrastructure is not preserved, this will forcibly + /// clear all of the cached analysis results that are keyed on the \c + /// LoopInfo for this function. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); + +private: + LoopNestAnalysisManager *InnerAM; + LoopInfo *LI; + bool MSSAUsed; +}; + +template <> +LoopNestAnalysisManagerFunctionProxy::Result +LoopNestAnalysisManagerFunctionProxy::run(Function &F, + FunctionAnalysisManager &AM); + +extern template class InnerAnalysisManagerProxy; + +extern template class OuterAnalysisManagerProxy; +using FunctionAnalysisManagerLoopNestProxy = + OuterAnalysisManagerProxy; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_LOOPNESTANALYSISMANAGER_H diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -156,6 +156,7 @@ /// Generator for '#omp parallel' /// /// \param Loc The insert and source location description. + /// \param AllocaIP The insertion points to be used for alloca instructions. /// \param BodyGenCB Callback that will generate the region code. /// \param PrivCB Callback to copy a given variable (think copy constructor). /// \param FiniCB Callback to finalize variable copies. @@ -166,10 +167,11 @@ /// /// \returns The insertion position *after* the parallel. IRBuilder<>::InsertPoint - CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, - Value *IfCondition, Value *NumThreads, - omp::ProcBindKind ProcBind, bool IsCancellable); + CreateParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, + BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, + FinalizeCallbackTy FiniCB, Value *IfCondition, + Value *NumThreads, omp::ProcBindKind ProcBind, + bool IsCancellable); /// Generator for '#omp flush' /// diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h --- a/llvm/include/llvm/IR/PassInstrumentation.h +++ b/llvm/include/llvm/IR/PassInstrumentation.h @@ -234,6 +234,8 @@ } }; +bool isSpecialPass(StringRef PassID, const std::vector &Specials); + } // namespace llvm #endif diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -510,10 +510,6 @@ if (!PI.runBeforePass(*P, IR)) continue; - if (DebugLogging) - dbgs() << "Running pass: " << P->name() << " on " << IR.getName() - << "\n"; - PreservedAnalyses PassPA; { TimeTraceScope TimeScope(P->name(), IR.getName()); diff --git a/llvm/include/llvm/IR/PassManagerImpl.h b/llvm/include/llvm/IR/PassManagerImpl.h --- a/llvm/include/llvm/IR/PassManagerImpl.h +++ b/llvm/include/llvm/IR/PassManagerImpl.h @@ -64,9 +64,6 @@ // run it to produce a result, which we then add to the cache. if (Inserted) { auto &P = this->lookUpPass(ID); - if (DebugLogging) - dbgs() << "Running analysis: " << P.name() << " on " << IR.getName() - << "\n"; PassInstrumentation PI; if (ID != PassInstrumentationAnalysis::ID()) { diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -17,10 +17,12 @@ #include "llvm/ADT/Optional.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LoopNestAnalysisManager.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Error.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Scalar/LoopNestPassManager.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include @@ -270,6 +272,7 @@ /// This is an interface that can be used to cross register each /// AnalysisManager with all the others analysis managers. void crossRegisterProxies(LoopAnalysisManager &LAM, + LoopNestAnalysisManager &LNAM, FunctionAnalysisManager &FAM, CGSCCAnalysisManager &CGAM, ModuleAnalysisManager &MAM); @@ -305,6 +308,13 @@ /// additional analyses. void registerLoopAnalyses(LoopAnalysisManager &LAM); + /// Registers all available loop nest analysis passes. + /// + /// This is an interface that can be used to populate a \c + /// LoopNestAnalysisManager with all registered loop nest analyses. Callers + /// can still manually register any additional analyses. + void registerLoopNestAnalyses(LoopNestAnalysisManager &LNAM); + /// Construct the core LLVM function canonicalization and simplification /// pipeline. /// @@ -507,6 +517,9 @@ Error parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText, bool VerifyEachPass = true, bool DebugLogging = false); + Error parsePassPipeline(LoopNestPassManager &LPM, StringRef PipelineText, + bool VerifyEachPass = true, + bool DebugLogging = false); /// @}} /// Parse a textual alias analysis pipeline into the provided AA manager. @@ -643,6 +656,10 @@ const std::function &C) { LoopAnalysisRegistrationCallbacks.push_back(C); } + void registerAnalysisRegistrationCallback( + const std::function &C) { + LoopNestAnalysisRegistrationCallbacks.push_back(C); + } void registerAnalysisRegistrationCallback( const std::function &C) { ModuleAnalysisRegistrationCallbacks.push_back(C); @@ -668,6 +685,11 @@ ArrayRef)> &C) { LoopPipelineParsingCallbacks.push_back(C); } + void registerPipelineParsingCallback( + const std::function)> &C) { + LoopNestPipelineParsingCallbacks.push_back(C); + } void registerPipelineParsingCallback( const std::function)> &C) { @@ -715,11 +737,18 @@ bool VerifyEachPass, bool DebugLogging); Error parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, bool VerifyEachPass, bool DebugLogging); + Error parseLoopNestPass(LoopNestPassManager &LNPM, const PipelineElement &E, + bool &UseMemorySSA, bool VerifyEachPass, + bool DebugLogging); bool parseAAPassName(AAManager &AA, StringRef Name); Error parseLoopPassPipeline(LoopPassManager &LPM, ArrayRef Pipeline, bool VerifyEachPass, bool DebugLogging); + Error parseLoopNestPassPipeline(LoopNestPassManager &LNPM, + ArrayRef Pipeline, + bool &UseMemorySSA, bool VerifyEachPass, + bool DebugLogging); Error parseFunctionPassPipeline(FunctionPassManager &FPM, ArrayRef Pipeline, bool VerifyEachPass, bool DebugLogging); @@ -785,6 +814,13 @@ ArrayRef)>, 2> LoopPipelineParsingCallbacks; + // LoopNest callbacks + SmallVector, 2> + LoopNestAnalysisRegistrationCallbacks; + SmallVector)>, + 2> + LoopNestPipelineParsingCallbacks; // AA callbacks SmallVector, 2> AAParsingCallbacks; diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -61,7 +61,16 @@ private: bool skip(StringRef PassID, Any IR); + bool DebugLogging; +}; + +// Debug logging for transformation and analysis passes. +class PrintPassInstrumentation { +public: + PrintPassInstrumentation(bool DebugLogging) : DebugLogging(DebugLogging) {} + void registerCallbacks(PassInstrumentationCallbacks &PIC); +private: bool DebugLogging; }; @@ -69,12 +78,13 @@ /// instrumentations and manages their state (if any). class StandardInstrumentations { PrintIRInstrumentation PrintIR; + PrintPassInstrumentation PrintPass; TimePassesHandler TimePasses; OptNoneInstrumentation OptNone; public: StandardInstrumentations(bool DebugLogging) - : PrintIR(), TimePasses(), OptNone(DebugLogging) {} + : PrintPass(DebugLogging), OptNone(DebugLogging) {} void registerCallbacks(PassInstrumentationCallbacks &PIC); diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -99,21 +99,17 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CGSCCPassManager.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/AbstractCallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/PassManager.h" diff --git a/llvm/include/llvm/Transforms/Scalar/LoopNestPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopNestPassManager.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/LoopNestPassManager.h @@ -0,0 +1,356 @@ +//===- LoopNestPassManager.h - Loop nest pass management -----------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOOPNESTPASSMANAGER_H +#define LLVM_TRANSFORMS_SCALAR_LOOPNESTPASSMANAGER_H + +#include "llvm/ADT/PriorityWorklist.h" +#include "llvm/Analysis/LoopNestAnalysis.h" +#include "llvm/Analysis/LoopNestAnalysisManager.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +class LNPMUpdater; + +template <> +PreservedAnalyses +PassManager::run(LoopNest &LN, LoopNestAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LNPMUpdater &U); + +extern template class PassManager; + +using LoopNestPassManager = + PassManager; + +/// A partial specialization of the require analysis template pass to forward +/// the extra parameters from a transformation's run method to the +/// AnalysisManager's getResult. +template +struct RequireAnalysisPass + : PassInfoMixin< + RequireAnalysisPass> { + PreservedAnalyses run(LoopNest &LN, LoopNestAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LNPMUpdater &) { + (void)AM.template getResult(LN, AR); + return PreservedAnalyses::all(); + } +}; + +/// This class provides an interface for updating the loop nest pass manager +/// based on mutations to the loop nest. +/// +/// A reference to an instance of this class is passed as an argument to each +/// LoopNest pass, and LoopNest passes should use it to update LNPM +/// infrastructure if they modify the loop nest structure. +class LNPMUpdater { +public: + /// This can be queried by loop nest passes which run other loop nest passes + /// (like pass managers) to know whether the loop nest needs to be skipped due + /// to updates to the loop nest. + /// + /// If this returns true, the loop nest object may have been deleted, so + /// passes should take care not to touch the object. + bool skipCurrentLoopNest() const { return SkipCurrentLoopNest; } + + void markLoopNestAsDeleted(LoopNest &LN, llvm::StringRef Name) { + LNAM.clear(LN, Name); + assert(&LN.getOutermostLoop() == CurrentLoopNest && + "Cannot delete loop nests other than the current one"); + SkipCurrentLoopNest = true; + } + + /// Loop nest passes should use this method to indicate they have added new + /// loop nests to the current function. + /// + /// \p NewLoopNests must only contain top-level loops. + void addNewLoopNests(ArrayRef NewLoopNests) { + for (Loop *NewL : NewLoopNests) { +#ifndef NDEBUG + assert(!NewL->getParentLoop() && + "All of the new loops must be top-level!"); +#endif + Worklist.insert(NewL); + } + } + + void revisitCurrentLoopNest() { + SkipCurrentLoopNest = true; + Worklist.insert(CurrentLoopNest); + } + +private: + template friend class FunctionToLoopNestPassAdaptor; + + LNPMUpdater(SmallPriorityWorklist &Worklist, + LoopNestAnalysisManager &LNAM) + : Worklist(Worklist), LNAM(LNAM) {} + + /// The \c FunctionToLoopNestPassAdaptor's worklist of loops to process. + SmallPriorityWorklist &Worklist; + + /// The analysis manager for use in the current loop nest; + LoopNestAnalysisManager &LNAM; + + Loop *CurrentLoopNest; + bool SkipCurrentLoopNest; +}; + +/// Adaptor that maps from a function to its loop nests. +/// +/// Designed to allow composition of a LoopNestPass(Manager) and a +/// FunctionPassManager. Note that if this pass is constructed with a \c +/// FunctionAnalysisManager it will run the \c +/// LoopNestAnalysisManagerFunctionProxy analysis prior to running the loop +/// passes over the function to enable a \c LoopNestAnalysisManager to be used +/// within this run safely. +template +class FunctionToLoopNestPassAdaptor + : public PassInfoMixin> { +public: + explicit FunctionToLoopNestPassAdaptor(LoopNestPassT Pass, + bool UseMemorySSA = false, + bool DebugLogging = false) + : Pass(std::move(Pass)), UseMemorySSA(UseMemorySSA), + LoopCanonicalizationFPM(DebugLogging) { + LoopCanonicalizationFPM.addPass(LoopSimplifyPass()); + LoopCanonicalizationFPM.addPass(LCSSAPass()); + } + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) { + // Before we even compute any loop nest analyses, first run a miniature + // function pass pipeline to put loops into their canonical form. Note that + // we can directly build up function analyses after this as the function + // pass manager handles all the invalidation at that layer. + PassInstrumentation PI = AM.getResult(F); + + PreservedAnalyses PA = PreservedAnalyses::all(); + if (PI.runBeforePass(LoopCanonicalizationFPM, F)) { + PA = LoopCanonicalizationFPM.run(F, AM); + PI.runAfterPass(LoopCanonicalizationFPM, F); + } + + // Get the loop structure for this function + LoopInfo &LI = AM.getResult(F); + + // If there are no loops, there is nothing to do here. + if (LI.empty()) + return PA; + + // Get the analysis results needed by loop nest passes. + LoopStandardAnalysisResults LAR = + detail::getLoopStandardAnalysisResults(F, AM, UseMemorySSA); + + // Setup the loop nest analysis manager from its proxy. It is important that + // this is only done when there are loops to process and we have built the + // LoopStandardAnalysisResults object. The loop nest analyses cached in this + // manager have access to those analysis results and so it must invalidate + // itself when they go away. + auto &LNAMFP = AM.getResult(F); + if (UseMemorySSA) + LNAMFP.markMSSAUsed(); + LoopNestAnalysisManager &LNAM = LNAMFP.getManager(); + + // The worklist of loop nests in the function. The loop nests are + // represented by their root loops and the actual LoopNest object will be + // constructed lazily when needed. + SmallPriorityWorklist Worklist; + LNPMUpdater Updater(Worklist, LNAM); + + // Append all outer-most loops in the function into the worklist. + for (Loop *L : LI.getTopLevelLoops()) + Worklist.insert(L); + + do { + Loop *L = Worklist.pop_back_val(); + + // Reset the update structure for this loop nest. + Updater.CurrentLoopNest = L; + Updater.SkipCurrentLoopNest = false; + + LoopNest &LN = LNAM.getLoopNest(*L, LAR); + // Check the PassInstrumentation's BeforePass callbacks before running the + // pass, skip its execution completely if asked to (callback returns + // false). + if (!PI.runBeforePass(Pass, LN)) + continue; + + PreservedAnalyses PassPA; + { + TimeTraceScope TimeScope(Pass.name()); + PassPA = Pass.run(LN, LNAM, LAR, Updater); + } + + // Do not pass deleted LoopNest into the instrumentation. + if (Updater.skipCurrentLoopNest()) + PI.runAfterPassInvalidated(Pass); + else + PI.runAfterPass(Pass, LN); + + if (!Updater.skipCurrentLoopNest()) + // We know that the loop nest pass couldn't have invalidated any other + // loop nest's analyses (that's the contract of a loop nest pass), so + // directly handle the loop nest analysis manager's invalidation here. + LNAM.invalidate(LN, PassPA); + + // Then intersect the preserved set so that invalidation of loop nest + // analyses will eventually occur when the loop nest pass completes. + PA.intersect(std::move(PassPA)); + } while (!Worklist.empty()); + + // By definition we preserve the proxy. We also preserve all analyses on + // LoopNests. This precludes *any* invalidation of loop nest analyses by the + // proxy, but that's OK because we've taken care to invalidate analyses in + // the loop nest analysis manager incrementally above. + PA.preserveSet>(); + PA.preserve(); + // We also preserve the set of standard analyses. + detail::preserveLoopStandardAnalysisResults(PA, UseMemorySSA); + detail::preserveAACategory(PA); + return PA; + } + +private: + LoopNestPassT Pass; + bool UseMemorySSA; + FunctionPassManager LoopCanonicalizationFPM; +}; + +/// A function to deduce a loop nest pass type and wrap it in the templated +/// adaptor. +template +FunctionToLoopNestPassAdaptor +createFunctionToLoopNestPassAdaptor(LoopNestPassT Pass, + bool UseMemorySSA = false, + bool DebugLogging = false) { + return FunctionToLoopNestPassAdaptor( + std::move(Pass), UseMemorySSA, DebugLogging); +} + +/// Pass for printing a loop nest's property. This is similar to +/// \c LoopNestPrinterPass in \file LoopNestAnalysis.h but implemented as a +/// LoopNestPass. +class PrintLoopNestPass : public PassInfoMixin { + raw_ostream &OS; + std::string Banner; + +public: + PrintLoopNestPass(); + explicit PrintLoopNestPass(raw_ostream &OS, const std::string &Banner = ""); + + PreservedAnalyses run(LoopNest &LN, LoopNestAnalysisManager &, + LoopStandardAnalysisResults &, LNPMUpdater &U); +}; + +/// Adaptor that maps from a loop nest to its loops. +template +class LoopNestToLoopPassAdaptor + : public PassInfoMixin> { +public: + explicit LoopNestToLoopPassAdaptor(LoopPassT Pass) : Pass(std::move(Pass)) {} + + PreservedAnalyses run(LoopNest &LN, LoopNestAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LNPMUpdater &U) { + PassInstrumentation PI = AM.getResult(LN, AR); + PreservedAnalyses PA = PreservedAnalyses::all(); + + // Get the loop analysis manager from the loop nest analysis manager. No + // need to set up proxy here since currently the latter is simply a wrapper + // around the former. + LoopAnalysisManager &LAM = AM.getLoopAnalysisManager(); + + SmallPriorityWorklist Worklist; + LPMUpdater Updater(Worklist, LAM); + appendLoopNestToWorklist(LN.getOutermostLoop(), Worklist); + + assert(!Worklist.empty() && + "Worklist should be non-empty since we're running on a LoopNest"); + + // Save a copy of the root loop and its name here in case they are + // invalidated later. + const Loop *Root = &LN.getOutermostLoop(); + const std::string LoopNestName = std::string(LN.getName()); + + do { + Loop *L = Worklist.pop_back_val(); + Updater.CurrentL = L; + Updater.SkipCurrentLoop = false; + +#ifndef NDEBUG + // Save a parent loop pointer for asserts. + Updater.ParentL = L->getParentLoop(); + + // Verify the loop structure and LCSSA form. + L->verifyLoop(); + assert(L->isRecursivelyLCSSAForm(AR.DT, AR.LI) && + "Loops must remain in LCSSA form!"); +#endif + + // Check the PassInstrumentation's BeforePass callbacks. + if (!PI.runBeforePass(Pass, *L)) + continue; + + PreservedAnalyses PassPA; + { + TimeTraceScope TimeScope(Pass.name()); + PassPA = Pass.run(*L, LAM, AR, Updater); + } + + // Do not pass deleted Loop into the instrumentation. + if (Updater.skipCurrentLoop()) + PI.runAfterPassInvalidated(Pass); + else + PI.runAfterPass(Pass, *L); + + if (!Updater.SkipCurrentLoop) + // Invalidate the loop analysis results here. + LAM.invalidate(*L, PassPA); + + PA.intersect(std::move(PassPA)); + } while (!Worklist.empty()); + + // Since the loops are processed in post-order, at this point CurrentL in + // Updater should points to the root loop. If the root loop is marked as + // deleted, we should also delete the loop nest from the function. + assert(Updater.CurrentL == Root && "CurrentL should point to the root loop " + "after traversing the loop nest."); + if (Updater.skipCurrentLoop()) + U.markLoopNestAsDeleted(LN, LoopNestName); + + // We don't have to explicitly mark the loop standard analysis results as + // preserved here since this will eventually be handled by the \c + // FunctionToLoopNestPassAdaptor. + PA.preserveSet>(); + // FIXME: We should check whether the loop nest structure is preserved or + // not. + return PA; + } + +private: + LoopPassT Pass; +}; + +/// A function to deduce a loop pass type and wrap it in the templated +/// adaptor. +template +LoopNestToLoopPassAdaptor +createLoopNestToLoopPassAdaptor(LoopPassT Pass) { + return LoopNestToLoopPassAdaptor(std::move(Pass)); +} + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_LOOPNESTPASSMANAGER_H diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -104,6 +104,7 @@ LoopStandardAnalysisResults &, LPMUpdater &>; template class FunctionToLoopPassAdaptor; +template class LoopNestToLoopPassAdaptor; /// This class provides an interface for updating the loop pass manager based /// on mutations to the loop nest. @@ -199,6 +200,7 @@ private: template friend class llvm::FunctionToLoopPassAdaptor; + template friend class llvm::LoopNestToLoopPassAdaptor; /// The \c FunctionToLoopPassAdaptor's worklist of loops to process. SmallPriorityWorklist &Worklist; @@ -220,6 +222,48 @@ : Worklist(Worklist), LAM(LAM) {} }; +namespace detail { + +/// Helper function for preserving the standard analyses on loops. +inline void preserveLoopStandardAnalysisResults(PreservedAnalyses &PA, + bool UseMemorySSA) { + PA.preserve(); + PA.preserve(); + PA.preserve(); + if (UseMemorySSA) + PA.preserve(); +} + +/// Helper function for preserving AA category. +inline void preserveAACategory(PreservedAnalyses &PA) { + // FIXME: What we really want to do here is preserve an AA category, but + // that concept doesn't exist yet. + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); +} + +/// Helper function for getting the analysis results needed by loop and loop +/// nest passes. +inline LoopStandardAnalysisResults +getLoopStandardAnalysisResults(Function &F, FunctionAnalysisManager &AM, + bool UseMemorySSA) { + MemorySSA *MSSA = + UseMemorySSA ? (&AM.getResult(F).getMSSA()) : nullptr; + LoopStandardAnalysisResults LAR = {AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + MSSA}; + return LAR; +} + +} // namespace detail + /// Adaptor that maps from a function to its loops. /// /// Designed to allow composition of a LoopPass(Manager) and a @@ -262,18 +306,9 @@ if (LI.empty()) return PA; - // Get the analysis results needed by loop passes. - MemorySSA *MSSA = UseMemorySSA - ? (&AM.getResult(F).getMSSA()) - : nullptr; - LoopStandardAnalysisResults LAR = {AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - MSSA}; + // Get the analysis results needed by loop nest passes. + LoopStandardAnalysisResults LAR = + detail::getLoopStandardAnalysisResults(F, AM, UseMemorySSA); // Setup the loop analysis manager from its proxy. It is important that // this is only done when there are loops to process and we have built the @@ -352,17 +387,8 @@ PA.preserveSet>(); PA.preserve(); // We also preserve the set of standard analyses. - PA.preserve(); - PA.preserve(); - PA.preserve(); - if (UseMemorySSA) - PA.preserve(); - // FIXME: What we really want to do here is preserve an AA category, but - // that concept doesn't exist yet. - PA.preserve(); - PA.preserve(); - PA.preserve(); - PA.preserve(); + detail::preserveLoopStandardAnalysisResults(PA, UseMemorySSA); + detail::preserveAACategory(PA); return PA; } diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -427,6 +427,12 @@ /// FIXME: Consider changing the order in LoopInfo. void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist &); +/// Utility that implements appending of all loops in a loop nest (rooted at \p +/// Root) onto a worklist. Since appendLoopsToWorklist(Loop &) only pushes +/// subloops, the root loop will be pushed into the worklist first in this +/// function. +void appendLoopNestToWorklist(Loop &Root, SmallPriorityWorklist &); + /// Recursively clone the specified loop and all of its children, /// mapping the blocks with the specified map. Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp --- a/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -78,9 +78,6 @@ if (!PI.runBeforePass(*Pass, *C)) continue; - if (DebugLogging) - dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n"; - PreservedAnalyses PassPA; { TimeTraceScope TimeScope(Pass->name()); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -1,4 +1,5 @@ if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API) + include_directories(/usr/include/tensorflow) if (DEFINED LLVM_HAVE_TF_AOT) include(TensorFlowCompile) tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel) @@ -75,6 +76,7 @@ LoopAnalysisManager.cpp LoopCacheAnalysis.cpp LoopNestAnalysis.cpp + LoopNestAnalysisManager.cpp LoopUnrollAnalyzer.cpp LoopInfo.cpp LoopPass.cpp diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp --- a/llvm/lib/Analysis/LoopNestAnalysis.cpp +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -206,6 +206,15 @@ return CurrentDepth; } +void LoopNest::reconstructInplace(ScalarEvolution &SE) { + assert(!Loops.empty() && "Loop nest should contain the root loop."); + Loop *Root = Loops[0]; + MaxPerfectDepth = getMaxPerfectDepth(*Root, SE); + Loops.clear(); + for (Loop *L : breadth_first(Root)) + Loops.push_back(L); +} + static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) { // The inner loop must be the only outer loop's child. @@ -282,6 +291,13 @@ return OS; } +AnalysisKey LoopNestAnalysis::Key; + +LoopNest LoopNestAnalysis::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR) { + return LoopNest(L, AR.SE); +} + //===----------------------------------------------------------------------===// // LoopNestPrinterPass implementation // diff --git a/llvm/lib/Analysis/LoopNestAnalysisManager.cpp b/llvm/lib/Analysis/LoopNestAnalysisManager.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/LoopNestAnalysisManager.cpp @@ -0,0 +1,114 @@ +//===- LoopNestAnalysisManager.cpp - LoopNest analysis management ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopNestAnalysisManager.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManagerImpl.h" + +using namespace llvm; + +namespace llvm { + +template class AnalysisManager; +template class InnerAnalysisManagerProxy; +template class InnerAnalysisManagerProxy; +template class OuterAnalysisManagerProxy; + +bool LoopNestAnalysisManagerFunctionProxy::Result::invalidate( + Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // If literally everything is preserved, we're done. + if (PA.areAllPreserved()) + return false; // This is still a valid proxy. + + const std::vector &Loops = LI->getTopLevelLoops(); + + auto PAC = PA.getChecker(); + bool InvalidateMemorySSAAnalysis = false; + if (MSSAUsed) + InvalidateMemorySSAAnalysis = Inv.invalidate(F, PA); + if (!PAC.preserved() && !PAC.preservedSet>()) { + if (Inv.invalidate(F, PA) || + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA) || + InvalidateMemorySSAAnalysis) { + // Note that the LoopInfo may be stale at this point, however the loop + // objects themselves remain the only viable keys that could be in the + // analysis manager's cache. So we just walk the keys and forcibly clear + // those results. Note that the order doesn't matter here as this will + // just directly destroy the results without calling methods on them. + // + // Though we're dealing with loop nests here, the analysis results can + // still be cleared via the root loops. + for (Loop *L : Loops) + InnerAM->clear(*L, ""); + InnerAM = nullptr; + return true; + } + } + + // Directly check if the relevant set is preserved. + bool AreLoopNestAnalysesPreserved = + PA.allAnalysesInSetPreserved>(); + + for (Loop *L : Loops) { + Optional LoopNestPA; + + // Check to see whether the preserved set needs to be pruned based on + // function-level analysis invalidation that triggers deferred invalidation + // registered with the outer analysis manager proxy for this loop nest. + if (auto *OuterProxy = + InnerAM->getCachedResult( + *L)) { + for (const auto &OuterInvalidationPair : + OuterProxy->getOuterInvalidations()) { + AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first; + const auto &InnerAnalysisIDs = OuterInvalidationPair.second; + if (Inv.invalidate(OuterAnalysisID, F, PA)) { + if (!LoopNestPA) + LoopNestPA = PA; + for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs) + LoopNestPA->abandon(InnerAnalysisID); + } + } + } + + // Check if we needed a custom PA set, and if so we'll need to run the + // inner invalidation. + if (LoopNestPA) { + InnerAM->invalidate(*L, *LoopNestPA); + continue; + } + + // Otherwise we only need to do invalidation if the original PA set didn't + // preserve all loop nest analyses. + if (!AreLoopNestAnalysesPreserved) + InnerAM->invalidate(*L, PA); + } + + // Return false to indicate that this result is still a valid proxy. + return false; +} + +template <> +LoopNestAnalysisManagerFunctionProxy::Result +LoopNestAnalysisManagerFunctionProxy::run(Function &F, + FunctionAnalysisManager &AM) { + return Result(*InnerAM, AM.getResult(F)); +} + +} // namespace llvm diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1239,7 +1239,8 @@ Value *NewValueInsert = insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); Value *StoreSuccess = - TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder); + TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr, + MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -2027,8 +2027,8 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - assert(!TM.getFunctionSections() && !TM.getDataSections() && - "XCOFF unique sections not yet implemented."); + assert(!TM.getDataSections() && + "XCOFF unique data sections not yet implemented."); // Common symbols go into a csect with matching name which will get mapped // into the .bss section. @@ -2057,8 +2057,13 @@ Kind, /* BeginSymbolName */ nullptr); } - if (Kind.isText()) + if (Kind.isText()) { + if (TM.getFunctionSections()) { + return cast(getFunctionEntryPointSymbol(GO, TM)) + ->getRepresentedCsect(); + } return TextSection; + } if (Kind.isData() || Kind.isReadOnlyWithRel()) // TODO: We may put this under option control, because user may want to @@ -2161,6 +2166,22 @@ SmallString<128> NameStr; NameStr.push_back('.'); getNameWithPrefix(NameStr, Func, TM); + + // When -function-sections is enabled, it's not necessary to emit + // function entry point label any more. We will use function entry + // point csect instead. For function delcarations, it's okay to continue + // using label semantic because undefined symbols gets treated as csect with + // XTY_ER property anyway. + if (TM.getFunctionSections() && !Func->isDeclaration() && + isa(Func)) { + XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(Func); + return cast(getContext().getXCOFFSection( + NameStr, XCOFF::XMC_PR, XCOFF::XTY_SD, SC, + SectionKind::getText())) + ->getQualNameSymbol(); + } + return getContext().getOrCreateSymbol(NameStr); } diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -394,9 +394,10 @@ } IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel( - const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, - Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) { + const LocationDescription &Loc, InsertPointTy OuterAllocaIP, + BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, + FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, + omp::ProcBindKind ProcBind, bool IsCancellable) { if (!updateToLocation(Loc)) return Loc.IP; @@ -429,7 +430,9 @@ // we want to delete at the end. SmallVector ToBeDeleted; - Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI()); + // Change the location to the outer alloca insertion point to create and + // initialize the allocas we pass into the parallel region. + Builder.restoreIP(OuterAllocaIP); AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); @@ -481,9 +484,9 @@ // Generate the privatization allocas in the block that will become the entry // of the outlined function. - InsertPointTy AllocaIP(PRegEntryBB, - PRegEntryBB->getTerminator()->getIterator()); - Builder.restoreIP(AllocaIP); + Builder.SetInsertPoint(PRegEntryBB->getTerminator()); + InsertPointTy InnerAllocaIP = Builder.saveIP(); + AllocaInst *PrivTIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr.local"); Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid"); @@ -512,7 +515,7 @@ // Let the caller create the body. assert(BodyGenCB && "Expected body generation callback!"); InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); - BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB); + BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB); LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); @@ -671,7 +674,7 @@ ReplacementValue = PrivTID; } else { Builder.restoreIP( - PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue)); + PrivCB(InnerAllocaIP, Builder.saveIP(), V, ReplacementValue)); assert(ReplacementValue && "Expected copy/create callback to set replacement value!"); if (ReplacementValue == &V) @@ -686,6 +689,10 @@ LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); PrivHelper(*Input); } + LLVM_DEBUG({ + for (Value *Output : Outputs) + LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n"); + }); assert(Outputs.empty() && "OpenMP outlining should not produce live-out values!"); diff --git a/llvm/lib/IR/PassInstrumentation.cpp b/llvm/lib/IR/PassInstrumentation.cpp --- a/llvm/lib/IR/PassInstrumentation.cpp +++ b/llvm/lib/IR/PassInstrumentation.cpp @@ -12,10 +12,19 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/PassInstrumentation.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/PassManager.h" namespace llvm { AnalysisKey PassInstrumentationAnalysis::Key; +bool isSpecialPass(StringRef PassID, const std::vector &Specials) { + size_t Pos = PassID.find('<'); + if (Pos == StringRef::npos) + return false; + StringRef Prefix = PassID.substr(0, Pos); + return any_of(Specials, [Prefix](StringRef S) { return Prefix.endswith(S); }); +} + } // namespace llvm diff --git a/llvm/lib/IR/PassTimingInfo.cpp b/llvm/lib/IR/PassTimingInfo.cpp --- a/llvm/lib/IR/PassTimingInfo.cpp +++ b/llvm/lib/IR/PassTimingInfo.cpp @@ -231,17 +231,9 @@ MyTimer->stopTimer(); } -static bool matchPassManager(StringRef PassID) { - size_t prefix_pos = PassID.find('<'); - if (prefix_pos == StringRef::npos) - return false; - StringRef Prefix = PassID.substr(0, prefix_pos); - return Prefix.endswith("PassManager") || Prefix.endswith("PassAdaptor") || - Prefix.endswith("AnalysisManagerProxy"); -} - void TimePassesHandler::runBeforePass(StringRef PassID) { - if (matchPassManager(PassID)) + if (isSpecialPass(PassID, + {"PassManager", "PassAdaptor", "AnalysisManagerProxy"})) return; startTimer(PassID); @@ -251,7 +243,8 @@ } void TimePassesHandler::runAfterPass(StringRef PassID) { - if (matchPassManager(PassID)) + if (isSpecialPass(PassID, + {"PassManager", "PassAdaptor", "AnalysisManagerProxy"})) return; stopTimer(PassID); diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -16,6 +16,7 @@ #include "llvm/LTO/LTOBackend.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LoopNestAnalysisManager.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -209,6 +210,7 @@ RegisterPassPlugins(Conf.PassPlugins, PB); LoopAnalysisManager LAM(Conf.DebugPassManager); + LoopNestAnalysisManager LNAM(LAM); FunctionAnalysisManager FAM(Conf.DebugPassManager); CGSCCAnalysisManager CGAM(Conf.DebugPassManager); ModuleAnalysisManager MAM(Conf.DebugPassManager); @@ -221,7 +223,8 @@ PB.registerCGSCCAnalyses(CGAM); PB.registerFunctionAnalyses(FAM); PB.registerLoopAnalyses(LAM); - PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + PB.registerLoopNestAnalyses(LNAM); + PB.crossRegisterProxies(LAM, LNAM, FAM, CGAM, MAM); ModulePassManager MPM(Conf.DebugPassManager); // FIXME (davide): verify the input. @@ -271,6 +274,7 @@ RegisterPassPlugins(Conf.PassPlugins, PB); LoopAnalysisManager LAM; + LoopNestAnalysisManager LNAM(LAM); FunctionAnalysisManager FAM; CGSCCAnalysisManager CGAM; ModuleAnalysisManager MAM; @@ -283,7 +287,8 @@ PB.registerCGSCCAnalyses(CGAM); PB.registerFunctionAnalyses(FAM); PB.registerLoopAnalyses(LAM); - PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + PB.registerLoopNestAnalyses(LNAM); + PB.crossRegisterProxies(LAM, LNAM, FAM, CGAM, MAM); ModulePassManager MPM; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -143,6 +143,7 @@ #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/Transforms/Scalar/LoopLoadElimination.h" +#include "llvm/Transforms/Scalar/LoopNestPassManager.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopPredication.h" #include "llvm/Transforms/Scalar/LoopRotation.h" @@ -386,9 +387,32 @@ static StringRef name() { return "NoOpLoopAnalysis"; } }; +/// No-op loop nest pass which does nothing. +struct NoOpLoopNestPass : PassInfoMixin { + PreservedAnalyses run(LoopNest &LN, LoopNestAnalysisManager &, + LoopStandardAnalysisResults &, LNPMUpdater &) { + return PreservedAnalyses::all(); + } + static StringRef name() { return "NoOpLoopNestPass"; } +}; + +/// No-op loop nest analysis. +class NoOpLoopNestAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + struct Result {}; + Result run(Loop &, LoopAnalysisManager &, LoopStandardAnalysisResults &) { + return Result(); + } + static StringRef name() { return "NoOpLoopNestAnalysis"; } +}; + AnalysisKey NoOpModuleAnalysis::Key; AnalysisKey NoOpCGSCCAnalysis::Key; AnalysisKey NoOpFunctionAnalysis::Key; +AnalysisKey NoOpLoopNestAnalysis::Key; AnalysisKey NoOpLoopAnalysis::Key; } // namespace @@ -435,6 +459,15 @@ C(LAM); } +void PassBuilder::registerLoopNestAnalyses(LoopNestAnalysisManager &LNAM) { +#define LOOP_NEST_ANALYSIS(NAME, CREATE_PASS) \ + LNAM.registerPass([&] { return CREATE_PASS; }); +#include "PassRegistry.def" + + for (auto &C : LoopNestAnalysisRegistrationCallbacks) + C(LNAM); +} + // TODO: Investigate the cost/benefit of tail call elimination on debugging. FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline( OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) { @@ -2006,6 +2039,30 @@ return callbacksAcceptPassName(Name, Callbacks); } +template +static bool isLoopNestPassName(StringRef Name, CallbacksT &Callbacks) { + // Explicitly handle pass manager names. + if (Name == "loop-nest" || Name == "loop-nest-mssa") + return true; + + // Explicitly handle custom-parsed pass names. + if (parseRepeatPassName(Name)) + return true; + +#define LOOP_NEST_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) \ + return true; +#define LOOP_NEST_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) \ + return true; +#define LOOP_NEST_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ + return true; +#include "PassRegistry.def" + + return callbacksAcceptPassName(Name, Callbacks); +} + template static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. @@ -2398,6 +2455,22 @@ FPM.addPass(std::move(NestedFPM)); return Error::success(); } + if (Name == "loop-nest" || Name == "loop-nest-mssa") { + LoopNestPassManager LNPM(DebugLogging); + // Because the LoopStandardAnalysisResults can only be constructed at + // FunctionToLoopNestPassAdaptor but not at LoopNestToLoopPassAdaptor, + // UseMemorySSA should depends on the loop passes as well. + // Memory SSA is needed true if either the loop nest explicitly requires + // it or at least one of the loop passes wrapped inside the loop nest pass + // requires it. + bool UseMemorySSA = (Name == "loop-nest-mssa"); + if (auto Err = parseLoopNestPassPipeline( + LNPM, InnerPipeline, UseMemorySSA, VerifyEachPass, DebugLogging)) + return Err; + FPM.addPass(createFunctionToLoopNestPassAdaptor( + std::move(LNPM), UseMemorySSA, DebugLogging)); + return Error::success(); + } if (Name == "loop" || Name == "loop-mssa") { LoopPassManager LPM(DebugLogging); if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass, @@ -2483,6 +2556,94 @@ inconvertibleErrorCode()); } +Error PassBuilder::parseLoopNestPass(LoopNestPassManager &LNPM, + const PipelineElement &E, + bool &UseMemorySSA, bool VerifyEachPass, + bool DebugLogging) { + StringRef Name = E.Name; + const auto &InnerPipeline = E.InnerPipeline; + + // First handle complex passes like the pass managers which carry pipelines. + if (!InnerPipeline.empty()) { + if (Name == "loop-nest") { + LoopNestPassManager NestedLNPM(DebugLogging); + if (auto Err = + parseLoopNestPassPipeline(NestedLNPM, InnerPipeline, UseMemorySSA, + VerifyEachPass, DebugLogging)) + return Err; + // Add the nested pass manager with the appropriate adaptor. + LNPM.addPass(std::move(NestedLNPM)); + return Error::success(); + } + // Loop passes can be wrapped in a loop nest pass via \c + // LoopNestToLoopPassAdaptor. + if (Name == "loop" || Name == "loop-mssa") { + LoopPassManager LPM(DebugLogging); + if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass, + DebugLogging)) + return Err; + // If the loop pass requires MemorySSA, the loop nest pass does as well. + UseMemorySSA |= (Name == "loop-mssa"); + LNPM.addPass(createLoopNestToLoopPassAdaptor(std::move(LPM))); + return Error::success(); + } + if (auto Count = parseRepeatPassName(Name)) { + LoopNestPassManager NestedLNPM(DebugLogging); + if (auto Err = + parseLoopNestPassPipeline(NestedLNPM, InnerPipeline, UseMemorySSA, + VerifyEachPass, DebugLogging)) + return Err; + LNPM.addPass(createRepeatedPass(*Count, std::move(NestedLNPM))); + return Error::success(); + } + + for (auto &C : LoopNestPipelineParsingCallbacks) + if (C(Name, LNPM, InnerPipeline)) + return Error::success(); + + // Normal passes can't have pipelines. + return make_error( + formatv("invalid use of '{0}' pass as loop pipeline", Name).str(), + inconvertibleErrorCode()); + } + +// Now expand the basic registered passes from the .inc file. +#define LOOP_NEST_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + LNPM.addPass(CREATE_PASS); \ + return Error::success(); \ + } +#define LOOP_NEST_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) { \ + auto Params = parsePassParameters(PARSER, Name, NAME); \ + if (!Params) \ + return Params.takeError(); \ + LNPM.addPass(CREATE_PASS(Params.get())); \ + return Error::success(); \ + } +#define LOOP_NEST_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">") { \ + LNPM.addPass(RequireAnalysisPass< \ + std::remove_reference::type, LoopNest, \ + LoopNestAnalysisManager, LoopStandardAnalysisResults &, \ + LNPMUpdater &>()); \ + return Error::success(); \ + } \ + if (Name == "invalidate<" NAME ">") { \ + LNPM.addPass(InvalidateAnalysisPass< \ + std::remove_reference::type>()); \ + return Error::success(); \ + } +#include "PassRegistry.def" + + for (auto &C : LoopNestPipelineParsingCallbacks) + if (C(Name, LNPM, InnerPipeline)) + return Error::success(); + return make_error( + formatv("unknown loop nest pass '{0}'", Name).str(), + inconvertibleErrorCode()); +} + Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, bool VerifyEachPass, bool DebugLogging) { StringRef Name = E.Name; @@ -2575,6 +2736,20 @@ return false; } +Error PassBuilder::parseLoopNestPassPipeline(LoopNestPassManager &LNPM, + ArrayRef Pipeline, + bool &UseMemorySSA, + bool VerifyEachPass, + bool DebugLogging) { + for (const auto &Element : Pipeline) { + if (auto Err = parseLoopNestPass(LNPM, Element, UseMemorySSA, + VerifyEachPass, DebugLogging)) + return Err; + // FIXME: No verifier support for LoopNest passes! + } + return Error::success(); +} + Error PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM, ArrayRef Pipeline, bool VerifyEachPass, @@ -2614,6 +2789,7 @@ } void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM, + LoopNestAnalysisManager &LNAM, FunctionAnalysisManager &FAM, CGSCCAnalysisManager &CGAM, ModuleAnalysisManager &MAM) { @@ -2622,6 +2798,8 @@ CGAM.registerPass([&] { return ModuleAnalysisManagerCGSCCProxy(MAM); }); FAM.registerPass([&] { return CGSCCAnalysisManagerFunctionProxy(CGAM); }); FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); }); + FAM.registerPass([&] { return LoopNestAnalysisManagerFunctionProxy(LNAM); }); + LNAM.registerPass([&] { return FunctionAnalysisManagerLoopNestProxy(FAM); }); FAM.registerPass([&] { return LoopAnalysisManagerFunctionProxy(LAM); }); LAM.registerPass([&] { return FunctionAnalysisManagerLoopProxy(FAM); }); } @@ -2661,6 +2839,9 @@ } else if (isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks)) { Pipeline = {{"function", std::move(*Pipeline)}}; + } else if (isLoopNestPassName(FirstName, + LoopNestPipelineParsingCallbacks)) { + Pipeline = {{"function", {{"loop-nest", std::move(*Pipeline)}}}}; } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks)) { Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}}; } else { @@ -2788,6 +2969,9 @@ #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; +#define LOOP_NEST_ANALYSIS(NAME, CREATE_PASS) \ + if (PassName == NAME) \ + return true; #define CGSSC_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -321,6 +321,7 @@ LOOP_ANALYSIS("access-info", LoopAccessAnalysis()) LOOP_ANALYSIS("ddg", DDGAnalysis()) LOOP_ANALYSIS("iv-users", IVUsersAnalysis()) +LOOP_ANALYSIS("loop-nest", LoopNestAnalysis()) LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) #undef LOOP_ANALYSIS @@ -359,3 +360,22 @@ }, parseLoopUnswitchOptions) #undef LOOP_PASS_WITH_PARAMS + +#ifndef LOOP_NEST_ANALYSIS +#define LOOP_NEST_ANALYSIS(NAME, CREATE_PASS) +#endif +LOOP_NEST_ANALYSIS("no-op-loop-nest", NoOpLoopNestAnalysis()) +LOOP_NEST_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) +#undef LOOP_NEST_ANALYSIS + +#ifndef LOOP_NEST_PASS +#define LOOP_NEST_PASS(NAME, CREATE_PASS) +#endif +LOOP_NEST_PASS("no-op-loop-nest", NoOpLoopNestPass()) +LOOP_NEST_PASS("print", PrintLoopNestPass()) +#undef LOOP_NEST_PASS + +#ifndef LOOP_NEST_PASS_WITH_PARAMS +#define LOOP_NEST_PASS_WITH_PARAMS(NAME, CREATE_PASS) +#endif +#undef LOOP_NEST_PASS_WITH_PARAMS diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; @@ -35,6 +36,13 @@ cl::desc("Enable skipping optional passes optnone functions " "under new pass manager")); +// FIXME: Change `-debug-pass-manager` from boolean to enum type. Similar to +// `-debug-pass` in legacy PM. +static cl::opt + DebugPMVerbose("debug-pass-manager-verbose", cl::Hidden, cl::init(false), + cl::desc("Print all pass management debugging information. " + "`-debug-pass-manager` must also be specified")); + namespace { /// Extracting Module out of \p IR unit. Also fills a textual description @@ -78,14 +86,25 @@ llvm_unreachable("Unknown IR unit"); } -void printIR(const Function *F, StringRef Banner, - StringRef Extra = StringRef()) { +void printIR(const Function *F, StringRef Banner, StringRef Extra = StringRef(), + bool Brief = false) { + if (Brief) { + dbgs() << F->getName() << '\n'; + return; + } + if (!llvm::isFunctionInPrintList(F->getName())) return; dbgs() << Banner << Extra << "\n" << static_cast(*F); } -void printIR(const Module *M, StringRef Banner, StringRef Extra = StringRef()) { +void printIR(const Module *M, StringRef Banner, StringRef Extra = StringRef(), + bool Brief = false) { + if (Brief) { + dbgs() << M->getName() << '\n'; + return; + } + if (llvm::isFunctionInPrintList("*") || llvm::forcePrintModuleIR()) { dbgs() << Banner << Extra << "\n"; M->print(dbgs(), nullptr, false); @@ -97,7 +116,12 @@ } void printIR(const LazyCallGraph::SCC *C, StringRef Banner, - StringRef Extra = StringRef()) { + StringRef Extra = StringRef(), bool Brief = false) { + if (Brief) { + dbgs() << *C << '\n'; + return; + } + bool BannerPrinted = false; for (const LazyCallGraph::Node &N : *C) { const Function &F = N.getFunction(); @@ -110,7 +134,13 @@ } } } -void printIR(const Loop *L, StringRef Banner) { + +void printIR(const Loop *L, StringRef Banner, bool Brief = false) { + if (Brief) { + dbgs() << *L; + return; + } + const Function *F = L->getHeader()->getParent(); if (!llvm::isFunctionInPrintList(F->getName())) return; @@ -119,7 +149,8 @@ /// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into /// llvm::Any and does actual print job. -void unwrapAndPrint(Any IR, StringRef Banner, bool ForceModule = false) { +void unwrapAndPrint(Any IR, StringRef Banner, bool ForceModule = false, + bool Brief = false) { if (ForceModule) { if (auto UnwrappedModule = unwrapModule(IR)) printIR(UnwrappedModule->first, Banner, UnwrappedModule->second); @@ -129,14 +160,14 @@ if (any_isa(IR)) { const Module *M = any_cast(IR); assert(M && "module should be valid for printing"); - printIR(M, Banner); + printIR(M, Banner, "", Brief); return; } if (any_isa(IR)) { const Function *F = any_cast(IR); assert(F && "function should be valid for printing"); - printIR(F, Banner); + printIR(F, Banner, "", Brief); return; } @@ -144,14 +175,14 @@ const LazyCallGraph::SCC *C = any_cast(IR); assert(C && "scc should be valid for printing"); std::string Extra = std::string(formatv(" (scc: {0})", C->getName())); - printIR(C, Banner, Extra); + printIR(C, Banner, Extra, Brief); return; } if (any_isa(IR)) { const Loop *L = any_cast(IR); assert(L && "Loop should be valid for printing"); - printIR(L, Banner); + printIR(L, Banner, Brief); return; } llvm_unreachable("Unknown wrapped IR type"); @@ -274,9 +305,34 @@ return true; } +void PrintPassInstrumentation::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + if (!DebugLogging) + return; + + std::vector SpecialPasses = {"PassManager"}; + if (!DebugPMVerbose) + SpecialPasses.emplace_back("PassAdaptor"); + + PIC.registerBeforeNonSkippedPassCallback( + [SpecialPasses](StringRef PassID, Any IR) { + if (isSpecialPass(PassID, SpecialPasses)) + return; + + dbgs() << "Running pass: " << PassID << " on "; + unwrapAndPrint(IR, "", false, true); + }); + + PIC.registerBeforeAnalysisCallback([](StringRef PassID, Any IR) { + dbgs() << "Running analysis: " << PassID << " on "; + unwrapAndPrint(IR, "", false, true); + }); +} + void StandardInstrumentations::registerCallbacks( PassInstrumentationCallbacks &PIC) { PrintIR.registerCallbacks(PIC); + PrintPass.registerCallbacks(PIC); TimePasses.registerCallbacks(PIC); OptNone.registerCallbacks(PIC); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1937,6 +1937,15 @@ let Inst{4-0} = Rd; } +class ClearAuth data, string asm> + : I<(outs GPR64:$Rd), (ins GPR64:$Rn), asm, "\t$Rd", "$Rd = $Rn", []>, Sched<[]> { + bits<5> Rd; + let Inst{31-11} = 0b110110101100000101000; + let Inst{10} = data; + let Inst{9-5} = 0b11111; + let Inst{4-0} = Rd; +} + // Base class for the Armv8.4-A 8 and 16-bit flag manipulation instructions class BaseFlagManipulation : I<(outs), iops, asm, ops, "", []>, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -994,8 +994,8 @@ defm PAC : SignAuth<0b000, 0b010, "pac">; defm AUT : SignAuth<0b001, 0b011, "aut">; - def XPACI : SignAuthZero<0b100, 0b00, "xpaci">; - def XPACD : SignAuthZero<0b100, 0b01, "xpacd">; + def XPACI : ClearAuth<0, "xpaci">; + def XPACD : ClearAuth<1, "xpacd">; def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>; // Combined Instructions diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -474,65 +474,29 @@ ArrayRef BaseOps2, unsigned NumLoads, unsigned NumBytes) const { + // If the mem ops (to be clustered) do not have the same base ptr, then they + // should not be clustered assert(!BaseOps1.empty() && !BaseOps2.empty()); const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); - if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) return false; - const MachineOperand *FirstDst = nullptr; - const MachineOperand *SecondDst = nullptr; - - if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) || - (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) || - (isMIMG(FirstLdSt) && isMIMG(SecondLdSt)) || - (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) { - const unsigned MaxGlobalLoadCluster = 7; - if (NumLoads > MaxGlobalLoadCluster) - return false; - - FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata); - if (!FirstDst) - FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst); - SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata); - if (!SecondDst) - SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst); - } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) { - FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst); - SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst); - } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) { - FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst); - SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst); - } - - if (!FirstDst || !SecondDst) - return false; - - // Try to limit clustering based on the total number of bytes loaded - // rather than the number of instructions. This is done to help reduce - // register pressure. The method used is somewhat inexact, though, - // because it assumes that all loads in the cluster will load the - // same number of bytes as FirstLdSt. - - // The unit of this value is bytes. - // FIXME: This needs finer tuning. - unsigned LoadClusterThreshold = 16; - - const MachineRegisterInfo &MRI = - FirstLdSt.getParent()->getParent()->getRegInfo(); - - const Register Reg = FirstDst->getReg(); - - const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg) - ? MRI.getRegClass(Reg) - : RI.getPhysRegClass(Reg); - - // FIXME: NumLoads should not be subtracted 1. This is to match behavior - // of clusterNeighboringMemOps which was previosly passing cluster length - // less 1. LoadClusterThreshold should be tuned instead. - return ((NumLoads - 1) * (RI.getRegSizeInBits(*DstRC) / 8)) <= - LoadClusterThreshold; + // In order to avoid regester pressure, on an average, the number of DWORDS + // loaded together by all clustered mem ops should not exceed 8. This is an + // empirical value based on certain observations and performance related + // experiments. + // The good thing about this heuristic is - it avoids clustering of too many + // sub-word loads, and also avoids clustering of wide loads. Below is the + // brief summary of how the heuristic behaves for various `LoadSize`. + // (1) 1 <= LoadSize <= 4: cluster at max 8 mem ops + // (2) 5 <= LoadSize <= 8: cluster at max 4 mem ops + // (3) 9 <= LoadSize <= 12: cluster at max 2 mem ops + // (4) 13 <= LoadSize <= 16: cluster at max 2 mem ops + // (5) LoadSize >= 17: do not cluster + const unsigned LoadSize = NumBytes / NumLoads; + const unsigned NumDWORDs = ((LoadSize + 3) / 4) * NumLoads; + return NumDWORDs <= 8; } // FIXME: This behaves strangely. If, for example, you have 32 load + stores, diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1779,7 +1779,11 @@ } void PPCAIXAsmPrinter::emitFunctionEntryLabel() { - PPCAsmPrinter::emitFunctionEntryLabel(); + // It's not necessary to emit the label when we have individual + // function in its own csect. + if (!TM.getFunctionSections()) + PPCAsmPrinter::emitFunctionEntryLabel(); + // Emit aliasing label for function entry point label. llvm::for_each( GOAliasMap[&MF->getFunction()], [this](const GlobalAlias *Alias) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySortRegion.cpp @@ -5,9 +5,14 @@ using namespace llvm; using namespace WebAssembly; -template <> bool ConcreteSortRegion::isLoop() const { +namespace llvm { +namespace WebAssembly { +template <> +bool ConcreteSortRegion::isLoop() const { return true; } +} // end namespace WebAssembly +} // end namespace llvm const SortRegion *SortRegionInfo::getRegionFor(const MachineBasicBlock *MBB) { const auto *ML = MLI.getLoopFor(MBB); diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -937,6 +937,7 @@ SMLoc End, unsigned Size, StringRef Identifier, const InlineAsmIdentifierInfo &Info); + bool parseDirectiveArch(); bool parseDirectiveEven(SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); @@ -3993,6 +3994,8 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { MCAsmParser &Parser = getParser(); StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal.startswith(".arch")) + return parseDirectiveArch(); if (IDVal.startswith(".code")) return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); else if (IDVal.startswith(".att_syntax")) { @@ -4047,6 +4050,12 @@ return true; } +bool X86AsmParser::parseDirectiveArch() { + // Ignore .arch for now. + getParser().parseStringToEndOfStatement(); + return false; +} + /// parseDirectiveEven /// ::= .even bool X86AsmParser::parseDirectiveEven(SMLoc L) { diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ValueTracking.h" diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -13,13 +13,16 @@ #include "llvm/Transforms/IPO/Attributor.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumeBundleQueries.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -261,6 +261,10 @@ // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= extern cl::opt ViewBlockFreqFuncName; +static cl::opt + PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, + cl::desc("Use the old CFG function hashing")); + // Return a string describing the branch condition that can be // used in static branch probability heuristics: static std::string getBranchCondString(Instruction *TI) { @@ -620,7 +624,8 @@ } // end anonymous namespace // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index -// value of each BB in the CFG. The higher 32 bits record the number of edges. +// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers +// of selects, indirect calls, mem ops and edges. template void FuncPGOInstrumentation::computeCFGHash() { std::vector Indexes; @@ -639,12 +644,31 @@ } JC.update(Indexes); - // Hash format for context sensitive profile. Reserve 4 bits for other - // information. - FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | - (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | - //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | - (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); + JamCRC JCH; + if (PGOOldCFGHashing) { + // Hash format for context sensitive profile. Reserve 4 bits for other + // information. + FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | + (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | + //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | + (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); + } else { + // The higher 32 bits. + auto updateJCH = [&JCH](uint64_t Num) { + uint8_t Data[8]; + support::endian::write64le(Data, Num); + JCH.update(Data); + }; + updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); + updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); + updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); + updateJCH((uint64_t)MST.AllEdges.size()); + + // Hash format for context sensitive profile. Reserve 4 bits for other + // information. + FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); + } + // Reserve bit 60-63 for other information purpose. FunctionHash &= 0x0FFFFFFFFFFFFFFF; if (IsCS) @@ -653,8 +677,12 @@ << " CRC = " << JC.getCRC() << ", Selects = " << SIVisitor.getNumOfSelectInsts() << ", Edges = " << MST.AllEdges.size() << ", ICSites = " - << ValueSites[IPVK_IndirectCallTarget].size() - << ", Hash = " << FunctionHash << "\n";); + << ValueSites[IPVK_IndirectCallTarget].size()); + if (!PGOOldCFGHashing) { + LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() + << ", High32 CRC = " << JCH.getCRC()); + } + LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); } // Check if we can safely rename this Comdat function. diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -33,6 +33,7 @@ LoopInstSimplify.cpp LoopInterchange.cpp LoopLoadElimination.cpp + LoopNestPassManager.cpp LoopPassManager.cpp LoopPredication.cpp LoopRerollPass.cpp diff --git a/llvm/lib/Transforms/Scalar/LoopNestPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopNestPassManager.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/LoopNestPassManager.cpp @@ -0,0 +1,109 @@ +//===- LoopNestPassManager.cpp - Loop nest pass management ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LoopNestPassManager.h" + +namespace llvm { + +template <> +PreservedAnalyses +PassManager::run(LoopNest &LN, LoopNestAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LNPMUpdater &U) { + PreservedAnalyses PA = PreservedAnalyses::all(); + + // Request PassInstrumentation from analysis manager, will use it to run + // instrumenting callbacks for the passes later. + PassInstrumentation PI = AM.getResult(LN, AR); + + if (DebugLogging) + dbgs() << "Starting LoopNest pass manager run.\n"; + + for (unsigned I = 0, E = Passes.size(); I != E; ++I) { + auto *Pass = Passes[I].get(); + + // Check the PassInstrumentation's BeforePass callbacks before running the + // pass, skip its execution completely if asked to (callback returns + // false). + if (!PI.runBeforePass(*Pass, LN)) + continue; + + if (DebugLogging) + dbgs() << "Running pass: " << Pass->name() << " on " << LN.getName() + << "\n"; + + PreservedAnalyses PassPA; + { + TimeTraceScope TimeScope(Pass->name(), LN.getName()); + PassPA = Pass->run(LN, AM, AR, U); + } + + // Do not pass deleted LoopNest into the instrumentation + if (U.skipCurrentLoopNest()) + PI.runAfterPassInvalidated(*Pass); + else + PI.runAfterPass(*Pass, LN); + + if (U.skipCurrentLoopNest()) { + PA.intersect(std::move(PassPA)); + break; + } + + // We shouldn't allow invalidating LoopNestAnalysis in AM since otherwise + // LN will be dangling. Currently the loop nest passes cannot explicitly + // update the LoopNest structure, so we must first check whether + // LoopNestAnalysis is preserved, and mark it as preserved + // regardlessly afterward. If the analysis is not preserved in the first + // place, we would have to manually reconstruct the LoopNest. + // FIXME: This is quite inefficient. Consider reimplementing LoopNest to + // allow dynamic modifications by the loop nest passes to avoid + // reconstructing it every time. + bool IsLoopNestPreserved = + PassPA.getChecker().preserved(); + + // No need to invalidate other loop nest analyses since they are running on + // Loop and hence can be updated dynamically. + PassPA.preserve(); + AM.invalidate(LN, PassPA); + + if (!IsLoopNestPreserved) + // The LoopNest structure had been altered, reconstruct it here. + LN.reconstructInplace(AR.SE); + PA.intersect(std::move(PassPA)); + } + + // Invalidation for the current loop nest should be handled above, and other + // loop nest analysis results shouldn't be impacted by runs over this loop + // nest. Therefore, the remaining analysis results in the AnalysisManager are + // preserved. We mark this with a set so that we don't need to inspect each + // one individually. + PA.preserveSet>(); + + if (DebugLogging) + dbgs() << "Finished LoopNest pass manager run.\n"; + + return PA; +} + +template class PassManager; + +PrintLoopNestPass::PrintLoopNestPass() : OS(dbgs()) {} +PrintLoopNestPass::PrintLoopNestPass(raw_ostream &OS, const std::string &Banner) + : OS(OS), Banner(Banner) {} + +PreservedAnalyses PrintLoopNestPass::run(LoopNest &LN, + LoopNestAnalysisManager &, + LoopStandardAnalysisResults &, + LNPMUpdater &) { + OS << LN << "\n"; + return PreservedAnalyses::all(); +} + +} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -39,9 +39,6 @@ if (!PI.runBeforePass(*Pass, L)) continue; - if (DebugLogging) - dbgs() << "Running pass: " << Pass->name() << " on " << L; - PreservedAnalyses PassPA; { TimeTraceScope TimeScope(Pass->name(), L.getName()); diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -108,14 +108,15 @@ /// insert a phi-node, otherwise LCSSA will be broken. /// The function is just a helper function for llvm::UnrollLoop that returns /// true if this situation occurs, indicating that LCSSA needs to be fixed. -static bool needToInsertPhisForLCSSA(Loop *L, std::vector Blocks, +static bool needToInsertPhisForLCSSA(Loop *L, + const std::vector &Blocks, LoopInfo *LI) { for (BasicBlock *BB : Blocks) { if (LI->getLoopFor(BB) == L) continue; for (Instruction &I : *BB) { for (Use &U : I.operands()) { - if (auto Def = dyn_cast(U)) { + if (const auto *Def = dyn_cast(U)) { Loop *DefLoop = LI->getLoopFor(Def->getParent()); if (!DefLoop) continue; diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1537,6 +1537,12 @@ appendReversedLoopsToWorklist(LI, Worklist); } +void llvm::appendLoopNestToWorklist( + Loop &Root, SmallPriorityWorklist &Worklist) { + Worklist.insert(&Root); + appendLoopsToWorklist(Root, Worklist); +} + Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM) { Loop &New = *LI->AllocateLoop(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7459,7 +7459,7 @@ // Finally, if tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the end of the latch. - if (CM.foldTailByMasking()) { + if (CM.foldTailByMasking() && !Legal->getReductionVars().empty()) { Builder.setInsertPoint(VPBB); auto *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan); for (auto &Reduction : Legal->getReductionVars()) { diff --git a/llvm/test/Analysis/ConstantFolding/abs.ll b/llvm/test/Analysis/ConstantFolding/abs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/abs.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -constprop -S | FileCheck %s + +declare i8 @llvm.abs.i8(i8, i1) +declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1) + +define i8 @undef_val_min_poison() { +; CHECK-LABEL: @undef_val_min_poison( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.abs.i8(i8 undef, i1 true) +; CHECK-NEXT: ret i8 [[R]] +; + %r = call i8 @llvm.abs.i8(i8 undef, i1 true) + ret i8 %r +} + +define i8 @undef_val_min_not_poison() { +; CHECK-LABEL: @undef_val_min_not_poison( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.abs.i8(i8 undef, i1 false) +; CHECK-NEXT: ret i8 [[R]] +; + %r = call i8 @llvm.abs.i8(i8 undef, i1 false) + ret i8 %r +} + +define i8 @min_val_min_poison() { +; CHECK-LABEL: @min_val_min_poison( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.abs.i8(i8 -128, i1 true) +; CHECK-NEXT: ret i8 [[R]] +; + %r = call i8 @llvm.abs.i8(i8 -128, i1 true) + ret i8 %r +} + +define i8 @min_val_min_not_poison() { +; CHECK-LABEL: @min_val_min_not_poison( +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.abs.i8(i8 -128, i1 false) +; CHECK-NEXT: ret i8 [[R]] +; + %r = call i8 @llvm.abs.i8(i8 -128, i1 false) + ret i8 %r +} + +define <8 x i8> @vec_const() { +; CHECK-LABEL: @vec_const( +; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> , i1 true) +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %r = call <8 x i8> @llvm.abs.v8i8(<8 x i8> , i1 1) + ret <8 x i8> %r +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll @@ -950,22 +950,22 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, ; GFX6-LABEL: simplify_bfe_u32_multi_use_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd -; GFX6-NEXT: s_mov_b32 s6, -1 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_load_dword s8, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7] -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_and_b32 s8, s8, 63 -; GFX6-NEXT: s_bfe_u32 s9, s8, 0x20002 -; GFX6-NEXT: v_mov_b32_e32 v1, s9 -; GFX6-NEXT: v_mov_b32_e32 v0, s8 -; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0 -; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; GFX6-NEXT: s_endpgm +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_and_b32 s0, s0, 63 +; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; GFX6-NEXT: s_endpgm i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 { %src = load i32, i32 addrspace(1)* %in, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -17,57 +17,56 @@ ; ; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1: ; GFX9-NOUNALIGNED: ; %bb.0: -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_add_co_u32_e32 v2, vcc, 11, v0 -; GFX9-NOUNALIGNED-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v[2:3], off offset:-6 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v5, v[2:3], off offset:-5 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v[2:3], off offset:-4 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v[2:3], off offset:-3 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v[2:3], off offset:-2 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v[2:3], off offset:-1 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[2:3], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v0, v[0:1], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v1, v[2:3], off offset:-10 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[2:3], off offset:-9 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v[2:3], off offset:-8 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v[2:3], off offset:-7 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, 0xff -; GFX9-NOUNALIGNED-NEXT: s_movk_i32 s4, 0xff -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v13, 8 -; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s5, 8 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(11) -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v4, v13, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v5, v5, v3 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v6, v6, v3 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(7) -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v8, v13, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v9, v9, v3 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v10, v10, v3 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v11, s4, v11 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v12, s4, v12 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v2, v3, v4 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v11 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 16, v5 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 24, v6 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v3, v7, v3, v8 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v9 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v10 -; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v0, v1, v11 -; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v2, v4, v5 -; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v3, v6, v7 -; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NOUNALIGNED-NEXT: v_add_co_u32_e32 v2, vcc, 11, v0 +; GFX9-NOUNALIGNED-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v1, v[2:3], off offset:-10 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v[2:3], off offset:-9 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v5, v[2:3], off offset:-8 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v[2:3], off offset:-7 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v[2:3], off offset:-6 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v[2:3], off offset:-5 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v[2:3], off offset:-4 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[2:3], off offset:-3 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[2:3], off offset:-2 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v[2:3], off offset:-1 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v[2:3], off +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, 0xff +; GFX9-NOUNALIGNED-NEXT: s_movk_i32 s4, 0xff +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v13, 8 +; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s5, 8 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s4, v5 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v7, v13, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v8, v8, v3 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v9, v9, v3 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v4 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v11, v13, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v12, v12, v3 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v2, v3 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 24, v5 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v5, v6, v3, v7 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v8 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v9 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v3, v10, v3, v11 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v12 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v0, v1, v4 +; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v5, v6, v7 +; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v3, v8, v2 +; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align1: ; GFX7-UNALIGNED: ; %bb.0: @@ -156,31 +155,30 @@ ; ; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2: ; GFX9-NOUNALIGNED: ; %bb.0: -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_add_co_u32_e32 v2, vcc, 10, v0 -; GFX9-NOUNALIGNED-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v[2:3], off -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v0, v[0:1], off -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v1, v[2:3], off offset:-8 -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v[2:3], off offset:-6 -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v6, v[2:3], off offset:-4 -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v[2:3], off offset:-2 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, 0xffff -; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s4, 0xffff -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v4, v4, v3 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v6, v6, v3 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v5, v3, v6 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v2, v3, v4 -; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NOUNALIGNED-NEXT: v_add_co_u32_e32 v2, vcc, 10, v0 +; GFX9-NOUNALIGNED-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v0, v[0:1], off +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v1, v[2:3], off offset:-8 +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v[2:3], off offset:-6 +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v[2:3], off offset:-4 +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v6, v[2:3], off offset:-2 +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v[2:3], off +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s4, 0xffff +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v5, v5, v3 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v2, v3 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s4, v1 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v4, v3, v5 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v6, v3, v2 +; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align2: ; GFX7-UNALIGNED: ; %bb.0: @@ -194,35 +192,36 @@ ; ; GFX7-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2: ; GFX7-NOUNALIGNED: ; %bb.0: -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s6, 0 -; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NOUNALIGNED-NEXT: s_mov_b64 s[4:5], 0 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:10 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:2 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:4 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:6 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:8 -; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s4, 0xffff -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v3 -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s4, v4 -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s4, v5 -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s4, v6 -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v6, s4, v0 -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s4, v2 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v5 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v1, v2 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v4, v3 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v6, v5 -; GFX7-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s6, 0 +; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NOUNALIGNED-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:2 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:4 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:6 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:8 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:10 +; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s4, v2 +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s4, v3 +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s4, v4 +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s4, v5 +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s4, v6 +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v0 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v1, v2 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v3, v4 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v5, v6 +; GFX7-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 2 ret <3 x i32> %load } @@ -399,97 +398,101 @@ ; ; GFX9-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1: ; GFX9-NOUNALIGNED: ; %bb.0: -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 1 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 2 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v5, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v4, s2 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 3 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v7, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v6, s2 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 4 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v9, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v8, s2 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 5 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v11, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v10, s2 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 6 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v13, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v12, s2 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 7 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v15, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v14, s2 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 8 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v17, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v16, s2 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 9 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v19, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v18, s2 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 10 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s0, s0, 11 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v20, v[10:11], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v21, v[12:13], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v14, v[14:15], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v15, v[16:17], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v16, v[18:19], off -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s1, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v11, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v13, s1 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v10, s2 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v12, s0 -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[10:11], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[12:13], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v0, v[0:1], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v1, v[2:3], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v[4:5], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v3, v[6:7], off -; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v[8:9], off -; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s1, 8 -; GFX9-NOUNALIGNED-NEXT: s_movk_i32 s0, 0xff -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v5, 0xff -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v6, 8 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v1, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s0, v1 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v2 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s0, v3 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v0, v1, v2 -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, v21, v5 -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v14, v5 -; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v0, v6, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v4, v5, v0 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v0, v1, v2 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v0, v6, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v11, v5 -; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, v10, v5 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v15, v5, v0 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v0, v1, v2 -; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 -; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 1 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 2 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v5, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v4, s2 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 3 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v7, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v6, s2 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 4 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v9, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v8, s2 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 5 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v11, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v10, s2 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 6 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v13, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v12, s2 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 7 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v15, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v14, s2 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 8 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v16, v[0:1], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v17, v[2:3], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v18, v[4:5], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v19, v[6:7], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v[8:9], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v[10:11], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[12:13], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[14:15], off +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 9 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 10 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s0, s0, 11 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s1, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v5, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v7, s1 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v4, s2 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v6, s0 +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v[0:1], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v[2:3], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v3, v[4:5], off +; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v[6:7], off +; GFX9-NOUNALIGNED-NEXT: s_movk_i32 s0, 0xff +; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s1, 8 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v5, 0xff +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v6, 8 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v0, s1, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v18 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v7, s0, v19 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v16, s0, v0 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v0, v1, v7 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, v10, v5 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v7, v11, v5 +; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v0, v6, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v8, v5, v0 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v0, v1, v7 +; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_sdwa v0, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, v3, v5 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v2, v4, v5 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v12, v5, v0 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v0, v1, v2 +; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 +; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog ; ; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align1: ; GFX7-UNALIGNED: ; %bb.0: @@ -585,52 +588,52 @@ ; ; GFX9-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2: ; GFX9-NOUNALIGNED: ; %bb.0: -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 2 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 4 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v5, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v4, s2 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 6 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v7, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v6, s2 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 8 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NOUNALIGNED-NEXT: s_add_u32 s0, s0, 10 -; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s1, s1, 0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v11, s1 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v9, s3 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v10, s0 -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v8, s2 -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v10, v[10:11], off -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v0, v[0:1], off -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v1, v[2:3], off -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v[4:5], off -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v3, v[6:7], off -; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v[8:9], off -; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s0, 0xffff -; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v5, 0xffff -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s0, v1 -; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v3, v5 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v2, v5, v0 -; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v10, v5 -; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v4, v5, v0 -; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 -; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 -; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 2 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 4 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v5, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v4, s2 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 6 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v7, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v6, s2 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s2, s0, 8 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s3, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NOUNALIGNED-NEXT: s_add_u32 s0, s0, 10 +; GFX9-NOUNALIGNED-NEXT: s_addc_u32 s1, s1, 0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v9, s3 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v11, s1 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v10, s0 +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v8, s2 +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v0, v[0:1], off +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v1, v[2:3], off +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v[4:5], off +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v3, v[6:7], off +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v[8:9], off +; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v[10:11], off +; GFX9-NOUNALIGNED-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v6, 0xffff +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v1 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v0, v0, s0, v1 +; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v3, v6 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v1, v2, v6, v0 +; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) +; GFX9-NOUNALIGNED-NEXT: v_and_b32_e32 v0, v5, v6 +; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NOUNALIGNED-NEXT: v_and_or_b32 v2, v4, v6, v0 +; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 +; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog ; ; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align2: ; GFX7-UNALIGNED: ; %bb.0: @@ -648,37 +651,35 @@ ; ; GFX7-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2: ; GFX7-NOUNALIGNED: ; %bb.0: -; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s2, -1 -; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:10 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v1, off, s[0:3], 0 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:2 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:4 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:6 -; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8 -; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s0, 0xffff -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s0, v2 -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s0, v3 -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s0, v4 -; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s0, v5 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v0 -; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v1, v2 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v3, v4 -; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v5, v6 -; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 -; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 -; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 -; GFX7-NOUNALIGNED-NEXT: ; return to shader part epilog +; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s2, -1 +; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v0, off, s[0:3], 0 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:2 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:4 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:6 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:8 +; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:10 +; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s0, 0xffff +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v1, s0, v1 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v3, s0, v3 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v2, s0, v2 +; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v5, s0, v5 +; GFX7-NOUNALIGNED-NEXT: v_and_b32_e32 v4, s0, v4 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v2, v3 +; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v4, v5 +; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 +; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 +; GFX7-NOUNALIGNED-NEXT: ; return to shader part epilog %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 2 ret <3 x i32> %load } diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll --- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll @@ -23,8 +23,6 @@ ret void } -declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0 -; Function Attrs: nounwind writeonly declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #1 ; Function Attrs: nounwind readnone speculatable diff --git a/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll b/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll --- a/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll @@ -3,7 +3,7 @@ ; GCN-LABEL: {{^}}amdhsa_trap_num_sgprs ; TRAP-HANDLER-ENABLE: NumSgprs: 61 -; TRAP-HANDLER-DISABLE: NumSgprs: 79 +; TRAP-HANDLER-DISABLE: NumSgprs: 77 define amdgpu_kernel void @amdhsa_trap_num_sgprs( i32 addrspace(1)* %out0, i32 %in0, i32 addrspace(1)* %out1, i32 %in1, diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -840,14 +840,14 @@ ; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 ; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 +; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 +; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 +; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN: buffer_store_dword [[REG8]], {{.*$}} ; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4 ; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8 ; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16 -; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 -; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 -; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20 ; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28 @@ -874,14 +874,14 @@ ; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000 ; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 +; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 +; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 +; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 ; GCN: buffer_store_dword [[REG8]], {{.*$}} ; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4 ; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8 ; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16 -; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 -; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 -; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 ; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20 ; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28 diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll --- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -177,13 +177,13 @@ ; SI-NOSDWA: v_or_b32_e32 [[VAL2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} ; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL1]] ; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL2]] -; SI-SDWA: v_or_b32_sdwa ; SI-SDWA: v_or_b32_e32 +; SI-SDWA: v_or_b32_sdwa ; SI-SDWA: v_or_b32_e32 -; SI-SDWA: v_or_b32_e32 [[VAL1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} -; SI-SDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL1]] ; SI-SDWA: v_or_b32_sdwa +; SI-SDWA: v_or_b32_e32 [[VAL1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} ; SI-SDWA: v_or_b32_e32 [[VAL2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} +; SI-SDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL1]] ; SI-SDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL2]] ; SI: v_cmp_eq_u32_e32 vcc, 0 ; SI: v_cmp_ne_u64_e32 vcc, 0 diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -636,81 +636,81 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind { ; SI-LABEL: load_v4i8_to_v4f32_2_uses: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s6, 0 -; SI-NEXT: s_mov_b32 s7, s3 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_movk_i32 s8, 0xff -; SI-NEXT: s_mov_b32 s6, s2 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; SI-NEXT: v_lshrrev_b32_e32 v6, 24, v4 -; SI-NEXT: v_and_b32_e32 v7, 0xff00, v4 -; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 -; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 -; SI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 -; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 -; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_and_b32_e32 v0, s8, v4 -; SI-NEXT: v_add_i32_e32 v2, vcc, 9, v5 -; SI-NEXT: v_or_b32_e32 v0, v7, v0 -; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v6 -; SI-NEXT: v_and_b32_e32 v2, s8, v2 -; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0 -; SI-NEXT: v_or_b32_e32 v1, v1, v2 -; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-NEXT: v_or_b32_e32 v0, v1, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, 0x9000000, v0 -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; SI-NEXT: s_endpgm +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_load_dword v4, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_movk_i32 s0, 0xff +; SI-NEXT: s_mov_b32 s6, s10 +; SI-NEXT: s_mov_b32 s7, s11 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; SI-NEXT: v_lshrrev_b32_e32 v6, 24, v4 +; SI-NEXT: v_and_b32_e32 v7, 0xff00, v4 +; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 +; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 +; SI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 +; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 +; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_and_b32_e32 v0, s0, v4 +; SI-NEXT: v_add_i32_e32 v2, vcc, 9, v5 +; SI-NEXT: v_or_b32_e32 v0, v7, v0 +; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v6 +; SI-NEXT: v_and_b32_e32 v2, s0, v2 +; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0 +; SI-NEXT: v_or_b32_e32 v1, v1, v2 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-NEXT: v_add_i32_e32 v0, vcc, 0x9000000, v0 +; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; SI-NEXT: s_endpgm ; ; VI-LABEL: load_v4i8_to_v4f32_2_uses: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 -; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: v_mov_b32_e32 v5, 9 -; VI-NEXT: s_movk_i32 s8, 0x900 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 -; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v4, v[0:1] -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v6, 24, v4 -; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 -; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 -; VI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 -; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; VI-NEXT: v_and_b32_e32 v7, 0xffffff00, v4 -; VI-NEXT: v_add_u16_e32 v8, 9, v4 -; VI-NEXT: v_add_u16_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v6 -; VI-NEXT: v_or_b32_sdwa v0, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; VI-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; VI-NEXT: v_mov_b32_e32 v2, s8 -; VI-NEXT: v_add_u16_e32 v0, s8, v0 -; VI-NEXT: v_add_u16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; VI-NEXT: s_endpgm +; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: v_mov_b32_e32 v5, 9 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: flat_load_dword v4, v[0:1] +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: s_movk_i32 s0, 0x900 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_lshrrev_b32_e32 v6, 24, v4 +; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 +; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 +; VI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 +; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; VI-NEXT: v_and_b32_e32 v7, 0xffffff00, v4 +; VI-NEXT: v_add_u16_e32 v8, 9, v4 +; VI-NEXT: v_add_u16_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v6 +; VI-NEXT: v_or_b32_sdwa v0, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; VI-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: v_add_u16_e32 v0, s0, v0 +; VI-NEXT: v_add_u16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v0, v0, v1 +; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; VI-NEXT: s_endpgm %tid.x = call i32 @llvm.amdgcn.workitem.id.x() %in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x %load = load <4 x i8>, <4 x i8> addrspace(1)* %in.ptr, align 4 @@ -725,41 +725,42 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind { ; SI-LABEL: load_v7i8_to_v7f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s2, 0 -; SI-NEXT: s_mov_b32 s3, s7 -; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_load_ubyte v2, v[0:1], s[0:3], 0 addr64 -; SI-NEXT: buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:1 -; SI-NEXT: buffer_load_ubyte v6, v[0:1], s[0:3], 0 addr64 offset:2 -; SI-NEXT: buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64 offset:3 -; SI-NEXT: buffer_load_ubyte v7, v[0:1], s[0:3], 0 addr64 offset:4 -; SI-NEXT: buffer_load_ubyte v5, v[0:1], s[0:3], 0 addr64 offset:5 -; SI-NEXT: buffer_load_ubyte v8, v[0:1], s[0:3], 0 addr64 offset:6 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(6) -; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v2 -; SI-NEXT: s_waitcnt vmcnt(5) -; SI-NEXT: v_cvt_f32_ubyte2_e32 v1, v3 -; SI-NEXT: s_waitcnt vmcnt(3) -; SI-NEXT: v_lshlrev_b32_e32 v9, 8, v4 -; SI-NEXT: v_or_b32_e32 v2, v9, v6 -; SI-NEXT: s_waitcnt vmcnt(2) -; SI-NEXT: v_cvt_f32_ubyte0_e32 v4, v7 -; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_cvt_f32_ubyte0_e32 v7, v8 -; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v2 -; SI-NEXT: v_cvt_f32_ubyte2_e32 v5, v5 -; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v2 -; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:24 -; SI-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; SI-NEXT: s_endpgm +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_load_ubyte v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:1 +; SI-NEXT: buffer_load_ubyte v6, v[0:1], s[0:3], 0 addr64 offset:2 +; SI-NEXT: buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64 offset:3 +; SI-NEXT: buffer_load_ubyte v7, v[0:1], s[0:3], 0 addr64 offset:4 +; SI-NEXT: buffer_load_ubyte v5, v[0:1], s[0:3], 0 addr64 offset:5 +; SI-NEXT: buffer_load_ubyte v8, v[0:1], s[0:3], 0 addr64 offset:6 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt vmcnt(6) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v2 +; SI-NEXT: s_waitcnt vmcnt(5) +; SI-NEXT: v_cvt_f32_ubyte2_e32 v1, v3 +; SI-NEXT: s_waitcnt vmcnt(3) +; SI-NEXT: v_lshlrev_b32_e32 v9, 8, v4 +; SI-NEXT: v_or_b32_e32 v3, v9, v6 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_ubyte2_e32 v5, v5 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v8 +; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:24 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; SI-NEXT: v_cvt_f32_ubyte0_e32 v4, v7 +; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v2 +; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v2 +; SI-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NEXT: s_endpgm ; ; VI-LABEL: load_v7i8_to_v7f32: ; VI: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll --- a/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll @@ -1,11 +1,5 @@ ; RUN: llc -mtriple=amdgcn--amdpal -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0 -declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0 -declare i32 @llvm.amdgcn.wwm.i32(i32) #1 -declare void @llvm.amdgcn.tbuffer.store.f32(float, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #2 -declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #2 - define amdgpu_hs void @foo(i32 inreg %arg, <4 x i32> inreg %buffer) { entry: br label %work @@ -19,7 +13,7 @@ br i1 %tmp607, label %bb49, label %bb54 bb49: - tail call void @llvm.amdgcn.tbuffer.store.f32(float 1.000000e+00, <4 x i32> %buffer, i32 0, i32 1, i32 1, i32 4, i32 4, i32 7, i1 true, i1 false) #7 + call void @llvm.amdgcn.raw.tbuffer.store.f32(float 1.0, <4 x i32> %buffer, i32 4, i32 1, i32 116, i32 1) ret void bb54: @@ -42,6 +36,10 @@ br i1 %tmp34, label %bb602, label %bb42 } -attributes #0 = { convergent nounwind readnone } -attributes #1 = { nounwind readnone speculatable } -attributes #2 = { nounwind writeonly } +declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0 +declare i32 @llvm.amdgcn.wwm.i32(i32) #1 +declare void @llvm.amdgcn.raw.tbuffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg, i32 immarg) #2 + +attributes #0 = { convergent nounwind readnone willreturn } +attributes #1 = { convergent nounwind readnone speculatable willreturn } +attributes #2 = { nounwind willreturn writeonly } diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -1210,171 +1210,167 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) { ; SI-LABEL: v_fshr_v2i24: ; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 -; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 -; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 -; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 -; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 -; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 -; SI-NEXT: s_mov_b32 s4, 0xffffff -; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab -; SI-NEXT: v_add_i32_e32 v7, vcc, 3, v0 -; SI-NEXT: v_add_i32_e32 v8, vcc, 4, v0 -; SI-NEXT: v_add_i32_e32 v9, vcc, 5, v0 -; SI-NEXT: v_add_i32_e32 v10, vcc, 2, v0 -; SI-NEXT: s_waitcnt vmcnt(5) -; SI-NEXT: v_and_b32_e32 v14, s4, v1 -; SI-NEXT: s_waitcnt vmcnt(4) -; SI-NEXT: v_and_b32_e32 v2, s4, v2 -; SI-NEXT: v_mul_hi_u32 v12, v2, s5 -; SI-NEXT: s_waitcnt vmcnt(3) -; SI-NEXT: v_and_b32_e32 v3, s4, v3 -; SI-NEXT: v_mul_hi_u32 v13, v3, s5 -; SI-NEXT: s_waitcnt vmcnt(2) -; SI-NEXT: v_and_b32_e32 v11, s4, v4 -; SI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 -; SI-NEXT: v_mul_lo_u32 v12, v12, 24 -; SI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 -; SI-NEXT: v_mul_lo_u32 v13, v13, 24 -; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 -; SI-NEXT: v_lshr_b32_e32 v12, v14, v2 -; SI-NEXT: v_sub_i32_e32 v3, vcc, v3, v13 -; SI-NEXT: v_sub_i32_e32 v13, vcc, 24, v2 -; SI-NEXT: v_sub_i32_e32 v14, vcc, 24, v3 -; SI-NEXT: v_and_b32_e32 v13, s4, v13 -; SI-NEXT: s_waitcnt vmcnt(1) -; SI-NEXT: v_lshl_b32_e32 v5, v5, v13 -; SI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 -; SI-NEXT: v_lshr_b32_e32 v11, v11, v3 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshl_b32_e32 v6, v6, v14 -; SI-NEXT: v_or_b32_e32 v5, v5, v12 -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; SI-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; SI-NEXT: v_or_b32_e32 v6, v6, v11 -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; SI-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc -; SI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen -; SI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen -; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen -; SI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen -; SI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; SI-NEXT: s_setpc_b64 s[30:31] +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 +; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 +; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 +; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 +; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:12 +; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 +; SI-NEXT: s_mov_b32 s4, 0xffffff +; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab +; SI-NEXT: v_add_i32_e32 v7, vcc, 3, v0 +; SI-NEXT: v_add_i32_e32 v8, vcc, 4, v0 +; SI-NEXT: v_add_i32_e32 v9, vcc, 5, v0 +; SI-NEXT: v_add_i32_e32 v10, vcc, 2, v0 +; SI-NEXT: s_waitcnt vmcnt(5) +; SI-NEXT: v_and_b32_e32 v14, s4, v1 +; SI-NEXT: s_waitcnt vmcnt(4) +; SI-NEXT: v_and_b32_e32 v2, s4, v2 +; SI-NEXT: v_mul_hi_u32 v12, v2, s5 +; SI-NEXT: s_waitcnt vmcnt(3) +; SI-NEXT: v_and_b32_e32 v3, s4, v3 +; SI-NEXT: v_mul_hi_u32 v13, v3, s5 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_and_b32_e32 v11, s4, v5 +; SI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 +; SI-NEXT: v_mul_lo_u32 v12, v12, 24 +; SI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 +; SI-NEXT: v_mul_lo_u32 v13, v13, 24 +; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 +; SI-NEXT: v_lshr_b32_e32 v12, v14, v2 +; SI-NEXT: v_sub_i32_e32 v3, vcc, v3, v13 +; SI-NEXT: v_sub_i32_e32 v13, vcc, 24, v2 +; SI-NEXT: v_sub_i32_e32 v14, vcc, 24, v3 +; SI-NEXT: v_and_b32_e32 v13, s4, v13 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_lshl_b32_e32 v6, v6, v13 +; SI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 +; SI-NEXT: v_lshr_b32_e32 v11, v11, v3 +; SI-NEXT: v_lshl_b32_e32 v4, v4, v14 +; SI-NEXT: v_or_b32_e32 v6, v6, v12 +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; SI-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; SI-NEXT: v_or_b32_e32 v4, v4, v11 +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; SI-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; SI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen +; SI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen +; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; SI-NEXT: s_waitcnt expcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; SI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen +; SI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen +; SI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_fshr_v2i24: ; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 -; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 -; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 -; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 -; VI-NEXT: s_mov_b32 s4, 0xffffff -; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab -; VI-NEXT: v_add_u32_e32 v7, vcc, 3, v0 -; VI-NEXT: v_add_u32_e32 v8, vcc, 4, v0 -; VI-NEXT: v_add_u32_e32 v9, vcc, 5, v0 -; VI-NEXT: v_add_u32_e32 v10, vcc, 2, v0 -; VI-NEXT: s_waitcnt vmcnt(5) -; VI-NEXT: v_and_b32_e32 v14, s4, v1 -; VI-NEXT: s_waitcnt vmcnt(4) -; VI-NEXT: v_and_b32_e32 v2, s4, v2 -; VI-NEXT: v_mul_hi_u32 v12, v2, s5 -; VI-NEXT: s_waitcnt vmcnt(3) -; VI-NEXT: v_and_b32_e32 v3, s4, v3 -; VI-NEXT: v_mul_hi_u32 v13, v3, s5 -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_and_b32_e32 v11, s4, v4 -; VI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 -; VI-NEXT: v_mul_lo_u32 v12, v12, 24 -; VI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 -; VI-NEXT: v_mul_lo_u32 v13, v13, 24 -; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v12 -; VI-NEXT: v_lshrrev_b32_e32 v12, v2, v14 -; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v13 -; VI-NEXT: v_sub_u32_e32 v13, vcc, 24, v2 -; VI-NEXT: v_sub_u32_e32 v14, vcc, 24, v3 -; VI-NEXT: v_and_b32_e32 v13, s4, v13 -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b32_e32 v5, v13, v5 -; VI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 -; VI-NEXT: v_lshrrev_b32_e32 v11, v3, v11 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b32_e32 v6, v14, v6 -; VI-NEXT: v_or_b32_e32 v5, v5, v12 -; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; VI-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; VI-NEXT: v_or_b32_e32 v6, v6, v11 -; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; VI-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc -; VI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen -; VI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen -; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; VI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen -; VI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen -; VI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_setpc_b64 s[30:31] +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 +; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 +; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 +; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 +; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:12 +; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 +; VI-NEXT: s_mov_b32 s4, 0xffffff +; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab +; VI-NEXT: v_add_u32_e32 v7, vcc, 3, v0 +; VI-NEXT: v_add_u32_e32 v8, vcc, 4, v0 +; VI-NEXT: v_add_u32_e32 v9, vcc, 5, v0 +; VI-NEXT: v_add_u32_e32 v10, vcc, 2, v0 +; VI-NEXT: s_waitcnt vmcnt(5) +; VI-NEXT: v_and_b32_e32 v14, s4, v1 +; VI-NEXT: s_waitcnt vmcnt(4) +; VI-NEXT: v_and_b32_e32 v2, s4, v2 +; VI-NEXT: v_mul_hi_u32 v12, v2, s5 +; VI-NEXT: s_waitcnt vmcnt(3) +; VI-NEXT: v_and_b32_e32 v3, s4, v3 +; VI-NEXT: v_mul_hi_u32 v13, v3, s5 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_and_b32_e32 v11, s4, v5 +; VI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 +; VI-NEXT: v_mul_lo_u32 v12, v12, 24 +; VI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 +; VI-NEXT: v_mul_lo_u32 v13, v13, 24 +; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v12 +; VI-NEXT: v_lshrrev_b32_e32 v12, v2, v14 +; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v13 +; VI-NEXT: v_sub_u32_e32 v13, vcc, 24, v2 +; VI-NEXT: v_sub_u32_e32 v14, vcc, 24, v3 +; VI-NEXT: v_and_b32_e32 v13, s4, v13 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_lshlrev_b32_e32 v6, v13, v6 +; VI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 +; VI-NEXT: v_lshrrev_b32_e32 v11, v3, v11 +; VI-NEXT: v_lshlrev_b32_e32 v4, v14, v4 +; VI-NEXT: v_or_b32_e32 v6, v6, v12 +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; VI-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; VI-NEXT: v_or_b32_e32 v4, v4, v11 +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; VI-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; VI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen +; VI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen +; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; VI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen +; VI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen +; VI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_fshr_v2i24: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 -; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 -; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 -; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:4 -; GFX9-NEXT: s_mov_b32 s4, 0xffffff -; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab -; GFX9-NEXT: s_waitcnt vmcnt(5) -; GFX9-NEXT: v_and_b32_e32 v10, s4, v1 -; GFX9-NEXT: s_waitcnt vmcnt(4) -; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX9-NEXT: v_mul_hi_u32 v6, v2, s5 -; GFX9-NEXT: s_waitcnt vmcnt(3) -; GFX9-NEXT: v_and_b32_e32 v3, s4, v3 -; GFX9-NEXT: v_mul_hi_u32 v7, v3, s5 -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_and_b32_e32 v9, s4, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6 -; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 4, v7 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX9-NEXT: v_sub_u32_e32 v2, v2, v6 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, v2, v10 -; GFX9-NEXT: v_sub_u32_e32 v3, v3, v7 -; GFX9-NEXT: v_sub_u32_e32 v7, 24, v2 -; GFX9-NEXT: v_sub_u32_e32 v10, 24, v3 -; GFX9-NEXT: v_and_b32_e32 v7, s4, v7 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, v3, v9 -; GFX9-NEXT: v_and_b32_e32 v10, 0xffffff, v10 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshl_or_b32 v5, v5, v7, v6 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshl_or_b32 v6, v8, v10, v9 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v2 -; GFX9-NEXT: buffer_store_byte_d16_hi v2, v0, s[0:3], 0 offen offset:5 -; GFX9-NEXT: buffer_store_byte v3, v0, s[0:3], 0 offen offset:4 -; GFX9-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 -; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen offset:2 -; GFX9-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 +; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 +; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 +; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 +; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:8 +; GFX9-NEXT: s_mov_b32 s4, 0xffffff +; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab +; GFX9-NEXT: s_waitcnt vmcnt(5) +; GFX9-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, v1, s5 +; GFX9-NEXT: s_waitcnt vmcnt(4) +; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX9-NEXT: v_mul_hi_u32 v7, v2, s5 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: v_and_b32_e32 v9, s4, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6 +; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 4, v7 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v10, s4, v8 +; GFX9-NEXT: v_sub_u32_e32 v1, v1, v6 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, v1, v10 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v7 +; GFX9-NEXT: v_sub_u32_e32 v7, 24, v1 +; GFX9-NEXT: v_sub_u32_e32 v10, 24, v2 +; GFX9-NEXT: v_and_b32_e32 v7, s4, v7 +; GFX9-NEXT: v_lshrrev_b32_e32 v9, v2, v9 +; GFX9-NEXT: v_and_b32_e32 v10, 0xffffff, v10 +; GFX9-NEXT: v_lshl_or_b32 v5, v5, v7, v6 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v8, vcc +; GFX9-NEXT: v_lshl_or_b32 v3, v3, v10, v9 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v2 +; GFX9-NEXT: buffer_store_byte_d16_hi v2, v0, s[0:3], 0 offen offset:5 +; GFX9-NEXT: buffer_store_byte v3, v0, s[0:3], 0 offen offset:4 +; GFX9-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 +; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen offset:2 +; GFX9-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fshr_v2i24: ; R600: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -273,8 +273,8 @@ ; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} ; MOVREL: v_movreld_b32_e32 v0, 5 -; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} -; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], gpr_idx(DST) +; IDXMODE: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00{{$}} +; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, gpr_idx(DST) ; IDXMODE-NEXT: v_mov_b32_e32 v0, 5 ; IDXMODE-NEXT: s_set_gpr_idx_off define amdgpu_kernel void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <16 x i32> addrspace(1)* %out, <16 x i32> %vec, i32 %offset) { diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll --- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll @@ -855,10 +855,10 @@ ; multiple. ; FUNC-LABEL: {{^}}packed_struct_argument_alignment: ; HSA-GFX9: kernarg_segment_byte_size = 28 -; HSA-GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:17 -; HSA-GFX9: global_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:13 ; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 ; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4 +; HSA-GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:17 +; HSA-GFX9: global_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:13 define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) { %val0 = extractvalue <{i32, i64}> %arg0, 0 %val1 = extractvalue <{i32, i64}> %arg0, 1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -237,157 +237,157 @@ define amdgpu_kernel void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) #0 { ; SI-LABEL: round_v4f64: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx8 s[8:15], s[0:1], 0x11 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_movk_i32 s18, 0xfc01 -; SI-NEXT: s_mov_b32 s3, 0xfffff -; SI-NEXT: s_mov_b32 s2, s6 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_bfe_u32 s0, s11, 0xb0014 -; SI-NEXT: s_add_i32 s19, s0, s18 -; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s19 -; SI-NEXT: s_brev_b32 s20, 1 -; SI-NEXT: s_andn2_b64 s[16:17], s[10:11], s[0:1] -; SI-NEXT: s_and_b32 s0, s11, s20 -; SI-NEXT: v_mov_b32_e32 v1, s0 -; SI-NEXT: v_mov_b32_e32 v0, s17 -; SI-NEXT: v_cmp_lt_i32_e64 vcc, s19, 0 -; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; SI-NEXT: v_mov_b32_e32 v1, s11 -; SI-NEXT: v_cmp_gt_i32_e64 s[0:1], s19, 51 -; SI-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] -; SI-NEXT: v_mov_b32_e32 v0, s16 -; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; SI-NEXT: v_mov_b32_e32 v2, s10 -; SI-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; SI-NEXT: v_add_f64 v[2:3], s[10:11], -v[0:1] -; SI-NEXT: s_bfe_u32 s0, s9, 0xb0014 -; SI-NEXT: s_add_i32 s17, s0, s18 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[2:3]|, 0.5 -; SI-NEXT: s_brev_b32 s16, -2 -; SI-NEXT: v_mov_b32_e32 v12, 0x3ff00000 -; SI-NEXT: v_mov_b32_e32 v4, s11 -; SI-NEXT: v_bfi_b32 v4, s16, v12, v4 -; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s17 -; SI-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc -; SI-NEXT: v_mov_b32_e32 v2, 0 -; SI-NEXT: s_andn2_b64 s[10:11], s[8:9], s[0:1] -; SI-NEXT: s_and_b32 s0, s9, s20 -; SI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] -; SI-NEXT: v_mov_b32_e32 v1, s0 -; SI-NEXT: v_mov_b32_e32 v0, s11 -; SI-NEXT: v_cmp_lt_i32_e64 vcc, s17, 0 -; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; SI-NEXT: v_mov_b32_e32 v1, s9 -; SI-NEXT: v_cmp_gt_i32_e64 s[0:1], s17, 51 -; SI-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] -; SI-NEXT: v_mov_b32_e32 v0, s10 -; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; SI-NEXT: v_mov_b32_e32 v4, s8 -; SI-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] -; SI-NEXT: v_add_f64 v[4:5], s[8:9], -v[0:1] -; SI-NEXT: s_bfe_u32 s0, s15, 0xb0014 -; SI-NEXT: s_add_i32 s10, s0, s18 -; SI-NEXT: v_mov_b32_e32 v6, s9 -; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s10 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 -; SI-NEXT: s_andn2_b64 s[8:9], s[14:15], s[0:1] -; SI-NEXT: v_bfi_b32 v6, s16, v12, v6 -; SI-NEXT: s_and_b32 s0, s15, s20 -; SI-NEXT: v_cndmask_b32_e32 v9, 0, v6, vcc -; SI-NEXT: v_mov_b32_e32 v5, s0 -; SI-NEXT: v_mov_b32_e32 v4, s9 -; SI-NEXT: v_cmp_lt_i32_e64 vcc, s10, 0 -; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; SI-NEXT: v_mov_b32_e32 v5, s15 -; SI-NEXT: v_cmp_gt_i32_e64 s[0:1], s10, 51 -; SI-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[0:1] -; SI-NEXT: v_mov_b32_e32 v4, s8 -; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc -; SI-NEXT: v_mov_b32_e32 v6, s14 -; SI-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[0:1] -; SI-NEXT: v_add_f64 v[6:7], s[14:15], -v[4:5] -; SI-NEXT: s_bfe_u32 s0, s13, 0xb0014 -; SI-NEXT: v_mov_b32_e32 v10, s15 -; SI-NEXT: s_add_i32 s8, s0, s18 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[6:7]|, 0.5 -; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s8 -; SI-NEXT: v_bfi_b32 v10, s16, v12, v10 -; SI-NEXT: v_cndmask_b32_e32 v7, 0, v10, vcc -; SI-NEXT: v_mov_b32_e32 v6, 0 -; SI-NEXT: s_andn2_b64 s[2:3], s[12:13], s[0:1] -; SI-NEXT: s_and_b32 s0, s13, s20 -; SI-NEXT: v_add_f64 v[6:7], v[4:5], v[6:7] -; SI-NEXT: v_mov_b32_e32 v5, s0 -; SI-NEXT: v_mov_b32_e32 v4, s3 -; SI-NEXT: v_cmp_lt_i32_e64 vcc, s8, 0 -; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; SI-NEXT: v_mov_b32_e32 v5, s13 -; SI-NEXT: v_cmp_gt_i32_e64 s[0:1], s8, 51 -; SI-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[0:1] -; SI-NEXT: v_mov_b32_e32 v4, s2 -; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc -; SI-NEXT: v_mov_b32_e32 v10, s12 -; SI-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1] -; SI-NEXT: v_add_f64 v[10:11], s[12:13], -v[4:5] -; SI-NEXT: v_mov_b32_e32 v13, s13 -; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[10:11]|, 0.5 -; SI-NEXT: v_bfi_b32 v12, s16, v12, v13 -; SI-NEXT: v_cndmask_b32_e32 v11, 0, v12, vcc -; SI-NEXT: v_mov_b32_e32 v10, 0 -; SI-NEXT: v_mov_b32_e32 v8, 0 -; SI-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11] -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: v_add_f64 v[0:1], v[0:1], v[8:9] -; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; SI-NEXT: s_endpgm +; SI-NEXT: s_load_dwordx8 s[8:15], s[0:1], 0x11 +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_movk_i32 s18, 0xfc01 +; SI-NEXT: s_mov_b32 s3, 0xfffff +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_bfe_u32 s0, s11, 0xb0014 +; SI-NEXT: s_add_i32 s19, s0, s18 +; SI-NEXT: s_mov_b32 s2, s6 +; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s19 +; SI-NEXT: s_brev_b32 s20, 1 +; SI-NEXT: s_andn2_b64 s[16:17], s[10:11], s[0:1] +; SI-NEXT: s_and_b32 s0, s11, s20 +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: v_mov_b32_e32 v0, s17 +; SI-NEXT: v_cmp_lt_i32_e64 vcc, s19, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: v_mov_b32_e32 v1, s11 +; SI-NEXT: v_cmp_gt_i32_e64 s[0:1], s19, 51 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; SI-NEXT: v_mov_b32_e32 v0, s16 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_mov_b32_e32 v2, s10 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; SI-NEXT: v_add_f64 v[2:3], s[10:11], -v[0:1] +; SI-NEXT: s_bfe_u32 s0, s9, 0xb0014 +; SI-NEXT: s_add_i32 s17, s0, s18 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[2:3]|, 0.5 +; SI-NEXT: s_brev_b32 s16, -2 +; SI-NEXT: v_mov_b32_e32 v12, 0x3ff00000 +; SI-NEXT: v_mov_b32_e32 v4, s11 +; SI-NEXT: v_bfi_b32 v4, s16, v12, v4 +; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s17 +; SI-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; SI-NEXT: v_mov_b32_e32 v2, 0 +; SI-NEXT: s_andn2_b64 s[10:11], s[8:9], s[0:1] +; SI-NEXT: s_and_b32 s0, s9, s20 +; SI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] +; SI-NEXT: v_mov_b32_e32 v1, s0 +; SI-NEXT: v_mov_b32_e32 v0, s11 +; SI-NEXT: v_cmp_lt_i32_e64 vcc, s17, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: v_mov_b32_e32 v1, s9 +; SI-NEXT: v_cmp_gt_i32_e64 s[0:1], s17, 51 +; SI-NEXT: v_cndmask_b32_e64 v1, v0, v1, s[0:1] +; SI-NEXT: v_mov_b32_e32 v0, s10 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_mov_b32_e32 v4, s8 +; SI-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; SI-NEXT: v_add_f64 v[4:5], s[8:9], -v[0:1] +; SI-NEXT: s_bfe_u32 s0, s15, 0xb0014 +; SI-NEXT: s_add_i32 s10, s0, s18 +; SI-NEXT: v_mov_b32_e32 v6, s9 +; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s10 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 +; SI-NEXT: s_andn2_b64 s[8:9], s[14:15], s[0:1] +; SI-NEXT: v_bfi_b32 v6, s16, v12, v6 +; SI-NEXT: s_and_b32 s0, s15, s20 +; SI-NEXT: v_cndmask_b32_e32 v9, 0, v6, vcc +; SI-NEXT: v_mov_b32_e32 v5, s0 +; SI-NEXT: v_mov_b32_e32 v4, s9 +; SI-NEXT: v_cmp_lt_i32_e64 vcc, s10, 0 +; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; SI-NEXT: v_mov_b32_e32 v5, s15 +; SI-NEXT: v_cmp_gt_i32_e64 s[0:1], s10, 51 +; SI-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[0:1] +; SI-NEXT: v_mov_b32_e32 v4, s8 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; SI-NEXT: v_mov_b32_e32 v6, s14 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[0:1] +; SI-NEXT: v_add_f64 v[6:7], s[14:15], -v[4:5] +; SI-NEXT: s_bfe_u32 s0, s13, 0xb0014 +; SI-NEXT: v_mov_b32_e32 v10, s15 +; SI-NEXT: s_add_i32 s8, s0, s18 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[6:7]|, 0.5 +; SI-NEXT: s_lshr_b64 s[0:1], s[2:3], s8 +; SI-NEXT: v_bfi_b32 v10, s16, v12, v10 +; SI-NEXT: v_cndmask_b32_e32 v7, 0, v10, vcc +; SI-NEXT: v_mov_b32_e32 v6, 0 +; SI-NEXT: s_andn2_b64 s[2:3], s[12:13], s[0:1] +; SI-NEXT: s_and_b32 s0, s13, s20 +; SI-NEXT: v_add_f64 v[6:7], v[4:5], v[6:7] +; SI-NEXT: v_mov_b32_e32 v5, s0 +; SI-NEXT: v_mov_b32_e32 v4, s3 +; SI-NEXT: v_cmp_lt_i32_e64 vcc, s8, 0 +; SI-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; SI-NEXT: v_mov_b32_e32 v5, s13 +; SI-NEXT: v_cmp_gt_i32_e64 s[0:1], s8, 51 +; SI-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[0:1] +; SI-NEXT: v_mov_b32_e32 v4, s2 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; SI-NEXT: v_mov_b32_e32 v10, s12 +; SI-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[0:1] +; SI-NEXT: v_add_f64 v[10:11], s[12:13], -v[4:5] +; SI-NEXT: v_mov_b32_e32 v13, s13 +; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[10:11]|, 0.5 +; SI-NEXT: v_bfi_b32 v12, s16, v12, v13 +; SI-NEXT: v_cndmask_b32_e32 v11, 0, v12, vcc +; SI-NEXT: v_mov_b32_e32 v10, 0 +; SI-NEXT: v_mov_b32_e32 v8, 0 +; SI-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11] +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: v_add_f64 v[0:1], v[0:1], v[8:9] +; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NEXT: s_endpgm ; ; CI-LABEL: round_v4f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; CI-NEXT: s_load_dwordx8 s[8:15], s[0:1], 0x11 -; CI-NEXT: s_brev_b32 s2, -2 -; CI-NEXT: v_mov_b32_e32 v12, 0x3ff00000 -; CI-NEXT: s_mov_b32 s7, 0xf000 -; CI-NEXT: s_mov_b32 s6, -1 -; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_trunc_f64_e32 v[0:1], s[10:11] -; CI-NEXT: v_mov_b32_e32 v4, s11 -; CI-NEXT: v_add_f64 v[2:3], s[10:11], -v[0:1] -; CI-NEXT: v_bfi_b32 v4, s2, v12, v4 -; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[2:3]|, 0.5 -; CI-NEXT: v_trunc_f64_e32 v[8:9], s[8:9] -; CI-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc -; CI-NEXT: v_mov_b32_e32 v2, 0 -; CI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] -; CI-NEXT: v_add_f64 v[0:1], s[8:9], -v[8:9] -; CI-NEXT: v_mov_b32_e32 v4, s9 -; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[0:1]|, 0.5 -; CI-NEXT: v_bfi_b32 v4, s2, v12, v4 -; CI-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; CI-NEXT: v_trunc_f64_e32 v[4:5], s[14:15] -; CI-NEXT: v_mov_b32_e32 v10, s15 -; CI-NEXT: v_add_f64 v[6:7], s[14:15], -v[4:5] -; CI-NEXT: v_bfi_b32 v10, s2, v12, v10 -; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[6:7]|, 0.5 -; CI-NEXT: v_mov_b32_e32 v6, 0 -; CI-NEXT: v_cndmask_b32_e32 v7, 0, v10, vcc -; CI-NEXT: v_trunc_f64_e32 v[10:11], s[12:13] -; CI-NEXT: v_add_f64 v[6:7], v[4:5], v[6:7] -; CI-NEXT: v_add_f64 v[4:5], s[12:13], -v[10:11] -; CI-NEXT: v_mov_b32_e32 v13, s13 -; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 -; CI-NEXT: v_bfi_b32 v12, s2, v12, v13 -; CI-NEXT: v_cndmask_b32_e32 v5, 0, v12, vcc -; CI-NEXT: v_mov_b32_e32 v4, 0 -; CI-NEXT: v_mov_b32_e32 v0, 0 -; CI-NEXT: v_add_f64 v[4:5], v[10:11], v[4:5] -; CI-NEXT: v_add_f64 v[0:1], v[8:9], v[0:1] -; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 -; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; CI-NEXT: s_endpgm +; CI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x11 +; CI-NEXT: s_brev_b32 s12, -2 +; CI-NEXT: v_mov_b32_e32 v12, 0x3ff00000 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_trunc_f64_e32 v[0:1], s[6:7] +; CI-NEXT: v_mov_b32_e32 v4, s7 +; CI-NEXT: v_add_f64 v[2:3], s[6:7], -v[0:1] +; CI-NEXT: v_bfi_b32 v4, s12, v12, v4 +; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[2:3]|, 0.5 +; CI-NEXT: v_trunc_f64_e32 v[8:9], s[4:5] +; CI-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; CI-NEXT: v_mov_b32_e32 v2, 0 +; CI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] +; CI-NEXT: v_add_f64 v[0:1], s[4:5], -v[8:9] +; CI-NEXT: v_mov_b32_e32 v4, s5 +; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[0:1]|, 0.5 +; CI-NEXT: v_bfi_b32 v4, s12, v12, v4 +; CI-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc +; CI-NEXT: v_trunc_f64_e32 v[4:5], s[10:11] +; CI-NEXT: v_mov_b32_e32 v10, s11 +; CI-NEXT: v_add_f64 v[6:7], s[10:11], -v[4:5] +; CI-NEXT: v_bfi_b32 v10, s12, v12, v10 +; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[6:7]|, 0.5 +; CI-NEXT: v_mov_b32_e32 v6, 0 +; CI-NEXT: v_cndmask_b32_e32 v7, 0, v10, vcc +; CI-NEXT: v_trunc_f64_e32 v[10:11], s[8:9] +; CI-NEXT: v_add_f64 v[6:7], v[4:5], v[6:7] +; CI-NEXT: v_add_f64 v[4:5], s[8:9], -v[10:11] +; CI-NEXT: v_mov_b32_e32 v13, s9 +; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 +; CI-NEXT: v_bfi_b32 v12, s12, v12, v13 +; CI-NEXT: v_cndmask_b32_e32 v5, 0, v12, vcc +; CI-NEXT: v_mov_b32_e32 v4, 0 +; CI-NEXT: v_mov_b32_e32 v0, 0 +; CI-NEXT: v_add_f64 v[4:5], v[10:11], v[4:5] +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: v_add_f64 v[0:1], v[8:9], v[0:1] +; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; CI-NEXT: s_endpgm %result = call <4 x double> @llvm.round.v4f64(<4 x double> %in) #1 store <4 x double> %result, <4 x double> addrspace(1)* %out ret void @@ -600,82 +600,82 @@ ; ; CI-LABEL: round_v8f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; CI-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x19 -; CI-NEXT: s_brev_b32 s2, -2 -; CI-NEXT: v_mov_b32_e32 v16, 0x3ff00000 -; CI-NEXT: s_mov_b32 s7, 0xf000 -; CI-NEXT: s_mov_b32 s6, -1 -; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: v_trunc_f64_e32 v[0:1], s[10:11] -; CI-NEXT: v_mov_b32_e32 v4, s11 -; CI-NEXT: v_add_f64 v[2:3], s[10:11], -v[0:1] -; CI-NEXT: v_bfi_b32 v4, s2, v16, v4 -; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[2:3]|, 0.5 -; CI-NEXT: v_mov_b32_e32 v2, 0 -; CI-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc -; CI-NEXT: v_trunc_f64_e32 v[4:5], s[8:9] -; CI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] -; CI-NEXT: v_add_f64 v[0:1], s[8:9], -v[4:5] -; CI-NEXT: v_mov_b32_e32 v6, s9 -; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[0:1]|, 0.5 -; CI-NEXT: v_bfi_b32 v6, s2, v16, v6 -; CI-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc -; CI-NEXT: v_trunc_f64_e32 v[6:7], s[14:15] -; CI-NEXT: v_mov_b32_e32 v0, 0 -; CI-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1] -; CI-NEXT: v_add_f64 v[4:5], s[14:15], -v[6:7] -; CI-NEXT: v_mov_b32_e32 v8, s15 -; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 -; CI-NEXT: v_bfi_b32 v8, s2, v16, v8 -; CI-NEXT: v_cndmask_b32_e32 v5, 0, v8, vcc -; CI-NEXT: v_trunc_f64_e32 v[8:9], s[12:13] -; CI-NEXT: v_mov_b32_e32 v4, 0 -; CI-NEXT: v_add_f64 v[6:7], v[6:7], v[4:5] -; CI-NEXT: v_add_f64 v[4:5], s[12:13], -v[8:9] -; CI-NEXT: v_mov_b32_e32 v10, s13 -; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 -; CI-NEXT: v_bfi_b32 v10, s2, v16, v10 -; CI-NEXT: v_cndmask_b32_e32 v5, 0, v10, vcc -; CI-NEXT: v_mov_b32_e32 v4, 0 -; CI-NEXT: v_add_f64 v[4:5], v[8:9], v[4:5] -; CI-NEXT: v_mov_b32_e32 v8, s19 -; CI-NEXT: v_bfi_b32 v18, s2, v16, v8 -; CI-NEXT: v_trunc_f64_e32 v[8:9], s[20:21] -; CI-NEXT: v_trunc_f64_e32 v[10:11], s[22:23] -; CI-NEXT: v_add_f64 v[14:15], s[20:21], -v[8:9] -; CI-NEXT: v_mov_b32_e32 v19, s23 -; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[14:15]|, 0.5 -; CI-NEXT: v_add_f64 v[14:15], s[22:23], -v[10:11] -; CI-NEXT: v_mov_b32_e32 v17, s21 -; CI-NEXT: v_cmp_ge_f64_e64 s[0:1], |v[14:15]|, 0.5 -; CI-NEXT: v_bfi_b32 v19, s2, v16, v19 -; CI-NEXT: v_trunc_f64_e32 v[12:13], s[16:17] -; CI-NEXT: v_bfi_b32 v17, s2, v16, v17 -; CI-NEXT: v_cndmask_b32_e64 v15, 0, v19, s[0:1] -; CI-NEXT: v_mov_b32_e32 v14, 0 -; CI-NEXT: v_add_f64 v[10:11], v[10:11], v[14:15] -; CI-NEXT: v_cndmask_b32_e32 v15, 0, v17, vcc -; CI-NEXT: v_mov_b32_e32 v14, 0 -; CI-NEXT: v_mov_b32_e32 v17, s17 -; CI-NEXT: v_add_f64 v[8:9], v[8:9], v[14:15] -; CI-NEXT: v_add_f64 v[14:15], s[16:17], -v[12:13] -; CI-NEXT: v_bfi_b32 v19, s2, v16, v17 -; CI-NEXT: v_trunc_f64_e32 v[16:17], s[18:19] -; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[14:15]|, 0.5 -; CI-NEXT: v_add_f64 v[14:15], s[18:19], -v[16:17] -; CI-NEXT: v_cmp_ge_f64_e64 s[0:1], |v[14:15]|, 0.5 -; CI-NEXT: v_mov_b32_e32 v14, 0 -; CI-NEXT: v_cndmask_b32_e64 v15, 0, v18, s[0:1] -; CI-NEXT: v_add_f64 v[14:15], v[16:17], v[14:15] -; CI-NEXT: v_cndmask_b32_e32 v17, 0, v19, vcc -; CI-NEXT: v_mov_b32_e32 v16, 0 -; CI-NEXT: v_add_f64 v[12:13], v[12:13], v[16:17] -; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:48 -; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:32 -; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 -; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; CI-NEXT: s_endpgm +; CI-NEXT: s_load_dwordx16 s[8:23], s[0:1], 0x19 +; CI-NEXT: s_brev_b32 s2, -2 +; CI-NEXT: v_mov_b32_e32 v16, 0x3ff00000 +; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; CI-NEXT: s_mov_b32 s7, 0xf000 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_trunc_f64_e32 v[0:1], s[10:11] +; CI-NEXT: v_mov_b32_e32 v4, s11 +; CI-NEXT: v_add_f64 v[2:3], s[10:11], -v[0:1] +; CI-NEXT: v_bfi_b32 v4, s2, v16, v4 +; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[2:3]|, 0.5 +; CI-NEXT: v_mov_b32_e32 v2, 0 +; CI-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; CI-NEXT: v_trunc_f64_e32 v[4:5], s[8:9] +; CI-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] +; CI-NEXT: v_add_f64 v[0:1], s[8:9], -v[4:5] +; CI-NEXT: v_mov_b32_e32 v6, s9 +; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[0:1]|, 0.5 +; CI-NEXT: v_bfi_b32 v6, s2, v16, v6 +; CI-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc +; CI-NEXT: v_trunc_f64_e32 v[6:7], s[14:15] +; CI-NEXT: v_mov_b32_e32 v0, 0 +; CI-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1] +; CI-NEXT: v_add_f64 v[4:5], s[14:15], -v[6:7] +; CI-NEXT: v_mov_b32_e32 v8, s15 +; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 +; CI-NEXT: v_bfi_b32 v8, s2, v16, v8 +; CI-NEXT: v_cndmask_b32_e32 v5, 0, v8, vcc +; CI-NEXT: v_trunc_f64_e32 v[8:9], s[12:13] +; CI-NEXT: v_mov_b32_e32 v4, 0 +; CI-NEXT: v_add_f64 v[6:7], v[6:7], v[4:5] +; CI-NEXT: v_add_f64 v[4:5], s[12:13], -v[8:9] +; CI-NEXT: v_mov_b32_e32 v10, s13 +; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 +; CI-NEXT: v_bfi_b32 v10, s2, v16, v10 +; CI-NEXT: v_cndmask_b32_e32 v5, 0, v10, vcc +; CI-NEXT: v_mov_b32_e32 v4, 0 +; CI-NEXT: v_add_f64 v[4:5], v[8:9], v[4:5] +; CI-NEXT: v_mov_b32_e32 v8, s19 +; CI-NEXT: v_bfi_b32 v18, s2, v16, v8 +; CI-NEXT: v_trunc_f64_e32 v[8:9], s[20:21] +; CI-NEXT: v_trunc_f64_e32 v[10:11], s[22:23] +; CI-NEXT: v_add_f64 v[14:15], s[20:21], -v[8:9] +; CI-NEXT: v_mov_b32_e32 v19, s23 +; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[14:15]|, 0.5 +; CI-NEXT: v_add_f64 v[14:15], s[22:23], -v[10:11] +; CI-NEXT: v_mov_b32_e32 v17, s21 +; CI-NEXT: v_cmp_ge_f64_e64 s[0:1], |v[14:15]|, 0.5 +; CI-NEXT: v_bfi_b32 v19, s2, v16, v19 +; CI-NEXT: v_trunc_f64_e32 v[12:13], s[16:17] +; CI-NEXT: v_bfi_b32 v17, s2, v16, v17 +; CI-NEXT: v_cndmask_b32_e64 v15, 0, v19, s[0:1] +; CI-NEXT: v_mov_b32_e32 v14, 0 +; CI-NEXT: v_add_f64 v[10:11], v[10:11], v[14:15] +; CI-NEXT: v_cndmask_b32_e32 v15, 0, v17, vcc +; CI-NEXT: v_mov_b32_e32 v14, 0 +; CI-NEXT: v_mov_b32_e32 v17, s17 +; CI-NEXT: v_add_f64 v[8:9], v[8:9], v[14:15] +; CI-NEXT: v_add_f64 v[14:15], s[16:17], -v[12:13] +; CI-NEXT: v_bfi_b32 v19, s2, v16, v17 +; CI-NEXT: v_trunc_f64_e32 v[16:17], s[18:19] +; CI-NEXT: v_cmp_ge_f64_e64 vcc, |v[14:15]|, 0.5 +; CI-NEXT: v_add_f64 v[14:15], s[18:19], -v[16:17] +; CI-NEXT: s_mov_b32 s6, -1 +; CI-NEXT: v_cmp_ge_f64_e64 s[0:1], |v[14:15]|, 0.5 +; CI-NEXT: v_mov_b32_e32 v14, 0 +; CI-NEXT: v_cndmask_b32_e64 v15, 0, v18, s[0:1] +; CI-NEXT: v_add_f64 v[14:15], v[16:17], v[14:15] +; CI-NEXT: v_cndmask_b32_e32 v17, 0, v19, vcc +; CI-NEXT: v_mov_b32_e32 v16, 0 +; CI-NEXT: v_add_f64 v[12:13], v[12:13], v[16:17] +; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:48 +; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:32 +; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; CI-NEXT: s_endpgm %result = call <8 x double> @llvm.round.v8f64(<8 x double> %in) #1 store <8 x double> %result, <8 x double> addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll --- a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll @@ -27,11 +27,10 @@ %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp7, i32 1 %tmp10 = insertelement <4 x i32> %tmp9, i32 undef, i32 2 %tmp11 = insertelement <4 x i32> %tmp10, i32 undef, i32 3 - call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %tmp11, <4 x i32> undef, i32 undef, i32 0, i32 %arg, i32 0, i32 14, i32 4, i1 1, i1 1) + call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %tmp11, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 78, i32 3) #2 ret void } -; Function Attrs: nounwind -declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0 +declare void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0 -attributes #0 = { nounwind } +attributes #0 = { nounwind willreturn writeonly } diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll --- a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll @@ -11,13 +11,14 @@ store i32 %v, i32 addrspace(3)* %p0 - call void @llvm.amdgcn.tbuffer.store.i32(i32 %v, <4 x i32> undef, i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i1 1, i1 0) + call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 %v, <4 x i32> undef, i32 0, i32 0, i32 68, i32 1) %w = load i32, i32 addrspace(3)* %p0 store i32 %w, i32 addrspace(3)* %p1 ret void } -declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0 +declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg, i32 immarg) #1 attributes #0 = { nounwind } +attributes #1 = { nounwind willreturn writeonly } diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll --- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll @@ -566,7 +566,6 @@ ; GCN-LABEL: {{^}}merge_global_store_7_constants_i32: ; GCN: buffer_store_dwordx4 ; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dword v ; CI: buffer_store_dwordx3 define amdgpu_kernel void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { store i32 34, i32 addrspace(1)* %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf.ll b/llvm/test/CodeGen/AMDGPU/mubuf.ll --- a/llvm/test/CodeGen/AMDGPU/mubuf.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf.ll @@ -1,7 +1,5 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s -declare i32 @llvm.amdgcn.workitem.id.x() readnone - ;;;==========================================================================;;; ;;; MUBUF LOAD TESTS ;;;==========================================================================;;; @@ -60,10 +58,10 @@ %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0 %tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0 %tmp2 = shl i32 %6, 2 - %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.dword.i32(<4 x i32> %tmp1, i32 %tmp2, i32 64, i32 1) + %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %tmp1, i32 %tmp2, i32 64, i32 1) %tmp4 = add i32 %6, 16 %tmp1.4xi32 = bitcast <4 x i32> %tmp1 to <4 x i32> - call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1) + call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 %tmp4, i32 %4, i32 68, i32 3) ret void } @@ -79,10 +77,10 @@ %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0 %tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0 %tmp2 = shl i32 %6, 2 - %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.dword.i32(<4 x i32> %tmp1, i32 %tmp2, i32 65, i32 1) + %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %tmp1, i32 %tmp2, i32 65, i32 1) %tmp4 = add i32 %6, 16 %tmp1.4xi32 = bitcast <4 x i32> %tmp1 to <4 x i32> - call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1) + call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 %tmp4, i32 %4, i32 68, i32 3) ret void } @@ -136,14 +134,14 @@ ; CHECK-LABEL: {{^}}store_sgpr_ptr: ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 -define amdgpu_kernel void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @store_sgpr_ptr(i32 addrspace(1)* %out) { store i32 99, i32 addrspace(1)* %out, align 4 ret void } ; CHECK-LABEL: {{^}}store_sgpr_ptr_offset: ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40 -define amdgpu_kernel void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) { %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 10 store i32 99, i32 addrspace(1)* %out.gep, align 4 ret void @@ -152,7 +150,7 @@ ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset: ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000 ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] -define amdgpu_kernel void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) { %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768 store i32 99, i32 addrspace(1)* %out.gep, align 4 ret void @@ -161,7 +159,7 @@ ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset_atomic: ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000 ; CHECK: buffer_atomic_add v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] -define amdgpu_kernel void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) { %gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 5 seq_cst ret void @@ -169,14 +167,20 @@ ; CHECK-LABEL: {{^}}store_vgpr_ptr: ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 -define amdgpu_kernel void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @store_vgpr_ptr(i32 addrspace(1)* %out) { %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid store i32 99, i32 addrspace(1)* %out.gep, align 4 ret void } -declare i32 @llvm.amdgcn.raw.buffer.load.dword.i32(<4 x i32>, i32, i32, i32) #0 -declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) + +declare i32 @llvm.amdgcn.workitem.id.x() #1 +declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg, i32 immarg) #2 +declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg) #3 attributes #0 = { nounwind readonly } +attributes #1 = { nounwind readnone speculatable willreturn } +attributes #2 = { nounwind willreturn writeonly } +attributes #3 = { nounwind readonly willreturn } +attributes #4 = { readnone } diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -17,9 +17,9 @@ ; ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 @@ -86,14 +86,14 @@ ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} @@ -300,9 +300,9 @@ ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX10: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 @@ -456,10 +456,10 @@ ; ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll b/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll --- a/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll @@ -25,31 +25,31 @@ %array_vector10 = insertelement <4 x float> %array_vector9, float 0.000000e+00, i32 2 %array_vector11 = insertelement <4 x float> %array_vector10, float undef, i32 3 %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 undef, i32 4864, i32 0) - call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 36, i32 4, i32 4, i1 1, i1 1) + call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 %tmp3, <4 x i32> undef, i32 36, i32 %arg, i32 68, i32 3) %bc = bitcast <4 x float> %array_vector3 to <4 x i32> %tmp4 = extractelement <4 x i32> %bc, i32 undef - call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp4, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 48, i32 4, i32 4, i1 1, i1 1) + call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 %tmp4, <4 x i32> undef, i32 48, i32 %arg, i32 68, i32 3) %bc49 = bitcast <4 x float> %array_vector11 to <4 x i32> %tmp5 = extractelement <4 x i32> %bc49, i32 undef - call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp5, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 72, i32 4, i32 4, i1 1, i1 1) + call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 %tmp5, <4 x i32> undef, i32 72, i32 %arg, i32 68, i32 3) %array_vector21 = insertelement <4 x float> , float %tmp, i32 1 %array_vector22 = insertelement <4 x float> %array_vector21, float undef, i32 2 %array_vector23 = insertelement <4 x float> %array_vector22, float undef, i32 3 - call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 28, i32 4, i32 4, i1 1, i1 1) + call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 28, i32 %arg, i32 68, i32 3) %bc52 = bitcast <4 x float> %array_vector23 to <4 x i32> %tmp6 = extractelement <4 x i32> %bc52, i32 undef - call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp6, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 64, i32 4, i32 4, i1 1, i1 1) - call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 20, i32 4, i32 4, i1 1, i1 1) - call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 56, i32 4, i32 4, i1 1, i1 1) - call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 92, i32 4, i32 4, i1 1, i1 1) + call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 %tmp6, <4 x i32> undef, i32 64, i32 %arg, i32 68, i32 3) + call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 20, i32 %arg, i32 68, i32 3) + call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 56, i32 %arg, i32 68, i32 3) + call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 92, i32 %arg, i32 68, i32 3) ret void } -declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1 -declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #2 -declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #3 +declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) #1 +declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg) #2 +declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg, i32 immarg) #3 attributes #0 = { nounwind "target-cpu"="tonga" } -attributes #1 = { nounwind readnone } -attributes #2 = { nounwind readonly } -attributes #3 = { nounwind } +attributes #1 = { nounwind readnone willreturn } +attributes #2 = { nounwind readonly willreturn } +attributes #3 = { nounwind willreturn writeonly } diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll --- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -1,12 +1,6 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) -declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) -declare void @llvm.amdgcn.s.barrier() #1 -declare i32 @llvm.amdgcn.workitem.id.x() #2 - - @stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4 @stored_constant_ptr = addrspace(3) global i32 addrspace(4)* undef, align 8 @stored_global_ptr = addrspace(3) global i32 addrspace(1)* undef, align 8 @@ -296,30 +290,33 @@ ret void } -; XGCN-LABEL: {{^}}reorder_local_load_tbuffer_store_local_load: -; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x4 -; XCI: TBUFFER_STORE_FORMAT -; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x8 -; define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #0 { -; %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 +; GCN-LABEL: {{^}}reorder_local_load_tbuffer_store_local_load: +; GCN: tbuffer_store_format +; GCN: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:1 offset1:2 +define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #0 { + %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 -; %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1 -; %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2 + %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1 + %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2 -; %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4 + %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4 -; %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 -; call void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32> %vdata, <4 x i32> undef, -; i32 %vaddr, i32 0, i32 0, i32 32, i32 14, i32 4, i1 1, i1 1) + %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 + %vaddr.add = add i32 %vaddr, 32 + call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %vdata, <4 x i32> undef, i32 %vaddr.add, i32 0, i32 0, i32 228, i32 3) -; %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4 + %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4 -; %add = add nsw i32 %tmp1, %tmp2 + %add = add nsw i32 %tmp1, %tmp2 + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} -; store i32 %add, i32 addrspace(1)* %out, align 4 -; ret void -; } +declare void @llvm.amdgcn.s.barrier() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #2 +declare void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #3 attributes #0 = { nounwind } -attributes #1 = { nounwind convergent } -attributes #2 = { nounwind readnone } +attributes #1 = { convergent nounwind willreturn } +attributes #2 = { nounwind readnone speculatable willreturn } +attributes #3 = { nounwind willreturn writeonly } diff --git a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll --- a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll +++ b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll @@ -6,14 +6,14 @@ define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 { ; CIVI-LABEL: local_store_i56: ; CIVI: ; %bb.0: -; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CIVI-NEXT: s_mov_b32 m0, -1 -; CIVI-NEXT: ds_write_b16 v0, v2 offset:4 -; CIVI-NEXT: ds_write_b32 v0, v1 -; CIVI-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; CIVI-NEXT: ds_write_b8 v0, v1 offset:6 -; CIVI-NEXT: s_waitcnt lgkmcnt(0) -; CIVI-NEXT: s_setpc_b64 s[30:31] +; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CIVI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; CIVI-NEXT: s_mov_b32 m0, -1 +; CIVI-NEXT: ds_write_b8 v0, v3 offset:6 +; CIVI-NEXT: ds_write_b16 v0, v2 offset:4 +; CIVI-NEXT: ds_write_b32 v0, v1 +; CIVI-NEXT: s_waitcnt lgkmcnt(0) +; CIVI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: local_store_i56: ; GFX9: ; %bb.0: @@ -30,70 +30,70 @@ define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 { ; HAWAII-LABEL: local_store_i55: ; HAWAII: ; %bb.0: -; HAWAII-NEXT: s_or_b32 s0, s4, 14 -; HAWAII-NEXT: v_mov_b32_e32 v0, s0 -; HAWAII-NEXT: v_mov_b32_e32 v1, s5 -; HAWAII-NEXT: flat_load_ubyte v0, v[0:1] -; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 -; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 -; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 -; HAWAII-NEXT: s_mov_b32 m0, -1 -; HAWAII-NEXT: s_waitcnt lgkmcnt(0) -; HAWAII-NEXT: v_mov_b32_e32 v1, s0 -; HAWAII-NEXT: v_mov_b32_e32 v3, s1 -; HAWAII-NEXT: v_mov_b32_e32 v2, s2 -; HAWAII-NEXT: ds_write_b16 v1, v2 offset:4 -; HAWAII-NEXT: s_waitcnt vmcnt(0) -; HAWAII-NEXT: v_and_b32_e32 v0, 0x7f, v0 -; HAWAII-NEXT: ds_write_b8 v1, v0 offset:6 -; HAWAII-NEXT: ds_write_b32 v1, v3 -; HAWAII-NEXT: s_endpgm +; HAWAII-NEXT: s_or_b32 s0, s4, 14 +; HAWAII-NEXT: v_mov_b32_e32 v0, s0 +; HAWAII-NEXT: v_mov_b32_e32 v1, s5 +; HAWAII-NEXT: flat_load_ubyte v0, v[0:1] +; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 +; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 +; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 +; HAWAII-NEXT: s_mov_b32 m0, -1 +; HAWAII-NEXT: s_waitcnt lgkmcnt(0) +; HAWAII-NEXT: v_mov_b32_e32 v1, s0 +; HAWAII-NEXT: v_mov_b32_e32 v2, s1 +; HAWAII-NEXT: v_mov_b32_e32 v3, s2 +; HAWAII-NEXT: s_waitcnt vmcnt(0) +; HAWAII-NEXT: v_and_b32_e32 v0, 0x7f, v0 +; HAWAII-NEXT: ds_write_b8 v1, v0 offset:6 +; HAWAII-NEXT: ds_write_b16 v1, v3 offset:4 +; HAWAII-NEXT: ds_write_b32 v1, v2 +; HAWAII-NEXT: s_endpgm ; ; FIJI-LABEL: local_store_i55: ; FIJI: ; %bb.0: -; FIJI-NEXT: s_or_b32 s0, s4, 14 -; FIJI-NEXT: v_mov_b32_e32 v0, s0 -; FIJI-NEXT: v_mov_b32_e32 v1, s5 -; FIJI-NEXT: flat_load_ubyte v0, v[0:1] -; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 -; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 -; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc -; FIJI-NEXT: s_mov_b32 m0, -1 -; FIJI-NEXT: s_waitcnt lgkmcnt(0) -; FIJI-NEXT: v_mov_b32_e32 v1, s0 -; FIJI-NEXT: v_mov_b32_e32 v3, s1 -; FIJI-NEXT: s_and_b32 s3, s2, 0xffff -; FIJI-NEXT: v_mov_b32_e32 v2, s2 -; FIJI-NEXT: ds_write_b16 v1, v2 offset:4 -; FIJI-NEXT: s_waitcnt vmcnt(0) -; FIJI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; FIJI-NEXT: v_or_b32_e32 v0, s3, v0 -; FIJI-NEXT: v_bfe_u32 v0, v0, 16, 7 -; FIJI-NEXT: ds_write_b8 v1, v0 offset:6 -; FIJI-NEXT: ds_write_b32 v1, v3 -; FIJI-NEXT: s_endpgm +; FIJI-NEXT: s_or_b32 s0, s4, 14 +; FIJI-NEXT: v_mov_b32_e32 v0, s0 +; FIJI-NEXT: v_mov_b32_e32 v1, s5 +; FIJI-NEXT: flat_load_ubyte v0, v[0:1] +; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 +; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 +; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc +; FIJI-NEXT: s_mov_b32 m0, -1 +; FIJI-NEXT: s_waitcnt lgkmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v1, s0 +; FIJI-NEXT: v_mov_b32_e32 v3, s1 +; FIJI-NEXT: s_and_b32 s3, s2, 0xffff +; FIJI-NEXT: v_mov_b32_e32 v2, s2 +; FIJI-NEXT: s_waitcnt vmcnt(0) +; FIJI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; FIJI-NEXT: v_or_b32_e32 v0, s3, v0 +; FIJI-NEXT: v_bfe_u32 v0, v0, 16, 7 +; FIJI-NEXT: ds_write_b8 v1, v0 offset:6 +; FIJI-NEXT: ds_write_b16 v1, v2 offset:4 +; FIJI-NEXT: ds_write_b32 v1, v3 +; FIJI-NEXT: s_endpgm ; ; GFX9-LABEL: local_store_i55: ; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: global_load_ubyte_d16_hi v2, v[0:1], off offset:14 -; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: s_and_b32 s3, s2, 0xffff -; GFX9-NEXT: ds_write_b16 v0, v1 offset:4 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_e32 v1, s3, v2 -; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffff, v1 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:6 -; GFX9-NEXT: ds_write_b32 v0, v3 -; GFX9-NEXT: s_endpgm +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: global_load_ubyte_d16_hi v2, v[0:1], off offset:14 +; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX9-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX9-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: s_and_b32 s3, s2, 0xffff +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_or_b32_e32 v2, s3, v2 +; GFX9-NEXT: v_and_b32_e32 v2, 0x7fffff, v2 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:6 +; GFX9-NEXT: ds_write_b16 v0, v1 offset:4 +; GFX9-NEXT: ds_write_b32 v0, v3 +; GFX9-NEXT: s_endpgm store i55 %arg, i55 addrspace(3)* %ptr, align 8 ret void } @@ -101,31 +101,31 @@ define amdgpu_kernel void @local_store_i48(i48 addrspace(3)* %ptr, i48 %arg) #0 { ; HAWAII-LABEL: local_store_i48: ; HAWAII: ; %bb.0: -; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 -; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 -; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 -; HAWAII-NEXT: s_mov_b32 m0, -1 -; HAWAII-NEXT: s_waitcnt lgkmcnt(0) -; HAWAII-NEXT: v_mov_b32_e32 v0, s0 -; HAWAII-NEXT: v_mov_b32_e32 v1, s2 -; HAWAII-NEXT: ds_write_b16 v0, v1 offset:4 -; HAWAII-NEXT: v_mov_b32_e32 v1, s1 -; HAWAII-NEXT: ds_write_b32 v0, v1 -; HAWAII-NEXT: s_endpgm +; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 +; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 +; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 +; HAWAII-NEXT: s_mov_b32 m0, -1 +; HAWAII-NEXT: s_waitcnt lgkmcnt(0) +; HAWAII-NEXT: v_mov_b32_e32 v0, s0 +; HAWAII-NEXT: v_mov_b32_e32 v2, s1 +; HAWAII-NEXT: v_mov_b32_e32 v1, s2 +; HAWAII-NEXT: ds_write_b16 v0, v1 offset:4 +; HAWAII-NEXT: ds_write_b32 v0, v2 +; HAWAII-NEXT: s_endpgm ; ; FIJI-LABEL: local_store_i48: ; FIJI: ; %bb.0: -; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 -; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 -; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc -; FIJI-NEXT: s_mov_b32 m0, -1 -; FIJI-NEXT: s_waitcnt lgkmcnt(0) -; FIJI-NEXT: v_mov_b32_e32 v0, s0 -; FIJI-NEXT: v_mov_b32_e32 v1, s2 -; FIJI-NEXT: ds_write_b16 v0, v1 offset:4 -; FIJI-NEXT: v_mov_b32_e32 v1, s1 -; FIJI-NEXT: ds_write_b32 v0, v1 -; FIJI-NEXT: s_endpgm +; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 +; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 +; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc +; FIJI-NEXT: s_mov_b32 m0, -1 +; FIJI-NEXT: s_waitcnt lgkmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v0, s0 +; FIJI-NEXT: v_mov_b32_e32 v2, s1 +; FIJI-NEXT: v_mov_b32_e32 v1, s2 +; FIJI-NEXT: ds_write_b16 v0, v1 offset:4 +; FIJI-NEXT: ds_write_b32 v0, v2 +; FIJI-NEXT: s_endpgm ; ; GFX9-LABEL: local_store_i48: ; GFX9: ; %bb.0: @@ -146,35 +146,35 @@ define amdgpu_kernel void @local_store_i65(i65 addrspace(3)* %ptr, i65 %arg) #0 { ; HAWAII-LABEL: local_store_i65: ; HAWAII: ; %bb.0: -; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x0 -; HAWAII-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 -; HAWAII-NEXT: s_load_dword s3, s[4:5], 0x4 -; HAWAII-NEXT: s_mov_b32 m0, -1 -; HAWAII-NEXT: s_waitcnt lgkmcnt(0) -; HAWAII-NEXT: v_mov_b32_e32 v2, s2 -; HAWAII-NEXT: s_and_b32 s3, s3, 1 -; HAWAII-NEXT: v_mov_b32_e32 v0, s3 -; HAWAII-NEXT: ds_write_b8 v2, v0 offset:8 -; HAWAII-NEXT: v_mov_b32_e32 v0, s0 -; HAWAII-NEXT: v_mov_b32_e32 v1, s1 -; HAWAII-NEXT: ds_write_b64 v2, v[0:1] -; HAWAII-NEXT: s_endpgm +; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x0 +; HAWAII-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; HAWAII-NEXT: s_load_dword s3, s[4:5], 0x4 +; HAWAII-NEXT: s_mov_b32 m0, -1 +; HAWAII-NEXT: s_waitcnt lgkmcnt(0) +; HAWAII-NEXT: v_mov_b32_e32 v2, s2 +; HAWAII-NEXT: v_mov_b32_e32 v0, s0 +; HAWAII-NEXT: s_and_b32 s3, s3, 1 +; HAWAII-NEXT: v_mov_b32_e32 v3, s3 +; HAWAII-NEXT: v_mov_b32_e32 v1, s1 +; HAWAII-NEXT: ds_write_b8 v2, v3 offset:8 +; HAWAII-NEXT: ds_write_b64 v2, v[0:1] +; HAWAII-NEXT: s_endpgm ; ; FIJI-LABEL: local_store_i65: ; FIJI: ; %bb.0: -; FIJI-NEXT: s_load_dword s2, s[4:5], 0x0 -; FIJI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; FIJI-NEXT: s_load_dword s3, s[4:5], 0x10 -; FIJI-NEXT: s_mov_b32 m0, -1 -; FIJI-NEXT: s_waitcnt lgkmcnt(0) -; FIJI-NEXT: v_mov_b32_e32 v2, s2 -; FIJI-NEXT: s_and_b32 s3, s3, 1 -; FIJI-NEXT: v_mov_b32_e32 v0, s3 -; FIJI-NEXT: ds_write_b8 v2, v0 offset:8 -; FIJI-NEXT: v_mov_b32_e32 v0, s0 -; FIJI-NEXT: v_mov_b32_e32 v1, s1 -; FIJI-NEXT: ds_write_b64 v2, v[0:1] -; FIJI-NEXT: s_endpgm +; FIJI-NEXT: s_load_dword s2, s[4:5], 0x0 +; FIJI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; FIJI-NEXT: s_load_dword s3, s[4:5], 0x10 +; FIJI-NEXT: s_mov_b32 m0, -1 +; FIJI-NEXT: s_waitcnt lgkmcnt(0) +; FIJI-NEXT: v_mov_b32_e32 v2, s2 +; FIJI-NEXT: v_mov_b32_e32 v0, s0 +; FIJI-NEXT: s_and_b32 s3, s3, 1 +; FIJI-NEXT: v_mov_b32_e32 v3, s3 +; FIJI-NEXT: v_mov_b32_e32 v1, s1 +; FIJI-NEXT: ds_write_b8 v2, v3 offset:8 +; FIJI-NEXT: ds_write_b64 v2, v[0:1] +; FIJI-NEXT: s_endpgm ; ; GFX9-LABEL: local_store_i65: ; GFX9: ; %bb.0: @@ -218,22 +218,22 @@ define void @local_store_i17(i17 addrspace(3)* %ptr, i17 %arg) #0 { ; CIVI-LABEL: local_store_i17: ; CIVI: ; %bb.0: -; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CIVI-NEXT: s_mov_b32 m0, -1 -; CIVI-NEXT: ds_write_b16 v0, v1 -; CIVI-NEXT: v_bfe_u32 v1, v1, 16, 1 -; CIVI-NEXT: ds_write_b8 v0, v1 offset:2 -; CIVI-NEXT: s_waitcnt lgkmcnt(0) -; CIVI-NEXT: s_setpc_b64 s[30:31] +; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CIVI-NEXT: s_mov_b32 m0, -1 +; CIVI-NEXT: v_bfe_u32 v2, v1, 16, 1 +; CIVI-NEXT: ds_write_b16 v0, v1 +; CIVI-NEXT: ds_write_b8 v0, v2 offset:2 +; CIVI-NEXT: s_waitcnt lgkmcnt(0) +; CIVI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: local_store_i17: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ds_write_b16 v0, v1 -; GFX9-NEXT: v_and_b32_e32 v1, 0x1ffff, v1 -; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v2, 0x1ffff, v1 +; GFX9-NEXT: ds_write_b16 v0, v1 +; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] store i17 %arg, i17 addrspace(3)* %ptr, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/token-factor-inline-limit-test.ll b/llvm/test/CodeGen/AMDGPU/token-factor-inline-limit-test.ll --- a/llvm/test/CodeGen/AMDGPU/token-factor-inline-limit-test.ll +++ b/llvm/test/CodeGen/AMDGPU/token-factor-inline-limit-test.ll @@ -9,14 +9,14 @@ ; GCN-TFILD: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 ; GCN-TFILD: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 ; GCN-TFILD: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 +; GCN-TFILD: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 +; GCN-TFILD: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 +; GCN-TFILD: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN-TFILD: buffer_store_dword [[REG8]], {{.*$}} ; GCN-TFILD: buffer_store_dword [[REG9]], {{.*}} offset:4 ; GCN-TFILD: buffer_store_dword [[REG10]], {{.*}} offset:8 ; GCN-TFILD: buffer_store_dword [[REG11]], {{.*}} offset:12 ; GCN-TFILD: buffer_store_dword [[REG12]], {{.*}} offset:16 -; GCN-TFILD: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 -; GCN-TFILD: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 -; GCN-TFILD: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN-TFILD: buffer_store_dword [[REG13]], {{.*}} offset:20 ; GCN-TFILD: buffer_store_dword [[REG14]], {{.*}} offset:24 ; GCN-TFILD: buffer_store_dword [[REG15]], {{.*}} offset:28 @@ -24,14 +24,14 @@ ; GCN-TFIL7: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 ; GCN-TFIL7: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 ; GCN-TFIL7: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 -; GCN-TFIL7: buffer_store_dword [[REG15]], {{.*}} offset:28 -; GCN-TFIL7: buffer_store_dword [[REG14]], {{.*}} offset:24 -; GCN-TFIL7: buffer_store_dword [[REG13]], {{.*}} offset:20 ; GCN-TFIL7: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 ; GCN-TFIL7: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 ; GCN-TFIL7: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 ; GCN-TFIL7: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 ; GCN-TFIL7: v_mov_b32_e32 [[REG8:v[0-9]+]], 8 +; GCN-TFIL7: buffer_store_dword [[REG15]], {{.*}} offset:28 +; GCN-TFIL7: buffer_store_dword [[REG14]], {{.*}} offset:24 +; GCN-TFIL7: buffer_store_dword [[REG13]], {{.*}} offset:20 ; GCN-TFIL7: buffer_store_dword [[REG12]], {{.*}} offset:16 ; GCN-TFIL7: buffer_store_dword [[REG11]], {{.*}} offset:12 ; GCN-TFIL7: buffer_store_dword [[REG10]], {{.*}} offset:8 diff --git a/llvm/test/CodeGen/AMDGPU/udivrem.ll b/llvm/test/CodeGen/AMDGPU/udivrem.ll --- a/llvm/test/CodeGen/AMDGPU/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/udivrem.ll @@ -39,6 +39,7 @@ ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x26 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x13 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x1d ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s10, s6 @@ -50,14 +51,12 @@ ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0 -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x1d ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s0, v0 ; GFX6-NEXT: v_mul_lo_u32 v1, v0, s3 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s2, v1 +; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 ; GFX6-NEXT: v_cmp_le_u32_e64 s[0:1], s3, v1 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s3, v1 diff --git a/llvm/test/CodeGen/Hexagon/atomic-store-byte.ll b/llvm/test/CodeGen/Hexagon/atomic-store-byte.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/atomic-store-byte.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=hexagon < %s | FileCheck %s + +; Test that the address for a store conditional for a byte is aligned +; correctly to use the memw_locked instruction. + +; CHECK: [[REG:(r[0-9]+)]] = and(r{{[0-9]+}},#-4) +; CHECK: = memw_locked([[REG]]) +; CHECK: memw_locked([[REG]],p{{[0-4]}}) = + +@foo.a00 = internal global i8 0, align 1 + +; Function Attrs: nofree norecurse nounwind +define dso_local void @foo() local_unnamed_addr #0 { +entry: + %0 = cmpxchg volatile i8* @foo.a00, i8 0, i8 1 seq_cst seq_cst + ret void +} + diff --git a/test/CodeGen/PowerPC/aix-complex.ll b/llvm/test/CodeGen/PowerPC/aix-complex.ll rename from test/CodeGen/PowerPC/aix-complex.ll rename to llvm/test/CodeGen/PowerPC/aix-complex.ll diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-funcsect.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-funcsect.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-funcsect.ll @@ -0,0 +1,95 @@ +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec -function-sections < %s | \ +; RUN: FileCheck --check-prefix=ASM %s +; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr4 \ +; RUN: -mattr=-altivec -function-sections < %s | \ +; RUN: FileCheck --check-prefix=ASM %s + +@alias_foo = alias void (...), bitcast (void ()* @foo to void (...)*) + +define void @foo() { +entry: + ret void +} + +define hidden void @hidden_foo() { +entry: + ret void +} + +define void @bar() { +entry: + call void @foo() + call void @static_overalign_foo() + call void bitcast (void (...)* @alias_foo to void ()*)() + call void bitcast (void (...)* @extern_foo to void ()*)() + call void @hidden_foo() + ret void +} + +declare void @extern_foo(...) + +define internal void @static_overalign_foo() align 64 { +entry: + ret void +} + +; ASM: .csect .foo[PR],2 +; ASM-NEXT: .globl foo[DS] # -- Begin function foo +; ASM-NEXT: .globl .foo[PR] +; ASM-NEXT: .align 4 +; ASM-NEXT: .csect foo[DS] +; ASM-NEXT: alias_foo: # @foo +; ASM-NEXT: .vbyte {{[0-9]+}}, .foo[PR] +; ASM-NEXT: .vbyte {{[0-9]+}}, TOC[TC0] +; ASM-NEXT: .vbyte {{[0-9]+}}, 0 +; ASM-NEXT: .csect .foo[PR],2 +; ASM-NEXT: .alias_foo: +; ASM-NEXT: # %bb.0: # %entry +; ASM-NEXT: blr +; ASM: .csect .hidden_foo[PR],2 +; ASM-NEXT: .globl hidden_foo[DS],hidden # -- Begin function hidden_foo +; ASM-NEXT: .globl .hidden_foo[PR],hidden +; ASM-NEXT: .align 4 +; ASM-NEXT: .csect hidden_foo[DS] +; ASM-NEXT: .vbyte {{[0-9]+}}, .hidden_foo[PR] # @hidden_foo +; ASM-NEXT: .vbyte {{[0-9]+}}, TOC[TC0] +; ASM-NEXT: .vbyte {{[0-9]+}}, 0 +; ASM-NEXT: .csect .hidden_foo[PR] +; ASM-NEXT: # %bb.0: # %entry +; ASM-NEXT: blr +; ASM: .csect .bar[PR],2 +; ASM-NEXT: .globl bar[DS] # -- Begin function bar +; ASM-NEXT: .globl .bar[PR] +; ASM-NEXT: .align 4 +; ASM-NEXT: .csect bar[DS] +; ASM-NEXT: .vbyte {{[0-9]+}}, .bar[PR] # @bar +; ASM-NEXT: .vbyte {{[0-9]+}}, TOC[TC0] +; ASM-NEXT: .vbyte {{[0-9]+}}, 0 +; ASM-NEXT: .csect .bar[PR],2 +; ASM-NEXT: # %bb.0: # %entry +; ASM: bl .foo[PR] +; ASM-NEXT: nop +; ASM-NEXT: bl .static_overalign_foo[PR] +; ASM-NEXT: nop +; ASM-NEXT: bl .alias_foo +; ASM-NEXT: nop +; ASM-NEXT: bl .extern_foo +; ASM-NEXT: nop +; ASM-NEXT: bl .hidden_foo[PR] +; ASM-NEXT: nop +; ASM: .csect .static_overalign_foo[PR],6 +; ASM-NEXT: .lglobl static_overalign_foo[DS] # -- Begin function static_overalign_foo +; ASM-NEXT: .lglobl .static_overalign_foo[PR] +; ASM-NEXT: .align 6 +; ASM-NEXT: .csect static_overalign_foo[DS] +; ASM-NEXT: .vbyte {{[0-9]+}}, .static_overalign_foo[PR] # @static_overalign_foo +; ASM-NEXT: .vbyte {{[0-9]+}}, TOC[TC0] +; ASM-NEXT: .vbyte {{[0-9]+}}, 0 +; ASM-NEXT: .csect .static_overalign_foo[PR],6 +; ASM-NEXT: # %bb.0: # %entry +; ASM-NEXT: blr +; ASM: .extern .extern_foo +; ASM-NEXT: .extern extern_foo[DS] +; ASM-NEXT: .globl alias_foo +; ASM-NEXT: .globl .alias_foo diff --git a/llvm/test/MC/X86/directive-arch.s b/llvm/test/MC/X86/directive-arch.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/directive-arch.s @@ -0,0 +1,10 @@ +## We currently parse but ignore .arch directives. +# RUN: llvm-mc -triple=x86_64 %s | FileCheck /dev/null --implicit-check-not=.arch + +.arch i286 +.arch generic32 + +.arch .avx512vl +.arch .noavx512bw +.arch .nop +.arch .sse4.2 diff --git a/llvm/test/Other/loop-pm-invalidation.ll b/llvm/test/Other/loop-pm-invalidation.ll --- a/llvm/test/Other/loop-pm-invalidation.ll +++ b/llvm/test/Other/loop-pm-invalidation.ll @@ -18,7 +18,7 @@ ; RUN: | FileCheck %s --check-prefix=CHECK-SCEV-INV-AFTER-DELETE define void @no_loops() { -; CHECK-LOOP-INV-LABEL: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on no_loops +; CHECK-LOOP-INV: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis @@ -29,7 +29,6 @@ ; CHECK-LOOP-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}LoopAnalysis ; CHECK-LOOP-INV-NEXT: Invalidating all non-preserved analyses ; CHECK-LOOP-INV-NEXT: Invalidating analysis: LoopAnalysis -; CHECK-LOOP-INV-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on no_loops ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis @@ -37,7 +36,7 @@ ; CHECK-LOOP-INV-NEXT: Finished {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Finished {{.*}}Function pass manager run. ; -; CHECK-SCEV-INV-LABEL: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on no_loops +; CHECK-SCEV-INV: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-SCEV-INV-NEXT: Running analysis: LoopAnalysis @@ -47,7 +46,6 @@ ; CHECK-SCEV-INV-NEXT: Finished {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Invalidating all non-preserved analyses -; CHECK-SCEV-INV-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on no_loops ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-SCEV-INV-NEXT: Running pass: LCSSAPass @@ -59,7 +57,7 @@ } define void @one_loop(i1* %ptr) { -; CHECK-LOOP-INV-LABEL: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on one_loop +; CHECK-LOOP-INV: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis @@ -73,7 +71,6 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-LOOP-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-LOOP-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}LoopAnalysis @@ -82,7 +79,6 @@ ; CHECK-LOOP-INV-NEXT: Invalidating analysis: LoopAnalysis ; CHECK-LOOP-INV-NEXT: Invalidating analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Invalidating analysis: InnerAnalysisManagerProxy<{{.*}}Loop -; CHECK-LOOP-INV-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on one_loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis @@ -91,12 +87,11 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-LOOP-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-LOOP-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Finished {{.*}}Function pass manager run. ; -; CHECK-SCEV-INV-LABEL: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on one_loop +; CHECK-SCEV-INV: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-SCEV-INV-NEXT: Running analysis: LoopAnalysis @@ -110,7 +105,6 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-SCEV-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ScalarEvolutionAnalysis @@ -118,7 +112,6 @@ ; CHECK-SCEV-INV-NEXT: Clearing all analysis results for: ; CHECK-SCEV-INV-NEXT: Invalidating analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Invalidating analysis: InnerAnalysisManagerProxy<{{.*}}Loop -; CHECK-SCEV-INV-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on one_loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-SCEV-INV-NEXT: Running pass: LCSSAPass @@ -126,7 +119,6 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-SCEV-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Finished {{.*}}Function pass manager run. @@ -143,7 +135,7 @@ } define void @nested_loops(i1* %ptr) { -; CHECK-LOOP-INV-LABEL: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on nested_loops +; CHECK-LOOP-INV: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis @@ -157,11 +149,9 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-LOOP-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-LOOP-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-LOOP-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-LOOP-INV: Finished {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}LoopAnalysis @@ -171,7 +161,6 @@ ; CHECK-LOOP-INV-NEXT: Invalidating analysis: LoopAnalysis ; CHECK-LOOP-INV-NEXT: Invalidating analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Invalidating analysis: InnerAnalysisManagerProxy<{{.*}}Loop -; CHECK-LOOP-INV-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on nested_loops ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis @@ -180,16 +169,14 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-LOOP-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-LOOP-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-LOOP-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-LOOP-INV: Finished {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Finished {{.*}}Function pass manager run. ; -; CHECK-SCEV-INV-LABEL: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on nested_loops +; CHECK-SCEV-INV: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-SCEV-INV-NEXT: Running analysis: LoopAnalysis @@ -203,11 +190,9 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-SCEV-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-SCEV-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV: Finished {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ScalarEvolutionAnalysis @@ -216,7 +201,6 @@ ; CHECK-SCEV-INV-NEXT: Clearing all analysis results for: ; CHECK-SCEV-INV-NEXT: Invalidating analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Invalidating analysis: InnerAnalysisManagerProxy<{{.*}}Loop -; CHECK-SCEV-INV-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on nested_loops ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-SCEV-INV-NEXT: Running pass: LCSSAPass @@ -224,11 +208,9 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-SCEV-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-SCEV-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV: Finished {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Finished {{.*}}Function pass manager run. @@ -252,7 +234,7 @@ } define void @dead_loop() { -; CHECK-LOOP-INV-LABEL: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on dead_loop +; CHECK-LOOP-INV: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis @@ -266,7 +248,6 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-LOOP-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-LOOP-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}LoopAnalysis @@ -275,7 +256,6 @@ ; CHECK-LOOP-INV-NEXT: Invalidating analysis: LoopAnalysis ; CHECK-LOOP-INV-NEXT: Invalidating analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Invalidating analysis: InnerAnalysisManagerProxy<{{.*}}Loop -; CHECK-LOOP-INV-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on dead_loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-LOOP-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis @@ -284,12 +264,11 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-LOOP-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-LOOP-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Finished {{.*}}Function pass manager run. ; -; CHECK-SCEV-INV-LABEL: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on dead_loop +; CHECK-SCEV-INV: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-SCEV-INV-NEXT: Running analysis: LoopAnalysis @@ -303,7 +282,6 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-SCEV-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ScalarEvolutionAnalysis @@ -311,7 +289,6 @@ ; CHECK-SCEV-INV-NEXT: Clearing all analysis results for: ; CHECK-SCEV-INV-NEXT: Invalidating analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Invalidating analysis: InnerAnalysisManagerProxy<{{.*}}Loop -; CHECK-SCEV-INV-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on dead_loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-NEXT: Running pass: LoopSimplifyPass ; CHECK-SCEV-INV-NEXT: Running pass: LCSSAPass @@ -319,14 +296,11 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-SCEV-INV-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Finished {{.*}}Function pass manager run. ; -; CHECK-SCEV-INV-AFTER-DELETE-LABEL: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on dead_loop -; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Starting {{.*}}Function pass manager run -; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: LoopSimplifyPass +; CHECK-SCEV-INV-AFTER-DELETE-LABEL: Running pass: LoopSimplifyPass on dead_loop ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: LoopAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: AssumptionAnalysis @@ -338,7 +312,6 @@ ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Starting {{.*}}Loop pass manager run. -; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: LoopDeletionPass ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Clearing all analysis results for: @@ -348,7 +321,6 @@ ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating all non-preserved analyses ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating analysis: InnerAnalysisManagerProxy<{{.*}}Loop -; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on dead_loop ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Starting {{.*}}Function pass manager run ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: LoopSimplifyPass ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: LCSSAPass diff --git a/llvm/test/Other/new-pass-manager.ll b/llvm/test/Other/new-pass-manager.ll --- a/llvm/test/Other/new-pass-manager.ll +++ b/llvm/test/Other/new-pass-manager.ll @@ -19,14 +19,11 @@ ; RUN: -passes='cgscc(no-op-cgscc)' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-CGSCC-PASS ; CHECK-CGSCC-PASS: Starting llvm::Module pass manager run -; CHECK-CGSCC-PASS-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor ; CHECK-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*(CGSCCAnalysisManager|AnalysisManager<.*LazyCallGraph::SCC.*>).*}},{{.*}}Module> ; CHECK-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module> ; CHECK-CGSCC-PASS-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-CGSCC-PASS-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-CGSCC-PASS-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-CGSCC-PASS-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy -; CHECK-CGSCC-PASS-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-CGSCC-PASS-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> ; CHECK-CGSCC-PASS-NEXT: Starting CGSCC pass manager run ; CHECK-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass @@ -40,9 +37,7 @@ ; RUN: -passes='function(no-op-function)' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-FUNCTION-PASS ; CHECK-FUNCTION-PASS: Starting llvm::Module pass manager run -; CHECK-FUNCTION-PASS-NEXT: Running pass: ModuleToFunctionPassAdaptor ; CHECK-FUNCTION-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}> -; CHECK-FUNCTION-PASS-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-FUNCTION-PASS-NEXT: Starting llvm::Function pass manager run ; CHECK-FUNCTION-PASS-NEXT: Running pass: NoOpFunctionPass ; CHECK-FUNCTION-PASS-NEXT: Finished llvm::Function pass manager run @@ -71,7 +66,6 @@ ; RUN: | FileCheck %s --check-prefix=CHECK-FUNCTION-PRINT ; CHECK-FUNCTION-PRINT: Starting llvm::Module pass manager run ; CHECK-FUNCTION-PRINT: Running pass: VerifierPass -; CHECK-FUNCTION-PRINT: Running pass: ModuleToFunctionPassAdaptor ; CHECK-FUNCTION-PRINT: Running analysis: InnerAnalysisManagerProxy<{{.*}}> ; CHECK-FUNCTION-PRINT: Starting llvm::Function pass manager run ; CHECK-FUNCTION-PRINT: Running pass: PrintFunctionPass @@ -406,14 +400,11 @@ ; RUN: -passes='cgscc(repeat<3>(no-op-cgscc))' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-REPEAT-CGSCC-PASS ; CHECK-REPEAT-CGSCC-PASS: Starting llvm::Module pass manager run -; CHECK-REPEAT-CGSCC-PASS-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*(CGSCCAnalysisManager|AnalysisManager<.*LazyCallGraph::SCC.*>).*}},{{.*}}Module> ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module> ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-REPEAT-CGSCC-PASS-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy -; CHECK-REPEAT-CGSCC-PASS-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> ; CHECK-REPEAT-CGSCC-PASS-NEXT: Starting CGSCC pass manager run ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running pass: RepeatedPass @@ -433,9 +424,7 @@ ; RUN: -passes='function(repeat<3>(no-op-function))' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-REPEAT-FUNCTION-PASS ; CHECK-REPEAT-FUNCTION-PASS: Starting llvm::Module pass manager run -; CHECK-REPEAT-FUNCTION-PASS-NEXT: Running pass: ModuleToFunctionPassAdaptor ; CHECK-REPEAT-FUNCTION-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}> -; CHECK-REPEAT-FUNCTION-PASS-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-REPEAT-FUNCTION-PASS-NEXT: Starting llvm::Function pass manager run ; CHECK-REPEAT-FUNCTION-PASS-NEXT: Running pass: RepeatedPass ; CHECK-REPEAT-FUNCTION-PASS-NEXT: Starting llvm::Function pass manager run @@ -454,11 +443,8 @@ ; RUN: -passes='loop(repeat<3>(no-op-loop))' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-REPEAT-LOOP-PASS ; CHECK-REPEAT-LOOP-PASS: Starting llvm::Module pass manager run -; CHECK-REPEAT-LOOP-PASS-NEXT: Running pass: ModuleToFunctionPassAdaptor ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}> -; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Starting llvm::Function pass manager run -; CHECK-REPEAT-LOOP-PASS-NEXT: Running pass: FunctionToLoopPassAdaptor ; CHECK-REPEAT-LOOP-PASS-NEXT: Starting llvm::Function pass manager run ; CHECK-REPEAT-LOOP-PASS-NEXT: Running pass: LoopSimplify ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: LoopAnalysis @@ -473,7 +459,6 @@ ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: TargetIRAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}> ; CHECK-REPEAT-LOOP-PASS-NEXT: Starting Loop pass manager run -; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Running pass: RepeatedPass ; CHECK-REPEAT-LOOP-PASS-NEXT: Starting Loop pass manager run ; CHECK-REPEAT-LOOP-PASS-NEXT: Running pass: NoOpLoopPass diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -88,20 +88,14 @@ ; RUN: --check-prefix=%llvmcheckext \ ; RUN: --check-prefix=CHECK-EP-OPTIMIZER-LAST --check-prefix=CHECK-O23SZ -; CHECK-O: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Starting llvm::Module pass manager run. -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis @@ -116,9 +110,8 @@ ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> +; CHECK-O-NEXT: Running pass: PromotePass ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis @@ -135,19 +128,17 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> +; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O2-NEXT: Running pass: OpenMPOptPass on (foo) ; CHECK-O3-NEXT: Running pass: OpenMPOptPass on (foo) -; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass @@ -167,7 +158,6 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running analysis: LoopAnalysis @@ -176,7 +166,6 @@ ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Starting Loop pass manager run. -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LoopRotatePass @@ -185,7 +174,6 @@ ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass @@ -214,11 +202,11 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> ; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run. ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run. +; CHECK-O23SZ-NEXT: Running pass: LICMPass ; CHECK-EP-SCALAR-LATE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis @@ -230,24 +218,22 @@ ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-O-NEXT: Finished llvm::Module pass manager run. -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O2-LTO-NOT: Running pass: EliminateAvailableExternallyPass ; CHECK-O: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: Float2IntPass ; CHECK-O-NEXT: Running pass: LowerConstantIntrinsicsPass on foo ; CHECK-EP-VECTORIZER-START-NEXT: Running pass: NoOpFunctionPass ; CHECK-EXT: Running pass: {{.*}}::Bye on foo -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopRotatePass ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LoopDistributePass ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass @@ -266,11 +252,11 @@ ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-O-NEXT: Running pass: LICMPass ; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass ; CHECK-O-NEXT: Running pass: LoopSinkPass ; CHECK-O-NEXT: Running pass: InstSimplifyPass diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -23,19 +23,13 @@ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \ ; RUN: --check-prefix=CHECK-O3 --check-prefix=CHECK-EP-Peephole -; CHECK-O: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Starting llvm::Module pass manager run. -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module +; CHECK-O: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Module ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis -; CHECK-O1-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass> -; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> -; CHECK-O2-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O2-NEXT: Starting llvm::Function pass manager run. ; CHECK-O2-NEXT: Running pass: CallSiteSplittingPass on foo ; CHECK-O2-NEXT: Running analysis: TargetLibraryAnalysis on foo @@ -48,14 +42,12 @@ ; CHECK-O2-NEXT: Running pass: IPSCCPPass ; CHECK-O2-NEXT: Running analysis: AssumptionAnalysis on foo ; CHECK-O2-NEXT: Running pass: CalledValuePropagationPass -; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-O1-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> +; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: AAManager ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis @@ -63,10 +55,9 @@ ; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass ; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O2-NEXT: Running pass: GlobalOptPass -; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> +; CHECK-O2-NEXT: Running pass: PromotePass ; CHECK-O2-NEXT: Running pass: ConstantMergePass ; CHECK-O2-NEXT: Running pass: DeadArgumentEliminationPass -; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O2-NEXT: Starting llvm::Function pass manager run. ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass ; CHECK-O2-NEXT: Running pass: InstCombinePass @@ -76,14 +67,12 @@ ; CHECK-O2-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O2-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O2-NEXT: Starting llvm::Module pass manager run. -; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O2-NEXT: Starting CGSCC pass manager run. ; CHECK-O2-NEXT: Running pass: InlinerPass ; CHECK-O2-NEXT: Finished CGSCC pass manager run. ; CHECK-O2-NEXT: Finished llvm::Module pass manager run. ; CHECK-O2-NEXT: Running pass: GlobalOptPass ; CHECK-O2-NEXT: Running pass: GlobalDCEPass -; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O2-NEXT: Starting llvm::Function pass manager run. ; CHECK-O2-NEXT: Running pass: InstCombinePass ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass @@ -92,15 +81,25 @@ ; CHECK-O2-NEXT: Running pass: SROA on foo ; CHECK-O2-NEXT: Running pass: TailCallElimPass on foo ; CHECK-O2-NEXT: Finished llvm::Function pass manager run. -; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass> -; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> +; CHECK-O2-NEXT: Running pass: PostOrderFunctionAttrsPass +; CHECK-O2-NEXT: Running pass: GVN on foo ; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis ; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis +; CHECK-O2-NEXT: Running pass: MemCpyOptPass on foo +; CHECK-O2-NEXT: Running pass: DSEPass on foo +; CHECK-O2-NEXT: Running pass: InstCombinePass on foo +; CHECK-O2-NEXT: Running pass: SimplifyCFGPass on foo +; CHECK-O2-NEXT: Running pass: SCCPPass on foo +; CHECK-O2-NEXT: Running pass: InstCombinePass on foo +; CHECK-O2-NEXT: Running pass: BDCEPass on foo ; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis +; CHECK-O2-NEXT: Running pass: InstCombinePass +; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass +; CHECK-O2-NEXT: Running pass: JumpThreadingPass ; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass ; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass -; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}SimplifyCFGPass> +; CHECK-O2-NEXT: Running pass: SimplifyCFGPass ; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O2-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Finished llvm::Module pass manager run. diff --git a/llvm/test/Other/new-pm-pgo.ll b/llvm/test/Other/new-pm-pgo.ll --- a/llvm/test/Other/new-pm-pgo.ll +++ b/llvm/test/Other/new-pm-pgo.ll @@ -18,8 +18,8 @@ ; USE: Running pass: PGOMemOPSizeOpt ; USE_POST_LINK: Running pass: PGOIndirectCallPromotion ; USE_POST_LINK: Running pass: PGOMemOPSizeOpt -; SAMPLE_USE_O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> -; SAMPLE_USE_PRE_LINK: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> +; SAMPLE_USE_O: Running pass: AddDiscriminatorsPass +; SAMPLE_USE_PRE_LINK: Running pass: AddDiscriminatorsPass ; SAMPLE_USE: Running pass: SimplifyCFGPass ; SAMPLE_USE: Running pass: SROA ; SAMPLE_USE: Running pass: EarlyCSEPass @@ -30,7 +30,7 @@ ; SAMPLE_USE_POST_LINK-NOT: Running pass: GlobalOptPass ; SAMPLE_USE_POST_LINK: Running pass: PGOIndirectCallPromotion ; SAMPLE_USE_PRE_LINK-NOT: Running pass: PGOIndirectCallPromotion -; SAMPLE_GEN: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> +; SAMPLE_GEN: Running pass: AddDiscriminatorsPass ; SPLIT: Running pass: HotColdSplittingPass define void @foo() { diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -47,27 +47,20 @@ ; RUN: -passes='thinlto' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2 ; -; CHECK-O: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Starting llvm::Module pass manager run. -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass -; CHECK-DIS-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> ; CHECK-DIS-NEXT: Running analysis: InnerAnalysisManagerProxy -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-DIS-NEXT: Running pass: AddDiscriminatorsPass ; CHECK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-POSTLINK-O-NEXT: Running pass: PGOIndirectCallPromotion ; CHECK-POSTLINK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-POSTLINK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-POSTLINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -; CHECK-POSTLINK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-PRELINK-O-NODIS-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> -; CHECK-PRELINK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis @@ -83,9 +76,8 @@ ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> +; CHECK-O-NEXT: Running pass: PromotePass ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis @@ -101,19 +93,17 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O2-NEXT: Running pass: OpenMPOptPass on (foo) ; CHECK-O3-NEXT: Running pass: OpenMPOptPass on (foo) -; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass @@ -132,7 +122,6 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running analysis: LoopAnalysis @@ -141,7 +130,6 @@ ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Starting Loop pass manager run. -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LoopRotatePass @@ -150,7 +138,6 @@ ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass @@ -188,11 +175,11 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> ; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run +; CHECK-O23SZ-NEXT: Running pass: LICMPass on Loop at depth 1 containing: %loop ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass @@ -202,23 +189,21 @@ ; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-O-NEXT: Finished llvm::Module pass manager run. ; CHECK-PRELINK-O-NEXT: Running pass: GlobalOptPass -; CHECK-POSTLINK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-POSTLINK-O-NEXT: Starting llvm::Module pass manager run. ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalOptPass ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-POSTLINK-O-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-POSTLINK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA -; CHECK-POSTLINK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-POSTLINK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-POSTLINK-O-NEXT: Running pass: Float2IntPass ; CHECK-POSTLINK-O-NEXT: Running pass: LowerConstantIntrinsicsPass ; CHECK-EXT: Running pass: {{.*}}::Bye -; CHECK-POSTLINK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopRotatePass ; CHECK-POSTLINK-O-NEXT: Starting llvm::Function pass manager run ; CHECK-POSTLINK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-POSTLINK-O-NEXT: Running pass: LCSSAPass ; CHECK-POSTLINK-O-NEXT: Finished llvm::Function pass manager run +; CHECK-POSTLINK-O-NEXT: Running pass: LoopRotatePass ; CHECK-POSTLINK-O-NEXT: Running pass: LoopDistributePass ; CHECK-POSTLINK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-POSTLINK-O-NEXT: Running pass: LoopVectorizePass @@ -237,11 +222,11 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass ; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-POSTLINK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass ; CHECK-POSTLINK-O-NEXT: Starting llvm::Function pass manager run ; CHECK-POSTLINK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-POSTLINK-O-NEXT: Running pass: LCSSAPass ; CHECK-POSTLINK-O-NEXT: Finished llvm::Function pass manager run +; CHECK-POSTLINK-O-NEXT: Running pass: LICMPass ; CHECK-POSTLINK-O-NEXT: Running pass: AlignmentFromAssumptionsPass ; CHECK-POSTLINK-O-NEXT: Running pass: LoopSinkPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -20,23 +20,17 @@ ; RUN: -passes='thinlto' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext --dump-input=fail ; -; CHECK-O: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis @@ -52,9 +46,8 @@ ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> +; CHECK-O-NEXT: Running pass: PromotePass ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running analysis: AAManager @@ -74,19 +67,17 @@ ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> +; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O2-NEXT: Running pass: OpenMPOptPass ; CHECK-O3-NEXT: Running pass: OpenMPOptPass -; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass @@ -105,7 +96,6 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass @@ -113,7 +103,6 @@ ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Starting Loop pass manager run. -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LoopRotatePass @@ -122,7 +111,6 @@ ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass @@ -160,11 +148,11 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run +; CHECK-O23SZ-NEXT: Running pass: LICMPass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -172,23 +160,21 @@ ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: Float2IntPass ; CHECK-O-NEXT: Running pass: LowerConstantIntrinsicsPass ; CHECK-EXT: Running pass: {{.*}}::Bye -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopRotatePass -; CHECK-O-NEXT: Starting {{.*}}Function pass manager run -; CHECK-O-NEXT: Running pass: LoopSimplifyPass -; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O-NEXT: Finished {{.*}}Function pass manager run +; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. +; CHECK-O-NEXT: Running pass: LoopSimplifyPass on foo +; CHECK-O-NEXT: Running pass: LCSSAPass on foo +; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. +; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LoopDistributePass ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass @@ -205,11 +191,11 @@ ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run +; CHECK-O-NEXT: Running pass: LICMPass ; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass ; CHECK-O-NEXT: Running pass: LoopSinkPass ; CHECK-O-NEXT: Running pass: InstSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -25,20 +25,14 @@ ; RUN: -passes='thinlto' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext --dump-input=fail ; -; CHECK-O: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis @@ -63,9 +57,8 @@ ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> +; CHECK-O-NEXT: Running pass: PromotePass ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo @@ -82,19 +75,17 @@ ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O2-NEXT: Running pass: OpenMPOptPass ; CHECK-O3-NEXT: Running pass: OpenMPOptPass -; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass @@ -113,7 +104,6 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass @@ -121,7 +111,6 @@ ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Starting Loop pass manager run. -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LoopRotatePass @@ -130,7 +119,6 @@ ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass @@ -168,11 +156,11 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run +; CHECK-O23SZ-NEXT: Running pass: LICMPass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -183,23 +171,21 @@ ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: EliminateAvailableExternallyPass ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: Float2IntPass ; CHECK-O-NEXT: Running pass: LowerConstantIntrinsicsPass ; CHECK-EXT: Running pass: {{.*}}::Bye -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopRotatePass ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run +; CHECK-O-NEXT: Running pass: LoopRotatePass ; CHECK-O-NEXT: Running pass: LoopDistributePass ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass @@ -216,11 +202,11 @@ ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run +; CHECK-O-NEXT: Running pass: LICMPass ; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass ; CHECK-O-NEXT: Running pass: LoopSinkPass ; CHECK-O-NEXT: Running pass: InstSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -27,20 +27,14 @@ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-O123 --dump-input=fail ; -; CHECK-O: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis @@ -55,9 +49,8 @@ ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> +; CHECK-O-NEXT: Running pass: PromotePass ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis @@ -68,15 +61,16 @@ ; CHECK-O123-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O123-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O123-NEXT: Starting {{.*}}Module pass manager run. -; CHECK-O123-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PassManager<{{.*}}LazyCallGraph::SCC ; CHECK-O123-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O123-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O123-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo) -; CHECK-O123-NEXT: Running analysis: PassInstrumentationAnalysis on (foo) ; CHECK-O123-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O123-NEXT: Starting CGSCC pass manager run. ; CHECK-O123-NEXT: Running pass: InlinerPass on (foo) -; CHECK-O123-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> +; CHECK-O123-NEXT: Running pass: SROA on foo +; CHECK-O123-NEXT: Running pass: EarlyCSEPass on foo +; CHECK-O123-NEXT: Running pass: SimplifyCFGPass on foo +; CHECK-O123-NEXT: Running pass: InstCombinePass on foo ; CHECK-O123-NEXT: Finished CGSCC pass manager run. ; CHECK-O123-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O123-NEXT: Running pass: GlobalDCEPass @@ -96,7 +90,6 @@ ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion on ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis on foo ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-Os-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-Oz-NEXT: Running analysis: InlineAdvisorAnalysis @@ -105,13 +98,12 @@ ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on foo ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> +; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass @@ -120,7 +112,6 @@ ; CHECK-O3-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O2-NEXT: Running pass: OpenMPOptPass ; CHECK-O3-NEXT: Running pass: OpenMPOptPass -; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA ; These next two can appear in any order since they are accessed as parameters @@ -156,7 +147,6 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass @@ -164,7 +154,6 @@ ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Starting Loop pass manager run. -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LoopRotatePass @@ -173,7 +162,6 @@ ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass @@ -211,11 +199,11 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run +; CHECK-O23SZ-NEXT: Running pass: LICMPass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -228,7 +216,6 @@ ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on bar -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis on bar ; CHECK-EXT: Running pass: {{.*}}::Bye ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: NameAnonGlobalPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -25,21 +25,15 @@ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-O123 --dump-input=fail ; -; CHECK-O: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> +; CHECK-O: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: AddDiscriminatorsPass ; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass -; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Module pass manager run. ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis @@ -62,9 +56,8 @@ ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass> +; CHECK-O-NEXT: Running pass: PromotePass ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass -; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo @@ -81,19 +74,17 @@ ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> +; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass ; CHECK-O-NEXT: Starting CGSCC pass manager run. ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O2-NEXT: Running pass: OpenMPOptPass ; CHECK-O3-NEXT: Running pass: OpenMPOptPass -; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass @@ -112,7 +103,6 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass @@ -120,7 +110,6 @@ ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Starting Loop pass manager run. -; CHECK-O-NEXT: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass ; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass ; CHECK-O-NEXT: Running pass: LoopRotatePass @@ -129,7 +118,6 @@ ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopStandardAnalysisResults{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass @@ -166,11 +154,11 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass ; CHECK-O23SZ-NEXT: Finished {{.*}}Function pass manager run +; CHECK-O23SZ-NEXT: Running pass: LICMPass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass diff --git a/llvm/test/Other/pass-pipeline-parsing.ll b/llvm/test/Other/pass-pipeline-parsing.ll --- a/llvm/test/Other/pass-pipeline-parsing.ll +++ b/llvm/test/Other/pass-pipeline-parsing.ll @@ -20,7 +20,6 @@ ; RUN: -passes=no-op-function,no-op-function %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-TWO-NOOP-FP ; CHECK-TWO-NOOP-FP: Starting llvm::Module pass manager run -; CHECK-TWO-NOOP-FP: Running pass: ModuleToFunctionPassAdaptor ; CHECK-TWO-NOOP-FP: Starting llvm::Function pass manager run ; CHECK-TWO-NOOP-FP: Running pass: NoOpFunctionPass ; CHECK-TWO-NOOP-FP: Running pass: NoOpFunctionPass @@ -31,7 +30,6 @@ ; RUN: -passes='function(no-op-function,no-op-function)' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-NESTED-TWO-NOOP-FP ; CHECK-NESTED-TWO-NOOP-FP: Starting llvm::Module pass manager run -; CHECK-NESTED-TWO-NOOP-FP: Running pass: ModuleToFunctionPassAdaptor ; CHECK-NESTED-TWO-NOOP-FP: Starting llvm::Function pass manager run ; CHECK-NESTED-TWO-NOOP-FP: Running pass: NoOpFunctionPass ; CHECK-NESTED-TWO-NOOP-FP: Running pass: NoOpFunctionPass @@ -43,7 +41,6 @@ ; RUN: | FileCheck %s --check-prefix=CHECK-MIXED-FP-AND-MP ; CHECK-MIXED-FP-AND-MP: Starting llvm::Module pass manager run ; CHECK-MIXED-FP-AND-MP: Running pass: NoOpModulePass -; CHECK-MIXED-FP-AND-MP: Running pass: ModuleToFunctionPassAdaptor ; CHECK-MIXED-FP-AND-MP: Starting llvm::Function pass manager run ; CHECK-MIXED-FP-AND-MP: Running pass: NoOpFunctionPass ; CHECK-MIXED-FP-AND-MP: Running pass: NoOpFunctionPass @@ -105,7 +102,6 @@ ; RUN: -passes=no-op-cgscc,no-op-cgscc %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-TWO-NOOP-CG ; CHECK-TWO-NOOP-CG: Starting llvm::Module pass manager run -; CHECK-TWO-NOOP-CG: Running pass: ModuleToPostOrderCGSCCPassAdaptor ; CHECK-TWO-NOOP-CG: Starting CGSCC pass manager run ; CHECK-TWO-NOOP-CG: Running pass: NoOpCGSCCPass ; CHECK-TWO-NOOP-CG: Running pass: NoOpCGSCCPass @@ -117,20 +113,16 @@ ; RUN: | FileCheck %s --check-prefix=CHECK-NESTED-MP-CG-FP ; CHECK-NESTED-MP-CG-FP: Starting llvm::Module pass manager run ; CHECK-NESTED-MP-CG-FP: Starting llvm::Module pass manager run -; CHECK-NESTED-MP-CG-FP: Running pass: ModuleToFunctionPassAdaptor ; CHECK-NESTED-MP-CG-FP: Starting llvm::Function pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpFunctionPass ; CHECK-NESTED-MP-CG-FP: Finished llvm::Function pass manager run -; CHECK-NESTED-MP-CG-FP: Running pass: ModuleToPostOrderCGSCCPassAdaptor ; CHECK-NESTED-MP-CG-FP: Starting CGSCC pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpCGSCCPass -; CHECK-NESTED-MP-CG-FP: Running pass: CGSCCToFunctionPassAdaptor ; CHECK-NESTED-MP-CG-FP: Starting llvm::Function pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpFunctionPass ; CHECK-NESTED-MP-CG-FP: Finished llvm::Function pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpCGSCCPass ; CHECK-NESTED-MP-CG-FP: Finished CGSCC pass manager run -; CHECK-NESTED-MP-CG-FP: Running pass: ModuleToFunctionPassAdaptor ; CHECK-NESTED-MP-CG-FP: Starting llvm::Function pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpFunctionPass ; CHECK-NESTED-MP-CG-FP: Finished llvm::Function pass manager run @@ -141,9 +133,7 @@ ; RUN: -passes='no-op-loop,no-op-loop' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-TWO-NOOP-LOOP ; CHECK-TWO-NOOP-LOOP: Starting llvm::Module pass manager run -; CHECK-TWO-NOOP-LOOP: Running pass: ModuleToFunctionPassAdaptor ; CHECK-TWO-NOOP-LOOP: Starting llvm::Function pass manager run -; CHECK-TWO-NOOP-LOOP: Running pass: FunctionToLoopPassAdaptor ; CHECK-TWO-NOOP-LOOP: Starting Loop pass manager run ; CHECK-TWO-NOOP-LOOP: Running pass: NoOpLoopPass ; CHECK-TWO-NOOP-LOOP: Running pass: NoOpLoopPass @@ -164,31 +154,37 @@ ; RUN: -passes='no-op-loop' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-NESTED-FP-LP ; CHECK-NESTED-FP-LP: Starting llvm::Module pass manager run -; CHECK-NESTED-FP-LP: Running pass: ModuleToFunctionPassAdaptor ; CHECK-NESTED-FP-LP: Starting llvm::Function pass manager run -; CHECK-NESTED-FP-LP: Running pass: FunctionToLoopPassAdaptor ; CHECK-NESTED-FP-LP: Starting Loop pass manager run ; CHECK-NESTED-FP-LP: Running pass: NoOpLoopPass ; CHECK-NESTED-FP-LP: Finished Loop pass manager run ; CHECK-NESTED-FP-LP: Finished llvm::Function pass manager run ; CHECK-NESTED-FP-LP: Finished llvm::Module pass manager run -; RUN: opt -disable-output -debug-pass-manager \ +; RUN: opt -disable-output -debug-pass-manager -debug-pass-manager-verbose \ ; RUN: -passes='module(no-op-function,no-op-loop,no-op-cgscc,cgscc(no-op-function,no-op-loop),function(no-op-loop))' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-ADAPTORS ; CHECK-ADAPTORS: Starting llvm::Module pass manager run ; CHECK-ADAPTORS: Starting llvm::Module pass manager run ; CHECK-ADAPTORS: Running pass: ModuleToFunctionPassAdaptor<{{.*}}NoOpFunctionPass> +; CHECK-ADAPTORS: Running pass: NoOpFunctionPass ; CHECK-ADAPTORS: Running pass: ModuleToFunctionPassAdaptor<{{.*}}FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass>{{.*}}> +; CHECK-ADAPTORS: Running pass: FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass> +; CHECK-ADAPTORS: Running pass: NoOpLoopPass on Loop at depth 1 containing: %loop ; CHECK-ADAPTORS: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}NoOpCGSCCPass> +; CHECK-ADAPTORS: Running pass: NoOpCGSCCPass ; CHECK-ADAPTORS: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-ADAPTORS: Starting CGSCC pass manager run ; CHECK-ADAPTORS: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}NoOpFunctionPass> +; CHECK-ADAPTORS: Running pass: NoOpFunctionPass ; CHECK-ADAPTORS: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass>{{.*}}> +; CHECK-ADAPTORS: Running pass: FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass> +; CHECK-ADAPTORS: Running pass: NoOpLoopPass on Loop at depth 1 containing: %loop ; CHECK-ADAPTORS: Finished CGSCC pass manager run ; CHECK-ADAPTORS: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-ADAPTORS: Starting llvm::Function pass manager run ; CHECK-ADAPTORS: Running pass: FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass> +; CHECK-ADAPTORS: Running pass: NoOpLoopPass on Loop at depth 1 containing: %loop ; CHECK-ADAPTORS: Finished llvm::Function pass manager run ; CHECK-ADAPTORS: Finished llvm::Module pass manager run ; CHECK-ADAPTORS: Finished llvm::Module pass manager run @@ -197,9 +193,8 @@ ; RUN: -passes='cgscc(print)' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-PRINT-IN-CGSCC ; CHECK-PRINT-IN-CGSCC: Starting llvm::Module pass manager run -; CHECK-PRINT-IN-CGSCC: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}> ; CHECK-PRINT-IN-CGSCC: Starting CGSCC pass manager run -; CHECK-PRINT-IN-CGSCC: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PrintFunctionPass> +; CHECK-PRINT-IN-CGSCC: Running pass: PrintFunctionPass ; CHECK-PRINT-IN-CGSCC: Finished CGSCC pass manager run ; CHECK-PRINT-IN-CGSCC: Running pass: VerifierPass ; CHECK-PRINT-IN-CGSCC: Finished llvm::Module pass manager run diff --git a/llvm/test/Transforms/LICM/assume.ll b/llvm/test/Transforms/LICM/assume.ll --- a/llvm/test/Transforms/LICM/assume.ll +++ b/llvm/test/Transforms/LICM/assume.ll @@ -1,5 +1,6 @@ ; RUN: opt -licm -basic-aa < %s -S | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop-nest(loop(licm))' < %s -S | FileCheck %s define void @f_0(i1 %p) nounwind ssp { ; CHECK-LABEL: @f_0( diff --git a/llvm/test/Transforms/LoopDeletion/invalidation.ll b/llvm/test/Transforms/LoopDeletion/invalidation.ll --- a/llvm/test/Transforms/LoopDeletion/invalidation.ll +++ b/llvm/test/Transforms/LoopDeletion/invalidation.ll @@ -4,8 +4,12 @@ ; ; RUN: opt < %s -passes='require,no-op-loop,require' -S \ ; RUN: | FileCheck %s --check-prefixes=CHECK,BEFORE +; RUN: opt < %s -passes='loop-nest(loop(require,no-op-loop,require))' -S \ +; RUN: | FileCheck %s --check-prefixes=CHECK,BEFORE ; RUN: opt < %s -passes='require,loop-deletion,require' -S \ ; RUN: | FileCheck %s --check-prefixes=CHECK,AFTER +; RUN: opt < %s -passes='loop-nest(loop(require,loop-deletion,require))' -S \ +; RUN: | FileCheck %s --check-prefixes=CHECK,AFTER define void @foo(i64 %n, i64 %m) nounwind { diff --git a/llvm/test/Transforms/LoopDeletion/multiple-exit-conditions.ll b/llvm/test/Transforms/LoopDeletion/multiple-exit-conditions.ll --- a/llvm/test/Transforms/LoopDeletion/multiple-exit-conditions.ll +++ b/llvm/test/Transforms/LoopDeletion/multiple-exit-conditions.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -loop-deletion -S | FileCheck %s ; RUN: opt < %s -passes='loop(loop-deletion)' -S | FileCheck %s +; RUN: opt < %s -passes='loop-nest(loop(loop-deletion))' -S | FileCheck %s ; ScalarEvolution can prove the loop iteration is finite, even though ; it can't represent the exact trip count as an expression. That's diff --git a/llvm/test/Transforms/LoopNest/print.ll b/llvm/test/Transforms/LoopNest/print.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopNest/print.ll @@ -0,0 +1,84 @@ +; RUN: opt -S -passes='loop-nest(print)' < %s 2>&1 >/dev/null | FileCheck %s + +; CHECK: IsPerfect=true, Depth=1, OutermostLoop: for.cond, Loops: ( for.cond ) +define i32 @f1(i32 %n) #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %res.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %add = add nsw i32 %res.0, %i.0 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %res.0.lcssa = phi i32 [ %res.0, %for.cond ] + ret i32 %res.0.lcssa +} + +; CHECH: IsPerfect=false, Depth=2, OutermostLoop: for.cond, Loops: ( for.cond for.cond1 for.cond5 ) +define i32 @f4(i32 %n) #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc12, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc13, %for.inc12 ] + %res.0 = phi i32 [ 0, %entry ], [ %res.2.lcssa, %for.inc12 ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.end14 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %res.1 = phi i32 [ %res.0, %for.body ], [ %add, %for.inc ] + %cmp2 = icmp slt i32 %j.0, %n + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %add = add nsw i32 %res.1, %i.0 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + %res.1.lcssa = phi i32 [ %res.1, %for.cond1 ] + br label %for.cond5 + +for.cond5: ; preds = %for.inc9, %for.end + %res.2 = phi i32 [ %res.1.lcssa, %for.end ], [ %add8, %for.inc9 ] + %j4.0 = phi i32 [ 0, %for.end ], [ %inc10, %for.inc9 ] + %cmp6 = icmp slt i32 %j4.0, %n + br i1 %cmp6, label %for.body7, label %for.end11 + +for.body7: ; preds = %for.cond5 + %add8 = add nsw i32 %res.2, %j4.0 + br label %for.inc9 + +for.inc9: ; preds = %for.body7 + %inc10 = add nsw i32 %j4.0, 1 + br label %for.cond5 + +for.end11: ; preds = %for.cond5 + %res.2.lcssa = phi i32 [ %res.2, %for.cond5 ] + br label %for.inc12 + +for.inc12: ; preds = %for.end11 + %inc13 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end14: ; preds = %for.cond + %res.0.lcssa = phi i32 [ %res.0, %for.cond ] + ret i32 %res.0.lcssa +} diff --git a/llvm/test/Transforms/LoopRotate/basic.ll b/llvm/test/Transforms/LoopRotate/basic.ll --- a/llvm/test/Transforms/LoopRotate/basic.ll +++ b/llvm/test/Transforms/LoopRotate/basic.ll @@ -2,6 +2,8 @@ ; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s ; RUN: opt -S -passes='require,require,loop(rotate)' < %s | FileCheck %s ; RUN: opt -S -passes='require,require,loop-mssa(rotate)' -verify-memoryssa < %s | FileCheck %s +; RUN: opt -S -passes='require,require,loop-nest(loop(rotate))' < %s | FileCheck %s +; RUN: opt -S -passes='require,require,loop-nest(loop-mssa(rotate))' -verify-memoryssa < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" diff --git a/llvm/test/Transforms/LoopRotate/freeze-crash.ll b/llvm/test/Transforms/LoopRotate/freeze-crash.ll --- a/llvm/test/Transforms/LoopRotate/freeze-crash.ll +++ b/llvm/test/Transforms/LoopRotate/freeze-crash.ll @@ -1,5 +1,6 @@ ; RUN: opt -loop-rotate -disable-output %s ; RUN: opt -passes=rotate -disable-output %s +; RUN: opt -passes='loop-nest(loop(rotate))' -disable-output %s ; Make sure we don't crash on this test. define void @foo(i32* %arg) { diff --git a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll --- a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll +++ b/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S < %s -loop-rotate -loop-rotate-multi=true | FileCheck %s ; RUN: opt -S < %s -passes='loop(rotate)' -loop-rotate-multi=true | FileCheck %s +; RUN: opt -S < %s -passes='loop-nest(loop(rotate))' -loop-rotate-multi=true | FileCheck %s ; Test loop rotation with multiple exits, some of them - deoptimizing. ; We should end up with a latch which exit is non-deoptimizing, so we should rotate diff --git a/llvm/test/Transforms/LoopRotate/pr35210.ll b/llvm/test/Transforms/LoopRotate/pr35210.ll --- a/llvm/test/Transforms/LoopRotate/pr35210.ll +++ b/llvm/test/Transforms/LoopRotate/pr35210.ll @@ -1,5 +1,7 @@ ;RUN: opt %s -passes='adce,loop(rotate),adce' -S -debug-pass-manager -debug-only=loop-rotate 2>&1 | FileCheck %s ;RUN: opt %s -passes='adce,loop-mssa(rotate),adce' -S -debug-pass-manager -debug-only=loop-rotate -verify-memoryssa 2>&1 | FileCheck %s --check-prefix=MSSA +;RUN: opt %s -passes='adce,loop-nest(loop(rotate)),adce' -S -debug-pass-manager -debug-only=loop-rotate 2>&1 | FileCheck %s --check-prefix=LN +;RUN: opt %s -passes='adce,loop-nest-mssa(loop-mssa(rotate)),adce' -S -debug-pass-manager -debug-only=loop-rotate -verify-memoryssa 2>&1 | FileCheck %s --check-prefix=LNMSSA ;REQUIRES: asserts ; This test is to make sure we invalidate the post dominator pass after loop rotate simplifies the loop latch. @@ -8,7 +10,6 @@ ; CHECK: Starting llvm::Function pass manager run. ; CHECK-NEXT: Running pass: ADCEPass on f ; CHECK-NEXT: Running analysis: PostDominatorTreeAnalysis on f -; CHECK-NEXT: Running pass: FunctionToLoopPassAdaptor{{.*}} on f ; CHECK-NEXT: Starting llvm::Function pass manager run. ; CHECK-NEXT: Running pass: LoopSimplifyPass on f ; CHECK-NEXT: Running analysis: LoopAnalysis on f @@ -22,7 +23,6 @@ ; CHECK-NEXT: Running analysis: TargetIRAnalysis on f ; CHECK-NEXT: Running analysis: InnerAnalysisManagerProxy{{.*}} on f ; CHECK-NEXT: Starting Loop pass manager run. -; CHECK-NEXT: Running analysis: PassInstrumentationAnalysis on bb ; CHECK-NEXT: Running pass: LoopRotatePass on Loop at depth 1 containing: %bb
,%bb4 ; CHECK-NEXT: Folding loop latch bb4 into bb ; CHECK-NEXT: Invalidating all non-preserved analyses for: bb @@ -33,10 +33,42 @@ ; CHECK-NEXT: Running analysis: PostDominatorTreeAnalysis on f ; CHECK-NEXT: Finished llvm::Function pass manager run. +; LN: Starting llvm::Function pass manager run. +; LN-NEXT: Running pass: ADCEPass on f +; LN-NEXT: Running analysis: PostDominatorTreeAnalysis on f +; LN-NEXT: Running pass: FunctionToLoopNestPassAdaptor{{.*}} on f +; LN-NEXT: Starting llvm::Function pass manager run. +; LN-NEXT: Running pass: LoopSimplifyPass on f +; LN-NEXT: Running analysis: LoopAnalysis on f +; LN-NEXT: Running analysis: DominatorTreeAnalysis on f +; LN-NEXT: Running analysis: AssumptionAnalysis on f +; LN-NEXT: Running pass: LCSSAPass on f +; LN-NEXT: Finished llvm::Function pass manager run. +; LN-NEXT: Running analysis: AAManager on f +; LN-NEXT: Running analysis: TargetLibraryAnalysis on f +; LN-NEXT: Running analysis: ScalarEvolutionAnalysis on f +; LN-NEXT: Running analysis: TargetIRAnalysis on f +; LN-NEXT: Running analysis: InnerAnalysisManagerProxy{{.*}} on f +; LN-NEXT: Running analysis: LoopNestAnalysis on bb +; LN-NEXT: Running analysis: PassInstrumentationAnalysis on bb +; LN-NEXT: Starting LoopNest pass manager run. +; LN-NEXT: Running pass: LoopNestToLoopPassAdaptor{{.*}} on bb +; LN-NEXT: Starting Loop pass manager run. +; LN-NEXT: Running pass: LoopRotatePass on Loop at depth 1 containing: %bb
,%bb4 +; LN-NEXT: Folding loop latch bb4 into bb +; LN-NEXT: Invalidating all non-preserved analyses for: bb +; LN-NEXT: Invalidating analysis: LoopNestAnalysis on bb +; LN-NEXT: Finished Loop pass manager run. +; LN-NEXT: Finished LoopNest pass manager run. +; LN-NEXT: Invalidating all non-preserved analyses for: f +; LN-NEXT: Invalidating analysis: PostDominatorTreeAnalysis on f +; LN-NEXT: Running pass: ADCEPass on f +; LN-NEXT: Running analysis: PostDominatorTreeAnalysis on f +; LN-NEXT: Finished llvm::Function pass manager run. + ; MSSA: Starting llvm::Function pass manager run. ; MSSA-NEXT: Running pass: ADCEPass on f ; MSSA-NEXT: Running analysis: PostDominatorTreeAnalysis on f -; MSSA-NEXT: Running pass: FunctionToLoopPassAdaptor{{.*}} on f ; MSSA-NEXT: Starting llvm::Function pass manager run. ; MSSA-NEXT: Running pass: LoopSimplifyPass on f ; MSSA-NEXT: Running analysis: LoopAnalysis on f @@ -51,7 +83,6 @@ ; MSSA-NEXT: Running analysis: TargetIRAnalysis on f ; MSSA-NEXT: Running analysis: InnerAnalysisManagerProxy{{.*}} on f ; MSSA-NEXT: Starting Loop pass manager run. -; MSSA-NEXT: Running analysis: PassInstrumentationAnalysis on bb ; MSSA-NEXT: Running pass: LoopRotatePass on Loop at depth 1 containing: %bb
,%bb4 ; MSSA-NEXT: Folding loop latch bb4 into bb ; MSSA-NEXT: Invalidating all non-preserved analyses for: bb @@ -62,6 +93,40 @@ ; MSSA-NEXT: Running analysis: PostDominatorTreeAnalysis on f ; MSSA-NEXT: Finished llvm::Function pass manager run. +; LNMSSA: Starting llvm::Function pass manager run. +; LNMSSA-NEXT: Running pass: ADCEPass on f +; LNMSSA-NEXT: Running analysis: PostDominatorTreeAnalysis on f +; LNMSSA-NEXT: Running pass: FunctionToLoopNestPassAdaptor{{.*}} on f +; LNMSSA-NEXT: Starting llvm::Function pass manager run. +; LNMSSA-NEXT: Running pass: LoopSimplifyPass on f +; LNMSSA-NEXT: Running analysis: LoopAnalysis on f +; LNMSSA-NEXT: Running analysis: DominatorTreeAnalysis on f +; LNMSSA-NEXT: Running analysis: AssumptionAnalysis on f +; LNMSSA-NEXT: Running pass: LCSSAPass on f +; LNMSSA-NEXT: Finished llvm::Function pass manager run. +; LNMSSA-NEXT: Running analysis: MemorySSAAnalysis on f +; LNMSSA-NEXT: Running analysis: AAManager on f +; LNMSSA-NEXT: Running analysis: TargetLibraryAnalysis on f +; LNMSSA-NEXT: Running analysis: ScalarEvolutionAnalysis on f +; LNMSSA-NEXT: Running analysis: TargetIRAnalysis on f +; LNMSSA-NEXT: Running analysis: InnerAnalysisManagerProxy{{.*}} on f +; LNMSSA-NEXT: Running analysis: LoopNestAnalysis on bb +; LNMSSA-NEXT: Running analysis: PassInstrumentationAnalysis on bb +; LNMSSA-NEXT: Starting LoopNest pass manager run. +; LNMSSA-NEXT: Running pass: LoopNestToLoopPassAdaptor{{.*}} on bb +; LNMSSA-NEXT: Starting Loop pass manager run. +; LNMSSA-NEXT: Running pass: LoopRotatePass on Loop at depth 1 containing: %bb
,%bb4 +; LNMSSA-NEXT: Folding loop latch bb4 into bb +; LNMSSA-NEXT: Invalidating all non-preserved analyses for: bb +; LNMSSA-NEXT: Invalidating analysis: LoopNestAnalysis on bb +; LNMSSA-NEXT: Finished Loop pass manager run. +; LNMSSA-NEXT: Finished LoopNest pass manager run. +; LNMSSA-NEXT: Invalidating all non-preserved analyses for: f +; LNMSSA-NEXT: Invalidating analysis: PostDominatorTreeAnalysis on f +; LNMSSA-NEXT: Running pass: ADCEPass on f +; LNMSSA-NEXT: Running analysis: PostDominatorTreeAnalysis on f +; LNMSSA-NEXT: Finished llvm::Function pass manager run. + ; CHECK-LABEL: define i8 @f() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label %bb @@ -78,6 +143,22 @@ ; CHECK: declare void @raise_exception() #0 ; CHECK: attributes #0 = { noreturn } +; LN-LABEL: define i8 @f() { +; LN-NEXT: entry: +; LN-NEXT: br label %bb +; LN: bb: ; preds = %bb, %entry +; LN-NEXT: %mode.0 = phi i8 [ 0, %entry ], [ %indvar.next, %bb ] +; LN-NEXT: %tmp5 = icmp eq i8 %mode.0, 1 +; LN-NEXT: %indvar.next = add i8 %mode.0, 1 +; LN-NEXT: br i1 %tmp5, label %bb5, label %bb +; LN: bb5: ; preds = %bb +; LN-NEXT: tail call void @raise_exception() #0 +; LN-NEXT: unreachable +; LN-NEXT: } +; LN: ; Function Attrs: noreturn +; LN: declare void @raise_exception() #0 +; LN: attributes #0 = { noreturn } + ; MSSA-LABEL: define i8 @f() { ; MSSA-NEXT: entry: ; MSSA-NEXT: br label %bb @@ -94,6 +175,22 @@ ; MSSA: declare void @raise_exception() #0 ; MSSA: attributes #0 = { noreturn } +; LNMSSA-LABEL: define i8 @f() { +; LNMSSA-NEXT: entry: +; LNMSSA-NEXT: br label %bb +; LNMSSA: bb: ; preds = %bb, %entry +; LNMSSA-NEXT: %mode.0 = phi i8 [ 0, %entry ], [ %indvar.next, %bb ] +; LNMSSA-NEXT: %tmp5 = icmp eq i8 %mode.0, 1 +; LNMSSA-NEXT: %indvar.next = add i8 %mode.0, 1 +; LNMSSA-NEXT: br i1 %tmp5, label %bb5, label %bb +; LNMSSA: bb5: ; preds = %bb +; LNMSSA-NEXT: tail call void @raise_exception() #0 +; LNMSSA-NEXT: unreachable +; LNMSSA-NEXT: } +; LNMSSA: ; Function Attrs: noreturn +; LNMSSA: declare void @raise_exception() #0 +; LNMSSA: attributes #0 = { noreturn } + define i8 @f() { entry: br label %bb diff --git a/llvm/test/Transforms/LoopUnroll/revisit.ll b/llvm/test/Transforms/LoopUnroll/revisit.ll --- a/llvm/test/Transforms/LoopUnroll/revisit.ll +++ b/llvm/test/Transforms/LoopUnroll/revisit.ll @@ -15,7 +15,7 @@ ; Basic test is fully unrolled and we revisit the post-unroll new sibling ; loops, including the ones that used to be child loops. define void @full_unroll(i1* %ptr) { -; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on full_unroll +; CHECK-LABEL: OptimizationRemarkEmitterAnalysis on full_unroll ; CHECK-NOT: LoopFullUnrollPass entry: @@ -81,7 +81,7 @@ ; duplicating child loops without changing their structure and so they aren't by ; default visited, but will be visited with a special parameter. define void @partial_unroll(i32 %count, i1* %ptr) { -; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on partial_unroll +; CHECK-LABEL: OptimizationRemarkEmitterAnalysis on partial_unroll ; CHECK-NOT: LoopFullUnrollPass entry: diff --git a/llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll b/llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll --- a/llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll @@ -6,20 +6,19 @@ ; RUN: opt -S -passes='loop(require),loop-unroll,loop(print-access-info)' -debug-pass-manager < %s 2>&1 | FileCheck %s ; ; CHECK: Starting llvm::Function pass manager run. -; CHECK: Running pass: FunctionToLoopPassAdaptor ; CHECK: Running analysis: LoopAnalysis ; CHECK: Running analysis: InnerAnalysisManagerProxy< ; CHECK: Starting Loop pass manager run. ; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis -; CHECK: Running analysis: LoopAccessAnalysis on inner1.header +; CHECK: Running analysis: LoopAccessAnalysis on Loop at depth 2 containing: %inner1.header ; CHECK: Finished Loop pass manager run. ; CHECK: Starting Loop pass manager run. ; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis -; CHECK: Running analysis: LoopAccessAnalysis on inner2.header +; CHECK: Running analysis: LoopAccessAnalysis on Loop at depth 2 containing: %inner2.header ; CHECK: Finished Loop pass manager run. ; CHECK: Starting Loop pass manager run. ; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis -; CHECK: Running analysis: LoopAccessAnalysis on outer.header +; CHECK: Running analysis: LoopAccessAnalysis on Loop at depth 1 containing: %outer.header ; CHECK: Finished Loop pass manager run. ; CHECK: Running pass: LoopUnrollPass ; CHECK: Clearing all analysis results for: inner2.header @@ -29,16 +28,15 @@ ; CHECK: Invalidating analysis: LoopAccessAnalysis on inner1.header ; CHECK: Invalidating all non-preserved analyses for: inner1.header.1 ; CHECK-NOT: Invalidating analysis: LoopAccessAnalysis on inner1.header.1 -; CHECK: Running pass: FunctionToLoopPassAdaptor ; CHECK: Starting Loop pass manager run. ; CHECK: Running pass: LoopAccessInfoPrinterPass -; CHECK: Running analysis: LoopAccessAnalysis on inner1.header +; CHECK: Running analysis: LoopAccessAnalysis on Loop at depth 1 containing: %inner1.header ; CHECK: Loop access info in function 'test': ; CHECK: inner1.header: ; CHECK: Finished Loop pass manager run. ; CHECK: Starting Loop pass manager run. ; CHECK: Running pass: LoopAccessInfoPrinterPass -; CHECK: Running analysis: LoopAccessAnalysis on inner1.header.1 +; CHECK: Running analysis: LoopAccessAnalysis on Loop at depth 1 containing: %inner1.header.1 ; CHECK: Loop access info in function 'test': ; CHECK: inner1.header.1: ; CHECK: Finished Loop pass manager run. diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -39,8 +39,7 @@ ; CHECK-NEXT: "loop:\n" + ; CHECK-NEXT: "WIDEN-INDUCTION %iv = phi 0, %iv.next\l" + ; CHECK-NEXT: "WIDEN\l"" %cond0 = icmp %iv, 13\l" + -; CHECK-NEXT: "WIDEN-SELECT%s = select %cond0, 10, 20\l" + -; CHECK-NEXT: "EMIT vp<%1> = icmp ule ir<%iv> vp<%0>\l" +; CHECK-NEXT: "WIDEN-SELECT%s = select %cond0, 10, 20\l" ; CHECK-NEXT: ] define void @test() { entry: diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -10,7 +10,7 @@ ; CHECK-REMARKS-NOT: remark: {{.*}} vectorized loop define void @VF1-VPlanExe() { -; CHECK-LABEL: @VF1-VPlanExe +; CHECK-LABEL: @VF1-VPlanExe( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -21,13 +21,9 @@ ; CHECK-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 [[INDUCTION]], 14 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[INDUCTION1]], 14 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[INDUCTION2]], 14 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[INDUCTION3]], 14 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -55,7 +51,7 @@ } define void @VF1-VPWidenCanonicalIVRecipeExe(double* %ptr1) { -; CHECK-LABEL: @VF1-VPWidenCanonicalIVRecipeExe +; CHECK-LABEL: @VF1-VPWidenCanonicalIVRecipeExe( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds double, double* [[PTR1:%.*]], i64 15 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -72,17 +68,9 @@ ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP3]] -; CHECK-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i64 [[VEC_IV]], 14 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i64 [[VEC_IV4]], 14 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i64 [[VEC_IV5]], 14 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ule i64 [[VEC_IV6]], 14 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3 ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/PR41279.proftext b/llvm/test/Transforms/PGOProfile/Inputs/PR41279.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/PR41279.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/PR41279.proftext @@ -1,6 +1,6 @@ :ir foo -60927483247 +1096621588030135663 4 3 2 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/PR41279_2.proftext b/llvm/test/Transforms/PGOProfile/Inputs/PR41279_2.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/PR41279_2.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/PR41279_2.proftext @@ -1,6 +1,6 @@ :ir f -62077759478 +1096621589180411894 2 3 2 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/branch1.proftext b/llvm/test/Transforms/PGOProfile/Inputs/branch1.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/branch1.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/branch1.proftext @@ -1,7 +1,7 @@ # :ir is the flag to indicate this is IR level profile. :ir test_br_1 -25571299074 +784007059655560962 2 3 2 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/branch1_large_count.proftext b/llvm/test/Transforms/PGOProfile/Inputs/branch1_large_count.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/branch1_large_count.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/branch1_large_count.proftext @@ -1,7 +1,7 @@ # :ir is the flag to indicate this is IR level profile. :ir test_br_1 -25571299074 +784007059655560962 2 12884901888 8589934592 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/branch2.proftext b/llvm/test/Transforms/PGOProfile/Inputs/branch2.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/branch2.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/branch2.proftext @@ -1,7 +1,7 @@ # :ir is the flag to indicate this is IR level profile. :ir test_br_2 -29667547796 +146835647075900052 2 1 1 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/branch2_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/branch2_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/branch2_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/branch2_entry.proftext @@ -2,7 +2,7 @@ :ir :entry_first test_br_2 -29667547796 +146835647075900052 2 2 1 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/criticaledge.proftext b/llvm/test/Transforms/PGOProfile/Inputs/criticaledge.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/criticaledge.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/criticaledge.proftext @@ -1,7 +1,7 @@ # :ir is the flag to indicate this is IR level profile. :ir test_criticalEdge -82323253069 +93478046750287693 8 2 1 @@ -13,7 +13,7 @@ 1 :bar -12884901887 +742261418966908927 1 7 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/criticaledge_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/criticaledge_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/criticaledge_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/criticaledge_entry.proftext @@ -2,7 +2,7 @@ :ir :entry_first test_criticalEdge -82323253069 +93478046750287693 8 7 2 @@ -14,7 +14,7 @@ 1 :bar -12884901887 +742261418966908927 1 7 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/cspgo.proftext b/llvm/test/Transforms/PGOProfile/Inputs/cspgo.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/cspgo.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/cspgo.proftext @@ -54,7 +54,7 @@ foo # Func Hash: -1152921640672869708 +1456607294772657484 # Num Counters: 10 # Counter Values: @@ -71,7 +71,7 @@ foo # Func Hash: -29212902728 +146835646621254984 # Num Counters: 2 # Counter Values: @@ -80,7 +80,7 @@ bar # Func Hash: -1152921569533132113 +1440408129826749777 # Num Counters: 5 # Counter Values: @@ -92,7 +92,7 @@ bar # Func Hash: -56228292833 +567185239050791137 # Num Counters: 4 # Counter Values: @@ -103,7 +103,7 @@ main # Func Hash: -1152921517491748863 +1895182923573755903 # Num Counters: 1 # Counter Values: @@ -111,7 +111,7 @@ main # Func Hash: -12884901887 +742261418966908927 # Num Counters: 1 # Counter Values: @@ -135,7 +135,7 @@ goo # Func Hash: -1152921517491748863 +1895182923573755903 # Num Counters: 1 # Counter Values: @@ -143,7 +143,7 @@ goo # Func Hash: -12884901887 +742261418966908927 # Num Counters: 1 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/diag_no_value_sites.proftext b/llvm/test/Transforms/PGOProfile/Inputs/diag_no_value_sites.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/diag_no_value_sites.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/diag_no_value_sites.proftext @@ -1,6 +1,6 @@ # :ir is the flag to indicate this is IR level profile. :ir foo -12884901887 +48277136972185599 1 1 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/fix_entry_count.proftext b/llvm/test/Transforms/PGOProfile/Inputs/fix_entry_count.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/fix_entry_count.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/fix_entry_count.proftext @@ -1,7 +1,7 @@ :ir :entry_first test_simple_for -34137660316 +1063705162469825436 2 0 96 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/func_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/func_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/func_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/func_entry.proftext @@ -3,7 +3,7 @@ :entry_first hot # Func Hash: -12884901887 +742261418966908927 # Num Counters: 1 # Counter Values: @@ -11,7 +11,7 @@ cold # Func Hash: -12884901887 +742261418966908927 # Num Counters: 1 # Counter Values: @@ -19,7 +19,7 @@ med # Func Hash: -12884901887 +742261418966908927 # Num Counters: 1 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/indirect_call.proftext b/llvm/test/Transforms/PGOProfile/Inputs/indirect_call.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/indirect_call.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/indirect_call.proftext @@ -1,7 +1,7 @@ :ir bar # Func Hash: -281487861612543 +170957022131388415 # Num Counters: 1 # Counter Values: @@ -19,7 +19,7 @@ func1 # Func Hash: -12884901887 +742261418966908927 # Num Counters: 1 # Counter Values: @@ -27,7 +27,7 @@ func2 # Func Hash: -12884901887 +742261418966908927 # Num Counters: 1 # Counter Values: @@ -35,7 +35,7 @@ func3 # Func Hash: -12884901887 +742261418966908927 # Num Counters: 1 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/indirectbr.proftext b/llvm/test/Transforms/PGOProfile/Inputs/indirectbr.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/indirectbr.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/indirectbr.proftext @@ -2,7 +2,7 @@ :ir foo # Func Hash: -47485104005 +844982796158316421 # Num Counters: 4 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/indirectbr_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/indirectbr_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/indirectbr_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/indirectbr_entry.proftext @@ -3,7 +3,7 @@ :entry_first foo # Func Hash: -47485104005 +844982796158316421 # Num Counters: 4 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/irreducible.proftext b/llvm/test/Transforms/PGOProfile/Inputs/irreducible.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/irreducible.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/irreducible.proftext @@ -1,7 +1,7 @@ :ir _Z11irreducibleii # Func Hash: -64451410787 +287486624745028451 # Num Counters: 6 # Counter Values: @@ -14,7 +14,7 @@ _Z11irreduciblePh # Func Hash: -104649601521 +331779889035882993 # Num Counters: 9 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/irreducible_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/irreducible_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/irreducible_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/irreducible_entry.proftext @@ -2,7 +2,7 @@ :entry_first _Z11irreducibleii # Func Hash: -64451410787 +287486624745028451 # Num Counters: 6 # Counter Values: @@ -15,7 +15,7 @@ _Z11irreduciblePh # Func Hash: -104649601521 +331779889035882993 # Num Counters: 9 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/landingpad.proftext b/llvm/test/Transforms/PGOProfile/Inputs/landingpad.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/landingpad.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/landingpad.proftext @@ -1,7 +1,7 @@ # :ir is the flag to indicate this is IR level profile. :ir foo -59130013419 +567185241952511723 4 3 1 @@ -9,7 +9,7 @@ 0 bar -24868915205 +784007058953177093 2 3 2 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/landingpad_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/landingpad_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/landingpad_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/landingpad_entry.proftext @@ -2,7 +2,7 @@ :ir :entry_first foo -59130013419 +567185241952511723 4 5 1 @@ -10,7 +10,7 @@ 0 bar -24868915205 +784007058953177093 2 3 2 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/large_count_remarks.proftext b/llvm/test/Transforms/PGOProfile/Inputs/large_count_remarks.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/large_count_remarks.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/large_count_remarks.proftext @@ -1,7 +1,7 @@ # :ir is the flag to indicate this is IR level profile. :ir test -25571299074 +784007059655560962 2 40000000000 20000000000 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/loop1.proftext b/llvm/test/Transforms/PGOProfile/Inputs/loop1.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/loop1.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/loop1.proftext @@ -1,7 +1,7 @@ # :ir is the flag to indicate this is IR level profile. :ir test_simple_for -34137660316 +1063705162469825436 2 96 4 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/loop1_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/loop1_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/loop1_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/loop1_entry.proftext @@ -2,7 +2,7 @@ :ir :entry_first test_simple_for -34137660316 +1063705162469825436 2 4 96 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/loop2.proftext b/llvm/test/Transforms/PGOProfile/Inputs/loop2.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/loop2.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/loop2.proftext @@ -1,7 +1,7 @@ # :ir is the flag to indicate this is IR level profile. :ir test_nested_for -53929068288 +798733566382720768 3 33 10 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/loop2_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/loop2_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/loop2_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/loop2_entry.proftext @@ -2,7 +2,7 @@ :ir :entry_first test_nested_for -53929068288 +798733566382720768 3 6 33 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext b/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext @@ -2,7 +2,7 @@ :ir foo # Func Hash: -53929068288 +687116424982578944 # Num Counters: 3 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch-correct.proftext b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch-correct.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch-correct.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch-correct.proftext @@ -2,7 +2,7 @@ :ir bar # Func Hash: -29667547796 +146835647075900052 # Num Counters: 2 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch.proftext b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch.proftext @@ -2,7 +2,7 @@ :ir bar # Func Hash: -29667547796 +146835647075900052 # Num Counters: 2 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-branch_entry.proftext @@ -3,7 +3,7 @@ :entry_first bar # Func Hash: -29667547796 +146835647075900052 # Num Counters: 2 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch-correct.proftext b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch-correct.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch-correct.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch-correct.proftext @@ -2,7 +2,7 @@ :ir main # Func Hash: -74054140268 +391331300939170156 # Num Counters: 7 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch-correct_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch-correct_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch-correct_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch-correct_entry.proftext @@ -3,7 +3,7 @@ :entry_first main # Func Hash: -74054140268 +391331300939170156 # Num Counters: 7 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch.proftext b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch.proftext @@ -2,7 +2,7 @@ :ir main # Func Hash: -74054140268 +391331300939170156 # Num Counters: 7 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/misexpect-switch_entry.proftext @@ -3,7 +3,7 @@ :entry_first main # Func Hash: -74054140268 +391331300939170156 # Num Counters: 7 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/multiple_hash_profile.proftext b/llvm/test/Transforms/PGOProfile/Inputs/multiple_hash_profile.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/multiple_hash_profile.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/multiple_hash_profile.proftext @@ -2,13 +2,23 @@ :ir _Z3fooi # Func Hash: -72057606922829823 +382993475055910911 # Num Counters: 2 # Counter Values: 18 12 +# For -pgo-instr-old-cfg-hashing=true +_Z3fooi +# Func Hash: +72057606922829823 +# Num Counters: +2 +# Counter Values: +18 +6 + _Z3fooi # Func Hash: 12884901887 @@ -17,6 +27,16 @@ # Counter Values: 0 +_Z3bari +# Func Hash: +382993475055910911 +# Num Counters: +2 +# Counter Values: +0 +0 + +# For -pgo-instr-old-cfg-hashing=true _Z3bari # Func Hash: 72057606922829823 @@ -28,9 +48,17 @@ _Z4m2f1v # Func Hash: -12884901887 +742261418966908927 # Num Counters: 1 # Counter Values: 1 +# For -pgo-instr-old-cfg-hashing=true +_Z4m2f1v +# Func Hash: +12884901887 +# Num Counters: +1 +# Counter Values: +1 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/noreturncall.proftext b/llvm/test/Transforms/PGOProfile/Inputs/noreturncall.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/noreturncall.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/noreturncall.proftext @@ -2,7 +2,7 @@ :ir foo # Func Hash: -36496524737 +238984482720105921 # Num Counters: 3 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/remap.proftext b/llvm/test/Transforms/PGOProfile/Inputs/remap.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/remap.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/remap.proftext @@ -1,7 +1,7 @@ # :ir is the flag to indicate this is IR level profile. :ir _ZN3foo3barERKN1N1XINS_4quuxEEE -25571299074 +784007059655560962 2 3 2 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/select1.proftext b/llvm/test/Transforms/PGOProfile/Inputs/select1.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/select1.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/select1.proftext @@ -1,7 +1,7 @@ :ir :entry_first test_br_2 -72057623705475732 +942389667449461396 3 5 1 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/select2.proftext b/llvm/test/Transforms/PGOProfile/Inputs/select2.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/select2.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/select2.proftext @@ -3,7 +3,7 @@ :entry_first foo # Func Hash: -72057628175588252 +134732432632142748 # Num Counters: 3 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext b/llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext @@ -1,7 +1,7 @@ # :ir is the flag to indicate this is IR level profile. :ir test_simple_for -34137660316 +1063705162469825436 2 0 0 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/switch.proftext b/llvm/test/Transforms/PGOProfile/Inputs/switch.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/switch.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/switch.proftext @@ -1,7 +1,7 @@ # :ir is the flag to indicate this is IR level profile. :ir test_switch -46200943743 +536873293052540031 4 0 5 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/switch_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/switch_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/switch_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/switch_entry.proftext @@ -2,7 +2,7 @@ :ir :entry_first test_switch -46200943743 +536873293052540031 4 10 5 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext @@ -10,7 +10,7 @@ foo # Func Hash: -29212902728 +1720106746050921044 # Num Counters: 2 # Counter Values: @@ -19,7 +19,7 @@ bar # Func Hash: -1152921534274394772 +1299757151682747028 # Num Counters: 2 # Counter Values: @@ -45,7 +45,7 @@ main # Func Hash: -12884901887 +1895182923573755903 # Num Counters: 1 # Counter Values: @@ -53,7 +53,7 @@ cspgo.c:foo # Func Hash: -1152921563228422740 +1720106746050921044 # Num Counters: 4 # Counter Values: diff --git a/llvm/test/Transforms/PGOProfile/multiple_hash_profile.ll b/llvm/test/Transforms/PGOProfile/multiple_hash_profile.ll --- a/llvm/test/Transforms/PGOProfile/multiple_hash_profile.ll +++ b/llvm/test/Transforms/PGOProfile/multiple_hash_profile.ll @@ -1,6 +1,8 @@ ; RUN: llvm-profdata merge %S/Inputs/multiple_hash_profile.proftext -o %t.profdata ; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-old-cfg-hashing=true -S | FileCheck -check-prefix=CHECKOLDHASH %s ; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-old-cfg-hashing=true -S | FileCheck -check-prefix=CHECKOLDHASH %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -29,6 +31,9 @@ ; CHECK: %mul.i = select i1 %cmp.i, i32 1, i32 %i ; CHECK-SAME: !prof ![[BW:[0-9]+]] ; CHECK: ![[BW]] = !{!"branch_weights", i32 12, i32 6} +; CHECKOLDHASH: %mul.i = select i1 %cmp.i, i32 1, i32 %i +; CHECKOLDHASH-SAME: !prof ![[BW:[0-9]+]] +; CHECKOLDHASH: ![[BW]] = !{!"branch_weights", i32 6, i32 12} %retval.0.i = mul nsw i32 %mul.i, %i ret i32 %retval.0.i } diff --git a/llvm/test/Transforms/SCCP/ipsccp-preserve-analysis.ll b/llvm/test/Transforms/SCCP/ipsccp-preserve-analysis.ll --- a/llvm/test/Transforms/SCCP/ipsccp-preserve-analysis.ll +++ b/llvm/test/Transforms/SCCP/ipsccp-preserve-analysis.ll @@ -19,7 +19,6 @@ ; NEW-PM-NEXT: Invalidating all non-preserved analyses for: ; NEW-PM-NEXT: Invalidating all non-preserved analyses for: f1 ; NEW-PM-NEXT: Invalidating all non-preserved analyses for: f2 -; NEW-PM-NEXT: Running pass: ModuleToFunctionPassAdaptor ; NEW-PM-NOT: Running analysis: ; IR-LABEL: @f1 diff --git a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp --- a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp +++ b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp @@ -136,6 +136,7 @@ PassBuilder PB(TM.get()); LoopAnalysisManager LAM; + LoopNestAnalysisManager LNAM(LAM); FunctionAnalysisManager FAM; CGSCCAnalysisManager CGAM; ModulePassManager MPM; @@ -146,7 +147,8 @@ PB.registerCGSCCAnalyses(CGAM); PB.registerFunctionAnalyses(FAM); PB.registerLoopAnalyses(LAM); - PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + PB.registerLoopNestAnalyses(LNAM); + PB.crossRegisterProxies(LAM, LNAM, FAM, CGAM, MAM); auto Err = PB.parsePassPipeline(MPM, PassPipeline, false, false); assert(!Err && "Should have been checked during fuzzer initialization"); diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LoopNestAnalysisManager.h" #include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Dominators.h" @@ -348,6 +349,7 @@ } LoopAnalysisManager LAM(DebugPM); + LoopNestAnalysisManager LNAM(LAM); FunctionAnalysisManager FAM(DebugPM); CGSCCAnalysisManager CGAM(DebugPM); ModuleAnalysisManager MAM(DebugPM); @@ -360,7 +362,8 @@ PB.registerCGSCCAnalyses(CGAM); PB.registerFunctionAnalyses(FAM); PB.registerLoopAnalyses(LAM); - PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + PB.registerLoopNestAnalyses(LNAM); + PB.crossRegisterProxies(LAM, LNAM, FAM, CGAM, MAM); ModulePassManager MPM(DebugPM); if (VK > VK_NoVerifier) diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -6,13 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Verifier.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "gtest/gtest.h" @@ -360,9 +361,11 @@ auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); IRBuilder<>::InsertPoint AfterIP = - OMPBuilder.CreateParallel(Loc, BodyGenCB, PrivCB, FiniCB, nullptr, - nullptr, OMP_PROC_BIND_default, false); + OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 1U); EXPECT_EQ(NumFinalizationPoints, 1U); @@ -400,6 +403,205 @@ EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin()); } +TEST_F(OpenMPIRBuilderTest, ParallelNested) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumInnerBodiesGenerated = 0; + unsigned NumOuterBodiesGenerated = 0; + unsigned NumFinalizationPoints = 0; + + auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumInnerBodiesGenerated; + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + // Trivial copy (=firstprivate). + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + + auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumOuterBodiesGenerated; + Builder.restoreIP(CodeGenIP); + BasicBlock *CGBB = CodeGenIP.getBlock(); + BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint()); + CGBB->getTerminator()->eraseFromParent(); + ; + + IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( + InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB, + FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP); + Builder.CreateBr(NewBB); + }; + + IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + EXPECT_EQ(NumInnerBodiesGenerated, 1U); + EXPECT_EQ(NumOuterBodiesGenerated, 1U); + EXPECT_EQ(NumFinalizationPoints, 2U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + OMPBuilder.finalize(); + + EXPECT_EQ(M->size(), 5U); + for (Function &OutlinedFn : *M) { + if (F == &OutlinedFn || OutlinedFn.isDeclaration()) + continue; + EXPECT_FALSE(verifyModule(*M, &errs())); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); + + EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); + EXPECT_EQ(OutlinedFn.arg_size(), 2U); + + EXPECT_EQ(OutlinedFn.getNumUses(), 1U); + User *Usr = OutlinedFn.user_back(); + ASSERT_TRUE(isa(Usr)); + CallInst *ForkCI = dyn_cast(Usr->user_back()); + ASSERT_NE(ForkCI, nullptr); + + EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); + EXPECT_EQ(ForkCI->getNumArgOperands(), 3U); + EXPECT_TRUE(isa(ForkCI->getArgOperand(0))); + EXPECT_EQ(ForkCI->getArgOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); + EXPECT_EQ(ForkCI->getArgOperand(2), Usr); + } +} + +TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumInnerBodiesGenerated = 0; + unsigned NumOuterBodiesGenerated = 0; + unsigned NumFinalizationPoints = 0; + + auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumInnerBodiesGenerated; + }; + + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + // Trivial copy (=firstprivate). + Builder.restoreIP(AllocaIP); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); + Builder.restoreIP(CodeGenIP); + Builder.CreateStore(V, ReplacementValue); + return CodeGenIP; + }; + + auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + + auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationIP) { + ++NumOuterBodiesGenerated; + Builder.restoreIP(CodeGenIP); + BasicBlock *CGBB = CodeGenIP.getBlock(); + BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint()); + BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt()); + CGBB->getTerminator()->eraseFromParent(); + ; + NewBB1->getTerminator()->eraseFromParent(); + ; + + IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.CreateParallel( + InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB, + FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP1); + Builder.CreateBr(NewBB1); + + IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.CreateParallel( + InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB, + FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP2); + Builder.CreateBr(NewBB2); + }; + + IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + EXPECT_EQ(NumInnerBodiesGenerated, 2U); + EXPECT_EQ(NumOuterBodiesGenerated, 1U); + EXPECT_EQ(NumFinalizationPoints, 3U); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + OMPBuilder.finalize(); + + EXPECT_EQ(M->size(), 6U); + for (Function &OutlinedFn : *M) { + if (F == &OutlinedFn || OutlinedFn.isDeclaration()) + continue; + EXPECT_FALSE(verifyModule(*M, &errs())); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); + EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); + EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); + + EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); + EXPECT_EQ(OutlinedFn.arg_size(), 2U); + + unsigned NumAllocas = 0; + for (Instruction &I : instructions(OutlinedFn)) + NumAllocas += isa(I); + EXPECT_EQ(NumAllocas, 1U); + + EXPECT_EQ(OutlinedFn.getNumUses(), 1U); + User *Usr = OutlinedFn.user_back(); + ASSERT_TRUE(isa(Usr)); + CallInst *ForkCI = dyn_cast(Usr->user_back()); + ASSERT_NE(ForkCI, nullptr); + + EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); + EXPECT_EQ(ForkCI->getNumArgOperands(), 3U); + EXPECT_TRUE(isa(ForkCI->getArgOperand(0))); + EXPECT_EQ(ForkCI->getArgOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); + EXPECT_EQ(ForkCI->getArgOperand(2), Usr); + } +} + TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); @@ -460,9 +662,12 @@ // No destructors. }; - IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( - Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()), - nullptr, OMP_PROC_BIND_default, false); + IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, + Builder.CreateIsNotNull(F->arg_begin()), + nullptr, OMP_PROC_BIND_default, false); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 1U); @@ -585,9 +790,12 @@ {Builder.getInt32(NumFinalizationPoints)}); }; - IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel( - Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()), - nullptr, OMP_PROC_BIND_default, true); + IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, + Builder.CreateIsNotNull(F->arg_begin()), + nullptr, OMP_PROC_BIND_default, true); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 0U); diff --git a/llvm/unittests/IR/PassBuilderCallbacksTest.cpp b/llvm/unittests/IR/PassBuilderCallbacksTest.cpp --- a/llvm/unittests/IR/PassBuilderCallbacksTest.cpp +++ b/llvm/unittests/IR/PassBuilderCallbacksTest.cpp @@ -174,6 +174,24 @@ MockPassHandle() { setDefaults(); } }; +template <> +struct MockPassHandle + : MockPassHandleBase, LoopNest, + LoopNestAnalysisManager, LoopStandardAnalysisResults &, + LNPMUpdater &> { + MOCK_METHOD4(run, + PreservedAnalyses(LoopNest &, LoopNestAnalysisManager &, + LoopStandardAnalysisResults &, LNPMUpdater &)); + + static void invalidateLoopNest(LoopNest &LN, LoopNestAnalysisManager &, + LoopStandardAnalysisResults &, + LNPMUpdater &Updater) { + Updater.markLoopNestAsDeleted(LN, LN.getName()); + } + + MockPassHandle() { setDefaults(); } +}; + template <> struct MockPassHandle : MockPassHandleBase, Function> { @@ -226,6 +244,20 @@ MockAnalysisHandle() { this->setDefaults(); } }; +template <> +struct MockAnalysisHandle + : MockAnalysisHandleBase, Loop, + LoopAnalysisManager, + LoopStandardAnalysisResults &> { + MOCK_METHOD3_T(run, typename Analysis::Result(Loop &, LoopAnalysisManager &, + LoopStandardAnalysisResults &)); + + MOCK_METHOD3_T(invalidate, bool(Loop &, const PreservedAnalyses &, + LoopAnalysisManager::Invalidator &)); + + MockAnalysisHandle() { this->setDefaults(); } +}; + template <> struct MockAnalysisHandle : MockAnalysisHandleBase, Function> { @@ -282,6 +314,8 @@ return any_cast(WrappedIR)->getName().str(); if (any_isa(WrappedIR)) return any_cast(WrappedIR)->getName().str(); + if (any_isa(WrappedIR)) + return any_cast(WrappedIR)->getName().str(); if (any_isa(WrappedIR)) return any_cast(WrappedIR)->getName().str(); if (any_isa(WrappedIR)) @@ -395,6 +429,7 @@ PassBuilder PB; ModulePassManager PM; LoopAnalysisManager LAM; + LoopNestAnalysisManager LNAM; FunctionAnalysisManager FAM; CGSCCAnalysisManager CGAM; ModuleAnalysisManager AM; @@ -426,7 +461,7 @@ "}\n")), CallbacksHandle(), PB(nullptr, PipelineTuningOptions(), None, &CallbacksHandle.Callbacks), - PM(true), LAM(true), FAM(true), CGAM(true), AM(true) { + PM(true), LAM(true), LNAM(LAM), FAM(true), CGAM(true), AM(true) { EXPECT_TRUE(&CallbacksHandle.Callbacks == PB.getPassInstrumentationCallbacks()); @@ -469,13 +504,15 @@ PB.registerCGSCCAnalyses(CGAM); PB.registerFunctionAnalyses(FAM); PB.registerLoopAnalyses(LAM); - PB.crossRegisterProxies(LAM, FAM, CGAM, AM); + PB.registerLoopNestAnalyses(LNAM); + PB.crossRegisterProxies(LAM, LNAM, FAM, CGAM, AM); } }; using ModuleCallbacksTest = PassBuilderCallbacksTest; using CGSCCCallbacksTest = PassBuilderCallbacksTest; using FunctionCallbacksTest = PassBuilderCallbacksTest; +using LoopNestCallbacksTest = PassBuilderCallbacksTest; using LoopCallbacksTest = PassBuilderCallbacksTest; /// Test parsing of the name of our mock pass for all IRUnits. @@ -731,6 +768,144 @@ PM.run(*M, AM); } +TEST_F(LoopNestCallbacksTest, Passes) { + EXPECT_CALL(AnalysisHandle, run(HasName("loop"), _, _)); + EXPECT_CALL(PassHandle, run(HasName("loop"), _, _, _)) + .WillOnce(WithArgs<0, 1, 2>(Invoke(getAnalysisResult))); + StringRef PipelineText = "test-transform"; + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + << "Pipeline was: " << PipelineText; + PM.run(*M, AM); +} + +TEST_F(LoopNestCallbacksTest, InstrumentedPasses) { + CallbacksHandle.registerPassInstrumentation(); + // Non-mock instrumentation not specifically mentioned below can be ignored. + CallbacksHandle.ignoreNonMockPassInstrumentation(""); + CallbacksHandle.ignoreNonMockPassInstrumentation("foo"); + CallbacksHandle.ignoreNonMockPassInstrumentation("loop"); + + EXPECT_CALL(AnalysisHandle, run(HasName("loop"), _, _)); + EXPECT_CALL(PassHandle, run(HasName("loop"), _, _, _)) + .WillOnce(WithArgs<0, 1, 2>(Invoke(getAnalysisResult))); + + // PassInstrumentation calls should happen in-sequence, in the same order + // as passes/analyses are scheduled. + ::testing::Sequence PISequence; + EXPECT_CALL(CallbacksHandle, + runBeforePass(HasNameRegex("MockPassHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL( + CallbacksHandle, + runBeforeNonSkippedPass(HasNameRegex("MockPassHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL( + CallbacksHandle, + runBeforeAnalysis(HasNameRegex("MockAnalysisHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL( + CallbacksHandle, + runAfterAnalysis(HasNameRegex("MockAnalysisHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL(CallbacksHandle, + runAfterPass(HasNameRegex("MockPassHandle"), HasName("loop"))) + .InSequence(PISequence); + + // Our mock pass does not invalidate IR. + EXPECT_CALL(CallbacksHandle, + runAfterPassInvalidated(HasNameRegex("MockPassHandle"))) + .Times(0); + + StringRef PipelineText = "test-transform"; + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + << "Pipeline was: " << PipelineText; + PM.run(*M, AM); +} + +TEST_F(LoopNestCallbacksTest, InstrumentedInvalidatingPasses) { + CallbacksHandle.registerPassInstrumentation(); + // Non-mock instrumentation not specifically mentioned below can be ignored. + CallbacksHandle.ignoreNonMockPassInstrumentation(""); + CallbacksHandle.ignoreNonMockPassInstrumentation("foo"); + CallbacksHandle.ignoreNonMockPassInstrumentation("loop"); + + EXPECT_CALL(AnalysisHandle, run(HasName("loop"), _, _)); + EXPECT_CALL(PassHandle, run(HasName("loop"), _, _, _)) + .WillOnce( + DoAll(WithArgs<0, 1, 2, 3>(Invoke(PassHandle.invalidateLoopNest)), + WithArgs<0, 1, 2>(Invoke(getAnalysisResult)))); + + // PassInstrumentation calls should happen in-sequence, in the same order + // as passes/analyses are scheduled. + ::testing::Sequence PISequence; + EXPECT_CALL(CallbacksHandle, + runBeforePass(HasNameRegex("MockPassHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL( + CallbacksHandle, + runBeforeNonSkippedPass(HasNameRegex("MockPassHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL( + CallbacksHandle, + runBeforeAnalysis(HasNameRegex("MockAnalysisHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL( + CallbacksHandle, + runAfterAnalysis(HasNameRegex("MockAnalysisHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL(CallbacksHandle, + runAfterPassInvalidated(HasNameRegex("MockPassHandle"))) + .InSequence(PISequence); + EXPECT_CALL(CallbacksHandle, + runAfterPassInvalidated(HasNameRegex("^PassManager"))) + .InSequence(PISequence); + + // Our mock pass invalidates IR, thus normal runAfterPass is never called. + EXPECT_CALL(CallbacksHandle, + runAfterPass(HasNameRegex("MockPassHandle"), HasName("loop"))) + .Times(0); + + StringRef PipelineText = "test-transform"; + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + << "Pipeline was: " << PipelineText; + PM.run(*M, AM); +} + +TEST_F(LoopNestCallbacksTest, InstrumentedSkippedPasses) { + CallbacksHandle.registerPassInstrumentation(); + // Non-mock instrumentation run here can safely be ignored. + CallbacksHandle.ignoreNonMockPassInstrumentation(""); + CallbacksHandle.ignoreNonMockPassInstrumentation("foo"); + CallbacksHandle.ignoreNonMockPassInstrumentation("loop"); + + // Skip the pass by returning false. + EXPECT_CALL(CallbacksHandle, + runBeforePass(HasNameRegex("MockPassHandle"), HasName("loop"))) + .WillOnce(Return(false)); + + EXPECT_CALL(AnalysisHandle, run(HasName("loop"), _, _)).Times(0); + EXPECT_CALL(PassHandle, run(HasName("loop"), _, _, _)).Times(0); + + // As the pass is skipped there is no afterPass, beforeAnalysis/afterAnalysis + // as well. + EXPECT_CALL(CallbacksHandle, runAfterPass(HasNameRegex("MockPassHandle"), _)) + .Times(0); + EXPECT_CALL(CallbacksHandle, + runAfterPassInvalidated(HasNameRegex("MockPassHandle"))) + .Times(0); + EXPECT_CALL(CallbacksHandle, + runBeforeAnalysis(HasNameRegex("MockAnalysisHandle"), _)) + .Times(0); + EXPECT_CALL(CallbacksHandle, + runAfterAnalysis(HasNameRegex("MockAnalysisHandle"), _)) + .Times(0); + + StringRef PipelineText = "test-transform"; + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded()) + << "Pipeline was: " << PipelineText; + PM.run(*M, AM); +} + TEST_F(LoopCallbacksTest, Passes) { EXPECT_CALL(AnalysisHandle, run(HasName("loop"), _, _)); EXPECT_CALL(PassHandle, run(HasName("loop"), _, _, _)) diff --git a/llvm/unittests/Transforms/IPO/AttributorTestBase.h b/llvm/unittests/Transforms/IPO/AttributorTestBase.h --- a/llvm/unittests/Transforms/IPO/AttributorTestBase.h +++ b/llvm/unittests/Transforms/IPO/AttributorTestBase.h @@ -18,6 +18,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Testing/Support/Error.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" diff --git a/llvm/unittests/Transforms/Scalar/LICMTest.cpp b/llvm/unittests/Transforms/Scalar/LICMTest.cpp --- a/llvm/unittests/Transforms/Scalar/LICMTest.cpp +++ b/llvm/unittests/Transforms/Scalar/LICMTest.cpp @@ -22,6 +22,7 @@ ModulePassManager MPM; PassBuilder PB; LoopAnalysisManager LAM; + LoopNestAnalysisManager LNAM(LAM); FunctionAnalysisManager FAM; CGSCCAnalysisManager CGAM; ModuleAnalysisManager MAM; @@ -30,7 +31,8 @@ PB.registerCGSCCAnalyses(CGAM); PB.registerFunctionAnalyses(FAM); PB.registerLoopAnalyses(LAM); - PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + PB.registerLoopNestAnalyses(LNAM); + PB.crossRegisterProxies(LAM, LNAM, FAM, CGAM, MAM); StringRef PipelineStr = "require,loop(licm)"; ASSERT_THAT_ERROR(PB.parsePassPipeline(MPM, PipelineStr), Succeeded()); diff --git a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp --- a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp +++ b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp @@ -186,8 +186,8 @@ // Allocate stack memory. auto loc = op.getLoc(); Value rank = rewriter.create(loc, arg); - Type i64Ty = rewriter.getI64Type(); - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, i64Ty); + Type indexTy = rewriter.getIndexType(); + Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); Value mem = rewriter.create(loc, memTy, ValueRange{rank}); // Copy shape extents to stack-allocated memory. @@ -197,15 +197,12 @@ loc, zero, rank, one, llvm::None, [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { Value dim = rewriter.create(loc, arg, iv); - Value dimInt = rewriter.create(loc, dim, i64Ty); - rewriter.create(loc, dimInt, mem, ValueRange{iv}); + rewriter.create(loc, dim, mem, ValueRange{iv}); rewriter.create(loc); }); // Load extents to tensor value. - Value extentTensorInt = rewriter.create(loc, mem); - rewriter.replaceOpWithNewOp(op.getOperation(), extentTensorInt, - op.getType()); + rewriter.replaceOpWithNewOp(op.getOperation(), mem); return success(); } diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -456,8 +456,12 @@ llvm::Value *ifCond = nullptr; llvm::Value *numThreads = nullptr; bool isCancellable = false; + // TODO: Determine the actual alloca insertion point, e.g., the function + // entry or the alloca insertion point as provided by the body callback + // above. + llvm::OpenMPIRBuilder::InsertPointTy allocaIP(builder.saveIP()); builder.restoreIP(ompBuilder->CreateParallel( - builder, bodyGenCB, privCB, finiCB, ifCond, numThreads, + builder, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, llvm::omp::OMP_PROC_BIND_default, isCancellable)); return success(); } diff --git a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir --- a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir +++ b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir @@ -40,16 +40,14 @@ // CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) func @shape_of_unranked(%arg : tensor<*xf32>) { // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> - // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref + // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref // CHECK: %[[C0:.*]] = constant 0 : index // CHECK: %[[C1:.*]] = constant 1 : index // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> - // CHECK: %[[DIM_INT:.*]] = index_cast %[[DIM]] : index to i64 - // CHECK: store %[[DIM_INT]], %[[SHAPE_MEM]][%[[I]]] : memref + // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref // CHECK: } - // CHECK: %[[SHAPE_INT:.*]] = tensor_load %[[SHAPE_MEM]] : memref - // CHECK: %[[SHAPE:.*]] = index_cast %[[SHAPE_INT]] : tensor to tensor + // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref %shape = shape.shape_of %arg : tensor<*xf32> -> tensor return } diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -880,14 +880,9 @@ return OFFLOAD_FAIL; } } - } else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { - TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast, - false, IsHostPtr); - TgtBaseOffset = 0; // no offset for ptrs. - DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to " - "object " DPxMOD "\n", - DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase), DPxPTR(HstPtrBase)); } else { + if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) + HstPtrBase = *reinterpret_cast(HstPtrBase); TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast, false, IsHostPtr); TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; diff --git a/openmp/libomptarget/test/env/base_ptr_ref_count.c b/openmp/libomptarget/test/env/base_ptr_ref_count.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/env/base_ptr_ref_count.c @@ -0,0 +1,51 @@ +// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu +// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda +// REQUIRES: libomptarget-debug + +#include + +int *allocate(size_t n) { + int *ptr = malloc(sizeof(int) * n); +#pragma omp target enter data map(to : ptr[:n]) + return ptr; +} + +void deallocate(int *ptr, size_t n) { +#pragma omp target exit data map(delete : ptr[:n]) + free(ptr); +} + +#pragma omp declare target +int *cnt; +void foo() { + ++(*cnt); +} +#pragma omp end declare target + +int main(void) { + int *A = allocate(10); + int *V = allocate(10); + deallocate(A, 10); + deallocate(V, 10); +// CHECK-NOT: RefCount=2 + cnt = malloc(sizeof(int)); + *cnt = 0; +#pragma omp target map(cnt[:1]) + foo(); + printf("Cnt = %d.\n", *cnt); +// CHECK: Cnt = 1. + *cnt = 0; +#pragma omp target data map(cnt[:1]) +#pragma omp target + foo(); + printf("Cnt = %d.\n", *cnt); +// CHECK: Cnt = 1. + free(cnt); + + return 0; +} + + diff --git a/polly/unittests/ScopPassManager/PassManagerTest.cpp b/polly/unittests/ScopPassManager/PassManagerTest.cpp --- a/polly/unittests/ScopPassManager/PassManagerTest.cpp +++ b/polly/unittests/ScopPassManager/PassManagerTest.cpp @@ -16,6 +16,7 @@ protected: ModuleAnalysisManager MAM; FunctionAnalysisManager FAM; + LoopNestAnalysisManager LNAM; LoopAnalysisManager LAM; CGSCCAnalysisManager CGAM; ScopAnalysisManager SAM; @@ -26,13 +27,14 @@ ScopPassRegistry(const ScopPassRegistry &) = delete; ScopPassRegistry &operator=(ScopPassRegistry &&) = delete; ScopPassRegistry &operator=(const ScopPassRegistry &) = delete; - ScopPassRegistry() { + ScopPassRegistry() : LNAM(LAM) { PassBuilder PB; AM = PB.buildDefaultAAPipeline(); PB.registerModuleAnalyses(MAM); PB.registerFunctionAnalyses(FAM); PB.registerLoopAnalyses(LAM); + PB.registerLoopNestAnalyses(LNAM); PB.registerCGSCCAnalyses(CGAM); FAM.registerPass([] { return ScopAnalysis(); }); @@ -43,7 +45,7 @@ // SAM.registerPass([] { return DependenceAnalysis(); }); SAM.registerPass([this] { return FunctionAnalysisManagerScopProxy(FAM); }); - PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + PB.crossRegisterProxies(LAM, LNAM, FAM, CGAM, MAM); } };