Index: llvm/include/llvm/Transforms/Utils/Cloning.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Cloning.h +++ llvm/include/llvm/Transforms/Utils/Cloning.h @@ -17,6 +17,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_CLONING_H #define LLVM_TRANSFORMS_UTILS_CLONING_H +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" @@ -268,6 +269,29 @@ Function *Callee, int64_t entryDelta, const ValueMap *VMap = nullptr); +/// Utility for cloning !noalias and !alias.scope metadata. When a code region +/// using scoped alias metadata is cloned, the aliasing relationships may not +/// hold between the two clones, in which case it is necessary to clone the +/// metadata using this utility. This comes up with inlining and unrolling. +class ScopedAliasMetadataCloner { + using MetadataMap = DenseMap; + SetVector MD; + MetadataMap Map; + void addRecursiveMetadataUses(); + +public: + ScopedAliasMetadataCloner(ArrayRef Blocks); + ScopedAliasMetadataCloner(const Function *F); + + /// Create a new clone of the scoped alias metadata, which will be used by + /// subsequent remap() calls. + void clone(); + + /// Remap instructions in the given VMap from the original to the cloned + /// metadata. + void remap(ValueToValueMapTy &VMap); +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CLONING_H Index: llvm/lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/CloneFunction.cpp +++ llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -884,3 +884,90 @@ return NewBB; } + +ScopedAliasMetadataCloner::ScopedAliasMetadataCloner( + ArrayRef Blocks) { + for (BasicBlock *BB : Blocks) { + for (const Instruction &I : *BB) { + if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + MD.insert(M); + if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + MD.insert(M); + } + } + addRecursiveMetadataUses(); +} + +ScopedAliasMetadataCloner::ScopedAliasMetadataCloner(const Function *F) { + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + MD.insert(M); + if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + MD.insert(M); + } + } + addRecursiveMetadataUses(); +} + +void ScopedAliasMetadataCloner::addRecursiveMetadataUses() { + SmallVector Queue(MD.begin(), MD.end()); + while (!Queue.empty()) { + const MDNode *M = cast(Queue.pop_back_val()); + for (const Metadata *Op : M->operands()) + if (const MDNode *OpMD = dyn_cast(Op)) + if (MD.insert(OpMD)) + Queue.push_back(OpMD); + } +} + +void ScopedAliasMetadataCloner::clone() { + // Discard a previous clone that may exist. + Map.clear(); + + SmallVector DummyNodes; + for (const MDNode *I : MD) { + DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), None)); + Map[I].reset(DummyNodes.back().get()); + } + + // Create new metadata nodes to replace the dummy nodes, replacing old + // metadata references with either a dummy node or an already-created new + // node. + SmallVector NewOps; + for (const MDNode *I : MD) { + for (const Metadata *Op : I->operands()) { + if (const MDNode *M = dyn_cast(Op)) + NewOps.push_back(Map[M]); + else + NewOps.push_back(const_cast(Op)); + } + + MDNode *NewM = MDNode::get(I->getContext(), NewOps); + MDTuple *TempM = cast(Map[I]); + assert(TempM->isTemporary() && "Expected temporary node"); + + TempM->replaceAllUsesWith(NewM); + NewOps.clear(); + } +} + +void ScopedAliasMetadataCloner::remap(ValueToValueMapTy &VMap) { + if (Map.empty()) + return; // Nothing to do. + + for (auto Entry : VMap) { + if (!Entry->second) + continue; + + Instruction *I = dyn_cast(Entry->second); + if (!I) + continue; + + if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope)) + I->setMetadata(LLVMContext::MD_alias_scope, Map[M]); + + if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias)) + I->setMetadata(LLVMContext::MD_noalias, Map[M]); + } +} Index: llvm/lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/InlineFunction.cpp +++ llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -828,81 +828,9 @@ /// call-site-specific control dependencies). static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap) { const Function *CalledFunc = CB.getCalledFunction(); - SetVector MD; - - // Note: We could only clone the metadata if it is already used in the - // caller. I'm omitting that check here because it might confuse - // inter-procedural alias analysis passes. We can revisit this if it becomes - // an efficiency or overhead problem. - - for (const BasicBlock &I : *CalledFunc) - for (const Instruction &J : I) { - if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) - MD.insert(M); - if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) - MD.insert(M); - } - - if (MD.empty()) - return; - - // Walk the existing metadata, adding the complete (perhaps cyclic) chain to - // the set. - SmallVector Queue(MD.begin(), MD.end()); - while (!Queue.empty()) { - const MDNode *M = cast(Queue.pop_back_val()); - for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) - if (const MDNode *M1 = dyn_cast(M->getOperand(i))) - if (MD.insert(M1)) - Queue.push_back(M1); - } - - // Now we have a complete set of all metadata in the chains used to specify - // the noalias scopes and the lists of those scopes. - SmallVector DummyNodes; - DenseMap MDMap; - for (const MDNode *I : MD) { - DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); - MDMap[I].reset(DummyNodes.back().get()); - } - - // Create new metadata nodes to replace the dummy nodes, replacing old - // metadata references with either a dummy node or an already-created new - // node. - for (const MDNode *I : MD) { - SmallVector NewOps; - for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) { - const Metadata *V = I->getOperand(i); - if (const MDNode *M = dyn_cast(V)) - NewOps.push_back(MDMap[M]); - else - NewOps.push_back(const_cast(V)); - } - - MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); - MDTuple *TempM = cast(MDMap[I]); - assert(TempM->isTemporary() && "Expected temporary node"); - - TempM->replaceAllUsesWith(NewM); - } - - // Now replace the metadata in the new inlined instructions with the - // repacements from the map. - for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); - VMI != VMIE; ++VMI) { - if (!VMI->second) - continue; - - Instruction *NI = dyn_cast(VMI->second); - if (!NI) - continue; - - if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) - NI->setMetadata(LLVMContext::MD_alias_scope, MDMap[M]); - - if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) - NI->setMetadata(LLVMContext::MD_noalias, MDMap[M]); - } + ScopedAliasMetadataCloner Cloner(CalledFunc); + Cloner.clone(); + Cloner.remap(VMap); } /// If the inlined function has noalias arguments, Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -563,6 +563,13 @@ std::vector UnrolledLoopBlocks = L->getBlocks(); + // Scoped noalias metadata in the loop might only apply within one iteration, + // in which case we need to use independent noalias metadata for each + // unrolled iteration. We currently have no way to distinguish where the + // metadata applies only within one iteration, or across them, so we have to + // make the conservative assumption here. + ScopedAliasMetadataCloner AliasMetadataCloner(UnrolledLoopBlocks); + // Loop Unrolling might create new loops. While we do preserve LoopInfo, we // might break loop-simplified form for these loops (as they, e.g., would // share the same exit blocks). We'll keep track of loops for which we can @@ -590,6 +597,9 @@ SmallDenseMap NewLoops; NewLoops[L] = L; + // Use new scoped noalias metadata for each iteration. + AliasMetadataCloner.clone(); + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); @@ -667,6 +677,8 @@ New, cast(LastValueMap[cast(OriginalBBIDom)])); } } + + AliasMetadataCloner.remap(VMap); } // Remap all instructions in the most recent iteration Index: llvm/test/Transforms/LoopUnroll/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/noalias.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -loop-unroll -unroll-count=4 < %s | FileCheck %s + +define void @pr39282(i32* %addr1, i32* %addr2) { +; CHECK-LABEL: @pr39282( +; CHECK-NEXT: start: +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !0 +; CHECK-NEXT: store i32 [[X]], i32* [[ADDR2:%.*]], align 4, !noalias !0 +; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1 +; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1 +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !3 +; CHECK-NEXT: store i32 [[X_1]], i32* [[ADDR2I_1]], align 4, !noalias !3 +; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !6 +; CHECK-NEXT: store i32 [[X_2]], i32* [[ADDR2]], align 4, !noalias !6 +; CHECK-NEXT: [[ADDR1I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1 +; CHECK-NEXT: [[ADDR2I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1 +; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR1I_3]], align 4, !alias.scope !9 +; CHECK-NEXT: store i32 [[X_3]], i32* [[ADDR2I_3]], align 4, !noalias !9 +; CHECK-NEXT: ret void +; +start: + br label %body + +body: + %i = phi i32 [ 0, %start ], [ %i2, %body ] + %j = and i32 %i, 1 + %addr1i = getelementptr inbounds i32, i32* %addr1, i32 %j + %addr2i = getelementptr inbounds i32, i32* %addr2, i32 %j + + %x = load i32, i32* %addr1i, !alias.scope !2 + store i32 %x, i32* %addr2i, !noalias !2 + + %i2 = add i32 %i, 1 + %cmp = icmp slt i32 %i2, 4 + br i1 %cmp, label %body, label %end + +end: + ret void +} + +!0 = distinct !{!0} +!1 = distinct !{!1, !0} +!2 = !{!1} + +; CHECK: !0 = !{!1} +; CHECK: !1 = distinct !{!1, !2} +; CHECK: !2 = distinct !{!2} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !5} +; CHECK: !5 = distinct !{!5} +; CHECK: !6 = !{!7} +; CHECK: !7 = distinct !{!7, !8} +; CHECK: !8 = distinct !{!8} +; CHECK: !9 = !{!10} +; CHECK: !10 = distinct !{!10, !11} +; CHECK: !11 = distinct !{!11} Index: llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -49,43 +49,43 @@ ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP8]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP9]], align 8, !tbaa !3, !alias.scope !7 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP9]], align 8, [[TBAA3:!tbaa !.*]], !alias.scope !7 ; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP4]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP10]], <4 x double>* [[TMP12]], align 8, !tbaa !3, !alias.scope !10, !noalias !7 +; CHECK-NEXT: store <4 x double> [[TMP10]], <4 x double>* [[TMP12]], align 8, [[TBAA3]], !alias.scope !10, !noalias !7 ; CHECK-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX_NEXT]] ; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x double>, <4 x double>* [[TMP14]], align 8, !tbaa !3, !alias.scope !7 +; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x double>, <4 x double>* [[TMP14]], align 8, [[TBAA3]], !alias.scope !12 ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x double> [[WIDE_LOAD_1]], [[TMP5]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX_NEXT]] ; CHECK-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP17]], align 8, !tbaa !3, !alias.scope !10, !noalias !7 +; CHECK-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP17]], align 8, [[TBAA3]], !alias.scope !15, !noalias !12 ; CHECK-NEXT: [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX_NEXT_1]] ; CHECK-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP18]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x double>, <4 x double>* [[TMP19]], align 8, !tbaa !3, !alias.scope !7 +; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x double>, <4 x double>* [[TMP19]], align 8, [[TBAA3]], !alias.scope !17 ; CHECK-NEXT: [[TMP20:%.*]] = fmul fast <4 x double> [[WIDE_LOAD_2]], [[TMP6]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX_NEXT_1]] ; CHECK-NEXT: [[TMP22:%.*]] = bitcast double* [[TMP21]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[TMP22]], align 8, !tbaa !3, !alias.scope !10, !noalias !7 +; CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[TMP22]], align 8, [[TBAA3]], !alias.scope !20, !noalias !17 ; CHECK-NEXT: [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX_NEXT_2]] ; CHECK-NEXT: [[TMP24:%.*]] = bitcast double* [[TMP23]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x double>, <4 x double>* [[TMP24]], align 8, !tbaa !3, !alias.scope !7 +; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x double>, <4 x double>* [[TMP24]], align 8, [[TBAA3]], !alias.scope !22 ; CHECK-NEXT: [[TMP25:%.*]] = fmul fast <4 x double> [[WIDE_LOAD_3]], [[TMP7]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX_NEXT_2]] ; CHECK-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP25]], <4 x double>* [[TMP27]], align 8, !tbaa !3, !alias.scope !10, !noalias !7 +; CHECK-NEXT: store <4 x double> [[TMP25]], <4 x double>* [[TMP27]], align 8, [[TBAA3]], !alias.scope !25, !noalias !22 ; CHECK-NEXT: [[INDEX_NEXT_3]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[NITER_NSUB_3]] = add i64 [[NITER]], -4 ; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0 -; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop !12 +; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], [[LOOP27:!llvm.loop !.*]] ; CHECK: middle.block.unr-lcssa: ; CHECK-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_3]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]] +; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]] ; CHECK: vector.body.epil.preheader: ; CHECK-NEXT: [[TMP28:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]] @@ -94,15 +94,15 @@ ; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[VECTOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_SUB:%.*]], [[VECTOR_BODY_EPIL]] ] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX_EPIL]] ; CHECK-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <4 x double>, <4 x double>* [[TMP30]], align 8, !tbaa !3, !alias.scope !7 +; CHECK-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <4 x double>, <4 x double>* [[TMP30]], align 8, [[TBAA3]], !alias.scope !7 ; CHECK-NEXT: [[TMP31:%.*]] = fmul fast <4 x double> [[WIDE_LOAD_EPIL]], [[TMP28]] ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX_EPIL]] ; CHECK-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP31]], <4 x double>* [[TMP33]], align 8, !tbaa !3, !alias.scope !10, !noalias !7 +; CHECK-NEXT: store <4 x double> [[TMP31]], <4 x double>* [[TMP33]], align 8, [[TBAA3]], !alias.scope !10, !noalias !7 ; CHECK-NEXT: [[INDEX_NEXT_EPIL]] = add i64 [[INDEX_EPIL]], 4 ; CHECK-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 -; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop !14 +; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], [[LOOP29:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]] @@ -111,8 +111,8 @@ ; CHECK-NEXT: [[TMP34:%.*]] = xor i64 [[INDVARS_IV_PH]], -1 ; CHECK-NEXT: [[TMP35:%.*]] = add nsw i64 [[TMP34]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: [[XTRAITER8:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3 -; CHECK-NEXT: [[LCMP_MOD9:%.*]] = icmp eq i64 [[XTRAITER8]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD9]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL_PREHEADER:%.*]] +; CHECK-NEXT: [[LCMP_MOD9_NOT:%.*]] = icmp eq i64 [[XTRAITER8]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD9_NOT]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL_PREHEADER:%.*]] ; CHECK: for.body.prol.preheader: ; CHECK-NEXT: [[TMP36:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]] @@ -120,14 +120,14 @@ ; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ] ; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ], [ [[XTRAITER8]], [[FOR_BODY_PROL_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_PROL]] -; CHECK-NEXT: [[T0_PROL:%.*]] = load double, double* [[ARRAYIDX_PROL]], align 8, !tbaa !3 +; CHECK-NEXT: [[T0_PROL:%.*]] = load double, double* [[ARRAYIDX_PROL]], align 8, [[TBAA3]] ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast double [[T0_PROL]], [[TMP36]] ; CHECK-NEXT: [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_PROL]] -; CHECK-NEXT: store double [[TMP37]], double* [[ARRAYIDX2_PROL]], align 8, !tbaa !3 +; CHECK-NEXT: store double [[TMP37]], double* [[ARRAYIDX2_PROL]], align 8, [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1 ; CHECK-NEXT: [[PROL_ITER_SUB]] = add i64 [[PROL_ITER]], -1 -; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp eq i64 [[PROL_ITER_SUB]], 0 -; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop !16 +; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_SUB]], 0 +; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], [[LOOP31:!llvm.loop !.*]] ; CHECK: for.body.prol.loopexit: ; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] ; CHECK-NEXT: [[TMP38:%.*]] = icmp ult i64 [[TMP35]], 3 @@ -141,31 +141,31 @@ ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[T0:%.*]] = load double, double* [[ARRAYIDX]], align 8, !tbaa !3 +; CHECK-NEXT: [[T0:%.*]] = load double, double* [[ARRAYIDX]], align 8, [[TBAA3]] ; CHECK-NEXT: [[TMP43:%.*]] = fmul fast double [[T0]], [[TMP39]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store double [[TMP43]], double* [[ARRAYIDX2]], align 8, !tbaa !3 +; CHECK-NEXT: store double [[TMP43]], double* [[ARRAYIDX2]], align 8, [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[T0_1:%.*]] = load double, double* [[ARRAYIDX_1]], align 8, !tbaa !3 +; CHECK-NEXT: [[T0_1:%.*]] = load double, double* [[ARRAYIDX_1]], align 8, [[TBAA3]] ; CHECK-NEXT: [[TMP44:%.*]] = fmul fast double [[T0_1]], [[TMP40]] ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: store double [[TMP44]], double* [[ARRAYIDX2_1]], align 8, !tbaa !3 +; CHECK-NEXT: store double [[TMP44]], double* [[ARRAYIDX2_1]], align 8, [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT_1]] -; CHECK-NEXT: [[T0_2:%.*]] = load double, double* [[ARRAYIDX_2]], align 8, !tbaa !3 +; CHECK-NEXT: [[T0_2:%.*]] = load double, double* [[ARRAYIDX_2]], align 8, [[TBAA3]] ; CHECK-NEXT: [[TMP45:%.*]] = fmul fast double [[T0_2]], [[TMP41]] ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT_1]] -; CHECK-NEXT: store double [[TMP45]], double* [[ARRAYIDX2_2]], align 8, !tbaa !3 +; CHECK-NEXT: store double [[TMP45]], double* [[ARRAYIDX2_2]], align 8, [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: [[T0_3:%.*]] = load double, double* [[ARRAYIDX_3]], align 8, !tbaa !3 +; CHECK-NEXT: [[T0_3:%.*]] = load double, double* [[ARRAYIDX_3]], align 8, [[TBAA3]] ; CHECK-NEXT: [[TMP46:%.*]] = fmul fast double [[T0_3]], [[TMP42]] ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: store double [[TMP46]], double* [[ARRAYIDX2_3]], align 8, !tbaa !3 +; CHECK-NEXT: store double [[TMP46]], double* [[ARRAYIDX2_3]], align 8, [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4 -; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !17 +; CHECK-NEXT: [[EXITCOND_NOT_3:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_3]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP32:!llvm.loop !.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/PhaseOrdering/pr39282.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/pr39282.ll +++ llvm/test/Transforms/PhaseOrdering/pr39282.ll @@ -20,11 +20,15 @@ ; CHECK-LABEL: @pr39282( ; CHECK-NEXT: start: ; CHECK-NEXT: [[X_I:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2:%.*]], align 4, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i64 1 -; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2:%.*]], i64 1 -; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !0, !noalias !3 -; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2]], align 4, !alias.scope !3, !noalias !0 -; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i64 1 +; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !5, !noalias !8 +; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !8, !noalias !5 +; CHECK-NEXT: [[X_I_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !10, !noalias !13 +; CHECK-NEXT: store i32 [[X_I_2]], i32* [[ADDR2]], align 4, !alias.scope !13, !noalias !10 +; CHECK-NEXT: [[X_I_3:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !15, !noalias !18 +; CHECK-NEXT: store i32 [[X_I_3]], i32* [[ADDR2I_1]], align 4, !alias.scope !18, !noalias !15 ; CHECK-NEXT: ret void ; start: