Index: llvm/include/llvm/IR/Value.h =================================================================== --- llvm/include/llvm/IR/Value.h +++ llvm/include/llvm/IR/Value.h @@ -334,6 +334,13 @@ /// values or constant users. void replaceUsesOutsideBlock(Value *V, BasicBlock *BB); + /// replaceUsesInsideBlock - Go through the uses list for this definition and + /// make each use point to "V" instead of "this" when the use is inside the + /// block. + /// Unlike replaceAllUsesWith this function does not support basic block + /// values or constant users. + void replaceUsesInsideBlock(Value *V, BasicBlock *BB); + //---------------------------------------------------------------------- // Methods for handling the chain of uses of this Value. // Index: llvm/lib/IR/Value.cpp =================================================================== --- llvm/lib/IR/Value.cpp +++ llvm/lib/IR/Value.cpp @@ -561,6 +561,23 @@ }); } +// Like replaceAllUsesWith except it does not handle constants or basic blocks. +// This routine leaves uses outside BB. +void Value::replaceUsesInsideBlock(Value *New, BasicBlock *BB) { + assert(New && "Value::replaceUsesInsideBlock(, BB) is invalid!"); + assert(!contains(New, this) && + "this->replaceUsesInsideBlock(expr(this), BB) is NOT valid!"); + assert(New->getType() == getType() && + "replaceUses of value with new value of different type!"); + assert(BB && "Basic block that may contain a use of 'New' must be defined\n"); + + replaceUsesWithIf(New, [BB](Use &U) { + auto *I = dyn_cast(U.getUser()); + // Don't replace if it's an instruction in the BB basic block. + return !I || I->getParent() == BB; + }); +} + namespace { // Various metrics for how much to strip off of pointers. enum PointerStripKind { Index: llvm/lib/Transforms/Utils/LoopRotationUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -38,6 +38,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/NoAliasUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -407,13 +408,87 @@ break; } - // Remember the local noalias scope declarations in the header. After the - // rotation, they must be duplicated and the scope must be cloned. This - // avoids unwanted interaction across iterations. - SmallVector NoAliasDeclInstructions; - for (Instruction &I : *OrigHeader) - if (auto *Decl = dyn_cast(&I)) - NoAliasDeclInstructions.push_back(Decl); + // check if there are local restrict declarations and how they are used. + // - avoid breaking up mixed usage: + // -- either all usages must be in the OrigHeader, or no usages must be in + // the OrigHeader. + // -- when usages are outside the function, and we decide to continue, break + // the connection with the llvm.noalias.decl, as it will have no impact + // any more. + SmallVector NoAliasScopeDeclInstructions; + SmallVector NoAliasDeclInstructions; + { + SmallVector ProvenanceNoAliasOrNoAliasToDisconnect; + for (Instruction &I : *OrigHeader) { + if (auto *Decl = dyn_cast(&I)) { + NoAliasScopeDeclInstructions.push_back(Decl); + } else if (IntrinsicInst *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::noalias_decl) { + // Check usage validity: + bool UsedInsideHeader = false; + bool UsedOutsideHeaderInsideLoop = false; + for (User *U : II->users()) { + Instruction *UI = cast(U); + if (UI->getParent() == OrigHeader) { + UsedInsideHeader = true; + } else if (L->contains(UI)) { + UsedOutsideHeaderInsideLoop = true; + } else { + if (PHINode *PUI = dyn_cast(UI)) { + if (PUI->getNumIncomingValues() > 1) { + LLVM_DEBUG( + llvm::dbgs() + << "LoopRotation: NOT rotating - " << *II + << "\n used in PHI node " << *PUI + << ",\n in exit block with multiple entries.\n"); + return false; + } + } + ProvenanceNoAliasOrNoAliasToDisconnect.push_back(UI); + } + } + + if (UsedInsideHeader && UsedOutsideHeaderInsideLoop) { + LLVM_DEBUG(llvm::dbgs() + << "LoopRotation: NOT rotating - " << *II + << " used in header and other parts of the loop.\n" + " Rotation would reduced the llvm.noalias quality " + "too much.\n"); + return false; + } + + NoAliasDeclInstructions.push_back(II); + } + } + } + + // If we get here, we will do the rotate. + // First break the link between any llvm.noalias.decl and its outside-loop + // usage + for (Instruction *I : ProvenanceNoAliasOrNoAliasToDisconnect) { + unsigned OpN; + if (PHINode *PI = dyn_cast(I)) { + // can only happen when in the exit block -> single predecessor ! + assert(PI->getNumIncomingValues() == 1 && + "PHI node should have a single value"); + (void)PI; // Silence not-used warning in Release builds + OpN = 0; + } else { + IntrinsicInst *II = cast(I); + if (II->getIntrinsicID() == Intrinsic::provenance_noalias) { + OpN = Intrinsic::ProvenanceNoAliasNoAliasDeclArg; + } else if (II->getIntrinsicID() == Intrinsic::noalias) { + OpN = Intrinsic::NoAliasNoAliasDeclArg; + } else { + assert(II->getIntrinsicID() == Intrinsic::noalias_copy_guard); + OpN = Intrinsic::NoAliasCopyGuardNoAliasDeclArg; + } + } + + auto *PT = cast(I->getOperand(OpN)->getType()); + I->setOperand(OpN, ConstantPointerNull::get(PT)); + } + } while (I != E) { Instruction *Inst = &*I++; @@ -476,7 +551,9 @@ } } - if (!NoAliasDeclInstructions.empty()) { + if (!NoAliasDeclInstructions.empty() || + !NoAliasScopeDeclInstructions.empty()) { + // There are noalias scope declarations: // (general): // Original: OrigPre { OrigHeader NewHeader ... Latch } @@ -484,6 +561,8 @@ // // with D: llvm.experimental.noalias.scope.decl, // U: !noalias or !alias.scope depending on D + // or with D: llvm.noalias.decl, + // U: llvm.provenance.noalias depending on D // ... { D U1 U2 } can transform into: // (0) : ... { D U1 U2 } // no relevant rotation for this part // (1) : ... D' { U1 U2 D } // D is part of OrigHeader @@ -492,20 +571,41 @@ // We now want to transform: // (1) -> : ... D' { D U1 U2 D'' } // (2) -> : ... D' U1' { D U2 D'' U1'' } - // D: original llvm.experimental.noalias.scope.decl + // D: original llvm.experimental.noalias.scope.decl/llvm.noalias.decl // D', U1': duplicate with replaced scopes // D'', U1'': different duplicate with replaced scopes // This ensures a safe fallback to 'may_alias' introduced by the rotate, // as U1'' and U1' scopes will not be compatible wrt to the local restrict - // Clone the llvm.experimental.noalias.decl again for the NewHeader. + // Clone the llvm.experimental.noalias.decl/llvm.noalias.decl again for + // the NewHeader. + Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI()); - for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) { + for (NoAliasScopeDeclInst *NAD : NoAliasScopeDeclInstructions) { LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:" << *NAD << "\n"); Instruction *NewNAD = NAD->clone(); NewNAD->insertBefore(NewHeaderInsertionPoint); } + for (Instruction *NAD : NoAliasDeclInstructions) { + LLVM_DEBUG(llvm::dbgs() + << " Cloning llvm.noalias.decl:" << *NAD << "\n"); + Instruction *NewNAD = NAD->clone(); + //@ FIXME: NewNAD->insertBefore(NewHeaderInsertionPoint); + NewNAD->insertBefore(NAD); + + // remap dependencies in the OrigHeader block to NewNAD + NAD->replaceUsesInsideBlock(NewNAD, OrigHeader); + + // And move the original NAD to the NewHeader + NAD->moveBefore(NewHeaderInsertionPoint); + + // Now forget about the original NAD mapping + auto tmp = + ValueMap[NAD]; // use a local copy to avoid undefined behavior + ValueMap[NewNAD] = tmp; // this could trigger a reallocation. + ValueMap.erase(NAD); + } // Scopes must now be duplicated, once for OrigHeader and once for // OrigPreHeader'. @@ -513,25 +613,30 @@ auto &Context = NewHeader->getContext(); SmallVector NoAliasDeclScopes; - for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) + for (auto *NAD : NoAliasScopeDeclInstructions) NoAliasDeclScopes.push_back(NAD->getScopeList()); + for (auto *NAD : NoAliasDeclInstructions) + NoAliasDeclScopes.push_back( + cast(cast( + NAD->getOperand(Intrinsic::NoAliasDeclScopeArg)) + ->getMetadata())); LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n"); cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context, "h.rot"); LLVM_DEBUG(OrigHeader->dump()); + // FIXME: originally we were updating only a part of the OrigPreHeader + // FIXME: for now, for easyness, we update the complete OrigPreHeader. + // FIXME: the comment below is not true any more ! // Keep the compile time impact low by only adapting the inserted block // of instructions in the OrigPreHeader. This might result in slightly // more aliasing between these instructions and those that were already // present, but it will be much faster when the original PreHeader is // large. - LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n"); - auto *FirstDecl = - cast(ValueMap[*NoAliasDeclInstructions.begin()]); - auto *LastInst = &OrigPreheader->back(); - cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst, - Context, "pre.rot"); + LLVM_DEBUG(dbgs() << " Updating OrigPreheader scopes\n"); + cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigPreheader}, Context, + "pre.rot"); LLVM_DEBUG(OrigPreheader->dump()); LLVM_DEBUG(dbgs() << " Updated NewHeader:\n"); Index: llvm/test/Transforms/LoopRotate/noalias.ll =================================================================== --- llvm/test/Transforms/LoopRotate/noalias.ll +++ llvm/test/Transforms/LoopRotate/noalias.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -loop-rotate < %s | FileCheck %s ; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s ; RUN: opt -S -passes='require,require,loop(loop-rotate)' < %s | FileCheck %s @@ -10,15 +11,26 @@ define void @test_02(i32* nocapture %_pA) nounwind ssp { ; CHECK-LABEL: @test_02( -; CHECK: entry: -; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !2) -; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !2 -; CHECK: for.body: -; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !5) -; CHECK: store i32 0, i32* %arrayidx, align 16, !noalias !5 -; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !7) -; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !7 -; CHECK: for.end: +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [20 x i32], align 16 +; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) +; CHECK-NEXT: store i32 42, i32* [[_PA:%.*]], align 16, !alias.scope !2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x i32], [20 x i32]* [[ARRAY]], i64 0, i64 0 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_01:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 16, !noalias !5 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_01]], 1 +; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) +; CHECK-NEXT: store i32 42, i32* [[_PA]], align 16, !alias.scope !7 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 100 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: [[ARRAYIDX_LCSSA:%.*]] = phi i32* [ [[ARRAYIDX]], [[FOR_BODY]] ] +; CHECK-NEXT: call void @g(i32* [[ARRAYIDX_LCSSA]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: ret void +; entry: %array = alloca [20 x i32], align 16 @@ -45,12 +57,23 @@ define void @test_03(i32* nocapture %_pA) nounwind ssp { ; CHECK-LABEL: @test_03( -; CHECK: entry: -; CHECK: for.body: -; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !5) -; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !5 -; CHECK: store i32 0, i32* %arrayidx, align 16, !noalias !5 -; CHECK: for.end: +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [20 x i32], align 16 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x i32], [20 x i32]* [[ARRAY]], i64 0, i64 0 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_01:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META5]]) +; CHECK-NEXT: store i32 42, i32* [[_PA:%.*]], align 16, !alias.scope !5 +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 16, !noalias !5 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_01]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 100 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: [[ARRAYIDX_LCSSA:%.*]] = phi i32* [ [[ARRAYIDX]], [[FOR_BODY]] ] +; CHECK-NEXT: call void @g(i32* [[ARRAYIDX_LCSSA]]) #[[ATTR2]] +; CHECK-NEXT: ret void +; entry: %array = alloca [20 x i32], align 16 @@ -77,16 +100,27 @@ define void @test_04(i32* nocapture %_pA) nounwind ssp { ; CHECK-LABEL: @test_04( -; CHECK: entry: -; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !9) -; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !9 -; CHECK: for.body: -; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !5) -; CHECK: store i32 0, i32* %arrayidx, align 16, !noalias !5 -; CHECK: store i32 43, i32* %_pA, align 16, !alias.scope !5 -; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !11) -; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !11 -; CHECK: for.end: +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [20 x i32], align 16 +; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) +; CHECK-NEXT: store i32 42, i32* [[_PA:%.*]], align 16, !alias.scope !9 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x i32], [20 x i32]* [[ARRAY]], i64 0, i64 0 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_01:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META5]]) +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 16, !noalias !5 +; CHECK-NEXT: store i32 43, i32* [[_PA]], align 16, !alias.scope !5 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_01]], 1 +; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) +; CHECK-NEXT: store i32 42, i32* [[_PA]], align 16, !alias.scope !11 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 100 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: [[ARRAYIDX_LCSSA:%.*]] = phi i32* [ [[ARRAYIDX]], [[FOR_BODY]] ] +; CHECK-NEXT: call void @g(i32* [[ARRAYIDX_LCSSA]]) #[[ATTR2]] +; CHECK-NEXT: ret void +; entry: %array = alloca [20 x i32], align 16 br label %for.cond @@ -113,17 +147,28 @@ define void @test_05(i32* nocapture %_pA) nounwind ssp { ; CHECK-LABEL: @test_05( -; CHECK: entry: -; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !13) -; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !13 -; CHECK: for.body: -; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !5) -; CHECK: store i32 0, i32* %arrayidx, align 16, !noalias !5 -; CHECK: store i32 43, i32* %_pA, align 16, !alias.scope !5 -; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !15) -; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !15 -; CHECK: for.end: -; CHECK: store i32 44, i32* %_pA, align 16, !alias.scope !5 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAY:%.*]] = alloca [20 x i32], align 16 +; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +; CHECK-NEXT: store i32 42, i32* [[_PA:%.*]], align 16, !alias.scope !13 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x i32], [20 x i32]* [[ARRAY]], i64 0, i64 0 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_01:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META5]]) +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 16, !noalias !5 +; CHECK-NEXT: store i32 43, i32* [[_PA]], align 16, !alias.scope !5 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_01]], 1 +; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +; CHECK-NEXT: store i32 42, i32* [[_PA]], align 16, !alias.scope !15 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 100 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: [[ARRAYIDX_LCSSA:%.*]] = phi i32* [ [[ARRAYIDX]], [[FOR_BODY]] ] +; CHECK-NEXT: store i32 44, i32* [[_PA]], align 16, !alias.scope !5 +; CHECK-NEXT: call void @g(i32* [[ARRAYIDX_LCSSA]]) #[[ATTR2]] +; CHECK-NEXT: ret void +; entry: %array = alloca [20 x i32], align 16 Index: llvm/test/Transforms/LoopRotate/noalias2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopRotate/noalias2.ll @@ -0,0 +1,208 @@ +; RUN: opt -S -loop-rotate < %s | FileCheck %s +; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s +; RUN: opt -S -passes='require,require,loop(loop-rotate)' < %s | FileCheck %s +; RUN: opt -S -passes='require,require,loop(loop-rotate)' -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @g(i32*) + +define void @test_02(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 0, i32* %arrayidx, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} + +; CHECK-LABEL: @test_02( +; CHECK: entry: +; CHECK: %p.decl1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK: %prov.p2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl1, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* undef, align 16 +; CHECK: for.body: +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK: %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 +; CHECK: for.end: + + +define void @test_03(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + store i32 0, i32* %arrayidx, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} +; CHECK-LABEL: @test_03( +; CHECK: entry: +; CHECK: for.body: +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !9), !tbaa !5, !noalias !9 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 +; CHECK: for.end: + +define void @test_04(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 0, i32* %arrayidx, align 16 + store i32 43, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} +; CHECK-LABEL: @test_04( +; CHECK: entry: +; CHECK: %p.decl1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !13) +; CHECK: %prov.p2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl1, i32** null, i32** undef, i32 0, metadata !13), !tbaa !5, !noalias !13 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* undef, align 16 +; CHECK: for.body: +; CHECK: %prov.p4 = phi i32* [ %prov.p2, %entry ], [ %prov.p, %for.body ] +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: store i32 43, i32* %_pA, ptr_provenance i32* %prov.p4, align 16 +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !15) +; CHECK: %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !15), !tbaa !5, !noalias !15 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 +; CHECK: for.end: + +define void @test_05(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 0, i32* %arrayidx, align 16 + store i32 43, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + store i32 44, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} +; CHECK-LABEL: @test_05( +; CHECK: entry: +; CHECK: %p.decl1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !17) +; CHECK: %prov.p2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl1, i32** null, i32** undef, i32 0, metadata !17), !tbaa !5, !noalias !17 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* undef, align 16 +; CHECK: for.body: +; CHECK: %prov.p4 = phi i32* [ %prov.p2, %entry ], [ %prov.p, %for.body ] +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: store i32 43, i32* %_pA, ptr_provenance i32* %prov.p4, align 16 +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !19) +; CHECK: %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !19), !tbaa !5, !noalias !19 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 +; CHECK: for.end: +; CHECK: %prov.p.lcssa = phi i32* [ %prov.p, %for.body ] +; CHECK: store i32 44, i32* %_pA, ptr_provenance i32* %prov.p.lcssa, align 16 + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_loop_rotate_XX: pA"} +!4 = distinct !{!4, !"test_loop_rotate_XX"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} + +; CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +; CHECK-NEXT: !1 = !{!"clang"} +; CHECK-NEXT: !2 = !{!3} +; CHECK-NEXT: !3 = distinct !{!3, !4, !"test_loop_rotate_XX: pA:pre.rot"} +; CHECK-NEXT: !4 = distinct !{!4, !"test_loop_rotate_XX"} +; CHECK-NEXT: !5 = !{!6, !6, i64 0, i64 4} +; CHECK-NEXT: !6 = !{!7, i64 4, !"any pointer"} +; CHECK-NEXT: !7 = !{!8, i64 1, !"omnipotent char"} +; CHECK-NEXT: !8 = !{!"Simple C/C++ TBAA"} +; CHECK-NEXT: !9 = !{!10} +; CHECK-NEXT: !10 = distinct !{!10, !4, !"test_loop_rotate_XX: pA"} +; CHECK-NEXT: !11 = !{!12} +; CHECK-NEXT: !12 = distinct !{!12, !4, !"test_loop_rotate_XX: pA:h.rot"} +; CHECK-NEXT: !13 = !{!14} +; CHECK-NEXT: !14 = distinct !{!14, !4, !"test_loop_rotate_XX: pA:pre.rot"} +; CHECK-NEXT: !15 = !{!16} +; CHECK-NEXT: !16 = distinct !{!16, !4, !"test_loop_rotate_XX: pA:h.rot"} +; CHECK-NEXT: !17 = !{!18} +; CHECK-NEXT: !18 = distinct !{!18, !4, !"test_loop_rotate_XX: pA:pre.rot"} +; CHECK-NEXT: !19 = !{!20} +; CHECK-NEXT: !20 = distinct !{!20, !4, !"test_loop_rotate_XX: pA:h.rot"}