Index: include/llvm/Analysis/LoopInfo.h =================================================================== --- include/llvm/Analysis/LoopInfo.h +++ include/llvm/Analysis/LoopInfo.h @@ -1014,6 +1014,26 @@ /// is representing an access group. bool isValidAsAccessGroup(MDNode *AccGroup); +/// Create a new LoopID after the loop has been transformed. +/// +/// This can be used when no follow-up loop attributes are defined +/// (llvm::makeFollowupLoopID returning None) to stop transformations to be +/// applied again. +/// +/// @param Context The LLVMContext in which to create the new LoopID. +/// @param OrigLoopID The original LoopID; can be nullptr if the original +/// loop has no LoopID. +/// @param RemovePrefixes Remove all loop attributes that have these prefixes. +/// Use to remove metadata of the transformation that has +/// been applied. +/// @param AddAttrs Add these loop attributes to the new LoopID. +/// +/// @return A new LoopID that can be applied using Loop::setLoopID(). +llvm::MDNode * +makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, + llvm::ArrayRef RemovePrefixes, + llvm::ArrayRef AddAttrs); + } // End llvm namespace #endif Index: include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h =================================================================== --- include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -98,11 +98,7 @@ OptimizationRemarkEmitter &ORE); /// Mark the loop L as already vectorized by setting the width to 1. - void setAlreadyVectorized() { - IsVectorized.Value = 1; - Hint Hints[] = {IsVectorized}; - writeHintsToMetadata(Hints); - } + void setAlreadyVectorized(); bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const; @@ -151,15 +147,6 @@ /// Checks string hint with one operand and set value if valid. void setHint(StringRef Name, Metadata *Arg); - /// Create a new hint from name / value pair. - MDNode *createHintMetadata(StringRef Name, unsigned V) const; - - /// Matches metadata with hint name. - bool matchesHintMetadataName(MDNode *Node, ArrayRef HintTypes); - - /// Sets current hints into loop metadata, keeping other values intact. - void writeHintsToMetadata(ArrayRef HintTypes); - /// The loop these hints belong to. const Loop *TheLoop; Index: lib/Analysis/LoopInfo.cpp =================================================================== --- lib/Analysis/LoopInfo.cpp +++ lib/Analysis/LoopInfo.cpp @@ -254,35 +254,13 @@ } void Loop::setLoopAlreadyUnrolled() { - MDNode *LoopID = getLoopID(); - // First remove any existing loop unrolling metadata. - SmallVector MDs; - // Reserve first location for self reference to the LoopID metadata node. - MDs.push_back(nullptr); - - if (LoopID) { - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - bool IsUnrollMetadata = false; - MDNode *MD = dyn_cast(LoopID->getOperand(i)); - if (MD) { - const MDString *S = dyn_cast(MD->getOperand(0)); - IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); - } - if (!IsUnrollMetadata) - MDs.push_back(LoopID->getOperand(i)); - } - } - - // Add unroll(disable) metadata to disable future unrolling. LLVMContext &Context = getHeader()->getContext(); - SmallVector DisableOperands; - DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); - MDNode *DisableNode = MDNode::get(Context, DisableOperands); - MDs.push_back(DisableNode); - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); + MDNode *DisableUnrollMD = + MDNode::get(Context, MDString::get(Context, "llvm.loop.unroll.disable")); + MDNode *LoopID = getLoopID(); + MDNode *NewLoopID = makePostTransformationMetadata( + Context, LoopID, {"llvm.loop.unroll."}, {DisableUnrollMD}); setLoopID(NewLoopID); } @@ -760,6 +738,45 @@ return Node->getNumOperands() == 0 && Node->isDistinct(); } +MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context, + MDNode *OrigLoopID, + ArrayRef RemovePrefixes, + ArrayRef AddAttrs) { + // First remove any existing loop metadata related to this transformation. + SmallVector MDs; + + // Reserve first location for self reference to the LoopID metadata node. + MDs.push_back(nullptr); + + // Remove metadata for the transformation that has been applied or that became + // outdated. + if (OrigLoopID) { + for (unsigned i = 1, ie = OrigLoopID->getNumOperands(); i < ie; ++i) { + bool IsVectorMetadata = false; + Metadata *Op = OrigLoopID->getOperand(i); + if (MDNode *MD = dyn_cast(Op)) { + const MDString *S = dyn_cast(MD->getOperand(0)); + if (S) + IsVectorMetadata = + llvm::any_of(RemovePrefixes, [S](StringRef Prefix) -> bool { + return S->getString().startswith(Prefix); + }); + } + if (!IsVectorMetadata) + MDs.push_back(Op); + } + } + + // Add metadata to avoid reapplying a transformation, such as + // llvm.loop.unroll.disable and llvm.loop.isvectorized. + MDs.append(AddAttrs.begin(), AddAttrs.end()); + + MDNode *NewLoopID = MDNode::getDistinct(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + return NewLoopID; +} + //===----------------------------------------------------------------------===// // LoopInfo implementation // Index: lib/Transforms/Vectorize/LoopVectorizationLegality.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -104,6 +104,25 @@ << "LV: Interleaving disabled by the pass manager\n"); } +void LoopVectorizeHints::setAlreadyVectorized() { + LLVMContext &Context = TheLoop->getHeader()->getContext(); + + MDNode *IsVectorizedMD = + MDNode::get(Context, {MDString::get(Context, "llvm.loop.isvectorized"), + ConstantAsMetadata::get(ConstantInt::get( + Type::getInt32Ty(Context), 1))}); + MDNode *LoopID = TheLoop->getLoopID(); + MDNode *NewLoopID = + makePostTransformationMetadata(Context, LoopID, + {Twine(Prefix(), "vectorize.").str(), + Twine(Prefix(), "interleave.").str()}, + {IsVectorizedMD}); + TheLoop->setLoopID(NewLoopID); + + // Update internal cache. + IsVectorized.Value = 1; +} + bool LoopVectorizeHints::allowVectorization( Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { if (getForce() == LoopVectorizeHints::FK_Disabled) { @@ -231,57 +250,6 @@ } } -MDNode *LoopVectorizeHints::createHintMetadata(StringRef Name, - unsigned V) const { - LLVMContext &Context = TheLoop->getHeader()->getContext(); - Metadata *MDs[] = { - MDString::get(Context, Name), - ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; - return MDNode::get(Context, MDs); -} - -bool LoopVectorizeHints::matchesHintMetadataName(MDNode *Node, - ArrayRef HintTypes) { - MDString *Name = dyn_cast(Node->getOperand(0)); - if (!Name) - return false; - - for (auto H : HintTypes) - if (Name->getString().endswith(H.Name)) - return true; - return false; -} - -void LoopVectorizeHints::writeHintsToMetadata(ArrayRef HintTypes) { - if (HintTypes.empty()) - return; - - // Reserve the first element to LoopID (see below). - SmallVector MDs(1); - // If the loop already has metadata, then ignore the existing operands. - MDNode *LoopID = TheLoop->getLoopID(); - if (LoopID) { - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - MDNode *Node = cast(LoopID->getOperand(i)); - // If node in update list, ignore old value. - if (!matchesHintMetadataName(Node, HintTypes)) - MDs.push_back(Node); - } - } - - // Now, add the missing hints. - for (auto H : HintTypes) - MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value)); - - // Replace current metadata node with new one. - LLVMContext &Context = TheLoop->getHeader()->getContext(); - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); - - TheLoop->setLoopID(NewLoopID); -} - bool LoopVectorizationRequirements::doesNotMeet( Function *F, Loop *L, const LoopVectorizeHints &Hints) { const char *PassName = Hints.vectorizeAnalysisPassName(); Index: test/Transforms/LoopVectorize/X86/already-vectorized.ll =================================================================== --- test/Transforms/LoopVectorize/X86/already-vectorized.ll +++ test/Transforms/LoopVectorize/X86/already-vectorized.ll @@ -43,4 +43,3 @@ ; CHECK: [[width]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[scalar]] = distinct !{[[scalar]], [[runtime_unroll:![0-9]+]], [[width]]} ; CHECK: [[runtime_unroll]] = !{!"llvm.loop.unroll.runtime.disable"} - Index: test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll =================================================================== --- test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -31,14 +31,14 @@ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <8 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP3]], align 4, !llvm.access.group !0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, <8 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, <8 x float>* [[TMP6]], align 4, !llvm.access.group !0 ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <8 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4 +; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4, !llvm.access.group !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !1 @@ -58,7 +58,7 @@ ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4, !llvm.access.group !0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !5 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !4 ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -104,21 +104,34 @@ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <8 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP3]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, <8 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, <8 x float>* [[TMP6]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <8 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ule <8 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP7]], <8 x float>* [[TMP9]], i32 4, <8 x i1> [[TMP8]]) +; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP7]], <8 x float>* [[TMP9]], i32 4, <8 x i1> [[TMP8]]), !llvm.access.group !6 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !7 ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP11]], [[TMP12]] +; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4, !llvm.access.group !6 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !9 ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -163,17 +176,17 @@ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <8 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP3]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, <8 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, <8 x float>* [[TMP6]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <8 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP5]] to <8 x float>* -; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4 +; CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[TMP8]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !11 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10 ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 16, 16 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -183,14 +196,14 @@ ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !7 +; CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !7 +; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP10]], [[TMP11]] -; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4, !llvm.access.group !7 +; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4, !llvm.access.group !6 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 16 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !12 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !11 ; CHECK: for.end: ; CHECK-NEXT: ret void ; Index: test/Transforms/LoopVectorize/remove_metadata.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/remove_metadata.ll @@ -0,0 +1,32 @@ +; RUN: opt -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s +; +; Check that llvm.loop.vectorize.* metadata is removed after vectorization. +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; CHECK-LABEL: @disable_nonforced_enable( +; CHECK: store <2 x i32> +define void @disable_nonforced_enable(i32* nocapture %a, i32 %n) { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + +!0 = !{!0, !{!"llvm.loop.vectorize.some_property"}, !{!"llvm.loop.vectorize.enable", i32 1}} + +; CHECK-NOT: llvm.loop.vectorize. +; CHECK: {!"llvm.loop.isvectorized", i32 1} +; CHECK-NOT: llvm.loop.vectorize.