diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -719,7 +719,6 @@ RequiresScalarEpilogue = false; } - /// Check if \p Instr belongs to any interleave group. bool isInterleaved(Instruction *Instr) const { return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end(); @@ -749,6 +748,9 @@ /// cannot be filtered by masking the load/store. void invalidateGroupsRequiringScalarEpilogue(); + /// Returns the number of interleave groups. + unsigned size() const { return InterleaveGroupMap.size(); } + private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. /// Simplifies SCEV expressions in the context of existing SCEV assumptions. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1310,6 +1310,19 @@ /// i.e. either vector version isn't available, or is too expensive. unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize); + /// Invalidates all interleave groups and the decisions based on them. + void invalidateAllInterleaveGroups() { + if (InterleaveInfo.size() > 0) { + // Invalidating interleave groups also requires invalidating all decisions + // based on them, which includes widening decisions and uniform and scalar + // values. + WideningDecisions.clear(); + Uniforms.clear(); + Scalars.clear(); + } + InterleaveInfo.reset(); + } + private: unsigned NumPredStores = 0; @@ -6519,7 +6532,7 @@ dbgs() << "LV: Invalidate all interleaved groups due to fold-tail by masking " "which requires masked-interleaved support.\n"); - CM.InterleaveInfo.reset(); + CM.invalidateAllInterleaveGroups(); } if (UserVF) { diff --git a/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll @@ -0,0 +1,55 @@ +; RUN: opt -loop-vectorize -hexagon-autohvx=1 -force-vector-width=64 -prefer-predicate-over-epilog -S %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +; Test for PR45572. + +; Check that interleave groups and decisions based on them are correctly +; invalidated with tail-folding on platforms where masked interleaved accesses +; are disabled. + +; Make sure a vector body has been created, 64 element vectors are used and a block predicate has been computed. +; CHECK-LABEL: vector.body: +; CHECK: %induction = add <64 x i32> +; CHECK: icmp ule <64 x i32> %induction + +define void @foo(i32* %arg, i32 %N) #0 { +entry: + %tmp = alloca i8 + br label %loop + +loop: ; preds = %bb2, %bb + %iv = phi i32 [ %iv.next, %loop], [ 0, %entry ] + %idx.mul = mul nuw nsw i32 %iv, 7 + %idx.start = add nuw nsw i32 %idx.mul, 1 + %tmp6 = getelementptr inbounds i32, i32* %arg, i32 %idx.start + %tmp7 = load i32, i32* %tmp6, align 4 + %tmp8 = add nuw nsw i32 %idx.start, 1 + %tmp9 = getelementptr inbounds i32, i32* %arg, i32 %tmp8 + %tmp10 = load i32, i32* %tmp9, align 4 + %tmp11 = add nuw nsw i32 %idx.start, 2 + %tmp12 = getelementptr inbounds i32, i32* %arg, i32 %tmp11 + %tmp13 = load i32, i32* %tmp12, align 4 + %tmp14 = add nuw nsw i32 %idx.start, 3 + %tmp15 = getelementptr inbounds i32, i32* %arg, i32 %tmp14 + %tmp16 = load i32, i32* %tmp15, align 4 + %tmp18 = add nuw nsw i32 %idx.start, 4 + %tmp19 = getelementptr inbounds i32, i32* %arg, i32 %tmp18 + %tmp20 = load i32, i32* %tmp19, align 4 + %tmp21 = add nuw nsw i32 %idx.start, 5 + %tmp22 = getelementptr inbounds i32, i32* %arg, i32 %tmp21 + %tmp23 = load i32, i32* %tmp22, align 4 + %tmp25 = add nuw nsw i32 %idx.start, 6 + %tmp26 = getelementptr inbounds i32, i32* %arg, i32 %tmp25 + %tmp27 = load i32, i32* %tmp26, align 4 + store i8 0, i8* %tmp, align 1 + %iv.next= add nuw nsw i32 %iv, 1 + %exit.cond = icmp eq i32 %iv.next, %N + br i1 %exit.cond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +attributes #0 = { "target-features"="+hvx,+hvx-length128b" }