diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -435,22 +435,17 @@ bool canVectorizeOuterLoop(); /// Return true if all of the instructions in the block can be speculatively - /// executed, and record the loads/stores that require masking. If's that - /// guard loads can be ignored under "assume safety" unless \p PreserveGuards - /// is true. This can happen when we introduces guards for which the original - /// "unguarded-loads are safe" assumption does not hold. For example, the - /// vectorizer's fold-tail transformation changes the loop to execute beyond - /// its original trip-count, under a proper guard, which should be preserved. + /// executed, and record the loads/stores that require masking. /// \p SafePtrs is a list of addresses that are known to be legal and we know /// that we can read from them without segfault. /// \p MaskedOp is a list of instructions that have to be transformed into /// calls to the appropriate masked intrinsic when the loop is vectorized. /// \p ConditionalAssumes is a list of assume instructions in predicated /// blocks that must be dropped if the CFG gets flattened. - bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs, - SmallPtrSetImpl &MaskedOp, - SmallPtrSetImpl &ConditionalAssumes, - bool PreserveGuards = false) const; + bool blockCanBePredicated( + BasicBlock *BB, SmallPtrSetImpl &SafePtrs, + SmallPtrSetImpl &MaskedOp, + SmallPtrSetImpl &ConditionalAssumes) const; /// Updates the vectorization state by adding \p Phi to the inductions list. /// This can set \p Phi as the main induction of the loop if \p Phi is a diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -955,10 +955,7 @@ bool LoopVectorizationLegality::blockCanBePredicated( BasicBlock *BB, SmallPtrSetImpl &SafePtrs, SmallPtrSetImpl &MaskedOp, - SmallPtrSetImpl &ConditionalAssumes, - bool PreserveGuards) const { - const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); - + SmallPtrSetImpl &ConditionalAssumes) const { for (Instruction &I : *BB) { // Check that we don't have a constant expression that can trap as operand. for (Value *Operand : I.operands()) { @@ -986,11 +983,7 @@ if (!LI) return false; if (!SafePtrs.count(LI->getPointerOperand())) { - // !llvm.mem.parallel_loop_access implies if-conversion safety. - // Otherwise, record that the load needs (real or emulated) masking - // and let the cost model decide. - if (!IsAnnotatedParallel || PreserveGuards) - MaskedOp.insert(LI); + MaskedOp.insert(LI); continue; } } @@ -1306,8 +1299,7 @@ // do not need predication such as the header block. for (BasicBlock *BB : TheLoop->blocks()) { if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp, - TmpConditionalAssumes, - /* MaskAllLoads= */ true)) { + TmpConditionalAssumes)) { LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n"); return false; } diff --git a/llvm/test/Transforms/LoopVectorize/X86/force-ifcvt.ll b/llvm/test/Transforms/LoopVectorize/X86/force-ifcvt.ll deleted file mode 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/force-ifcvt.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: opt -loop-vectorize -S < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind uwtable -define void @Test(i32* nocapture %res, i32* nocapture readnone %c, i32* nocapture readonly %d, i32* nocapture readonly %p) #0 { -entry: - br label %for.body - -; CHECK-LABEL: @Test -; CHECK: <4 x i32> - -for.body: ; preds = %cond.end, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cond.end ] - %arrayidx = getelementptr inbounds i32, i32* %p, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !1 - %cmp1 = icmp eq i32 %0, 0 - %arrayidx3 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv - %1 = load i32, i32* %arrayidx3, align 4, !llvm.access.group !1 - br i1 %cmp1, label %cond.end, label %cond.false - -cond.false: ; preds = %for.body - %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv - %2 = load i32, i32* %arrayidx7, align 4, !llvm.access.group !1 - %add = add nsw i32 %2, %1 - br label %cond.end - -cond.end: ; preds = %for.body, %cond.false - %cond = phi i32 [ %add, %cond.false ], [ %1, %for.body ] - store i32 %cond, i32* %arrayidx3, align 4, !llvm.access.group !1 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 16 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 - -for.end: ; preds = %cond.end - ret void -} - -attributes #0 = { norecurse nounwind uwtable "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" } - -!0 = distinct !{!0, !{!"llvm.loop.parallel_accesses", !1}} -!1 = distinct !{} diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll --- a/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll @@ -51,7 +51,7 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !8 } -; Case2: With pragma assume_safety only the store is masked. +; Case2: With pragma assume_safety both, load and store are masked. ; void assume_safety(int * p, int * q1, int * q2, int guard) { ; #pragma clang loop vectorize(assume_safety) ; for(int ix=0; ix < 1021; ++ix) { @@ -63,7 +63,7 @@ ;CHECK-LABEL: @assume_safety ;CHECK: vector.body: -;CHECK-NOT: @llvm.masked.load +;CHECK: call <8 x i32> @llvm.masked.load ;CHECK: call void @llvm.masked.store ; Function Attrs: norecurse nounwind uwtable