Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h =================================================================== --- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -135,15 +135,16 @@ /// pass name to force the frontend to print the diagnostic. const char *vectorizeAnalysisPassName() const; - bool allowReordering() const { + bool allowReordering(bool HintsAllowReordering) const { // When enabling loop hints are provided we allow the vectorizer to change // the order of operations that is given by the scalar loop. This is not // enabled by default because can be unsafe or inefficient. For example, // reordering floating-point operations will change the way round-off // error accumulates in the loop. ElementCount EC = getWidth(); - return getForce() == LoopVectorizeHints::FK_Enabled || - EC.getKnownMinValue() > 1; + return HintsAllowReordering && + (getForce() == LoopVectorizeHints::FK_Enabled || + EC.getKnownMinValue() > 1); } bool isPotentiallyUnsafe() const { @@ -195,8 +196,9 @@ Instruction *getExactFPInst() { return ExactFPMathInst; } - bool canVectorizeFPMath(const LoopVectorizeHints &Hints) const { - return !ExactFPMathInst || Hints.allowReordering(); + bool canVectorizeFPMath(const LoopVectorizeHints &Hints, + bool HintsAllowReordering) const { + return !ExactFPMathInst || Hints.allowReordering(HintsAllowReordering); } unsigned getNumRuntimePointerChecks() const { Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -336,6 +336,11 @@ cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions")); +cl::opt HintsAllowReordering( + "hints-allow-reordering", cl::init(true), cl::Hidden, + cl::desc("Allow enabling loop hints to reorder FP operations " + "during vectorization.")); + static cl::opt PreferPredicatedReductionSelect( "prefer-predicated-reduction-select", cl::init(false), cl::Hidden, cl::desc( @@ -8000,7 +8005,7 @@ NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; bool ThresholdReached = NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; - if ((ThresholdReached && !Hints.allowReordering()) || + if ((ThresholdReached && !Hints.allowReordering(HintsAllowReordering)) || PragmaThresholdReached) { ORE->emit([&]() { return OptimizationRemarkAnalysisAliasing( @@ -9933,7 +9938,7 @@ return false; } - if (!Requirements.canVectorizeFPMath(Hints)) { + if (!Requirements.canVectorizeFPMath(Hints, HintsAllowReordering)) { ORE->emit([&]() { auto *ExactFPMathInst = Requirements.getExactFPInst(); return OptimizationRemarkAnalysisFPCommute(DEBUG_TYPE, "CantReorderFPOps", Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions -S | FileCheck %s -check-prefix=CHECK +; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -enable-strict-reductions -hints-allow-reordering=false -S | FileCheck %s -check-prefix=CHECK-NO-REORDER define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) { ; CHECK-LABEL: @fadd_strict @@ -9,6 +10,9 @@ ; CHECK: for.end ; CHECK: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX]], %middle.block ] ; CHECK: ret float %[[PHI]] + +; CHECK-NO-REORDER-LABEL: @fadd_strict +; CHECK-NO-REORDER-NOT: vector.body entry: br label %for.body @@ -42,6 +46,9 @@ ; CHECK: for.end ; CHECK: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX4]], %middle.block ] ; CHECK: ret float %[[PHI]] + +; CHECK-NO-REORDER-LABEL: @fadd_strict_unroll +; CHECK-NO-REORDER-NOT: vector.body entry: br label %for.body @@ -83,6 +90,9 @@ ; CHECK: %[[RDX2]] = call float @llvm.vector.reduce.fadd.nxv4f32(float %[[VEC_PHI2]], %[[MGATHER2]]) ; CHECK: for.end ; CHECK ret void + +; CHECK-NO-REORDER-LABEL: @fadd_strict_interleave +; CHECK-NO-REORDER-NOT: vector.body entry: %arrayidxa = getelementptr inbounds float, float* %a, i64 1 %a1 = load float, float* %a, align 4 @@ -123,6 +133,9 @@ ; CHECK: for.end ; CHECK: %[[PHI:.*]] = phi float [ 0.000000e+00, %entry ], [ %[[EXIT_PHI]], %for.end.loopexit ] ; CHECK: ret float %[[PHI]] + +; CHECK-NO-REORDER-LABEL: @fadd_invariant +; CHECK-NO-REORDER-NOT: vector.body entry: %arrayidx = getelementptr inbounds float, float* %a, i64 1 %0 = load float, float* %arrayidx, align 4 @@ -169,6 +182,9 @@ ; CHECK: for.end ; CHECK: %[[RDX_PHI:.*]] = phi float [ %[[FADD]], %for.inc ], [ %[[RDX]], %middle.block ] ; CHECK: ret float %[[RDX_PHI]] + +; CHECK-NO-REORDER-LABEL: @fadd_conditional +; CHECK-NO-REORDER-NOT: vector.body entry: br label %for.body @@ -219,6 +235,9 @@ ; CHECK: for.end ; CHECK: %[[RET:.*]] = phi float [ %[[FADD2]], %for.body ], [ %[[RDX]], %middle.block ] ; CHECK: ret float %[[RET]] + +; CHECK-NO-REORDER-LABEL: @fadd_multiple +; CHECK-NO-REORDER-NOT: vector.body entry: br label %for.body Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -enable-strict-reductions -S | FileCheck %s -check-prefix=CHECK +; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -enable-strict-reductions -hints-allow-reordering=false -S | FileCheck %s -check-prefix=CHECK-NO-REORDER define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) { ; CHECK-LABEL: @fadd_strict @@ -9,6 +10,9 @@ ; CHECK: for.end ; CHECK: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX]], %middle.block ] ; CHECK: ret float %[[PHI]] + +; CHECK-NO-REORDER-LABEL: @fadd_strict +; CHECK-NO-REORDER-NOT: vector.body entry: br label %for.body @@ -42,6 +46,9 @@ ; CHECK: for.end ; CHECK: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX4]], %middle.block ] ; CHECK: ret float %[[PHI]] + +; CHECK-NO-REORDER-LABEL: @fadd_strict_unroll +; CHECK-NO-REORDER-NOT: vector.body entry: br label %for.body @@ -91,6 +98,9 @@ ; CHECK: for.end ; CHECK: %[[SUM_LCSSA:.*]] = phi float [ %[[FADD_LCSSA]], %for.cond.cleanup ], [ 0.000000e+00, %entry ] ; CHECK: ret float %[[SUM_LCSSA]] + +; CHECK-NO-REORDER-LABEL: @fadd_strict_unroll_last_val +; CHECK-NO-REORDER-NOT: vector.body entry: %cmp = icmp sgt i64 %n, 0 br i1 %cmp, label %for.body, label %for.end @@ -132,6 +142,9 @@ ; CHECK: %[[RDX2]] = call float @llvm.vector.reduce.fadd.v4f32(float %[[VEC_PHI1]], <4 x float> %[[STRIDED2]]) ; CHECK: for.end ; CHECK ret void + +; CHECK-NO-REORDER-LABEL: @fadd_strict_interleave +; CHECK-NO-REORDER-NOT: vector.body entry: %arrayidxa = getelementptr inbounds float, float* %a, i64 1 %a1 = load float, float* %a, align 4 @@ -172,6 +185,9 @@ ; CHECK: for.end ; CHECK: %[[PHI:.*]] = phi float [ 0.000000e+00, %entry ], [ %[[EXIT_PHI]], %for.end.loopexit ] ; CHECK: ret float %[[PHI]] + +; CHECK-NO-REORDER-LABEL: @fadd_invariant +; CHECK-NO-REORDER-NOT: vector.body entry: %arrayidx = getelementptr inbounds float, float* %a, i64 1 %0 = load float, float* %arrayidx, align 4 @@ -223,6 +239,9 @@ ; CHECK: for.end ; CHECK: %[[RDX_PHI:.*]] = phi float [ %[[FADD]], %for.inc ], [ %[[RDX]], %middle.block ] ; CHECK: ret float %[[RDX_PHI]] + +; CHECK-NO-REORDER-LABEL: @fadd_conditional +; CHECK-NO-REORDER-NOT: vector.body entry: br label %for.body @@ -267,6 +286,9 @@ ; CHECK: for.end: ; CHECK: %[[RES_PHI:.*]] = phi float [ %[[FADD:.*]], %for.body ], [ %[[RDX]], %middle.block ] ; CHECK: ret float %[[RES_PHI]] + +; CHECK-NO-REORDER-LABEL: @fadd_predicated +; CHECK-NO-REORDER-NOT: vector.body entry: br label %for.body @@ -305,6 +327,9 @@ ; CHECK: for.end ; CHECK: %[[RET:.*]] = phi float [ %[[FADD2]], %for.body ], [ %[[RDX]], %middle.block ] ; CHECK: ret float %[[RET]] + +; CHECK-NO-REORDER-LABEL: @fadd_multiple +; CHECK-NO-REORDER-NOT: vector.body entry: br label %for.body