Index: llvm/lib/Analysis/IVDescriptors.cpp =================================================================== --- llvm/lib/Analysis/IVDescriptors.cpp +++ llvm/lib/Analysis/IVDescriptors.cpp @@ -593,11 +593,14 @@ return isConditionalRdxPattern(Kind, I); LLVM_FALLTHROUGH; case Instruction::FCmp: - case Instruction::ICmp: - if (!isIntMinMaxRecurrenceKind(Kind) && - (!HasFunNoNaNAttr || !isFPMinMaxRecurrenceKind(Kind))) - return InstDesc(false, I); - return isMinMaxSelectCmpPattern(I, Prev); + case Instruction::ICmp: { + if (isFPMinMaxRecurrenceKind(Kind) && + (HasFunNoNaNAttr || I->hasNoNaNs())) + return isMinMaxSelectCmpPattern(I, Prev); + if (isIntMinMaxRecurrenceKind(Kind)) + return isMinMaxSelectCmpPattern(I, Prev); + return InstDesc(false, I); + } } } Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll @@ -275,19 +275,19 @@ ; FMIN (FAST) ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define float @fmin_fast(float* noalias nocapture readonly %a, i64 %n) #0 { +define float @fmin_fast(float* noalias nocapture readonly %a, i64 %n) { ; CHECK-LABEL: @fmin_fast ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load ; CHECK: %[[LOAD2:.*]] = load -; CHECK: %[[FCMP1:.*]] = fcmp olt %[[LOAD1]] -; CHECK: %[[FCMP2:.*]] = fcmp olt %[[LOAD2]] +; CHECK: %[[FCMP1:.*]] = fcmp nnan olt %[[LOAD1]] +; CHECK: %[[FCMP2:.*]] = fcmp nnan olt %[[LOAD2]] ; CHECK: %[[SEL1:.*]] = select %[[FCMP1]], %[[LOAD1]] ; CHECK: %[[SEL2:.*]] = select %[[FCMP2]], %[[LOAD2]] ; CHECK: middle.block: -; CHECK: %[[FCMP:.*]] = fcmp olt %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: %[[SEL:.*]] = select %[[FCMP]], %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: call float @llvm.vector.reduce.fmin.nxv8f32( %[[SEL]]) +; CHECK: %[[FCMP:.*]] = fcmp nnan olt %[[SEL1]], %[[SEL2]] +; CHECK-NEXT: %[[SEL:.*]] = select nnan %[[FCMP]], %[[SEL1]], %[[SEL2]] +; CHECK-NEXT: call nnan float @llvm.vector.reduce.fmin.nxv8f32( %[[SEL]]) entry: %cmp6 = icmp sgt i64 %n, 0 br i1 %cmp6, label %for.body, label %for.end @@ -297,8 +297,8 @@ %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] %arrayidx = getelementptr inbounds float, float* %a, i64 %iv %0 = load float, float* %arrayidx, align 4 - %cmp.i = fcmp olt float %0, %sum.07 - %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07 + %cmp.i = fcmp nnan olt float %0, %sum.07 + %.sroa.speculated = select nnan i1 %cmp.i, float %0, float %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -311,19 +311,19 @@ ; FMAX (FAST) ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define float @fmax_fast(float* noalias nocapture readonly %a, i64 %n) #0 { +define float @fmax_fast(float* noalias nocapture readonly %a, i64 %n) { ; CHECK-LABEL: @fmax_fast ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load ; CHECK: %[[LOAD2:.*]] = load -; CHECK: %[[FCMP1:.*]] = fcmp fast ogt %[[LOAD1]] -; CHECK: %[[FCMP2:.*]] = fcmp fast ogt %[[LOAD2]] +; CHECK: %[[FCMP1:.*]] = fcmp nnan ogt %[[LOAD1]] +; CHECK: %[[FCMP2:.*]] = fcmp nnan ogt %[[LOAD2]] ; CHECK: %[[SEL1:.*]] = select %[[FCMP1]], %[[LOAD1]] ; CHECK: %[[SEL2:.*]] = select %[[FCMP2]], %[[LOAD2]] ; CHECK: middle.block: -; CHECK: %[[FCMP:.*]] = fcmp fast ogt %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: %[[SEL:.*]] = select fast %[[FCMP]], %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: call fast float @llvm.vector.reduce.fmax.nxv8f32( %[[SEL]]) +; CHECK: %[[FCMP:.*]] = fcmp nnan ogt %[[SEL1]], %[[SEL2]] +; CHECK-NEXT: %[[SEL:.*]] = select nnan %[[FCMP]], %[[SEL1]], %[[SEL2]] +; CHECK-NEXT: call nnan float @llvm.vector.reduce.fmax.nxv8f32( %[[SEL]]) entry: %cmp6 = icmp sgt i64 %n, 0 br i1 %cmp6, label %for.body, label %for.end @@ -333,8 +333,8 @@ %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] %arrayidx = getelementptr inbounds float, float* %a, i64 %iv %0 = load float, float* %arrayidx, align 4 - %cmp.i = fcmp fast ogt float %0, %sum.07 - %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07 + %cmp.i = fcmp nnan ogt float %0, %sum.07 + %.sroa.speculated = select nnan i1 %cmp.i, float %0, float %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -433,8 +433,6 @@ ret i32 %sum.0.lcssa } -attributes #0 = { "no-nans-fp-math"="true" } - !0 = distinct !{!0, !1, !2, !3, !4} !1 = !{!"llvm.loop.vectorize.width", i32 8} !2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}