diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -181,6 +181,101 @@ ret void } +define void @nan(float* noalias nocapture readonly %input, + float* %output) local_unnamed_addr #0 { +; CHECK-LABEL: @nan +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[INPUT:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, float* [[OUTPUT:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, float* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP7]], i32 4, <4 x i1> [[TMP4]], <4 x float> poison) +; CHECK-NEXT: [[TMP8:%.*]] = fadd nnan <4 x float> [[WIDE_LOAD]], [[WIDE_MASKED_LOAD]] +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds float, float* %input, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %cmp1 = fcmp uno float %0, %0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %arrayidx5 = getelementptr inbounds float, float* %output, i64 %indvars.iv + %1 = load float, float* %arrayidx5, align 4 + %add = fadd nnan float %0, %1 + store float %add, float* %arrayidx5, align 4 + br label %for.inc + +for.inc: + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 8 + br i1 %exitcond.not, label %loop.exit, label %for.body + +loop.exit: + ret void +} + +define void @inf(float* noalias nocapture readonly %input, + float* %output) local_unnamed_addr #0 { +; CHECK-LABEL: @inf +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[INPUT:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP5:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, float* [[OUTPUT:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, float* [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP9]], i32 4, <4 x i1> [[TMP6]], <4 x float> poison) +; CHECK-NEXT: [[TMP10:%.*]] = fadd ninf <4 x float> [[WIDE_LOAD]], [[WIDE_MASKED_LOAD]] +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds float, float* %input, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %plus = fcmp une float %0, 0x7FF0000000000000 + %minus = fcmp une float %0, 0xFFF0000000000000 + %or = and i1 %plus, %minus + br i1 %or, label %if.then, label %for.inc + +if.then: + %arrayidx5 = getelementptr inbounds float, float* %output, i64 %indvars.iv + %1 = load float, float* %arrayidx5, align 4 + %add = fadd ninf float %0, %1 + store float %add, float* %arrayidx5, align 4 + br label %for.inc + +for.inc: + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 8 + br i1 %exitcond.not, label %loop.exit, label %for.body + +loop.exit: + ret void +} + attributes #0 = { noinline nounwind uwtable "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" } !0 = !{}