Index: llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -disable-verify -S | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +@d = internal unnamed_addr global i32 5, align 4 + +define dso_local void @l() local_unnamed_addr { +; This test case is to check the trunc instruction +; won't be inserted in between 2 phi statements. +; CHECK-LABEL: @l( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP12:%.*]], [[BB25:%.*]] ] +; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB11:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 undef to i32 +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: br label [[BB25]] +; CHECK: bb11: +; CHECK-NEXT: [[TMP12:%.*]] = zext i1 undef to i32 +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i16> [[TMP0]], undef +; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i16> [[TMP4]] to <2 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <2 x i64> undef, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i1> [[TMP6]] to <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult <2 x i32> undef, [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i1> [[TMP8]] to <2 x i32> +; CHECK-NEXT: br label [[BB25]] +; CHECK: bb25: +; CHECK-NEXT: [[TMP28:%.*]] = phi i32 [ [[TMP12]], [[BB11]] ], [ [[TMP4]], [[BB3]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP9]], [[BB11]] ], [ [[TMP3]], [[BB3]] ] +; CHECK-NEXT: [[TMP11:%.*]] = trunc <2 x i32> [[TMP10]] to <2 x i8> +; CHECK-NEXT: [[TMP12]] = phi <2 x i16> [ [[TMP4]], [[BB11]] ], [ [[TMP1]], [[BB3]] ] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i8> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 undef, [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i8> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = zext i8 [[TMP15]] to i32 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], [[TMP16]] +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], [[TMP28]] +; CHECK-NEXT: br i1 undef, label [[BB34:%.*]], label [[BB1]] +; CHECK: bb34: +; CHECK-NEXT: [[TMP35:%.*]] = phi i32 [ [[TMP33]], [[BB25]] ] +; CHECK-NEXT: br label [[BB36:%.*]] +; CHECK: bb36: +; CHECK-NEXT: store i32 [[TMP35]], i32* @d, align 4 +; CHECK-NEXT: ret void +; +bb: + br label %bb1 + +bb1: ; preds = %bb25, %bb + %tmp = phi i16 [ undef, %bb ], [ %tmp29, %bb25 ] + %tmp2 = phi i16 [ undef, %bb ], [ %tmp30, %bb25 ] + br i1 undef, label %bb3, label %bb11 + +bb3: ; preds = %bb1 + %tmp4 = zext i1 undef to i32 + %tmp5 = xor i16 %tmp2, undef + %tmp6 = icmp ugt i16 %tmp5, 8 + %tmp7 = zext i1 %tmp6 to i32 + %tmp8 = xor i16 %tmp, undef + %tmp9 = icmp ugt i16 %tmp8, 8 + %tmp10 = zext i1 %tmp9 to i32 + br label %bb25 + +bb11: ; preds = %bb1 + %tmp12 = zext i1 undef to i32 + %tmp13 = xor i16 %tmp2, undef + %tmp14 = sext i16 %tmp13 to i64 + %tmp15 = icmp ule i64 undef, %tmp14 + %tmp16 = zext i1 %tmp15 to i32 + %tmp17 = icmp ult i32 undef, %tmp16 + %tmp18 = zext i1 %tmp17 to i32 + %tmp19 = xor i16 %tmp, undef + %tmp20 = sext i16 %tmp19 to i64 + %tmp21 = icmp ule i64 undef, %tmp20 + %tmp22 = zext i1 %tmp21 to i32 + %tmp23 = icmp ult i32 undef, %tmp22 + %tmp24 = zext i1 %tmp23 to i32 + br label %bb25 + +bb25: ; preds = %bb11, %bb3 + %tmp26 = phi i32 [ %tmp24, %bb11 ], [ %tmp10, %bb3 ] + %tmp27 = phi i32 [ %tmp18, %bb11 ], [ %tmp7, %bb3 ] + %tmp28 = phi i32 [ %tmp12, %bb11 ], [ %tmp4, %bb3 ] + %tmp29 = phi i16 [ %tmp19, %bb11 ], [ %tmp8, %bb3 ] + %tmp30 = phi i16 [ %tmp13, %bb11 ], [ %tmp5, %bb3 ] + %tmp31 = and i32 undef, %tmp26 + %tmp32 = and i32 %tmp31, %tmp27 + %tmp33 = and i32 %tmp32, %tmp28 + br i1 undef, label %bb34, label %bb1 + +bb34: ; preds = %bb25 + %tmp35 = phi i32 [ %tmp33, %bb25 ] + br label %bb36 + +bb36: ; preds = %bb34 + store i32 %tmp35, i32* @d, align 4 + ret void +}