diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/compact.ll b/llvm/test/Transforms/LoopVectorize/AArch64/compact.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/compact.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize -S < %s -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: argmemonly nofree norecurse nosync nounwind uwtable vscale_range(1,16) +define dso_local i32 @kernel_reference(i32 noundef %N, i32 noundef %a, ptr noalias nocapture noundef readonly %comp, ptr noalias nocapture noundef writeonly %Out_ref, ptr nocapture noundef readonly %B, ptr noalias nocapture noundef readnone %Out1) #0 { +; CHECK-LABEL: @kernel_reference( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[N_013:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_1:%.*]], [[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COMP:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], [[A:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[N_013]], 1 +; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[N_013]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[OUT_REF:%.*]], i64 [[IDXPROM4]] +; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[N_1]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[N_013]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[N_1_LCSSA:%.*]] = phi i32 [ [[N_1]], [[FOR_INC]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[N_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[N_1_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[N_0_LCSSA]] +; +entry: + %cmp11 = icmp sgt i32 %N, 0 + br i1 %cmp11, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %N to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.inc + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %n.013 = phi i32 [ 0, %for.body.preheader ], [ %n.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %comp, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %0, %a + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %arrayidx3 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv + %1 = load i32, ptr %arrayidx3, align 4 + %inc = add nsw i32 %n.013, 1 + %idxprom4 = sext i32 %n.013 to i64 + %arrayidx5 = getelementptr inbounds i32, ptr %Out_ref, i64 %idxprom4 + store i32 %1, ptr %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %n.1 = phi i32 [ %inc, %if.then ], [ %n.013, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + %n.0.lcssa = phi i32 [ 0, %entry ], [ %n.1, %for.inc ] + ret i32 %n.0.lcssa +} + +attributes #0 = { argmemonly nofree norecurse nosync nounwind uwtable vscale_range(1,16) "target-cpu"="generic" "target-features"="+neon,+sve,+v8.2a"}