diff --git a/llvm/test/Transforms/LoopVectorize/override-short-tc-heuristic.ll b/llvm/test/Transforms/LoopVectorize/override-short-tc-heuristic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/override-short-tc-heuristic.ll @@ -0,0 +1,196 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -prefer-predicate-over-epilogue=scalar-epilogue -S | FileCheck -check-prefix CHECK-SCALAR %s +; RUN: opt < %s -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck -check-prefix CHECK-PRED-OR-SCALAR %s +; RUN: opt < %s -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck -check-prefix PRED-OR-DONTVEC %s + +;@src = common global [32 x i8] zeroinitializer, align 1 +;@dst = common global [32 x i8] zeroinitializer, align 1 + +; Simple loop with small constant trip count. +; TODO: Check that -prefer-predicate-over-epilogue and "llvm.loop.vectorize.predicate.enable=true" +; hint override "small trip count" heuristic. +define i32 @const_low_trip_count_hint_pred(i8 *%dst, i8 *%src) { +; CHECK-LABEL: @const_low_trip_count_hint_pred( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] +; CHECK-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 +; CHECK-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 +; CHECK-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret i32 0 +; +; CHECK-SCALAR-LABEL: @const_low_trip_count_hint_pred( +; CHECK-SCALAR-NEXT: entry: +; CHECK-SCALAR-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-SCALAR: for.body: +; CHECK-SCALAR-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] +; CHECK-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-SCALAR-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 +; CHECK-SCALAR-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 +; CHECK-SCALAR-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 +; CHECK-SCALAR-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 +; CHECK-SCALAR-NEXT: [[INC]] = add nsw i32 [[I]], 1 +; CHECK-SCALAR-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 +; CHECK-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-SCALAR: for.end: +; CHECK-SCALAR-NEXT: ret i32 0 +; +; CHECK-PRED-OR-SCALAR-LABEL: @const_low_trip_count_hint_pred( +; CHECK-PRED-OR-SCALAR-NEXT: entry: +; CHECK-PRED-OR-SCALAR-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-PRED-OR-SCALAR: for.body: +; CHECK-PRED-OR-SCALAR-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] +; CHECK-PRED-OR-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-PRED-OR-SCALAR-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 +; CHECK-PRED-OR-SCALAR-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 +; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[INC]] = add nsw i32 [[I]], 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-PRED-OR-SCALAR: for.end: +; CHECK-PRED-OR-SCALAR-NEXT: ret i32 0 +; +; PRED-OR-DONTVEC-LABEL: @const_low_trip_count_hint_pred( +; PRED-OR-DONTVEC-NEXT: entry: +; PRED-OR-DONTVEC-NEXT: br label [[FOR_BODY:%.*]] +; PRED-OR-DONTVEC: for.body: +; PRED-OR-DONTVEC-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; PRED-OR-DONTVEC-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] +; PRED-OR-DONTVEC-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; PRED-OR-DONTVEC-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 +; PRED-OR-DONTVEC-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 +; PRED-OR-DONTVEC-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 +; PRED-OR-DONTVEC-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 +; PRED-OR-DONTVEC-NEXT: [[INC]] = add nsw i32 [[I]], 1 +; PRED-OR-DONTVEC-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 +; PRED-OR-DONTVEC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; PRED-OR-DONTVEC: for.end: +; PRED-OR-DONTVEC-NEXT: ret i32 0 +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %ldidx = getelementptr i8, i8* %src, i32 %i + %stidx = getelementptr i8, i8* %dst, i32 %i + %ldval = load i8, i8* %ldidx, align 1 + %cmp1 = icmp eq i8 %ldval, 5 + %val = select i1 %cmp1, i8 1, i8 2 + store i8 %val, i8* %stidx, align 1 + %inc = add nsw i32 %i, 1 + %exitcond = icmp slt i32 %i, 8 + br i1 %exitcond, label %for.body, label %for.end, !llvm.loop !1 + +for.end: ; preds = %for.body + ret i32 0 +} + +; Simple loop with small constant trip count. +; TODO: Check that -prefer-predicate-over-epilogue and "llvm.loop.vectorize.predicate.enable=false" +; hint override "small trip count" heuristic. +define i32 @const_low_trip_count_hint_no_pred(i8 *%dst, i8 *%src) { +; CHECK-LABEL: @const_low_trip_count_hint_no_pred( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] +; CHECK-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 +; CHECK-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 +; CHECK-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret i32 0 +; +; CHECK-SCALAR-LABEL: @const_low_trip_count_hint_no_pred( +; CHECK-SCALAR-NEXT: entry: +; CHECK-SCALAR-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-SCALAR: for.body: +; CHECK-SCALAR-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] +; CHECK-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-SCALAR-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 +; CHECK-SCALAR-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 +; CHECK-SCALAR-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 +; CHECK-SCALAR-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 +; CHECK-SCALAR-NEXT: [[INC]] = add nsw i32 [[I]], 1 +; CHECK-SCALAR-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 +; CHECK-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-SCALAR: for.end: +; CHECK-SCALAR-NEXT: ret i32 0 +; +; CHECK-PRED-OR-SCALAR-LABEL: @const_low_trip_count_hint_no_pred( +; CHECK-PRED-OR-SCALAR-NEXT: entry: +; CHECK-PRED-OR-SCALAR-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-PRED-OR-SCALAR: for.body: +; CHECK-PRED-OR-SCALAR-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] +; CHECK-PRED-OR-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-PRED-OR-SCALAR-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 +; CHECK-PRED-OR-SCALAR-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 +; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[INC]] = add nsw i32 [[I]], 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-PRED-OR-SCALAR: for.end: +; CHECK-PRED-OR-SCALAR-NEXT: ret i32 0 +; +; PRED-OR-DONTVEC-LABEL: @const_low_trip_count_hint_no_pred( +; PRED-OR-DONTVEC-NEXT: entry: +; PRED-OR-DONTVEC-NEXT: br label [[FOR_BODY:%.*]] +; PRED-OR-DONTVEC: for.body: +; PRED-OR-DONTVEC-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; PRED-OR-DONTVEC-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] +; PRED-OR-DONTVEC-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; PRED-OR-DONTVEC-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 +; PRED-OR-DONTVEC-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 +; PRED-OR-DONTVEC-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 +; PRED-OR-DONTVEC-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 +; PRED-OR-DONTVEC-NEXT: [[INC]] = add nsw i32 [[I]], 1 +; PRED-OR-DONTVEC-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 +; PRED-OR-DONTVEC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; PRED-OR-DONTVEC: for.end: +; PRED-OR-DONTVEC-NEXT: ret i32 0 +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %ldidx = getelementptr i8, i8* %src, i32 %i + %stidx = getelementptr i8, i8* %dst, i32 %i + %ldval = load i8, i8* %ldidx, align 1 + %cmp1 = icmp eq i8 %ldval, 5 + %val = select i1 %cmp1, i8 1, i8 2 + store i8 %val, i8* %stidx, align 1 + %inc = add nsw i32 %i, 1 + %exitcond = icmp slt i32 %i, 8 + br i1 %exitcond, label %for.body, label %for.end, !llvm.loop !3 + +for.end: ; preds = %for.body + ret i32 0 +} + +!1 = distinct !{!1, !2, !5} +!2 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} +!3 = distinct !{!3, !4, !5} +!4 = !{!"llvm.loop.vectorize.predicate.enable", i1 false} +!5 = !{!"llvm.loop.vectorize.width", i32 4}