Changeset View
Changeset View
Standalone View
Standalone View
llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll
; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=x86_64-unknown-linux -S 2>&1 | FileCheck %s | ; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=x86_64-unknown-linux -S 2>&1 | FileCheck %s | ||||
; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=x86_64-unknown-linux -mattr=+avx512f -S 2>&1 | FileCheck %s --check-prefix=AVX512F | ; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=x86_64-unknown-linux -mattr=+avx512f -S 2>&1 | FileCheck %s --check-prefix=AVX512F | ||||
; REQUIRES: asserts | ; REQUIRES: asserts | ||||
@a = global [1024 x i8] zeroinitializer, align 16 | @a = global [1024 x i8] zeroinitializer, align 16 | ||||
@b = global [1024 x i8] zeroinitializer, align 16 | @b = global [1024 x i8] zeroinitializer, align 16 | ||||
define i32 @foo() { | define i32 @foo() { | ||||
; This function has a loop of SAD pattern. Here we check when VF = 16 the | ; This function has a loop of SAD pattern. Here we check when VF = 16 the | ||||
; register usage doesn't exceed 16. | ; register usage doesn't exceed 16. | ||||
; | ; | ||||
; CHECK-LABEL: foo | ; CHECK-LABEL: foo | ||||
; CHECK: LV(REG): VF = 8 | ; CHECK: LV(REG): VF = 8 | ||||
; CHECK-NEXT: LV(REG): Found max usage: 7 | ; CHECK-NEXT: LV(REG): Found max usage: 2 item | ||||
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers | |||||
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 7 registers | |||||
; CHECK-NEXT: LV(REG): Found invariant usage: 0 item | |||||
; CHECK: LV(REG): VF = 16 | ; CHECK: LV(REG): VF = 16 | ||||
; CHECK-NEXT: LV(REG): Found max usage: 13 | ; CHECK-NEXT: LV(REG): Found max usage: 2 item | ||||
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers | |||||
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 13 registers | |||||
; CHECK-NEXT: LV(REG): Found invariant usage: 0 item | |||||
entry: | entry: | ||||
br label %for.body | br label %for.body | ||||
for.cond.cleanup: | for.cond.cleanup: | ||||
%add.lcssa = phi i32 [ %add, %for.body ] | %add.lcssa = phi i32 [ %add, %for.body ] | ||||
ret i32 %add.lcssa | ret i32 %add.lcssa | ||||
Show All 17 Lines | |||||
} | } | ||||
define i32 @goo() { | define i32 @goo() { | ||||
; For indvars.iv used in a computating chain only feeding into getelementptr or cmp, | ; For indvars.iv used in a computating chain only feeding into getelementptr or cmp, | ||||
; it will not have vector version and the vector register usage will not exceed the | ; it will not have vector version and the vector register usage will not exceed the | ||||
; available vector register number. | ; available vector register number. | ||||
; CHECK-LABEL: goo | ; CHECK-LABEL: goo | ||||
; CHECK: LV(REG): VF = 8 | ; CHECK: LV(REG): VF = 8 | ||||
; CHECK-NEXT: LV(REG): Found max usage: 7 | ; CHECK-NEXT: LV(REG): Found max usage: 2 item | ||||
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers | |||||
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 7 registers | |||||
; CHECK-NEXT: LV(REG): Found invariant usage: 0 item | |||||
; CHECK: LV(REG): VF = 16 | ; CHECK: LV(REG): VF = 16 | ||||
; CHECK-NEXT: LV(REG): Found max usage: 13 | ; CHECK-NEXT: LV(REG): Found max usage: 2 item | ||||
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers | |||||
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 13 registers | |||||
; CHECK-NEXT: LV(REG): Found invariant usage: 0 item | |||||
entry: | entry: | ||||
br label %for.body | br label %for.body | ||||
for.cond.cleanup: ; preds = %for.body | for.cond.cleanup: ; preds = %for.body | ||||
%add.lcssa = phi i32 [ %add, %for.body ] | %add.lcssa = phi i32 [ %add, %for.body ] | ||||
ret i32 %add.lcssa | ret i32 %add.lcssa | ||||
for.body: ; preds = %for.body, %entry | for.body: ; preds = %for.body, %entry | ||||
Show All 15 Lines | for.body: ; preds = %for.body, %entry | ||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
%exitcond = icmp eq i64 %indvars.iv.next, 1024 | %exitcond = icmp eq i64 %indvars.iv.next, 1024 | ||||
br i1 %exitcond, label %for.cond.cleanup, label %for.body | br i1 %exitcond, label %for.cond.cleanup, label %for.body | ||||
} | } | ||||
define i64 @bar(i64* nocapture %a) { | define i64 @bar(i64* nocapture %a) { | ||||
; CHECK-LABEL: bar | ; CHECK-LABEL: bar | ||||
; CHECK: LV(REG): VF = 2 | ; CHECK: LV(REG): VF = 2 | ||||
; CHECK: LV(REG): Found max usage: 3 | ; CHECK-NEXT: LV(REG): Found max usage: 2 item | ||||
; | ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 3 registers | ||||
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers | |||||
; CHECK-NEXT: LV(REG): Found invariant usage: 0 item | |||||
entry: | entry: | ||||
br label %for.body | br label %for.body | ||||
for.cond.cleanup: | for.cond.cleanup: | ||||
%add2.lcssa = phi i64 [ %add2, %for.body ] | %add2.lcssa = phi i64 [ %add2, %for.body ] | ||||
ret i64 %add2.lcssa | ret i64 %add2.lcssa | ||||
for.body: | for.body: | ||||
Show All 14 Lines | |||||
@c = external global [0 x i32], align 4 | @c = external global [0 x i32], align 4 | ||||
define void @hoo(i32 %n) { | define void @hoo(i32 %n) { | ||||
; For c[i] = e[d[i]] in the loop, e[d[i]] is not consecutive but its index %tmp can | ; For c[i] = e[d[i]] in the loop, e[d[i]] is not consecutive but its index %tmp can | ||||
; be gathered into a vector. For VF == 16, the vector version of %tmp will be <16 x i64> | ; be gathered into a vector. For VF == 16, the vector version of %tmp will be <16 x i64> | ||||
; so the max usage of AVX512 vector register will be 2. | ; so the max usage of AVX512 vector register will be 2. | ||||
; AVX512F-LABEL: bar | ; AVX512F-LABEL: bar | ||||
; AVX512F: LV(REG): VF = 16 | ; AVX512F: LV(REG): VF = 16 | ||||
; AVX512F: LV(REG): Found max usage: 2 | ; AVX512F-CHECK: LV(REG): Found max usage: 2 item | ||||
; | ; AVX512F-CHECK: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers | ||||
; AVX512F-CHECK: LV(REG): RegisterClass: Generic::VectorRC, 2 registers | |||||
; AVX512F-CHECK: LV(REG): Found invariant usage: 0 item | |||||
entry: | entry: | ||||
br label %for.body | br label %for.body | ||||
for.body: ; preds = %for.body, %entry | for.body: ; preds = %for.body, %entry | ||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||||
%arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv | %arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv | ||||
%tmp = load i64, i64* %arrayidx, align 8 | %tmp = load i64, i64* %arrayidx, align 8 | ||||
%arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp | %arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp | ||||
Show All 10 Lines |