diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix LOBUDGET %s +; RUN: opt < %s -passes=slp-vectorizer -S -slp-schedule-budget=32 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix HIBUDGET %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" @@ -10,44 +11,97 @@ declare void @unknown() define void @test(ptr %a, ptr %b, ptr %c, ptr %d) { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4 -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[B:%.*]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4 -; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[D:%.*]], align 4 -; CHECK-NEXT: ret void +; LOBUDGET-LABEL: @test( +; LOBUDGET-NEXT: entry: +; LOBUDGET-NEXT: [[L0:%.*]] = load float, ptr [[A:%.*]], align 4 +; LOBUDGET-NEXT: [[A1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 +; LOBUDGET-NEXT: [[L1:%.*]] = load float, ptr [[A1]], align 4 +; LOBUDGET-NEXT: [[A2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 2 +; LOBUDGET-NEXT: [[L2:%.*]] = load float, ptr [[A2]], align 4 +; LOBUDGET-NEXT: [[A3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 3 +; LOBUDGET-NEXT: [[L3:%.*]] = load float, ptr [[A3]], align 4 +; LOBUDGET-NEXT: [[L00:%.*]] = fadd float [[L0]], [[L0]] +; LOBUDGET-NEXT: [[L10:%.*]] = fadd float [[L1]], [[L1]] +; LOBUDGET-NEXT: [[L20:%.*]] = fadd float [[L2]], [[L2]] +; LOBUDGET-NEXT: [[L30:%.*]] = fadd float [[L3]], [[L3]] +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: call void @unknown() +; LOBUDGET-NEXT: store float [[L00]], ptr [[B:%.*]], align 4 +; LOBUDGET-NEXT: [[B1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 1 +; LOBUDGET-NEXT: store float [[L10]], ptr [[B1]], align 4 +; LOBUDGET-NEXT: [[B2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 2 +; LOBUDGET-NEXT: store float [[L20]], ptr [[B2]], align 4 +; LOBUDGET-NEXT: [[B3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 3 +; LOBUDGET-NEXT: store float [[L30]], ptr [[B3]], align 4 +; LOBUDGET-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4 +; LOBUDGET-NEXT: store <4 x float> [[TMP0]], ptr [[D:%.*]], align 4 +; LOBUDGET-NEXT: ret void +; +; HIBUDGET-LABEL: @test( +; HIBUDGET-NEXT: entry: +; HIBUDGET-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4 +; HIBUDGET-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[TMP0]], [[TMP0]] +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: call void @unknown() +; HIBUDGET-NEXT: store <4 x float> [[TMP1]], ptr [[B:%.*]], align 4 +; HIBUDGET-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4 +; HIBUDGET-NEXT: store <4 x float> [[TMP2]], ptr [[D:%.*]], align 4 +; HIBUDGET-NEXT: ret void ; entry: - ; Don't vectorize these loads. + ; Don't vectorize these loads (with the reduced budget). %l0 = load float, ptr %a %a1 = getelementptr inbounds float, ptr %a, i64 1 %l1 = load float, ptr %a1 @@ -55,6 +109,10 @@ %l2 = load float, ptr %a2 %a3 = getelementptr inbounds float, ptr %a, i64 3 %l3 = load float, ptr %a3 + %l00 = fadd float %l0, %l0 + %l10 = fadd float %l1, %l1 + %l20 = fadd float %l2, %l2 + %l30 = fadd float %l3, %l3 ; some unrelated instructions inbetween to enlarge the scheduling region call void @unknown() @@ -86,14 +144,15 @@ call void @unknown() call void @unknown() - ; Don't vectorize these stores because their operands are too far away. - store float %l0, ptr %b + ; Don't vectorize these stores because their operands are too far away (with + ; the reduced budget). + store float %l00, ptr %b %b1 = getelementptr inbounds float, ptr %b, i64 1 - store float %l1, ptr %b1 + store float %l10, ptr %b1 %b2 = getelementptr inbounds float, ptr %b, i64 2 - store float %l2, ptr %b2 + store float %l20, ptr %b2 %b3 = getelementptr inbounds float, ptr %b, i64 3 - store float %l3, ptr %b3 + store float %l30, ptr %b3 ; But still vectorize the following instructions, because even if the budget ; is exceeded there is a minimum region size.