diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/load-used-by-two-stores.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/load-used-by-two-stores.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/load-used-by-two-stores.ll @@ -0,0 +1,219 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S < %s -mtriple=aarch64-- | FileCheck %s + +; This test check that we can vectorize the simple pattern of a load with more +; than one store user: +; L0 L1 +; / \ / \ +; S0 S'0 S1 S'1 + +define void @load_used_by_two_stores_double(double *%ptr) { +; CHECK-LABEL: @load_used_by_two_stores_double( +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 1 +; CHECK-NEXT: [[LD0:%.*]] = load double, double* [[GEP0]], align 8 +; CHECK-NEXT: [[LD1:%.*]] = load double, double* [[GEP1]], align 8 +; CHECK-NEXT: store double [[LD0]], double* [[GEP0]], align 8 +; CHECK-NEXT: store double [[LD1]], double* [[GEP1]], align 8 +; CHECK-NEXT: store double [[LD0]], double* [[GEP0]], align 8 +; CHECK-NEXT: store double [[LD1]], double* [[GEP1]], align 8 +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds double, double* %ptr, i64 0 + %gep1 = getelementptr inbounds double, double* %ptr, i64 1 + %ld0 = load double, double *%gep0, align 8 + %ld1 = load double, double *%gep1, align 8 + + store double %ld0, double *%gep0, align 8 + store double %ld1, double *%gep1, align 8 + + store double %ld0, double *%gep0, align 8 + store double %ld1, double *%gep1, align 8 + ret void +} + +define void @load_used_by_two_stores_i32(i32 *%ptr) { +; CHECK-LABEL: @load_used_by_two_stores_i32( +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 0 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 1 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 3 +; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* [[GEP0]], align 8 +; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* [[GEP1]], align 8 +; CHECK-NEXT: [[LD2:%.*]] = load i32, i32* [[GEP2]], align 8 +; CHECK-NEXT: [[LD3:%.*]] = load i32, i32* [[GEP3]], align 8 +; CHECK-NEXT: store i32 [[LD0]], i32* [[GEP0]], align 8 +; CHECK-NEXT: store i32 [[LD1]], i32* [[GEP1]], align 8 +; CHECK-NEXT: store i32 [[LD2]], i32* [[GEP2]], align 8 +; CHECK-NEXT: store i32 [[LD3]], i32* [[GEP3]], align 8 +; CHECK-NEXT: store i32 [[LD0]], i32* [[GEP0]], align 8 +; CHECK-NEXT: store i32 [[LD1]], i32* [[GEP1]], align 8 +; CHECK-NEXT: store i32 [[LD2]], i32* [[GEP2]], align 8 +; CHECK-NEXT: store i32 [[LD3]], i32* [[GEP3]], align 8 +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds i32, i32* %ptr, i64 0 + %gep1 = getelementptr inbounds i32, i32* %ptr, i64 1 + %gep2 = getelementptr inbounds i32, i32* %ptr, i64 2 + %gep3 = getelementptr inbounds i32, i32* %ptr, i64 3 + + %ld0 = load i32, i32 *%gep0, align 8 + %ld1 = load i32, i32 *%gep1, align 8 + %ld2 = load i32, i32 *%gep2, align 8 + %ld3 = load i32, i32 *%gep3, align 8 + + + store i32 %ld0, i32 *%gep0, align 8 + store i32 %ld1, i32 *%gep1, align 8 + store i32 %ld2, i32 *%gep2, align 8 + store i32 %ld3, i32 *%gep3, align 8 + + + store i32 %ld0, i32 *%gep0, align 8 + store i32 %ld1, i32 *%gep1, align 8 + store i32 %ld2, i32 *%gep2, align 8 + store i32 %ld3, i32 *%gep3, align 8 + + ret void +} + +define void @load_used_by_two_stores_i8(i8 *%ptr) { +; CHECK-LABEL: @load_used_by_two_stores_i8( +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 0 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 1 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 3 +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 4 +; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 5 +; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 6 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 7 +; CHECK-NEXT: [[GEP8:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 8 +; CHECK-NEXT: [[GEP9:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 9 +; CHECK-NEXT: [[GEP10:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 10 +; CHECK-NEXT: [[GEP11:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 11 +; CHECK-NEXT: [[GEP12:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 12 +; CHECK-NEXT: [[GEP13:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 13 +; CHECK-NEXT: [[GEP14:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 14 +; CHECK-NEXT: [[GEP15:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 15 +; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* [[GEP0]], align 8 +; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* [[GEP1]], align 8 +; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* [[GEP2]], align 8 +; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* [[GEP3]], align 8 +; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* [[GEP4]], align 8 +; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* [[GEP5]], align 8 +; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* [[GEP6]], align 8 +; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* [[GEP7]], align 8 +; CHECK-NEXT: [[LD8:%.*]] = load i8, i8* [[GEP8]], align 8 +; CHECK-NEXT: [[LD9:%.*]] = load i8, i8* [[GEP9]], align 8 +; CHECK-NEXT: [[LD10:%.*]] = load i8, i8* [[GEP10]], align 8 +; CHECK-NEXT: [[LD11:%.*]] = load i8, i8* [[GEP11]], align 8 +; CHECK-NEXT: [[LD12:%.*]] = load i8, i8* [[GEP12]], align 8 +; CHECK-NEXT: [[LD13:%.*]] = load i8, i8* [[GEP13]], align 8 +; CHECK-NEXT: [[LD14:%.*]] = load i8, i8* [[GEP14]], align 8 +; CHECK-NEXT: [[LD15:%.*]] = load i8, i8* [[GEP15]], align 8 +; CHECK-NEXT: store i8 [[LD0]], i8* [[GEP0]], align 8 +; CHECK-NEXT: store i8 [[LD1]], i8* [[GEP1]], align 8 +; CHECK-NEXT: store i8 [[LD2]], i8* [[GEP2]], align 8 +; CHECK-NEXT: store i8 [[LD3]], i8* [[GEP3]], align 8 +; CHECK-NEXT: store i8 [[LD4]], i8* [[GEP4]], align 8 +; CHECK-NEXT: store i8 [[LD5]], i8* [[GEP5]], align 8 +; CHECK-NEXT: store i8 [[LD6]], i8* [[GEP6]], align 8 +; CHECK-NEXT: store i8 [[LD7]], i8* [[GEP7]], align 8 +; CHECK-NEXT: store i8 [[LD8]], i8* [[GEP8]], align 8 +; CHECK-NEXT: store i8 [[LD9]], i8* [[GEP9]], align 8 +; CHECK-NEXT: store i8 [[LD10]], i8* [[GEP10]], align 8 +; CHECK-NEXT: store i8 [[LD11]], i8* [[GEP11]], align 8 +; CHECK-NEXT: store i8 [[LD12]], i8* [[GEP12]], align 8 +; CHECK-NEXT: store i8 [[LD13]], i8* [[GEP13]], align 8 +; CHECK-NEXT: store i8 [[LD14]], i8* [[GEP14]], align 8 +; CHECK-NEXT: store i8 [[LD15]], i8* [[GEP15]], align 8 +; CHECK-NEXT: store i8 [[LD0]], i8* [[GEP0]], align 8 +; CHECK-NEXT: store i8 [[LD1]], i8* [[GEP1]], align 8 +; CHECK-NEXT: store i8 [[LD2]], i8* [[GEP2]], align 8 +; CHECK-NEXT: store i8 [[LD3]], i8* [[GEP3]], align 8 +; CHECK-NEXT: store i8 [[LD4]], i8* [[GEP4]], align 8 +; CHECK-NEXT: store i8 [[LD5]], i8* [[GEP5]], align 8 +; CHECK-NEXT: store i8 [[LD6]], i8* [[GEP6]], align 8 +; CHECK-NEXT: store i8 [[LD7]], i8* [[GEP7]], align 8 +; CHECK-NEXT: store i8 [[LD8]], i8* [[GEP8]], align 8 +; CHECK-NEXT: store i8 [[LD9]], i8* [[GEP9]], align 8 +; CHECK-NEXT: store i8 [[LD10]], i8* [[GEP10]], align 8 +; CHECK-NEXT: store i8 [[LD11]], i8* [[GEP11]], align 8 +; CHECK-NEXT: store i8 [[LD12]], i8* [[GEP12]], align 8 +; CHECK-NEXT: store i8 [[LD13]], i8* [[GEP13]], align 8 +; CHECK-NEXT: store i8 [[LD14]], i8* [[GEP14]], align 8 +; CHECK-NEXT: store i8 [[LD15]], i8* [[GEP15]], align 8 +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds i8, i8* %ptr, i64 0 + %gep1 = getelementptr inbounds i8, i8* %ptr, i64 1 + %gep2 = getelementptr inbounds i8, i8* %ptr, i64 2 + %gep3 = getelementptr inbounds i8, i8* %ptr, i64 3 + %gep4 = getelementptr inbounds i8, i8* %ptr, i64 4 + %gep5 = getelementptr inbounds i8, i8* %ptr, i64 5 + %gep6 = getelementptr inbounds i8, i8* %ptr, i64 6 + %gep7 = getelementptr inbounds i8, i8* %ptr, i64 7 + %gep8 = getelementptr inbounds i8, i8* %ptr, i64 8 + %gep9 = getelementptr inbounds i8, i8* %ptr, i64 9 + %gep10 = getelementptr inbounds i8, i8* %ptr, i64 10 + %gep11 = getelementptr inbounds i8, i8* %ptr, i64 11 + %gep12 = getelementptr inbounds i8, i8* %ptr, i64 12 + %gep13 = getelementptr inbounds i8, i8* %ptr, i64 13 + %gep14 = getelementptr inbounds i8, i8* %ptr, i64 14 + %gep15 = getelementptr inbounds i8, i8* %ptr, i64 15 + + %ld0 = load i8, i8 *%gep0, align 8 + %ld1 = load i8, i8 *%gep1, align 8 + %ld2 = load i8, i8 *%gep2, align 8 + %ld3 = load i8, i8 *%gep3, align 8 + %ld4 = load i8, i8 *%gep4, align 8 + %ld5 = load i8, i8 *%gep5, align 8 + %ld6 = load i8, i8 *%gep6, align 8 + %ld7 = load i8, i8 *%gep7, align 8 + %ld8 = load i8, i8 *%gep8, align 8 + %ld9 = load i8, i8 *%gep9, align 8 + %ld10 = load i8, i8 *%gep10, align 8 + %ld11 = load i8, i8 *%gep11, align 8 + %ld12 = load i8, i8 *%gep12, align 8 + %ld13 = load i8, i8 *%gep13, align 8 + %ld14 = load i8, i8 *%gep14, align 8 + %ld15 = load i8, i8 *%gep15, align 8 + + + store i8 %ld0, i8 *%gep0, align 8 + store i8 %ld1, i8 *%gep1, align 8 + store i8 %ld2, i8 *%gep2, align 8 + store i8 %ld3, i8 *%gep3, align 8 + store i8 %ld4, i8 *%gep4, align 8 + store i8 %ld5, i8 *%gep5, align 8 + store i8 %ld6, i8 *%gep6, align 8 + store i8 %ld7, i8 *%gep7, align 8 + store i8 %ld8, i8 *%gep8, align 8 + store i8 %ld9, i8 *%gep9, align 8 + store i8 %ld10, i8 *%gep10, align 8 + store i8 %ld11, i8 *%gep11, align 8 + store i8 %ld12, i8 *%gep12, align 8 + store i8 %ld13, i8 *%gep13, align 8 + store i8 %ld14, i8 *%gep14, align 8 + store i8 %ld15, i8 *%gep15, align 8 + + + store i8 %ld0, i8 *%gep0, align 8 + store i8 %ld1, i8 *%gep1, align 8 + store i8 %ld2, i8 *%gep2, align 8 + store i8 %ld3, i8 *%gep3, align 8 + store i8 %ld4, i8 *%gep4, align 8 + store i8 %ld5, i8 *%gep5, align 8 + store i8 %ld6, i8 *%gep6, align 8 + store i8 %ld7, i8 *%gep7, align 8 + store i8 %ld8, i8 *%gep8, align 8 + store i8 %ld9, i8 *%gep9, align 8 + store i8 %ld10, i8 *%gep10, align 8 + store i8 %ld11, i8 *%gep11, align 8 + store i8 %ld12, i8 *%gep12, align 8 + store i8 %ld13, i8 *%gep13, align 8 + store i8 %ld14, i8 *%gep14, align 8 + store i8 %ld15, i8 *%gep15, align 8 + + ret void +}