diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-add-i64.ll @@ -0,0 +1,467 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer %s --mtriple=aarch64-- | FileCheck %s + +; This test checks whether load-zext-add and load-add reduction patterns +; get vectorized by SLP. + +define i64 @red_zext_ld_2xi64(ptr %ptr) { +; CHECK-LABEL: @red_zext_ld_2xi64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64 +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]] +; CHECK-NEXT: ret i64 [[ADD_1]] +; +entry: + %ld0 = load i8, ptr %ptr + %zext = zext i8 %ld0 to i64 + %gep = getelementptr inbounds i8, ptr %ptr, i64 1 + %ld1 = load i8, ptr %gep + %zext.1 = zext i8 %ld1 to i64 + %add.1 = add nuw nsw i64 %zext, %zext.1 + ret i64 %add.1 +} + +define i64 @red_zext_ld_4xi64(ptr %ptr) { +; CHECK-LABEL: @red_zext_ld_4xi64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64 +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_1]], align 1 +; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[LD2]] to i64 +; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[ZEXT_2]] +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 +; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1 +; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64 +; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]] +; CHECK-NEXT: ret i64 [[ADD_3]] +; +entry: + %ld0 = load i8, ptr %ptr + %zext = zext i8 %ld0 to i64 + %gep = getelementptr inbounds i8, ptr %ptr, i64 1 + %ld1 = load i8, ptr %gep + %zext.1 = zext i8 %ld1 to i64 + %add.1 = add nuw nsw i64 %zext, %zext.1 + %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2 + %ld2 = load i8, ptr %gep.1 + %zext.2 = zext i8 %ld2 to i64 + %add.2 = add nuw nsw i64 %add.1, %zext.2 + %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3 + %ld3 = load i8, ptr %gep.2 + %zext.3 = zext i8 %ld3 to i64 + %add.3 = add nuw nsw i64 %add.2, %zext.3 + ret i64 %add.3 +} + +define i64 @red_zext_ld_8xi64(ptr %ptr) { +; CHECK-LABEL: @red_zext_ld_8xi64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64 +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_1]], align 1 +; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[LD2]] to i64 +; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[ZEXT_2]] +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 +; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1 +; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64 +; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]] +; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4 +; CHECK-NEXT: [[LD4:%.*]] = load i8, ptr [[GEP_3]], align 1 +; CHECK-NEXT: [[ZEXT_4:%.*]] = zext i8 [[LD4]] to i64 +; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i64 [[ADD_3]], [[ZEXT_4]] +; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5 +; CHECK-NEXT: [[LD5:%.*]] = load i8, ptr [[GEP_4]], align 1 +; CHECK-NEXT: [[ZEXT_5:%.*]] = zext i8 [[LD5]] to i64 +; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i64 [[ADD_4]], [[ZEXT_5]] +; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6 +; CHECK-NEXT: [[LD6:%.*]] = load i8, ptr [[GEP_5]], align 1 +; CHECK-NEXT: [[ZEXT_6:%.*]] = zext i8 [[LD6]] to i64 +; CHECK-NEXT: [[ADD_6:%.*]] = add nuw nsw i64 [[ADD_5]], [[ZEXT_6]] +; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7 +; CHECK-NEXT: [[LD7:%.*]] = load i8, ptr [[GEP_6]], align 1 +; CHECK-NEXT: [[ZEXT_7:%.*]] = zext i8 [[LD7]] to i64 +; CHECK-NEXT: [[ADD_7:%.*]] = add nuw nsw i64 [[ADD_6]], [[ZEXT_7]] +; CHECK-NEXT: ret i64 [[ADD_7]] +; +entry: + %ld0 = load i8, ptr %ptr + %zext = zext i8 %ld0 to i64 + %gep = getelementptr inbounds i8, ptr %ptr, i64 1 + %ld1 = load i8, ptr %gep + %zext.1 = zext i8 %ld1 to i64 + %add.1 = add nuw nsw i64 %zext, %zext.1 + %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2 + %ld2 = load i8, ptr %gep.1 + %zext.2 = zext i8 %ld2 to i64 + %add.2 = add nuw nsw i64 %add.1, %zext.2 + %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3 + %ld3 = load i8, ptr %gep.2 + %zext.3 = zext i8 %ld3 to i64 + %add.3 = add nuw nsw i64 %add.2, %zext.3 + %gep.3 = getelementptr inbounds i8, ptr %ptr, i64 4 + %ld4 = load i8, ptr %gep.3 + %zext.4 = zext i8 %ld4 to i64 + %add.4 = add nuw nsw i64 %add.3, %zext.4 + %gep.4 = getelementptr inbounds i8, ptr %ptr, i64 5 + %ld5 = load i8, ptr %gep.4 + %zext.5 = zext i8 %ld5 to i64 + %add.5 = add nuw nsw i64 %add.4, %zext.5 + %gep.5 = getelementptr inbounds i8, ptr %ptr, i64 6 + %ld6 = load i8, ptr %gep.5 + %zext.6 = zext i8 %ld6 to i64 + %add.6 = add nuw nsw i64 %add.5, %zext.6 + %gep.6 = getelementptr inbounds i8, ptr %ptr, i64 7 + %ld7 = load i8, ptr %gep.6 + %zext.7 = zext i8 %ld7 to i64 + %add.7 = add nuw nsw i64 %add.6, %zext.7 + ret i64 %add.7 +} + +define i64 @red_zext_ld_16xi64(ptr %ptr) { +; CHECK-LABEL: @red_zext_ld_16xi64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64 +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_1]], align 1 +; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[LD2]] to i64 +; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[ZEXT_2]] +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 +; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1 +; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64 +; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]] +; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4 +; CHECK-NEXT: [[LD4:%.*]] = load i8, ptr [[GEP_3]], align 1 +; CHECK-NEXT: [[ZEXT_4:%.*]] = zext i8 [[LD4]] to i64 +; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i64 [[ADD_3]], [[ZEXT_4]] +; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 5 +; CHECK-NEXT: [[LD5:%.*]] = load i8, ptr [[GEP_4]], align 1 +; CHECK-NEXT: [[ZEXT_5:%.*]] = zext i8 [[LD5]] to i64 +; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i64 [[ADD_4]], [[ZEXT_5]] +; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6 +; CHECK-NEXT: [[LD6:%.*]] = load i8, ptr [[GEP_5]], align 1 +; CHECK-NEXT: [[ZEXT_6:%.*]] = zext i8 [[LD6]] to i64 +; CHECK-NEXT: [[ADD_6:%.*]] = add nuw nsw i64 [[ADD_5]], [[ZEXT_6]] +; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 7 +; CHECK-NEXT: [[LD7:%.*]] = load i8, ptr [[GEP_6]], align 1 +; CHECK-NEXT: [[ZEXT_7:%.*]] = zext i8 [[LD7]] to i64 +; CHECK-NEXT: [[ADD_7:%.*]] = add nuw nsw i64 [[ADD_6]], [[ZEXT_7]] +; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8 +; CHECK-NEXT: [[LD8:%.*]] = load i8, ptr [[GEP_7]], align 1 +; CHECK-NEXT: [[ZEXT_8:%.*]] = zext i8 [[LD8]] to i64 +; CHECK-NEXT: [[ADD_8:%.*]] = add nuw nsw i64 [[ADD_7]], [[ZEXT_8]] +; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 9 +; CHECK-NEXT: [[LD9:%.*]] = load i8, ptr [[GEP_8]], align 1 +; CHECK-NEXT: [[ZEXT_9:%.*]] = zext i8 [[LD9]] to i64 +; CHECK-NEXT: [[ADD_9:%.*]] = add nuw nsw i64 [[ADD_8]], [[ZEXT_9]] +; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 10 +; CHECK-NEXT: [[LD10:%.*]] = load i8, ptr [[GEP_9]], align 1 +; CHECK-NEXT: [[ZEXT_10:%.*]] = zext i8 [[LD10]] to i64 +; CHECK-NEXT: [[ADD_10:%.*]] = add nuw nsw i64 [[ADD_9]], [[ZEXT_10]] +; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 11 +; CHECK-NEXT: [[LD11:%.*]] = load i8, ptr [[GEP_10]], align 1 +; CHECK-NEXT: [[ZEXT_11:%.*]] = zext i8 [[LD11]] to i64 +; CHECK-NEXT: [[ADD_11:%.*]] = add nuw nsw i64 [[ADD_10]], [[ZEXT_11]] +; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 12 +; CHECK-NEXT: [[LD12:%.*]] = load i8, ptr [[GEP_11]], align 1 +; CHECK-NEXT: [[ZEXT_12:%.*]] = zext i8 [[LD12]] to i64 +; CHECK-NEXT: [[ADD_12:%.*]] = add nuw nsw i64 [[ADD_11]], [[ZEXT_12]] +; CHECK-NEXT: [[GEP_12:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 13 +; CHECK-NEXT: [[LD13:%.*]] = load i8, ptr [[GEP_12]], align 1 +; CHECK-NEXT: [[ZEXT_13:%.*]] = zext i8 [[LD13]] to i64 +; CHECK-NEXT: [[ADD_13:%.*]] = add nuw nsw i64 [[ADD_12]], [[ZEXT_13]] +; CHECK-NEXT: [[GEP_13:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 14 +; CHECK-NEXT: [[LD14:%.*]] = load i8, ptr [[GEP_13]], align 1 +; CHECK-NEXT: [[ZEXT_14:%.*]] = zext i8 [[LD14]] to i64 +; CHECK-NEXT: [[ADD_14:%.*]] = add nuw nsw i64 [[ADD_13]], [[ZEXT_14]] +; CHECK-NEXT: [[GEP_14:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 15 +; CHECK-NEXT: [[LD15:%.*]] = load i8, ptr [[GEP_14]], align 1 +; CHECK-NEXT: [[ZEXT_15:%.*]] = zext i8 [[LD15]] to i64 +; CHECK-NEXT: [[ADD_15:%.*]] = add nuw nsw i64 [[ADD_14]], [[ZEXT_15]] +; CHECK-NEXT: ret i64 [[ADD_15]] +; +entry: + %ld0 = load i8, ptr %ptr + %zext = zext i8 %ld0 to i64 + %gep = getelementptr inbounds i8, ptr %ptr, i64 1 + %ld1 = load i8, ptr %gep + %zext.1 = zext i8 %ld1 to i64 + %add.1 = add nuw nsw i64 %zext, %zext.1 + %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2 + %ld2 = load i8, ptr %gep.1 + %zext.2 = zext i8 %ld2 to i64 + %add.2 = add nuw nsw i64 %add.1, %zext.2 + %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3 + %ld3 = load i8, ptr %gep.2 + %zext.3 = zext i8 %ld3 to i64 + %add.3 = add nuw nsw i64 %add.2, %zext.3 + %gep.3 = getelementptr inbounds i8, ptr %ptr, i64 4 + %ld4 = load i8, ptr %gep.3 + %zext.4 = zext i8 %ld4 to i64 + %add.4 = add nuw nsw i64 %add.3, %zext.4 + %gep.4 = getelementptr inbounds i8, ptr %ptr, i64 5 + %ld5 = load i8, ptr %gep.4 + %zext.5 = zext i8 %ld5 to i64 + %add.5 = add nuw nsw i64 %add.4, %zext.5 + %gep.5 = getelementptr inbounds i8, ptr %ptr, i64 6 + %ld6 = load i8, ptr %gep.5 + %zext.6 = zext i8 %ld6 to i64 + %add.6 = add nuw nsw i64 %add.5, %zext.6 + %gep.6 = getelementptr inbounds i8, ptr %ptr, i64 7 + %ld7 = load i8, ptr %gep.6 + %zext.7 = zext i8 %ld7 to i64 + %add.7 = add nuw nsw i64 %add.6, %zext.7 + %gep.7 = getelementptr inbounds i8, ptr %ptr, i64 8 + %ld8 = load i8, ptr %gep.7 + %zext.8 = zext i8 %ld8 to i64 + %add.8 = add nuw nsw i64 %add.7, %zext.8 + %gep.8 = getelementptr inbounds i8, ptr %ptr, i64 9 + %ld9 = load i8, ptr %gep.8 + %zext.9 = zext i8 %ld9 to i64 + %add.9 = add nuw nsw i64 %add.8, %zext.9 + %gep.9 = getelementptr inbounds i8, ptr %ptr, i64 10 + %ld10 = load i8, ptr %gep.9 + %zext.10 = zext i8 %ld10 to i64 + %add.10 = add nuw nsw i64 %add.9, %zext.10 + %gep.10 = getelementptr inbounds i8, ptr %ptr, i64 11 + %ld11 = load i8, ptr %gep.10 + %zext.11 = zext i8 %ld11 to i64 + %add.11 = add nuw nsw i64 %add.10, %zext.11 + %gep.11 = getelementptr inbounds i8, ptr %ptr, i64 12 + %ld12 = load i8, ptr %gep.11 + %zext.12 = zext i8 %ld12 to i64 + %add.12 = add nuw nsw i64 %add.11, %zext.12 + %gep.12 = getelementptr inbounds i8, ptr %ptr, i64 13 + %ld13 = load i8, ptr %gep.12 + %zext.13 = zext i8 %ld13 to i64 + %add.13 = add nuw nsw i64 %add.12, %zext.13 + %gep.13 = getelementptr inbounds i8, ptr %ptr, i64 14 + %ld14 = load i8, ptr %gep.13 + %zext.14 = zext i8 %ld14 to i64 + %add.14 = add nuw nsw i64 %add.13, %zext.14 + %gep.14 = getelementptr inbounds i8, ptr %ptr, i64 15 + %ld15 = load i8, ptr %gep.14 + %zext.15 = zext i8 %ld15 to i64 + %add.15 = add nuw nsw i64 %add.14, %zext.15 + ret i64 %add.15 +} + + + +define i64 @red_ld_2xi64(ptr %ptr) { +; CHECK-LABEL: @red_ld_2xi64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LD0:%.*]] = load i64, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[LD0]], [[LD1]] +; CHECK-NEXT: ret i64 [[ADD_1]] +; +entry: + %ld0 = load i64, ptr %ptr + %gep = getelementptr inbounds i64, ptr %ptr, i64 1 + %ld1 = load i64, ptr %gep + %add.1 = add nuw nsw i64 %ld0, %ld1 + ret i64 %add.1 +} + +define i64 @red_ld_4xi64(ptr %ptr) { +; CHECK-LABEL: @red_ld_4xi64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LD0:%.*]] = load i64, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[LD0]], [[LD1]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 2 +; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[GEP_1]], align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[LD2]] +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 3 +; CHECK-NEXT: [[LD3:%.*]] = load i64, ptr [[GEP_2]], align 4 +; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[LD3]] +; CHECK-NEXT: ret i64 [[ADD_3]] +; +entry: + %ld0 = load i64, ptr %ptr + %gep = getelementptr inbounds i64, ptr %ptr, i64 1 + %ld1 = load i64, ptr %gep + %add.1 = add nuw nsw i64 %ld0, %ld1 + %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2 + %ld2 = load i64, ptr %gep.1 + %add.2 = add nuw nsw i64 %add.1, %ld2 + %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3 + %ld3 = load i64, ptr %gep.2 + %add.3 = add nuw nsw i64 %add.2, %ld3 + ret i64 %add.3 +} + +define i64 @red_ld_8xi64(ptr %ptr) { +; CHECK-LABEL: @red_ld_8xi64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LD0:%.*]] = load i64, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[LD0]], [[LD1]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 2 +; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[GEP_1]], align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[LD2]] +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 3 +; CHECK-NEXT: [[LD3:%.*]] = load i64, ptr [[GEP_2]], align 4 +; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[LD3]] +; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 4 +; CHECK-NEXT: [[LD4:%.*]] = load i64, ptr [[GEP_3]], align 4 +; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i64 [[ADD_3]], [[LD4]] +; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 5 +; CHECK-NEXT: [[LD5:%.*]] = load i64, ptr [[GEP_4]], align 4 +; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i64 [[ADD_4]], [[LD5]] +; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 6 +; CHECK-NEXT: [[LD6:%.*]] = load i64, ptr [[GEP_5]], align 4 +; CHECK-NEXT: [[ADD_6:%.*]] = add nuw nsw i64 [[ADD_5]], [[LD6]] +; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 7 +; CHECK-NEXT: [[LD7:%.*]] = load i64, ptr [[GEP_6]], align 4 +; CHECK-NEXT: [[ADD_7:%.*]] = add nuw nsw i64 [[ADD_6]], [[LD7]] +; CHECK-NEXT: ret i64 [[ADD_7]] +; +entry: + %ld0 = load i64, ptr %ptr + %gep = getelementptr inbounds i64, ptr %ptr, i64 1 + %ld1 = load i64, ptr %gep + %add.1 = add nuw nsw i64 %ld0, %ld1 + %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2 + %ld2 = load i64, ptr %gep.1 + %add.2 = add nuw nsw i64 %add.1, %ld2 + %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3 + %ld3 = load i64, ptr %gep.2 + %add.3 = add nuw nsw i64 %add.2, %ld3 + %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 4 + %ld4 = load i64, ptr %gep.3 + %add.4 = add nuw nsw i64 %add.3, %ld4 + %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 5 + %ld5 = load i64, ptr %gep.4 + %add.5 = add nuw nsw i64 %add.4, %ld5 + %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 6 + %ld6 = load i64, ptr %gep.5 + %add.6 = add nuw nsw i64 %add.5, %ld6 + %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 7 + %ld7 = load i64, ptr %gep.6 + %add.7 = add nuw nsw i64 %add.6, %ld7 + ret i64 %add.7 +} + +define i64 @red_ld_16xi64(ptr %ptr) { +; CHECK-LABEL: @red_ld_16xi64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LD0:%.*]] = load i64, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[LD0]], [[LD1]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 2 +; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[GEP_1]], align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[LD2]] +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 3 +; CHECK-NEXT: [[LD3:%.*]] = load i64, ptr [[GEP_2]], align 4 +; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[ADD_2]], [[LD3]] +; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 4 +; CHECK-NEXT: [[LD4:%.*]] = load i64, ptr [[GEP_3]], align 4 +; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i64 [[ADD_3]], [[LD4]] +; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 5 +; CHECK-NEXT: [[LD5:%.*]] = load i64, ptr [[GEP_4]], align 4 +; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i64 [[ADD_4]], [[LD5]] +; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 6 +; CHECK-NEXT: [[LD6:%.*]] = load i64, ptr [[GEP_5]], align 4 +; CHECK-NEXT: [[ADD_6:%.*]] = add nuw nsw i64 [[ADD_5]], [[LD6]] +; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 7 +; CHECK-NEXT: [[LD7:%.*]] = load i64, ptr [[GEP_6]], align 4 +; CHECK-NEXT: [[ADD_7:%.*]] = add nuw nsw i64 [[ADD_6]], [[LD7]] +; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 8 +; CHECK-NEXT: [[LD8:%.*]] = load i64, ptr [[GEP_7]], align 4 +; CHECK-NEXT: [[ADD_8:%.*]] = add nuw nsw i64 [[ADD_7]], [[LD8]] +; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 9 +; CHECK-NEXT: [[LD9:%.*]] = load i64, ptr [[GEP_8]], align 4 +; CHECK-NEXT: [[ADD_9:%.*]] = add nuw nsw i64 [[ADD_8]], [[LD9]] +; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 10 +; CHECK-NEXT: [[LD10:%.*]] = load i64, ptr [[GEP_9]], align 4 +; CHECK-NEXT: [[ADD_10:%.*]] = add nuw nsw i64 [[ADD_9]], [[LD10]] +; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 11 +; CHECK-NEXT: [[LD11:%.*]] = load i64, ptr [[GEP_10]], align 4 +; CHECK-NEXT: [[ADD_11:%.*]] = add nuw nsw i64 [[ADD_10]], [[LD11]] +; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 12 +; CHECK-NEXT: [[LD12:%.*]] = load i64, ptr [[GEP_11]], align 4 +; CHECK-NEXT: [[ADD_12:%.*]] = add nuw nsw i64 [[ADD_11]], [[LD12]] +; CHECK-NEXT: [[GEP_12:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 13 +; CHECK-NEXT: [[LD13:%.*]] = load i64, ptr [[GEP_12]], align 4 +; CHECK-NEXT: [[ADD_13:%.*]] = add nuw nsw i64 [[ADD_12]], [[LD13]] +; CHECK-NEXT: [[GEP_13:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 14 +; CHECK-NEXT: [[LD14:%.*]] = load i64, ptr [[GEP_13]], align 4 +; CHECK-NEXT: [[ADD_14:%.*]] = add nuw nsw i64 [[ADD_13]], [[LD14]] +; CHECK-NEXT: [[GEP_14:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 15 +; CHECK-NEXT: [[LD15:%.*]] = load i64, ptr [[GEP_14]], align 4 +; CHECK-NEXT: [[ADD_15:%.*]] = add nuw nsw i64 [[ADD_14]], [[LD15]] +; CHECK-NEXT: ret i64 [[ADD_15]] +; +entry: + %ld0 = load i64, ptr %ptr + %gep = getelementptr inbounds i64, ptr %ptr, i64 1 + %ld1 = load i64, ptr %gep + %add.1 = add nuw nsw i64 %ld0, %ld1 + %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2 + %ld2 = load i64, ptr %gep.1 + %add.2 = add nuw nsw i64 %add.1, %ld2 + %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3 + %ld3 = load i64, ptr %gep.2 + %add.3 = add nuw nsw i64 %add.2, %ld3 + %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 4 + %ld4 = load i64, ptr %gep.3 + %add.4 = add nuw nsw i64 %add.3, %ld4 + %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 5 + %ld5 = load i64, ptr %gep.4 + %add.5 = add nuw nsw i64 %add.4, %ld5 + %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 6 + %ld6 = load i64, ptr %gep.5 + %add.6 = add nuw nsw i64 %add.5, %ld6 + %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 7 + %ld7 = load i64, ptr %gep.6 + %add.7 = add nuw nsw i64 %add.6, %ld7 + %gep.7 = getelementptr inbounds i64, ptr %ptr, i64 8 + %ld8 = load i64, ptr %gep.7 + %add.8 = add nuw nsw i64 %add.7, %ld8 + %gep.8 = getelementptr inbounds i64, ptr %ptr, i64 9 + %ld9 = load i64, ptr %gep.8 + %add.9 = add nuw nsw i64 %add.8, %ld9 + %gep.9 = getelementptr inbounds i64, ptr %ptr, i64 10 + %ld10 = load i64, ptr %gep.9 + %add.10 = add nuw nsw i64 %add.9, %ld10 + %gep.10 = getelementptr inbounds i64, ptr %ptr, i64 11 + %ld11 = load i64, ptr %gep.10 + %add.11 = add nuw nsw i64 %add.10, %ld11 + %gep.11 = getelementptr inbounds i64, ptr %ptr, i64 12 + %ld12 = load i64, ptr %gep.11 + %add.12 = add nuw nsw i64 %add.11, %ld12 + %gep.12 = getelementptr inbounds i64, ptr %ptr, i64 13 + %ld13 = load i64, ptr %gep.12 + %add.13 = add nuw nsw i64 %add.12, %ld13 + %gep.13 = getelementptr inbounds i64, ptr %ptr, i64 14 + %ld14 = load i64, ptr %gep.13 + %add.14 = add nuw nsw i64 %add.13, %ld14 + %gep.14 = getelementptr inbounds i64, ptr %ptr, i64 15 + %ld15 = load i64, ptr %gep.14 + %add.15 = add nuw nsw i64 %add.14, %ld15 + ret i64 %add.15 +}