Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -153,7 +153,8 @@ bool AArch64TTIImpl::shouldMaximizeVectorBandwidth( TargetTransformInfo::RegisterKind K) const { assert(K != TargetTransformInfo::RGK_Scalar); - return K == TargetTransformInfo::RGK_FixedWidthVector; + return (K == TargetTransformInfo::RGK_FixedWidthVector && + !ST->forceStreamingCompatibleSVE()); } /// Calculate the cost of materializing a 64-bit value. This helper Index: llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -force-streaming-compatible-sve -mattr=+sve -scalable-vectorization=off -aarch64-sve-vector-bits-min=128 -S 2>&1 | FileCheck %s --check-prefix=SC_SVE +; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -mattr=+sve -scalable-vectorization=off -aarch64-sve-vector-bits-min=128 -S -disable-output 2>&1 | FileCheck %s --check-prefix=NO_SC_SVE +; SC_SVE: LV: Selecting VF: 2. +; NO_SC_SVE: LV: Selecting VF: 8. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-none-linux-gnu" + +define void @fxpAutoCorrelation() { +; SC_SVE-LABEL: @fxpAutoCorrelation( +; SC_SVE-NEXT: entry: +; SC_SVE-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SC_SVE: vector.ph: +; SC_SVE-NEXT: br label [[VECTOR_BODY:%.*]] +; SC_SVE: vector.body: +; SC_SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SC_SVE-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] +; SC_SVE-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] +; SC_SVE-NEXT: [[TMP0]] = or <2 x i32> zeroinitializer, [[VEC_PHI]] +; SC_SVE-NEXT: [[TMP1]] = or <2 x i32> zeroinitializer, [[VEC_PHI1]] +; SC_SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; SC_SVE-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; SC_SVE-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SC_SVE: middle.block: +; SC_SVE-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]] +; SC_SVE-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]]) +; SC_SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0 +; SC_SVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SC_SVE: scalar.ph: +; SC_SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SC_SVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] +; SC_SVE-NEXT: br label [[FOR_BODY6:%.*]] +; SC_SVE: for.body6: +; SC_SVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY6]] ] +; SC_SVE-NEXT: [[ACCUMULATOR_032:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD12:%.*]], [[FOR_BODY6]] ] +; SC_SVE-NEXT: [[TMP4:%.*]] = load i16, ptr null, align 2 +; SC_SVE-NEXT: [[CONV10:%.*]] = sext i16 0 to i32 +; SC_SVE-NEXT: [[MUL:%.*]] = mul i32 [[CONV10]], 0 +; SC_SVE-NEXT: [[ADD12]] = or i32 0, [[ACCUMULATOR_032]] +; SC_SVE-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; SC_SVE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 0 +; SC_SVE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY6]], !llvm.loop [[LOOP3:![0-9]+]] +; SC_SVE: for.end: +; SC_SVE-NEXT: [[ADD12_LCSSA:%.*]] = phi i32 [ [[ADD12]], [[FOR_BODY6]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] +; SC_SVE-NEXT: [[TMP5:%.*]] = lshr i32 [[ADD12_LCSSA]], 0 +; SC_SVE-NEXT: ret void +; +entry: + br label %for.body6 + +for.body6: ; preds = %for.body6, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body6 ] + %Accumulator.032 = phi i32 [ 0, %entry ], [ %add12, %for.body6 ] + %0 = load i16, ptr null, align 2 + %conv10 = sext i16 0 to i32 + %mul = mul i32 %conv10, 0 + %add12 = or i32 0, %Accumulator.032 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body6 + +for.end: ; preds = %for.body6 + %1 = lshr i32 %add12, 0 + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; NO_SC_SVE: {{.*}}