Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7361,6 +7361,7 @@ bool Changed = false; SmallVector Incoming; SmallPtrSet VisitedInstrs; + unsigned MaxVecRegSize = R.getMaxVecRegSize(); bool HaveVectorizedPhiNodes = true; while (HaveVectorizedPhiNodes) { @@ -7387,8 +7388,18 @@ // Look for the next elements with the same type. SmallVector::iterator SameTypeIt = IncIt; + Type *EltTy = (*IncIt)->getType(); + unsigned EltSize = EltTy->isSized() ? DL->getTypeSizeInBits(EltTy) + : MaxVecRegSize; + unsigned MaxNumElts = MaxVecRegSize / EltSize; + if (MaxNumElts < 2) { + ++IncIt; + continue; + } + while (SameTypeIt != E && - (*SameTypeIt)->getType() == (*IncIt)->getType()) { + (*SameTypeIt)->getType() == EltTy && + (SameTypeIt - IncIt) < MaxNumElts) { VisitedInstrs.insert(*SameTypeIt); ++SameTypeIt; } Index: llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll +++ llvm/test/Transforms/SLPVectorizer/X86/remark_unsupported.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s +; RUN: opt -S -mtriple=x86_64-pc-linux-gnu -mcpu=generic -slp-vectorizer --slp-max-reg-size=256 -pass-remarks-output=%t < %s | FileCheck %s ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s ; This type is not supported by SLP Index: llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll @@ -0,0 +1,135 @@ +; RUN: opt -slp-vectorizer -S -slp-max-reg-size=32 < %s | FileCheck -check-prefixes=FUNC,MAX32 %s +; RUN: opt -slp-vectorizer -S -slp-max-reg-size=128 < %s | FileCheck -check-prefixes=FUNC,MAX128 %s +; RUN: opt -slp-vectorizer -S -slp-max-reg-size=1024 < %s | FileCheck -check-prefixes=FUNC,MAX1024 %s + +; FUNC-LABEL: @phi_float32() +; MAX32-COUNT32: phi float +; MAX128-COUNT8: phi <4 x float> +; MAX128-COUNT2: phi <32 x float> + +define void @phi_float32() { +entry: + br i1 undef, label %.thread3941, label %bb + +bb: ; preds = %entry + br label %.thread3941 + +.thread3941: ; preds = %bb, %entry + %i = fpext half undef to float + %i1 = fmul contract float %i, undef + %i2 = fadd contract float 0.000000e+00, %i1 + %i3 = fpext half undef to float + %i4 = fmul contract float %i3, undef + %i5 = fadd contract float 0.000000e+00, %i4 + %i6 = fpext half undef to float + %i7 = fmul contract float %i6, undef + %i8 = fadd contract float 0.000000e+00, %i7 + %i9 = fpext half undef to float + %i10 = fmul contract float %i9, undef + %i11 = fadd contract float 0.000000e+00, %i10 + %i12 = fmul contract float %i, undef + %i13 = fadd contract float 0.000000e+00, %i12 + %i14 = fmul contract float %i3, undef + %i15 = fadd contract float 0.000000e+00, %i14 + %i16 = fmul contract float %i6, undef + %i17 = fadd contract float 0.000000e+00, %i16 + %i18 = fmul contract float %i9, undef + %i19 = fadd contract float 0.000000e+00, %i18 + %i20 = fmul contract float %i, undef + %i21 = fadd contract float 0.000000e+00, %i20 + %i22 = fmul contract float %i3, undef + %i23 = fadd contract float 0.000000e+00, %i22 + %i24 = fmul contract float %i6, undef + %i25 = fadd contract float 0.000000e+00, %i24 + %i26 = fmul contract float %i9, undef + %i27 = fadd contract float 0.000000e+00, %i26 + %i28 = fmul contract float %i, undef + %i29 = fadd contract float 0.000000e+00, %i28 + %i30 = fmul contract float %i3, undef + %i31 = fadd contract float 0.000000e+00, %i30 + %i32 = fmul contract float %i6, undef + %i33 = fadd contract float 0.000000e+00, %i32 + %i34 = fmul contract float %i9, undef + %i35 = fadd contract float 0.000000e+00, %i34 + %i36 = fmul contract float %i, undef + %i37 = fadd contract float 0.000000e+00, %i36 + %i38 = fmul contract float %i3, undef + %i39 = fadd contract float 0.000000e+00, %i38 + %i40 = fmul contract float %i6, undef + %i41 = fadd contract float 0.000000e+00, %i40 + %i42 = fmul contract float %i9, undef + %i43 = fadd contract float 0.000000e+00, %i42 + %i44 = fmul contract float %i, undef + %i45 = fadd contract float 0.000000e+00, %i44 + %i46 = fmul contract float %i3, undef + %i47 = fadd contract float 0.000000e+00, %i46 + %i48 = fmul contract float %i6, undef + %i49 = fadd contract float 0.000000e+00, %i48 + %i50 = fmul contract float %i9, undef + %i51 = fadd contract float 0.000000e+00, %i50 + %i52 = fmul contract float %i, undef + %i53 = fadd contract float 0.000000e+00, %i52 + %i54 = fmul contract float %i3, undef + %i55 = fadd contract float 0.000000e+00, %i54 + %i56 = fmul contract float %i6, undef + %i57 = fadd contract float 0.000000e+00, %i56 + %i58 = fmul contract float %i9, undef + %i59 = fadd contract float 0.000000e+00, %i58 + %i60 = fmul contract float %i, undef + %i61 = fadd contract float 0.000000e+00, %i60 + %i62 = fmul contract float %i3, undef + %i63 = fadd contract float 0.000000e+00, %i62 + %i64 = fmul contract float %i6, undef + %i65 = fadd contract float 0.000000e+00, %i64 + %i66 = fmul contract float %i9, undef + %i67 = fadd contract float 0.000000e+00, %i66 + switch i32 undef, label %bb70 [ + i32 0, label %bb71 + i32 1, label %bb68 + i32 2, label %bb69 + ] + +bb68: ; preds = %.thread3941 + br label %bb71 + +bb69: ; preds = %.thread3941 + br label %bb71 + +bb70: ; preds = %.thread3941 + br label %bb71 + +bb71: ; preds = %bb70, %bb69, %bb68, %.thread3941 + %.sroa.436.3 = phi float [ %i19, %bb68 ], [ %i19, %bb69 ], [ undef, %bb70 ], [ %i19, %.thread3941 ] + %.sroa.374.3 = phi float [ %i17, %bb68 ], [ undef, %bb69 ], [ %i17, %bb70 ], [ %i17, %.thread3941 ] + %.sroa.312.3 = phi float [ %i15, %bb68 ], [ undef, %bb69 ], [ undef, %bb70 ], [ undef, %.thread3941 ] + %.sroa.250.3 = phi float [ %i13, %bb68 ], [ %i13, %bb69 ], [ %i13, %bb70 ], [ undef, %.thread3941 ] + %.sroa.188.3 = phi float [ %i11, %bb68 ], [ %i11, %bb69 ], [ undef, %bb70 ], [ %i11, %.thread3941 ] + %.sroa.126.3 = phi float [ %i8, %bb68 ], [ undef, %bb69 ], [ %i8, %bb70 ], [ %i8, %.thread3941 ] + %.sroa.64.3 = phi float [ %i5, %bb68 ], [ undef, %bb69 ], [ undef, %bb70 ], [ undef, %.thread3941 ] + %.sroa.02268.3 = phi float [ %i2, %bb68 ], [ %i2, %bb69 ], [ %i2, %bb70 ], [ undef, %.thread3941 ] + %.sroa.498.3 = phi float [ %i21, %bb68 ], [ %i21, %bb69 ], [ %i21, %bb70 ], [ undef, %.thread3941 ] + %.sroa.560.3 = phi float [ %i23, %bb68 ], [ undef, %bb69 ], [ undef, %bb70 ], [ undef, %.thread3941 ] + %.sroa.622.3 = phi float [ %i25, %bb68 ], [ undef, %bb69 ], [ %i25, %bb70 ], [ %i25, %.thread3941 ] + %.sroa.684.3 = phi float [ %i27, %bb68 ], [ %i27, %bb69 ], [ undef, %bb70 ], [ %i27, %.thread3941 ] + %.sroa.746.3 = phi float [ %i29, %bb68 ], [ %i29, %bb69 ], [ %i29, %bb70 ], [ undef, %.thread3941 ] + %.sroa.808.3 = phi float [ %i31, %bb68 ], [ undef, %bb69 ], [ undef, %bb70 ], [ undef, %.thread3941 ] + %.sroa.870.3 = phi float [ %i33, %bb68 ], [ undef, %bb69 ], [ %i33, %bb70 ], [ %i33, %.thread3941 ] + %.sroa.932.3 = phi float [ %i35, %bb68 ], [ %i35, %bb69 ], [ undef, %bb70 ], [ %i35, %.thread3941 ] + %.sroa.994.3 = phi float [ %i37, %bb68 ], [ %i37, %bb69 ], [ %i37, %bb70 ], [ undef, %.thread3941 ] + %.sroa.1056.3 = phi float [ %i39, %bb68 ], [ undef, %bb69 ], [ undef, %bb70 ], [ undef, %.thread3941 ] + %.sroa.1118.3 = phi float [ %i41, %bb68 ], [ undef, %bb69 ], [ %i41, %bb70 ], [ %i41, %.thread3941 ] + %.sroa.1180.3 = phi float [ %i43, %bb68 ], [ %i43, %bb69 ], [ undef, %bb70 ], [ %i43, %.thread3941 ] + %.sroa.1242.3 = phi float [ %i45, %bb68 ], [ %i45, %bb69 ], [ %i45, %bb70 ], [ undef, %.thread3941 ] + %.sroa.1304.3 = phi float [ %i47, %bb68 ], [ undef, %bb69 ], [ undef, %bb70 ], [ undef, %.thread3941 ] + %.sroa.1366.3 = phi float [ %i49, %bb68 ], [ undef, %bb69 ], [ %i49, %bb70 ], [ %i49, %.thread3941 ] + %.sroa.1428.3 = phi float [ %i51, %bb68 ], [ %i51, %bb69 ], [ undef, %bb70 ], [ %i51, %.thread3941 ] + %.sroa.1490.3 = phi float [ %i53, %bb68 ], [ %i53, %bb69 ], [ %i53, %bb70 ], [ undef, %.thread3941 ] + %.sroa.1552.3 = phi float [ %i55, %bb68 ], [ undef, %bb69 ], [ undef, %bb70 ], [ undef, %.thread3941 ] + %.sroa.1614.3 = phi float [ %i57, %bb68 ], [ undef, %bb69 ], [ %i57, %bb70 ], [ %i57, %.thread3941 ] + %.sroa.1676.3 = phi float [ %i59, %bb68 ], [ %i59, %bb69 ], [ undef, %bb70 ], [ %i59, %.thread3941 ] + %.sroa.1738.3 = phi float [ %i61, %bb68 ], [ %i61, %bb69 ], [ %i61, %bb70 ], [ undef, %.thread3941 ] + %.sroa.1800.3 = phi float [ %i63, %bb68 ], [ undef, %bb69 ], [ undef, %bb70 ], [ undef, %.thread3941 ] + %.sroa.1862.3 = phi float [ %i65, %bb68 ], [ undef, %bb69 ], [ %i65, %bb70 ], [ %i65, %.thread3941 ] + %.sroa.1924.3 = phi float [ %i67, %bb68 ], [ %i67, %bb69 ], [ undef, %bb70 ], [ %i67, %.thread3941 ] + ret void +}