diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1926,6 +1926,18 @@ "' does not apply to function return values", V); + unsigned MaxParameterWidth = 0; + auto GetMaxParameterWidth = [&MaxParameterWidth](Type *Ty) { + if (Ty->isVectorTy()) { + if (auto *VT = dyn_cast(Ty)) { + unsigned Size = VT->getPrimitiveSizeInBits().getFixedSize(); + if (Size > MaxParameterWidth) + MaxParameterWidth = Size; + } + } + }; + GetMaxParameterWidth(FT->getReturnType()); + verifyParameterAttrs(RetAttrs, FT->getReturnType(), V); // Verify parameter attributes. @@ -1943,6 +1955,7 @@ } verifyParameterAttrs(ArgAttrs, Ty, V); + GetMaxParameterWidth(Ty); if (ArgAttrs.hasAttribute(Attribute::Nest)) { Assert(!SawNest, "More than one parameter has attribute nest!", V); @@ -2084,6 +2097,15 @@ CheckFailed("invalid value for 'frame-pointer' attribute: " + FP, V); } + if (Attrs.hasFnAttr("min-legal-vector-width")) { + StringRef Val = + Attrs.getFnAttr("min-legal-vector-width").getValueAsString(); + unsigned Width; + if (Val.getAsInteger(0, Width) || Width < MaxParameterWidth) + CheckFailed( + "invalid value for 'min-legal-vector-width' attribute: " + Val, V); + } + checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-prefix", V); checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-entry", V); checkUnsignedBaseTenFuncAttr(Attrs, "warn-stack-size", V); diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll --- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll @@ -2026,17 +2026,23 @@ ret void } -define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind { +@res = external global <16 x i32>, align 64 + +define void @pr52561(<8 x i32> %a1, <8 x i32> %a2, <8 x i32> %b1, <8 x i32> %b2) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind { ; X64-LABEL: pr52561: ; X64: # %bb.0: ; X64-NEXT: vpbroadcastd {{.*#+}} ymm4 = [112,112,112,112,112,112,112,112] +; X64-NEXT: vpaddd %ymm4, %ymm3, %ymm3 +; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm1 ; X64-NEXT: vpaddd %ymm4, %ymm2, %ymm2 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; X64-NEXT: vpaddd %ymm4, %ymm3, %ymm2 -; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 -; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; X64-NEXT: vmovsh %xmm0, %xmm2, %xmm0 +; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; X64-NEXT: movq res@GOTPCREL(%rip), %rax +; X64-NEXT: vmovdqa %ymm1, 32(%rax) +; X64-NEXT: vmovaps %ymm0, (%rax) +; X64-NEXT: vzeroupper ; X64-NEXT: retq ; ; X86-LABEL: pr52561: @@ -2047,17 +2053,23 @@ ; X86-NEXT: subl $32, %esp ; X86-NEXT: vpaddd 8(%ebp), %ymm1, %ymm1 ; X86-NEXT: vpbroadcastd {{.*#+}} ymm3 = [112,112,112,112,112,112,112,112] +; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm1 ; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm2 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm1 -; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 ; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0 +; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 +; X86-NEXT: vmovdqa %ymm1, res+32 +; X86-NEXT: vmovaps %ymm0, res ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper ; X86-NEXT: retl + %a = shufflevector <8 x i32> %a1, <8 x i32> %a2, <16 x i32> + %b = shufflevector <8 x i32> %b1, <8 x i32> %b2, <16 x i32> %1 = add <16 x i32> %a, %2 = add <16 x i32> %1, %b %3 = and <16 x i32> %2, - ret <16 x i32> %3 + store <16 x i32> %3, <16 x i32> *@res, align 64 + ret void } diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -769,23 +769,26 @@ } declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) -define <16 x i32> @trunc_v16i64_v16i32(<16 x i64>* %x) nounwind "min-legal-vector-width"="256" { +@res = external global <16 x i32>, align 64 + +define void @trunc_v16i64_v16i32(<16 x i64>* %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v16i64_v16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-NEXT: vmovdqa 32(%rdi), %ymm1 ; CHECK-NEXT: vmovdqa 64(%rdi), %ymm2 ; CHECK-NEXT: vmovdqa 96(%rdi), %ymm3 -; CHECK-NEXT: vpmovqd %ymm0, %xmm0 -; CHECK-NEXT: vpmovqd %ymm1, %xmm1 -; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; CHECK-NEXT: vpmovqd %ymm2, %xmm1 -; CHECK-NEXT: vpmovqd %ymm3, %xmm2 -; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; CHECK-NEXT: movq res@GOTPCREL(%rip), %rax +; CHECK-NEXT: vpmovqd %ymm3, 48(%rax) +; CHECK-NEXT: vpmovqd %ymm2, 32(%rax) +; CHECK-NEXT: vpmovqd %ymm1, 16(%rax) +; CHECK-NEXT: vpmovqd %ymm0, (%rax) +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %a = load <16 x i64>, <16 x i64>* %x %b = trunc <16 x i64> %a to <16 x i32> - ret <16 x i32> %b + store <16 x i32> %b, <16 x i32> *@res, align 64 + ret void } define <16 x i8> @trunc_v16i64_v16i8(<16 x i64>* %x) nounwind "min-legal-vector-width"="256" { diff --git a/llvm/test/Transforms/PhaseOrdering/X86/horiz-math-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/horiz-math-inseltpoison.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/horiz-math-inseltpoison.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/horiz-math-inseltpoison.ll @@ -62,7 +62,7 @@ ret <4 x float> %shuffle } -define <8 x float> @hadd_reverse_v8f32(<8 x float> %a, <8 x float> %b) #0 { +define <8 x float> @hadd_reverse_v8f32(<8 x float> %a, <8 x float> %b) #1 { ; CHECK-LABEL: @hadd_reverse_v8f32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> @@ -106,7 +106,7 @@ ret <8 x float> %vecinit30 } -define <8 x float> @reverse_hadd_v8f32(<8 x float> %a, <8 x float> %b) #0 { +define <8 x float> @reverse_hadd_v8f32(<8 x float> %a, <8 x float> %b) #1 { ; CHECK-LABEL: @reverse_hadd_v8f32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> @@ -151,3 +151,4 @@ } attributes #0 = { "min-legal-vector-width"="128" "target-cpu"="btver2" "target-features"="+avx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3" } +attributes #1 = { "min-legal-vector-width"="256" "target-cpu"="btver2" "target-features"="+avx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3" } diff --git a/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll b/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll @@ -62,7 +62,7 @@ ret <4 x float> %shuffle } -define <8 x float> @hadd_reverse_v8f32(<8 x float> %a, <8 x float> %b) #0 { +define <8 x float> @hadd_reverse_v8f32(<8 x float> %a, <8 x float> %b) #1 { ; CHECK-LABEL: @hadd_reverse_v8f32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> @@ -106,7 +106,7 @@ ret <8 x float> %vecinit30 } -define <8 x float> @reverse_hadd_v8f32(<8 x float> %a, <8 x float> %b) #0 { +define <8 x float> @reverse_hadd_v8f32(<8 x float> %a, <8 x float> %b) #1 { ; CHECK-LABEL: @reverse_hadd_v8f32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> @@ -151,3 +151,4 @@ } attributes #0 = { "min-legal-vector-width"="128" "target-cpu"="btver2" "target-features"="+avx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3" } +attributes #1 = { "min-legal-vector-width"="256" "target-cpu"="btver2" "target-features"="+avx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3" } diff --git a/llvm/test/Transforms/SampleProfile/merge-function-attributes.ll b/llvm/test/Transforms/SampleProfile/merge-function-attributes.ll --- a/llvm/test/Transforms/SampleProfile/merge-function-attributes.ll +++ b/llvm/test/Transforms/SampleProfile/merge-function-attributes.ll @@ -4,12 +4,15 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-grtev4-linux-gnu" +@res = external global <8 x double>, align 64 + ; Verify that yyy is inlined into xxx with the function attibutes properly merged. -; CHECK: define <8 x double> @xxx(){{.*}} #[[ATTRNO:[0-9]+]] +; CHECK: define void @xxx(){{.*}} #[[ATTRNO:[0-9]+]] ; CHECK-NEXT: call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512 -define <8 x double> @xxx() #0 !dbg !5 { +define void @xxx() #0 !dbg !5 { %x = call <8 x double> @yyy(), !dbg !7 - ret <8 x double> %x + store <8 x double> %x, <8 x double> *@res, align 64 + ret void } define available_externally <8 x double> @yyy() #1 !dbg !8 {