diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34378,7 +34378,7 @@ return DAG.getBitcast(RootVT, V1); } - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); unsigned RootSizeInBits = RootVT.getSizeInBits(); unsigned NumRootElts = RootVT.getVectorNumElements(); unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; @@ -39218,7 +39218,7 @@ } // Only use (F)HADD opcodes if they aren't microcoded or minimizes codesize. - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); if (!Subtarget.hasFastHorizontalOps() && !OptForSize) return SDValue(); diff --git a/llvm/test/CodeGen/X86/phaddsub-extract.ll b/llvm/test/CodeGen/X86/phaddsub-extract.ll --- a/llvm/test/CodeGen/X86/phaddsub-extract.ll +++ b/llvm/test/CodeGen/X86/phaddsub-extract.ll @@ -2094,6 +2094,28 @@ ret i32 %x230 } +define i32 @hadd32_4_pgso(<4 x i32> %x225) !prof !14 { +; SSE3-LABEL: hadd32_4_pgso: +; SSE3: # %bb.0: +; SSE3-NEXT: phaddd %xmm0, %xmm0 +; SSE3-NEXT: phaddd %xmm0, %xmm0 +; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: retq +; +; AVX-LABEL: hadd32_4_pgso: +; AVX: # %bb.0: +; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: retq + %x226 = shufflevector <4 x i32> %x225, <4 x i32> undef, <4 x i32> + %x227 = add <4 x i32> %x225, %x226 + %x228 = shufflevector <4 x i32> %x227, <4 x i32> undef, <4 x i32> + %x229 = add <4 x i32> %x227, %x228 + %x230 = extractelement <4 x i32> %x229, i32 0 + ret i32 %x230 +} + define i32 @hadd32_8_optsize(<8 x i32> %x225) optsize { ; SSE3-LABEL: hadd32_8_optsize: ; SSE3: # %bb.0: @@ -2141,3 +2163,20 @@ %x230 = extractelement <16 x i32> %x229, i32 0 ret i32 %x230 } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 0} diff --git a/llvm/test/CodeGen/X86/splat-for-size.ll b/llvm/test/CodeGen/X86/splat-for-size.ll --- a/llvm/test/CodeGen/X86/splat-for-size.ll +++ b/llvm/test/CodeGen/X86/splat-for-size.ll @@ -417,6 +417,33 @@ ret <8 x i64> %shuffle } +define <8 x i64> @pr23259_pgso() !prof !14 { +; AVX-LABEL: pr23259_pgso: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movl $1, %eax +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 +; AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] +; AVX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1] +; AVX-NEXT: retq +; +; AVX2-LABEL: pr23259_pgso: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovdqa {{.*}}(%rip), %ymm0 +; AVX2-NEXT: movl $1, %eax +; AVX2-NEXT: vmovq %rax, %xmm1 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,1,1] +; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1] +; AVX2-NEXT: retq +entry: + %0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32 + %1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> + %shuffle = shufflevector <3 x i64> , <3 x i64> %1, <8 x i32> + ret <8 x i64> %shuffle +} + attributes #0 = { optsize } attributes #1 = { minsize }