Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -814,6 +814,9 @@ bool mergeStoresAfterLegalization() const override { return true; } + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const override; + bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -4611,6 +4611,20 @@ return Subtarget.hasLZCNT(); } +bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const { + // Do not merge to float value size (128 bytes) if no implicit + // float attribute is set. + bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat); + + if (NoFloat) { + unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32; + return (MemVT.getSizeInBits() <= MaxIntSize); + } + return true; +} + bool X86TargetLowering::isCtlzFast() const { return Subtarget.hasFastLZCNT(); } Index: test/CodeGen/X86/pr34421.ll =================================================================== --- test/CodeGen/X86/pr34421.ll +++ test/CodeGen/X86/pr34421.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-macosx10.13.0 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s --check-prefix=X64 + +; Function Attrs: noimplicitfloat noredzone nounwind +define void @thread_selfcounts() local_unnamed_addr #0 { +; X86-LABEL: thread_selfcounts: +; X86: ## BB#0: ## %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl -56(%ebp), %eax +; X86-NEXT: movl -52(%ebp), %ecx +; X86-NEXT: movl %ecx, -20(%ebp) +; X86-NEXT: movl %eax, -24(%ebp) +; X86-NEXT: movl -48(%ebp), %eax +; X86-NEXT: movl -44(%ebp), %ecx +; X86-NEXT: movl %ecx, -12(%ebp) +; X86-NEXT: movl %eax, -16(%ebp) +; X86-NEXT: ## -- End function +; +; X64-LABEL: thread_selfcounts: +; X64: ## BB#0: ## %entry +; X64-NEXT: pushq %rbp +; X64-NEXT: movq %rsp, %rbp +; X64-NEXT: subq $48, %rsp +; X64-NEXT: movq -48(%rbp), %rax +; X64-NEXT: movq -40(%rbp), %rcx +; X64-NEXT: movq %rax, -16(%rbp) +; X64-NEXT: movq %rcx, -8(%rbp) +; X64-NEXT: ## -- End function +entry: + %counts = alloca [2 x i64], align 16 + %thread_counts = alloca [3 x i64], align 16 + %arraydecay = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 0 + %0 = load i64, i64* %arraydecay, align 16 + %arrayidx3 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 0 + store i64 %0, i64* %arrayidx3, align 16 + %arrayidx6 = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 1 + %1 = load i64, i64* %arrayidx6, align 8 + %arrayidx10 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 1 + store i64 %1, i64* %arrayidx10, align 8 + unreachable +} + +attributes #0 = { noimplicitfloat noredzone nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }