Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5397,12 +5397,10 @@ // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. - // FIXME: Only does this for 64-bit or more since we don't have proper - // cost model for unaligned load / store. bool Fast; - if (NumMemOps && AllowOverlap && - VTSize >= 8 && NewVTSize < Size && - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast) + if (NumMemOps && AllowOverlap && NewVTSize < Size && + TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && + Fast) VTSize = Size; else { VT = NewVT; Index: lib/Target/ARM/ARMLegalizerInfo.cpp =================================================================== --- lib/Target/ARM/ARMLegalizerInfo.cpp +++ lib/Target/ARM/ARMLegalizerInfo.cpp @@ -75,6 +75,13 @@ const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); + if (ST.isThumb()) { + // FIXME: merge with the code for non-Thumb. + computeTables(); + verify(*ST.getInstrInfo()); + return; + } + getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); Index: test/CodeGen/AArch64/arm64-memcpy-inline.ll =================================================================== --- test/CodeGen/AArch64/arm64-memcpy-inline.ll +++ test/CodeGen/AArch64/arm64-memcpy-inline.ll @@ -16,10 +16,8 @@ define i32 @t0() { entry: ; CHECK-LABEL: t0: -; CHECK: ldrb [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #10] -; CHECK: strb [[REG0]], [x[[BASEREG2:[0-9]+]], #10] -; CHECK: ldrh [[REG1:w[0-9]+]], [x[[BASEREG]], #8] -; CHECK: strh [[REG1]], [x[[BASEREG2]], #8] +; CHECK: ldur [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #7] +; CHECK: stur [[REG0]], [x[[BASEREG2:[0-9]+]], #7] ; CHECK: ldr [[REG2:x[0-9]+]], ; CHECK: str [[REG2]], call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false) @@ -74,9 +72,9 @@ define void @t5(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t5: -; CHECK: strb wzr, [x0, #6] -; CHECK: mov [[REG7:w[0-9]+]], #21587 -; CHECK: strh [[REG7]], [x0, #4] +; CHECK: mov [[REG7:w[0-9]+]], #21337 +; CHECK: movk [[REG7]], +; CHECK: stur [[REG7]], [x0, #3] ; CHECK: mov [[REG8:w[0-9]+]], ; CHECK: movk [[REG8]], ; CHECK: str [[REG8]], [x0] Index: test/CodeGen/PowerPC/jaggedstructs.ll =================================================================== --- test/CodeGen/PowerPC/jaggedstructs.ll +++ test/CodeGen/PowerPC/jaggedstructs.ll @@ -34,11 +34,9 @@ ; CHECK-DAG: lwz {{[0-9]+}}, 178(1) ; CHECK-DAG: sth {{[0-9]+}}, 70(1) ; CHECK-DAG: stw {{[0-9]+}}, 66(1) -; CHECK-DAG: lbz {{[0-9]+}}, 191(1) -; CHECK-DAG: lhz {{[0-9]+}}, 189(1) +; CHECK-DAG: lwz {{[0-9]+}}, 188(1) ; CHECK-DAG: lwz {{[0-9]+}}, 185(1) -; CHECK-DAG: stb {{[0-9]+}}, 79(1) -; CHECK-DAG: sth {{[0-9]+}}, 77(1) +; CHECK-DAG: stw {{[0-9]+}}, 76(1) ; CHECK-DAG: stw {{[0-9]+}}, 73(1) ; CHECK-DAG: ld 6, 72(1) ; CHECK-DAG: ld 5, 64(1) Index: test/CodeGen/PowerPC/structsinmem.ll =================================================================== --- test/CodeGen/PowerPC/structsinmem.ll +++ test/CodeGen/PowerPC/structsinmem.ll @@ -157,8 +157,7 @@ ; CHECK: stw {{[0-9]+}}, 147(1) ; CHECK: sth {{[0-9]+}}, 158(1) ; CHECK: stw {{[0-9]+}}, 154(1) -; CHECK: stb {{[0-9]+}}, 167(1) -; CHECK: sth {{[0-9]+}}, 165(1) +; CHECK: stw {{[0-9]+}}, 164(1) ; CHECK: stw {{[0-9]+}}, 161(1) } Index: test/CodeGen/PowerPC/structsinregs.ll =================================================================== --- test/CodeGen/PowerPC/structsinregs.ll +++ test/CodeGen/PowerPC/structsinregs.ll @@ -148,8 +148,7 @@ ; CHECK: stw {{[0-9]+}}, 83(1) ; CHECK: sth {{[0-9]+}}, 94(1) ; CHECK: stw {{[0-9]+}}, 90(1) -; CHECK: stb {{[0-9]+}}, 103(1) -; CHECK: sth {{[0-9]+}}, 101(1) +; CHECK: stw {{[0-9]+}}, 100(1) ; CHECK: stw {{[0-9]+}}, 97(1) ; CHECK: ld 9, 96(1) ; CHECK: ld 8, 88(1) Index: test/CodeGen/X86/memcpy-from-string.ll =================================================================== --- test/CodeGen/X86/memcpy-from-string.ll +++ test/CodeGen/X86/memcpy-from-string.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -asm-verbose=false | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s --check-prefix=X86 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -12,11 +13,12 @@ ; Memcpy lowering should emit stores of immediates containing string data from ; the correct offsets. -; CHECK-LABEL: foo: -; CHECK: movb $0, 6(%rdi) -; CHECK: movw $15212, 4(%rdi) -; CHECK: movl $1802117222, (%rdi) define void @foo(i8* %tmp2) { +; X86-LABEL: foo: +; X86: # %bb.0: +; X86-NEXT: movl $3894379, 3(%rdi) # imm = 0x3B6C6B +; X86-NEXT: movl $1802117222, (%rdi) # imm = 0x6B6A2066 +; X86-NEXT: retq call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @0, i64 0, i64 3), i64 7, i1 false) ret void } Index: test/CodeGen/X86/memset-2.ll =================================================================== --- test/CodeGen/X86/memset-2.ll +++ test/CodeGen/X86/memset-2.ll @@ -10,6 +10,7 @@ ; CHECK-NEXT: pushl $0 ; CHECK-NEXT: calll _memset ; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: ud2 entry: call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 188, i1 false) unreachable @@ -22,6 +23,7 @@ ; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $76, {{[0-9]+}}(%esp) ; CHECK-NEXT: calll _memset +; CHECK-NEXT: ud2 entry: call void @llvm.memset.p0i8.i32(i8* undef, i8 %c, i32 76, i1 false) unreachable @@ -49,11 +51,10 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: imull $16843009, %ecx, %ecx ## imm = 0x1010101 +; CHECK-NEXT: movl %ecx, 11(%eax) ; CHECK-NEXT: movl %ecx, 8(%eax) ; CHECK-NEXT: movl %ecx, 4(%eax) ; CHECK-NEXT: movl %ecx, (%eax) -; CHECK-NEXT: movw %cx, 12(%eax) -; CHECK-NEXT: movb %cl, 14(%eax) ; CHECK-NEXT: retl entry: tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i1 false) Index: test/CodeGen/X86/memset-zero.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/memset-zero.ll @@ -0,0 +1,344 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=core2 | FileCheck %s --check-prefix=CORE2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind + +define void @memset_0(i8* %a) nounwind { +; X86-LABEL: memset_0: +; X86: # %bb.0: # %entry +; X86-NEXT: retl +; +; CORE2-LABEL: memset_0: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_0: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 0, i1 false) + ret void +} + +define void @memset_4(i8* %a) nounwind { +; X86-LABEL: memset_4: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_4: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movl $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_4: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: movl $0, (%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 4, i1 false) + ret void +} + +define void @memset_5(i8* %a) nounwind { +; X86-LABEL: memset_5: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb $0, 4(%eax) +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_5: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movb $0, 4(%rdi) +; CORE2-NEXT: movl $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_5: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: movb $0, 4(%rdi) +; NEHALEM-NEXT: movl $0, (%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 5, i1 false) + ret void +} + +define void @memset_7(i8* %a) nounwind { +; X86-LABEL: memset_7: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $0, 3(%eax) +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_7: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movl $0, 3(%rdi) +; CORE2-NEXT: movl $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_7: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: movl $0, 3(%rdi) +; NEHALEM-NEXT: movl $0, (%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 7, i1 false) + ret void +} + +define void @memset_8(i8* %a) nounwind { +; X86-LABEL: memset_8: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $0, 4(%eax) +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_8: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movq $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_8: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: movq $0, (%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 8, i1 false) + ret void +} + +define void @memset_11(i8* %a) nounwind { +; X86-LABEL: memset_11: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $0, 7(%eax) +; X86-NEXT: movl $0, 4(%eax) +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_11: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movl $0, 7(%rdi) +; CORE2-NEXT: movq $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_11: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: movl $0, 7(%rdi) +; NEHALEM-NEXT: movq $0, (%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 11, i1 false) + ret void +} + +define void @memset_13(i8* %a) nounwind { +; X86-LABEL: memset_13: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb $0, 12(%eax) +; X86-NEXT: movl $0, 8(%eax) +; X86-NEXT: movl $0, 4(%eax) +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_13: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movq $0, 5(%rdi) +; CORE2-NEXT: movq $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_13: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: movq $0, 5(%rdi) +; NEHALEM-NEXT: movq $0, (%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 13, i1 false) + ret void +} + +define void @memset_15(i8* %a) nounwind { +; X86-LABEL: memset_15: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $0, 11(%eax) +; X86-NEXT: movl $0, 8(%eax) +; X86-NEXT: movl $0, 4(%eax) +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_15: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movq $0, 7(%rdi) +; CORE2-NEXT: movq $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_15: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: movq $0, 7(%rdi) +; NEHALEM-NEXT: movq $0, (%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 15, i1 false) + ret void +} + +define void @memset_16(i8* %a) nounwind { +; X86-LABEL: memset_16: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $0, 12(%eax) +; X86-NEXT: movl $0, 8(%eax) +; X86-NEXT: movl $0, 4(%eax) +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_16: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movq $0, 8(%rdi) +; CORE2-NEXT: movq $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_16: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: xorps %xmm0, %xmm0 +; NEHALEM-NEXT: movups %xmm0, (%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 16, i1 false) + ret void +} + +define void @memset_17(i8* %a) nounwind { +; X86-LABEL: memset_17: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb $0, 16(%eax) +; X86-NEXT: movl $0, 12(%eax) +; X86-NEXT: movl $0, 8(%eax) +; X86-NEXT: movl $0, 4(%eax) +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_17: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movb $0, 16(%rdi) +; CORE2-NEXT: movq $0, 8(%rdi) +; CORE2-NEXT: movq $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_17: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: xorps %xmm0, %xmm0 +; NEHALEM-NEXT: movups %xmm0, (%rdi) +; NEHALEM-NEXT: movb $0, 16(%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 17, i1 false) + ret void +} + +define void @memset_19(i8* %a) nounwind { +; X86-LABEL: memset_19: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $0, 15(%eax) +; X86-NEXT: movl $0, 12(%eax) +; X86-NEXT: movl $0, 8(%eax) +; X86-NEXT: movl $0, 4(%eax) +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_19: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movl $0, 15(%rdi) +; CORE2-NEXT: movq $0, 8(%rdi) +; CORE2-NEXT: movq $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_19: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: xorps %xmm0, %xmm0 +; NEHALEM-NEXT: movups %xmm0, (%rdi) +; NEHALEM-NEXT: movl $0, 15(%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 19, i1 false) + ret void +} + +define void @memset_31(i8* %a) nounwind { +; X86-LABEL: memset_31: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $0, 27(%eax) +; X86-NEXT: movl $0, 24(%eax) +; X86-NEXT: movl $0, 20(%eax) +; X86-NEXT: movl $0, 16(%eax) +; X86-NEXT: movl $0, 12(%eax) +; X86-NEXT: movl $0, 8(%eax) +; X86-NEXT: movl $0, 4(%eax) +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_31: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movq $0, 23(%rdi) +; CORE2-NEXT: movq $0, 16(%rdi) +; CORE2-NEXT: movq $0, 8(%rdi) +; CORE2-NEXT: movq $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_31: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: xorps %xmm0, %xmm0 +; NEHALEM-NEXT: movups %xmm0, 15(%rdi) +; NEHALEM-NEXT: movups %xmm0, (%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 31, i1 false) + ret void +} + +define void @memset_35(i8* %a) nounwind { +; X86-LABEL: memset_35: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $0, 31(%eax) +; X86-NEXT: movl $0, 28(%eax) +; X86-NEXT: movl $0, 24(%eax) +; X86-NEXT: movl $0, 20(%eax) +; X86-NEXT: movl $0, 16(%eax) +; X86-NEXT: movl $0, 12(%eax) +; X86-NEXT: movl $0, 8(%eax) +; X86-NEXT: movl $0, 4(%eax) +; X86-NEXT: movl $0, (%eax) +; X86-NEXT: retl +; +; CORE2-LABEL: memset_35: +; CORE2: # %bb.0: # %entry +; CORE2-NEXT: movl $0, 31(%rdi) +; CORE2-NEXT: movq $0, 24(%rdi) +; CORE2-NEXT: movq $0, 16(%rdi) +; CORE2-NEXT: movq $0, 8(%rdi) +; CORE2-NEXT: movq $0, (%rdi) +; CORE2-NEXT: retq +; +; NEHALEM-LABEL: memset_35: +; NEHALEM: # %bb.0: # %entry +; NEHALEM-NEXT: xorps %xmm0, %xmm0 +; NEHALEM-NEXT: movups %xmm0, 16(%rdi) +; NEHALEM-NEXT: movups %xmm0, (%rdi) +; NEHALEM-NEXT: movl $0, 31(%rdi) +; NEHALEM-NEXT: retq +entry: + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 35, i1 false) + ret void +} Index: test/CodeGen/X86/unaligned-load.ll =================================================================== --- test/CodeGen/X86/unaligned-load.ll +++ test/CodeGen/X86/unaligned-load.ll @@ -1,11 +1,55 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=I386 %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=CORE2 %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=COREI7 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=I386 %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=CORE2 %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic | FileCheck -check-prefix=COREI7 %s @.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8 @.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8 +; This can be improved; see PR39952. + define void @func() nounwind ssp { +; I386-LABEL: func: +; I386: ## %bb.0: ## %entry +; I386-NEXT: subl $32, %esp +; I386-NEXT: .p2align 4, 0x90 +; I386-NEXT: LBB0_1: ## %bb +; I386-NEXT: ## =>This Inner Loop Header: Depth=1 +; I386-NEXT: movl $4673097, {{[0-9]+}}(%esp) ## imm = 0x474E49 +; I386-NEXT: movl $1230132307, {{[0-9]+}}(%esp) ## imm = 0x49525453 +; I386-NEXT: movl $541347367, {{[0-9]+}}(%esp) ## imm = 0x20444E27 +; I386-NEXT: movl $840969293, {{[0-9]+}}(%esp) ## imm = 0x32202C4D +; I386-NEXT: movl $1095911247, {{[0-9]+}}(%esp) ## imm = 0x4152474F +; I386-NEXT: movl $1380982853, {{[0-9]+}}(%esp) ## imm = 0x52502045 +; I386-NEXT: movl $1313821779, {{[0-9]+}}(%esp) ## imm = 0x4E4F5453 +; I386-NEXT: movl $1498564676, (%esp) ## imm = 0x59524844 +; I386-NEXT: jmp LBB0_1 +; +; CORE2-LABEL: func: +; CORE2: ## %bb.0: ## %entry +; CORE2-NEXT: movabsq $20070800167293728, %rax ## imm = 0x474E4952545320 +; CORE2-NEXT: movabsq $2325069237881678925, %rcx ## imm = 0x20444E2732202C4D +; CORE2-NEXT: movabsq $4706902966564560965, %rdx ## imm = 0x4152474F52502045 +; CORE2-NEXT: movabsq $5642821575076104260, %rsi ## imm = 0x4E4F545359524844 +; CORE2-NEXT: .p2align 4, 0x90 +; CORE2-NEXT: LBB0_1: ## %bb +; CORE2-NEXT: ## =>This Inner Loop Header: Depth=1 +; CORE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; CORE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; CORE2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; CORE2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) +; CORE2-NEXT: jmp LBB0_1 +; +; COREI7-LABEL: func: +; COREI7: ## %bb.0: ## %entry +; COREI7-NEXT: movups _.str3+{{.*}}(%rip), %xmm0 +; COREI7-NEXT: movups {{.*}}(%rip), %xmm1 +; COREI7-NEXT: .p2align 4, 0x90 +; COREI7-NEXT: LBB0_1: ## %bb +; COREI7-NEXT: ## =>This Inner Loop Header: Depth=1 +; COREI7-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; COREI7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; COREI7-NEXT: jmp LBB0_1 entry: %String2Loc = alloca [31 x i8], align 1 br label %bb @@ -19,19 +63,57 @@ ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind - -; I386: calll {{_?}}memcpy +define void @func_aligned() nounwind ssp { +; I386-LABEL: func_aligned: +; I386: ## %bb.0: ## %entry +; I386-NEXT: subl $44, %esp +; I386-NEXT: movaps {{.*#+}} xmm0 = [1498564676,1313821779,1380982853,1095911247] +; I386-NEXT: .p2align 4, 0x90 +; I386-NEXT: LBB1_1: ## %bb +; I386-NEXT: ## =>This Inner Loop Header: Depth=1 +; I386-NEXT: movaps %xmm0, (%esp) +; I386-NEXT: movl $4673097, {{[0-9]+}}(%esp) ## imm = 0x474E49 +; I386-NEXT: movl $1230132307, {{[0-9]+}}(%esp) ## imm = 0x49525453 +; I386-NEXT: movl $541347367, {{[0-9]+}}(%esp) ## imm = 0x20444E27 +; I386-NEXT: movl $840969293, {{[0-9]+}}(%esp) ## imm = 0x32202C4D +; I386-NEXT: jmp LBB1_1 +; +; CORE2-LABEL: func_aligned: +; CORE2: ## %bb.0: ## %entry +; CORE2-NEXT: movabsq $20070800167293728, %rax ## imm = 0x474E4952545320 +; CORE2-NEXT: movabsq $2325069237881678925, %rcx ## imm = 0x20444E2732202C4D +; CORE2-NEXT: movabsq $4706902966564560965, %rdx ## imm = 0x4152474F52502045 +; CORE2-NEXT: movabsq $5642821575076104260, %rsi ## imm = 0x4E4F545359524844 +; CORE2-NEXT: .p2align 4, 0x90 +; CORE2-NEXT: LBB1_1: ## %bb +; CORE2-NEXT: ## =>This Inner Loop Header: Depth=1 +; CORE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; CORE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; CORE2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; CORE2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) +; CORE2-NEXT: jmp LBB1_1 +; +; COREI7-LABEL: func_aligned: +; COREI7: ## %bb.0: ## %entry +; COREI7-NEXT: movups _.str3+{{.*}}(%rip), %xmm0 +; COREI7-NEXT: movups {{.*}}(%rip), %xmm1 +; COREI7-NEXT: .p2align 4, 0x90 +; COREI7-NEXT: LBB1_1: ## %bb +; COREI7-NEXT: ## =>This Inner Loop Header: Depth=1 +; COREI7-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; COREI7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; COREI7-NEXT: jmp LBB1_1 +entry: + %String2Loc = alloca [31 x i8], align 16 + br label %bb -; CORE2: movabsq -; CORE2: movabsq -; CORE2: movabsq +bb: ; preds = %bb, %entry + %String2Loc9 = getelementptr inbounds [31 x i8], [31 x i8]* %String2Loc, i64 0, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str3, i64 0, i64 0), i64 31, i1 false) + br label %bb -; COREI7: movups _.str3 +return: ; No predecessors! + ret void +} -; CORE2: .section -; CORE2: .p2align 3 -; CORE2-NEXT: _.str1: -; CORE2-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING" -; CORE2: .p2align 3 -; CORE2-NEXT: _.str3: +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind