diff --git a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll --- a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll +++ b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll @@ -1,11 +1,35 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin8 -relocation-model=static > %t -; RUN: grep "movl _last" %t | count 1 -; RUN: grep "cmpl.*_last" %t | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-darwin8 -relocation-model=static | FileCheck %s @block = external global i8* ; [#uses=1] @last = external global i32 ; [#uses=3] define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp.8) { +; CHECK-LABEL: loadAndRLEsource_no_exit_2E_1_label_2E_0: +; CHECK: ## %bb.0: ## %newFuncRoot +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl _last, %edx +; CHECK-NEXT: leal 1(%edx), %esi +; CHECK-NEXT: movl %esi, _last +; CHECK-NEXT: movl _block, %esi +; CHECK-NEXT: movb %al, 1(%esi,%edx) +; CHECK-NEXT: cmpl %ecx, _last +; CHECK-NEXT: jge LBB0_3 +; CHECK-NEXT: ## %bb.1: ## %label.0 +; CHECK-NEXT: cmpl $257, %eax ## imm = 0x101 +; CHECK-NEXT: je LBB0_3 +; CHECK-NEXT: ## %bb.2: ## %label.0.no_exit.1_crit_edge.exitStub +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; CHECK-NEXT: LBB0_3: ## %codeRepl5.exitStub +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl newFuncRoot: br label %label.0 label.0.no_exit.1_crit_edge.exitStub: ; preds = %label.0 diff --git a/llvm/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll b/llvm/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll --- a/llvm/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll +++ b/llvm/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll @@ -1,13 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Coalescing from R32 to a subset R32_. Once another register coalescer bug is ; fixed, the movb should go away as well. -; RUN: llc < %s -mtriple=i686-- -relocation-model=static | \ -; RUN: grep movl +; RUN: llc < %s -mtriple=i686-- -relocation-model=static | FileCheck %s @B = external global i32 ; [#uses=2] @C = external global i16* ; [#uses=2] define void @test(i32 %A) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: andb $16, %cl +; CHECK-NEXT: shll %cl, B +; CHECK-NEXT: shrl $3, %eax +; CHECK-NEXT: addl %eax, C +; CHECK-NEXT: retl %A.upgrd.1 = trunc i32 %A to i8 ; [#uses=1] %tmp2 = load i32, i32* @B ; [#uses=1] %tmp3 = and i8 %A.upgrd.1, 16 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll b/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll --- a/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll +++ b/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll @@ -1,10 +1,24 @@ -; RUN: llc < %s -mtriple=i686-- -relocation-model=static | not grep "subl.*%esp" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -relocation-model=static | FileCheck %s @A = external global i16* ; [#uses=1] @B = external global i32 ; [#uses=1] @C = external global i32 ; [#uses=2] define void @test() { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: movl A, %eax +; CHECK-NEXT: movzwl 2(%eax), %eax +; CHECK-NEXT: movb B, %cl +; CHECK-NEXT: movl C, %edx +; CHECK-NEXT: andb $16, %cl +; CHECK-NEXT: shll %cl, %edx +; CHECK-NEXT: xorb $16, %cl +; CHECK-NEXT: shrl %cl, %eax +; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: movl %eax, C +; CHECK-NEXT: retl %tmp = load i16*, i16** @A ; [#uses=1] %tmp1 = getelementptr i16, i16* %tmp, i32 1 ; [#uses=1] %tmp.upgrd.1 = load i16, i16* %tmp1 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll b/llvm/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll --- a/llvm/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll +++ b/llvm/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -mtriple=i686-- | grep -- 4294967240 +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; PR853 +; CHECK: 4294967240 @X = global i32* inttoptr (i64 -56 to i32*) ; [#uses=0] diff --git a/llvm/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll b/llvm/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll --- a/llvm/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll +++ b/llvm/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll @@ -1,7 +1,20 @@ -; RUN: llc < %s -mtriple=i686-- -mcpu=i386 | \ -; RUN: not grep "movl %eax, %edx" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mcpu=i386 | FileCheck %s define i32 @foo(i32 %t, i32 %C) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: decl %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %cond_true +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: incl %eax +; CHECK-NEXT: cmpl $40, %ecx +; CHECK-NEXT: jl .LBB0_1 +; CHECK-NEXT: # %bb.2: # %bb12 +; CHECK-NEXT: retl entry: br label %cond_true diff --git a/llvm/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll b/llvm/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll --- a/llvm/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll +++ b/llvm/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll @@ -1,9 +1,23 @@ -; RUN: llc < %s -mtriple=i686-- | grep shrl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; Bug in FindModifiedNodeSlot cause tmp14 load to become a zextload and shr 31 ; is then optimized away. @tree_code_type = external global [0 x i32] ; <[0 x i32]*> [#uses=1] define void @copy_if_shared_r() { +; CHECK-LABEL: copy_if_shared_r: +; CHECK: # %bb.0: +; CHECK-NEXT: movl 0, %eax +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: movl tree_code_type(,%ecx,4), %ecx +; CHECK-NEXT: decl %ecx +; CHECK-NEXT: cmpl $2, %ecx +; CHECK-NEXT: ja .LBB0_2 +; CHECK-NEXT: # %bb.1: # %cond_true +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: .LBB0_2: # %cond_true17 +; CHECK-NEXT: retl %tmp = load i32, i32* null ; [#uses=1] %tmp56 = and i32 %tmp, 255 ; [#uses=1] %gep.upgrd.1 = zext i32 %tmp56 to i64 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll --- a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll +++ b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-- > %t -; RUN: not grep ",%rsp)" %t +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s ; PR1103 target datalayout = "e-p:64:64" @@ -7,6 +7,251 @@ define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %b +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: movslq (%rdi), %rax +; CHECK-NEXT: movslq (%rsi), %r8 +; CHECK-NEXT: movslq (%rdx), %r10 +; CHECK-NEXT: movl (%rcx), %edi +; CHECK-NEXT: movslq (%r9), %rcx +; CHECK-NEXT: movq %rsp, %rdx +; CHECK-NEXT: subl %eax, %r8d +; CHECK-NEXT: movslq %r8d, %rsi +; CHECK-NEXT: js .LBB0_1 +; CHECK-NEXT: # %bb.11: # %b63 +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: js .LBB0_14 +; CHECK-NEXT: # %bb.12: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_13: # %a25b +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB0_13 +; CHECK-NEXT: .LBB0_14: # %b85 +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.15: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_16: # %a25b140 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB0_16 +; CHECK-NEXT: .LBB0_1: # %a29b +; CHECK-NEXT: cmpl %r10d, %edi +; CHECK-NEXT: js .LBB0_10 +; CHECK-NEXT: # %bb.2: # %b158 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movb $1, %r10b +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_9: # %b1606 +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: je .LBB0_10 +; CHECK-NEXT: .LBB0_3: # %a29b173 +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_37 Depth 2 +; CHECK-NEXT: # Child Loop BB0_19 Depth 2 +; CHECK-NEXT: # Child Loop BB0_20 Depth 2 +; CHECK-NEXT: # Child Loop BB0_21 Depth 3 +; CHECK-NEXT: # Child Loop BB0_23 Depth 2 +; CHECK-NEXT: # Child Loop BB0_24 Depth 3 +; CHECK-NEXT: # Child Loop BB0_26 Depth 2 +; CHECK-NEXT: # Child Loop BB0_38 Depth 3 +; CHECK-NEXT: # Child Loop BB0_29 Depth 3 +; CHECK-NEXT: # Child Loop BB0_30 Depth 2 +; CHECK-NEXT: # Child Loop BB0_39 Depth 3 +; CHECK-NEXT: # Child Loop BB0_33 Depth 3 +; CHECK-NEXT: # Child Loop BB0_34 Depth 2 +; CHECK-NEXT: # Child Loop BB0_36 Depth 2 +; CHECK-NEXT: testl %r8d, %r8d +; CHECK-NEXT: js .LBB0_4 +; CHECK-NEXT: # %bb.17: # %b179 +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: js .LBB0_18 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_37: # %a30b +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: je .LBB0_37 +; CHECK-NEXT: .LBB0_18: # %b188 +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: testb %r10b, %r10b +; CHECK-NEXT: jne .LBB0_4 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_19: # %a30b294 +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: je .LBB0_19 +; CHECK-NEXT: .LBB0_4: # %a33b +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: orl %r8d, %eax +; CHECK-NEXT: movl %eax, %r9d +; CHECK-NEXT: shrl $31, %r9d +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jns .LBB0_20 +; CHECK-NEXT: .LBB0_5: # %a50b +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movl %r8d, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: movl %eax, %r11d +; CHECK-NEXT: shrl $31, %r11d +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jns .LBB0_26 +; CHECK-NEXT: .LBB0_6: # %a57b +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: testb %r9b, %r9b +; CHECK-NEXT: je .LBB0_30 +; CHECK-NEXT: .LBB0_7: # %a66b +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: testb %r11b, %r11b +; CHECK-NEXT: jne .LBB0_8 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_34: # %a74b +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: jne .LBB0_34 +; CHECK-NEXT: # %bb.35: # %b1582 +; CHECK-NEXT: # in Loop: Header=BB0_34 Depth=2 +; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: jne .LBB0_34 +; CHECK-NEXT: .LBB0_8: # %a93b +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: testl %r8d, %r8d +; CHECK-NEXT: js .LBB0_9 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_36: # %a97b +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: addss %xmm0, %xmm1 +; CHECK-NEXT: addss %xmm0, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: addss %xmm0, %xmm2 +; CHECK-NEXT: addss %xmm1, %xmm2 +; CHECK-NEXT: movss %xmm2, {{.*}}(%rip) +; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: jne .LBB0_36 +; CHECK-NEXT: jmp .LBB0_9 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_22: # %b463 +; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 +; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: je .LBB0_23 +; CHECK-NEXT: .LBB0_20: # %b341 +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # => This Loop Header: Depth=2 +; CHECK-NEXT: # Child Loop BB0_21 Depth 3 +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: js .LBB0_22 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_21: # %a35b +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # Parent Loop BB0_20 Depth=2 +; CHECK-NEXT: # => This Inner Loop Header: Depth=3 +; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: je .LBB0_21 +; CHECK-NEXT: jmp .LBB0_22 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_28: # %b1016 +; CHECK-NEXT: # in Loop: Header=BB0_26 Depth=2 +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: jle .LBB0_6 +; CHECK-NEXT: .LBB0_26: # %b858 +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # => This Loop Header: Depth=2 +; CHECK-NEXT: # Child Loop BB0_38 Depth 3 +; CHECK-NEXT: # Child Loop BB0_29 Depth 3 +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: js .LBB0_27 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_38: # %a53b +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # Parent Loop BB0_26 Depth=2 +; CHECK-NEXT: # => This Inner Loop Header: Depth=3 +; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: je .LBB0_38 +; CHECK-NEXT: .LBB0_27: # %b879 +; CHECK-NEXT: # in Loop: Header=BB0_26 Depth=2 +; CHECK-NEXT: testb %r10b, %r10b +; CHECK-NEXT: jne .LBB0_28 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_29: # %a53b1019 +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # Parent Loop BB0_26 Depth=2 +; CHECK-NEXT: # => This Inner Loop Header: Depth=3 +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: jle .LBB0_29 +; CHECK-NEXT: jmp .LBB0_28 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_32: # %b1263 +; CHECK-NEXT: # in Loop: Header=BB0_30 Depth=2 +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: jle .LBB0_7 +; CHECK-NEXT: .LBB0_30: # %b1117 +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # => This Loop Header: Depth=2 +; CHECK-NEXT: # Child Loop BB0_39 Depth 3 +; CHECK-NEXT: # Child Loop BB0_33 Depth 3 +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: js .LBB0_31 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_39: # %a63b +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # Parent Loop BB0_30 Depth=2 +; CHECK-NEXT: # => This Inner Loop Header: Depth=3 +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: jle .LBB0_39 +; CHECK-NEXT: .LBB0_31: # %b1139 +; CHECK-NEXT: # in Loop: Header=BB0_30 Depth=2 +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: jle .LBB0_32 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_33: # %a63b1266 +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # Parent Loop BB0_30 Depth=2 +; CHECK-NEXT: # => This Inner Loop Header: Depth=3 +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: jle .LBB0_33 +; CHECK-NEXT: jmp .LBB0_32 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_25: # %b712 +; CHECK-NEXT: # in Loop: Header=BB0_23 Depth=2 +; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: je .LBB0_5 +; CHECK-NEXT: .LBB0_23: # %b535 +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # => This Loop Header: Depth=2 +; CHECK-NEXT: # Child Loop BB0_24 Depth 3 +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: js .LBB0_25 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_24: # %a45b +; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 +; CHECK-NEXT: # Parent Loop BB0_23 Depth=2 +; CHECK-NEXT: # => This Inner Loop Header: Depth=3 +; CHECK-NEXT: testb %dil, %dil +; CHECK-NEXT: je .LBB0_24 +; CHECK-NEXT: jmp .LBB0_25 +; CHECK-NEXT: .LBB0_10: # %a109b +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq b: %r = load i32, i32* %a0 %r2 = load i32, i32* %a1 diff --git a/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll b/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll --- a/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll +++ b/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll @@ -1,6 +1,6 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -enable-tail-merge=0 | FileCheck %s ; PR 1200 -; RUN: llc < %s -enable-tail-merge=0 | not grep jmp - ; ModuleID = '' target datalayout = "e-p:32:32" target triple = "i686-apple-darwin8" @@ -27,6 +27,81 @@ declare i32 @fprintf(%struct.FILE*, i8*, ...) define i16 @main_bb_2E_i9_2E_i_2E_i932_2E_ce(%struct.list* %l_addr.01.0.i2.i.i929, %struct.operator** %tmp66.i62.i.out) { +; CHECK-LABEL: main_bb_2E_i9_2E_i_2E_i932_2E_ce: +; CHECK: ## %bb.0: ## %newFuncRoot +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: subl $20, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: .cfi_offset %edi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl (%eax), %edi +; CHECK-NEXT: movl 8(%edi), %eax +; CHECK-NEXT: movl L_outfile$non_lazy_ptr, %ecx +; CHECK-NEXT: movl (%ecx), %ecx +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl L_str1$non_lazy_ptr, %eax +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %ecx, (%esp) +; CHECK-NEXT: calll _fprintf +; CHECK-NEXT: movl 20(%edi), %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jle LBB0_6 +; CHECK-NEXT: ## %bb.1: ## %NodeBlock4 +; CHECK-NEXT: cmpl $2, %eax +; CHECK-NEXT: jge LBB0_2 +; CHECK-NEXT: ## %bb.4: ## %LeafBlock2 +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: jne LBB0_3 +; CHECK-NEXT: ## %bb.5: ## %bb20.i.i937.exitStub +; CHECK-NEXT: movl %edi, (%esi) +; CHECK-NEXT: movw $3, %ax +; CHECK-NEXT: addl $20, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl +; CHECK-NEXT: LBB0_6: ## %NodeBlock +; CHECK-NEXT: js LBB0_9 +; CHECK-NEXT: ## %bb.7: ## %LeafBlock1 +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jne LBB0_3 +; CHECK-NEXT: ## %bb.8: ## %bb12.i.i935.exitStub +; CHECK-NEXT: movl %edi, (%esi) +; CHECK-NEXT: movw $2, %ax +; CHECK-NEXT: addl $20, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl +; CHECK-NEXT: LBB0_2: ## %LeafBlock3 +; CHECK-NEXT: jne LBB0_3 +; CHECK-NEXT: ## %bb.11: ## %bb28.i.i938.exitStub +; CHECK-NEXT: movl %edi, (%esi) +; CHECK-NEXT: movw $4, %ax +; CHECK-NEXT: addl $20, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl +; CHECK-NEXT: LBB0_9: ## %LeafBlock +; CHECK-NEXT: cmpl $-1, %eax +; CHECK-NEXT: je LBB0_10 +; CHECK-NEXT: LBB0_3: ## %NewDefault +; CHECK-NEXT: movl %edi, (%esi) +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addl $20, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl +; CHECK-NEXT: LBB0_10: ## %bb.i14.i.exitStub +; CHECK-NEXT: movl %edi, (%esi) +; CHECK-NEXT: movw $1, %ax +; CHECK-NEXT: addl $20, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl newFuncRoot: br label %bb.i9.i.i932.ce diff --git a/llvm/test/CodeGen/X86/2007-03-01-SpillerCrash.ll b/llvm/test/CodeGen/X86/2007-03-01-SpillerCrash.ll --- a/llvm/test/CodeGen/X86/2007-03-01-SpillerCrash.ll +++ b/llvm/test/CodeGen/X86/2007-03-01-SpillerCrash.ll @@ -1,7 +1,12 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2 -; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2 | not grep movhlps +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2 | FileCheck %s define void @test() nounwind { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: ## %test.exit +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: ud2 test.exit: fmul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:0 [#uses=4] load <4 x float>, <4 x float>* null ; <<4 x float>>:1 [#uses=1] diff --git a/llvm/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll b/llvm/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll --- a/llvm/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll +++ b/llvm/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll @@ -1,8 +1,15 @@ -; RUN: llc < %s -no-integrated-as | grep "mov %gs:72, %eax" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -no-integrated-as | FileCheck %s target datalayout = "e-p:32:32" target triple = "i686-apple-darwin9" define void @test() { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: mov %gs:72, %eax +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retl %tmp1 = tail call i32* asm sideeffect "mov %gs:${1:P}, $0", "=r,i,~{dirflag},~{fpsr},~{flags}"( i32 72 ) ; <%struct._pthread*> [#uses=1] ret void } diff --git a/llvm/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll b/llvm/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll --- a/llvm/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll +++ b/llvm/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll @@ -1,10 +1,17 @@ -; RUN: llc < %s | not grep "bsrl.*10" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s ; PR1356 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-apple-darwin8" define i32 @main() { +; CHECK-LABEL: main: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: bsrl LCPI0_0, %eax +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retl entry: %tmp4 = tail call i32 asm "bsrl $1, $0", "=r,ro,~{dirflag},~{fpsr},~{flags},~{cc}"( i32 10 ) ; [#uses=1] ret i32 %tmp4 diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll --- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll +++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep "movb %ah, %r" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, [4 x i8], i64 } %struct.PyBoolScalarObject = type { i64, %struct._typeobject*, i8 } @@ -19,6 +20,175 @@ @.str5 = external constant [14 x i8] ; <[14 x i8]*> [#uses=1] define %struct.PyObject* @ubyte_divmod(%struct.PyObject* %a, %struct.PyObject* %b) { +; CHECK-LABEL: ubyte_divmod: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset %rbx, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsi, %r14 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: callq __ubyte_convert_to_ctype +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: js LBB0_4 +; CHECK-NEXT: ## %bb.1: ## %cond_next.i +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: movq %r14, %rdi +; CHECK-NEXT: callq __ubyte_convert_to_ctype +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: sarl $31, %ecx +; CHECK-NEXT: andl %eax, %ecx +; CHECK-NEXT: cmpl $-2, %ecx +; CHECK-NEXT: je LBB0_8 +; CHECK-NEXT: ## %bb.2: ## %cond_next.i +; CHECK-NEXT: cmpl $-1, %ecx +; CHECK-NEXT: jne LBB0_6 +; CHECK-NEXT: LBB0_3: ## %bb4 +; CHECK-NEXT: movq _PyArray_API@{{.*}}(%rip), %rax +; CHECK-NEXT: movq (%rax), %rax +; CHECK-NEXT: movq 16(%rax), %rax +; CHECK-NEXT: jmp LBB0_10 +; CHECK-NEXT: LBB0_4: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: cmpl $-2, %eax +; CHECK-NEXT: je LBB0_8 +; CHECK-NEXT: ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: cmpl $-1, %eax +; CHECK-NEXT: je LBB0_3 +; CHECK-NEXT: LBB0_6: ## %bb35 +; CHECK-NEXT: movq _PyUFunc_API@{{.*}}(%rip), %rbp +; CHECK-NEXT: movq (%rbp), %rax +; CHECK-NEXT: callq *216(%rax) +; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %dl +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: je LBB0_11 +; CHECK-NEXT: ## %bb.7: ## %cond_false.i +; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %bl +; CHECK-NEXT: movzbl %bl, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: divb %dl +; CHECK-NEXT: movl %eax, %r14d +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: jne LBB0_12 +; CHECK-NEXT: jmp LBB0_14 +; CHECK-NEXT: LBB0_8: ## %bb17 +; CHECK-NEXT: callq _PyErr_Occurred +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: ## %bb.9: ## %cond_next +; CHECK-NEXT: movq _PyArray_API@{{.*}}(%rip), %rax +; CHECK-NEXT: movq (%rax), %rax +; CHECK-NEXT: movq 80(%rax), %rax +; CHECK-NEXT: LBB0_10: ## %bb4 +; CHECK-NEXT: movq 96(%rax), %rax +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: movq %r14, %rsi +; CHECK-NEXT: callq *40(%rax) +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_11: ## %cond_true.i +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: callq _feraiseexcept +; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %dl +; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %bl +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: je LBB0_14 +; CHECK-NEXT: LBB0_12: ## %cond_false.i +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: je LBB0_14 +; CHECK-NEXT: ## %bb.13: ## %cond_next17.i +; CHECK-NEXT: movzbl %bl, %eax +; CHECK-NEXT: divb %dl +; CHECK-NEXT: movzbl %ah, %eax +; CHECK-NEXT: movl %eax, %r15d +; CHECK-NEXT: jmp LBB0_18 +; CHECK-NEXT: LBB0_14: ## %cond_true.i200 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: jne LBB0_17 +; CHECK-NEXT: ## %bb.16: ## %cond_true14.i +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: callq _feraiseexcept +; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: xorl %r15d, %r15d +; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: movq (%rbp), %rax +; CHECK-NEXT: callq *224(%rax) +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: ## %bb.19: ## %cond_true61 +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: movq (%rbp), %rax +; CHECK-NEXT: movq _.str5@{{.*}}(%rip), %rdi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: callq *200(%rax) +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: js LBB0_27 +; CHECK-NEXT: ## %bb.20: ## %cond_next73 +; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq (%rbp), %rax +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: movl %ebx, %edx +; CHECK-NEXT: callq *232(%rax) +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: LBB0_21: ## %cond_next89 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: callq _PyTuple_New +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: je LBB0_27 +; CHECK-NEXT: ## %bb.22: ## %cond_next97 +; CHECK-NEXT: movq %rax, %rbx +; CHECK-NEXT: movq _PyArray_API@{{.*}}(%rip), %rbp +; CHECK-NEXT: movq (%rbp), %rax +; CHECK-NEXT: movq 200(%rax), %rdi +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: callq *304(%rdi) +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.23: ## %cond_next135 +; CHECK-NEXT: movb %r14b, 16(%rax) +; CHECK-NEXT: movq %rax, 24(%rbx) +; CHECK-NEXT: movq (%rbp), %rax +; CHECK-NEXT: movq 200(%rax), %rdi +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: callq *304(%rdi) +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.24: ## %cond_next182 +; CHECK-NEXT: movb %r15b, 16(%rax) +; CHECK-NEXT: movq %rax, 32(%rbx) +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_25: ## %cond_true113 +; CHECK-NEXT: decq (%rbx) +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: ## %bb.26: ## %cond_true126 +; CHECK-NEXT: movq 8(%rbx), %rax +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: callq *48(%rax) +; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq entry: %arg1 = alloca i8, align 1 ; [#uses=3] %arg2 = alloca i8, align 1 ; [#uses=3] diff --git a/llvm/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll b/llvm/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll --- a/llvm/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll +++ b/llvm/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll @@ -1,8 +1,15 @@ -; RUN: llc < %s -mtriple=i686-- | grep "movsbl" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s @X = global i32 0 ; [#uses=1] define i32 @_Z3fooi(i32 %x) { +; CHECK-LABEL: _Z3fooi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, X +; CHECK-NEXT: movsbl %al, %eax +; CHECK-NEXT: retl entry: store i32 %x, i32* @X, align 4 %retval67 = trunc i32 %x to i8 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll b/llvm/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll --- a/llvm/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll +++ b/llvm/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll @@ -1,9 +1,26 @@ -; RUN: llc < %s -mtriple=i686-- | not grep pushf +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s %struct.gl_texture_image = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8* } %struct.gl_texture_object = type { i32, i32, i32, float, [4 x i32], i32, i32, i32, i32, i32, float, [11 x %struct.gl_texture_image*], [1024 x i8], i32, i32, i32, i8, i8*, i8, void (%struct.gl_texture_object*, i32, float*, float*, float*, float*, i8*, i8*, i8*, i8*)*, %struct.gl_texture_object* } define fastcc void @sample_3d_linear(%struct.gl_texture_object* %tObj, %struct.gl_texture_image* %img, float %s, float %t, float %r, i8* %red, i8* %green, i8* %blue, i8* %alpha) { +; CHECK-LABEL: sample_3d_linear: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl 0, %esi +; CHECK-NEXT: pushl $0 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: calll floorf +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-NEXT: cmpl $10497, %esi # imm = 0x2901 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: %tmp15 = load i32, i32* null, align 4 ; [#uses=1] %tmp16 = icmp eq i32 %tmp15, 10497 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll --- a/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll +++ b/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll @@ -1,6 +1,29 @@ -; RUN: llc < %s -mtriple=i686-- | not grep movb +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define signext i16 @f(i32* %bp, i32* %ss) { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %cond_next127 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl (%eax), %edx +; CHECK-NEXT: movl (%ecx), %esi +; CHECK-NEXT: andl $15, %edx +; CHECK-NEXT: andl $15, %esi +; CHECK-NEXT: addl %esi, (%ecx) +; CHECK-NEXT: cmpl $63, %edx +; CHECK-NEXT: jb .LBB0_1 +; CHECK-NEXT: # %bb.2: # %UnifiedReturnBlock +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: br label %cond_next127 diff --git a/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll --- a/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll +++ b/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll @@ -1,6 +1,36 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | grep addss | not grep esp +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s define fastcc void @fht(float* %fz, i16 signext %n) { +; CHECK-LABEL: fht: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: subss %xmm1, %xmm2 +; CHECK-NEXT: movaps %xmm1, %xmm3 +; CHECK-NEXT: mulss %xmm0, %xmm3 +; CHECK-NEXT: addss %xmm1, %xmm3 +; CHECK-NEXT: movaps %xmm1, %xmm4 +; CHECK-NEXT: subss %xmm3, %xmm4 +; CHECK-NEXT: addss %xmm1, %xmm3 +; CHECK-NEXT: xorps %xmm5, %xmm5 +; CHECK-NEXT: subss %xmm2, %xmm5 +; CHECK-NEXT: addss %xmm0, %xmm2 +; CHECK-NEXT: mulss %xmm0, %xmm3 +; CHECK-NEXT: mulss %xmm0, %xmm5 +; CHECK-NEXT: addss %xmm3, %xmm5 +; CHECK-NEXT: addss %xmm0, %xmm5 +; CHECK-NEXT: movss %xmm5, 0 +; CHECK-NEXT: movss %xmm1, (%ecx) +; CHECK-NEXT: addss %xmm0, %xmm1 +; CHECK-NEXT: movss %xmm1, 0 +; CHECK-NEXT: mulss %xmm0, %xmm2 +; CHECK-NEXT: mulss %xmm0, %xmm4 +; CHECK-NEXT: addss %xmm2, %xmm4 +; CHECK-NEXT: addss %xmm0, %xmm4 +; CHECK-NEXT: movss %xmm4, (%ecx) +; CHECK-NEXT: retl entry: br i1 true, label %bb171.preheader, label %bb431 diff --git a/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll --- a/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll +++ b/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll @@ -1,6 +1,22 @@ -; RUN: llc < %s -mtriple=i686-- -mcpu=corei7 | grep sarl | not grep esp +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mcpu=corei7 | FileCheck %s define signext i16 @t(i16* %qmatrix, i16* %dct, i16* %acBaseTable, i16* %acExtTable, i16 signext %acBaseRes, i16 signext %acMaskRes, i16 signext %acExtRes, i32* %bitptr, i32* %source, i32 %markerPrefix, i8** %byteptr, i32 %scale, i32 %round, i32 %bits) { +; CHECK-LABEL: t: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %cond_next127 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: sarl %cl, %edx +; CHECK-NEXT: incl %edx +; CHECK-NEXT: cmpl $63, %edx +; CHECK-NEXT: jb .LBB0_1 +; CHECK-NEXT: # %bb.2: # %UnifiedReturnBlock +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl entry: br label %cond_next127 diff --git a/llvm/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll b/llvm/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll --- a/llvm/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll +++ b/llvm/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll @@ -1,6 +1,13 @@ -; RUN: llc < %s -mtriple=i686-- | grep mov | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define signext i16 @t() { +; CHECK-LABEL: t: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movswl 0, %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl entry: %tmp180 = load i16, i16* null, align 2 ; [#uses=3] %tmp180181 = sext i16 %tmp180 to i32 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll b/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll --- a/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll +++ b/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll @@ -1,6 +1,38 @@ -; RUN: llc < %s -mtriple=i686-- -mcpu=generic -mattr=+sse2 | not grep lea +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mcpu=generic -mattr=+sse2 | FileCheck %s define float @foo(i32* %x, float* %y, i32 %c) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.2: # %bb18.preheader +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_3: # %bb18 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: cvtsi2ssl (%edx,%esi,4), %xmm1 +; CHECK-NEXT: mulss (%ecx,%esi,4), %xmm1 +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: incl %esi +; CHECK-NEXT: cmpl %eax, %esi +; CHECK-NEXT: jb .LBB0_3 +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: .LBB0_4: # %bb23 +; CHECK-NEXT: movss %xmm0, (%esp) +; CHECK-NEXT: flds (%esp) +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl entry: %tmp2132 = icmp eq i32 %c, 0 ; [#uses=1] br i1 %tmp2132, label %bb23, label %bb18 diff --git a/llvm/test/CodeGen/X86/2007-11-07-MulBy4.ll b/llvm/test/CodeGen/X86/2007-11-07-MulBy4.ll --- a/llvm/test/CodeGen/X86/2007-11-07-MulBy4.ll +++ b/llvm/test/CodeGen/X86/2007-11-07-MulBy4.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i686-- | not grep imul +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s %struct.eebb = type { %struct.eebb*, i16* } %struct.hf = type { %struct.hf*, i16*, i8*, i32, i32, %struct.eebb*, i32, i32, i8*, i8*, i8*, i8*, i16*, i8*, i16*, %struct.ri, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [30 x i32], %struct.eebb, i32, i8* } @@ -6,6 +7,19 @@ %struct.ri = type { %struct.ri*, i32, i8*, i16*, i32*, i32 } define fastcc i32 @foo(i16* %eptr, i8* %ecode, %struct.foo_data* %md, i32 %ims) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $0, 0 +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.1: # %cond_next79 +; CHECK-NEXT: cmpl $0, 0 +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.2: # %cond_next130 +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: .LBB0_3: # %cond_true89 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl entry: %tmp36 = load i32, i32* null, align 4 ; [#uses=1] %tmp37 = icmp ult i32 0, %tmp36 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll --- a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll +++ b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll @@ -1,10 +1,34 @@ -; RUN: llc < %s -mtriple=i686-- -mcpu=generic | grep "(%esp)" | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mcpu=generic | FileCheck %s ; PR1872 %struct.c34007g__designated___XUB = type { i32, i32, i32, i32 } %struct.c34007g__pkg__parent = type { i32*, %struct.c34007g__designated___XUB* } define void @_ada_c34007g() { +; CHECK-LABEL: _ada_c34007g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: andl $-8, %esp +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: movl (%esp), %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: orl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: .LBB0_3: # %bb5507 +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: .cfi_def_cfa %esp, 4 +; CHECK-NEXT: retl entry: %x8 = alloca %struct.c34007g__pkg__parent, align 8 ; <%struct.c34007g__pkg__parent*> [#uses=2] %tmp1272 = getelementptr %struct.c34007g__pkg__parent, %struct.c34007g__pkg__parent* %x8, i32 0, i32 0 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll --- a/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll +++ b/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll @@ -1,8 +1,51 @@ -; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -mtriple=i686-- -mattr=+mmx | grep esi +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -mtriple=i686-- -mattr=+mmx | FileCheck %s ; PR2082 ; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of ; registers. define void @transpose4x4(i8* %dst, i8* %src, i32 %dst_stride, i32 %src_stride) { +; CHECK-LABEL: transpose4x4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: leal (%ecx,%ecx,2), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: leal (%edi,%edi,2), %ebx +; CHECK-NEXT: #APP +; CHECK-NEXT: movd (%esi), %mm0 +; CHECK-NEXT: movd (%esi,%edi), %mm1 +; CHECK-NEXT: movd (%esi,%edi,2), %mm2 +; CHECK-NEXT: movd (%esi,%ebx), %mm3 +; CHECK-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] +; CHECK-NEXT: punpcklbw %mm3, %mm2 # mm2 = mm2[0],mm3[0],mm2[1],mm3[1],mm2[2],mm3[2],mm2[3],mm3[3] +; CHECK-NEXT: movq %mm0, %mm1 +; CHECK-NEXT: punpcklwd %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1] +; CHECK-NEXT: punpckhwd %mm2, %mm1 # mm1 = mm1[2],mm2[2],mm1[3],mm2[3] +; CHECK-NEXT: movd %mm0, (%eax) +; CHECK-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] +; CHECK-NEXT: movd %mm0, (%eax,%ecx) +; CHECK-NEXT: movd %mm1, (%eax,%ecx,2) +; CHECK-NEXT: punpckhdq %mm1, %mm1 # mm1 = mm1[1,1] +; CHECK-NEXT: movd %mm1, (%eax,%edx) +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: popl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: %dst_addr = alloca i8* ; [#uses=5] %src_addr = alloca i8* ; [#uses=5] @@ -47,7 +90,7 @@ %tmp32 = load i8*, i8** %src_addr, align 4 ; [#uses=1] %tmp33 = getelementptr i8, i8* %tmp32, i32 %tmp31 ; [#uses=1] %tmp3334 = bitcast i8* %tmp33 to i32* ; [#uses=1] - call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* %tmp12, i32* %tmp56, i32* %tmp1011, i32* %tmp1617, i32* %tmp1920, i32* %tmp2324, i32* %tmp2829, i32* %tmp3334 ) nounwind + call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* %tmp12, i32* %tmp56, i32* %tmp1011, i32* %tmp1617, i32* %tmp1920, i32* %tmp2324, i32* %tmp2829, i32* %tmp3334 ) nounwind br label %return return: ; preds = %entry diff --git a/llvm/test/CodeGen/X86/2008-03-07-APIntBug.ll b/llvm/test/CodeGen/X86/2008-03-07-APIntBug.ll --- a/llvm/test/CodeGen/X86/2008-03-07-APIntBug.ll +++ b/llvm/test/CodeGen/X86/2008-03-07-APIntBug.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i686-- -mcpu=i386 | not grep 255 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mcpu=i386 | FileCheck %s %struct.CONSTRAINT = type { i32, i32, i32, i32 } %struct.FIRST_UNION = type { %struct.anon } @@ -13,6 +14,16 @@ %struct.word_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, [4 x i8] } define void @InsertSym_bb1163(%struct.rec** %s) { +; CHECK-LABEL: InsertSym_bb1163: +; CHECK: # %bb.0: # %newFuncRoot +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl (%eax), %ecx +; CHECK-NEXT: movl 28(%ecx), %ecx +; CHECK-NEXT: orb $-128, 26(%ecx) +; CHECK-NEXT: orb $2, 25(%ecx) +; CHECK-NEXT: movl (%eax), %eax +; CHECK-NEXT: orb $1, 25(%eax) +; CHECK-NEXT: retl newFuncRoot: br label %bb1163 bb1233.exitStub: ; preds = %bb1163 diff --git a/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll b/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll --- a/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll +++ b/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -frame-pointer=all | grep add | grep 12 | not grep non_lazy_ptr +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -frame-pointer=all | FileCheck %s ; Don't fold re-materialized load into a two address instruction %"struct.Smarts::Runnable" = type { i32 (...)**, i32 } @@ -14,10 +15,41 @@ @_ZTVSt15basic_stringbufIcSt11char_traitsIcESaIcEE = external constant [16 x i32 (...)*] ; <[16 x i32 (...)*]*> [#uses=1] define void @_GLOBAL__I__ZN5Pooma5pinfoE() nounwind { +; CHECK-LABEL: _GLOBAL__I__ZN5Pooma5pinfoE: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: calll L0$pb +; CHECK-NEXT: L0$pb: +; CHECK-NEXT: popl %edi +; CHECK-NEXT: movl L__ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE$non_lazy_ptr-L0$pb(%edi), %ebx +; CHECK-NEXT: leal 32(%ebx), %eax +; CHECK-NEXT: movl %eax, 0 +; CHECK-NEXT: movl $180, (%esp) +; CHECK-NEXT: calll __Znwm +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll __ZNSt8ios_baseC2Ev +; CHECK-NEXT: movl $0, 0 +; CHECK-NEXT: addl $12, %ebx +; CHECK-NEXT: movl %ebx, (%esi) +; CHECK-NEXT: movl L__ZTVSt15basic_streambufIcSt11char_traitsIcEE$non_lazy_ptr-L0$pb(%edi), %eax +; CHECK-NEXT: addl $8, %eax +; CHECK-NEXT: movl %eax, 0 +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll __ZNSt6localeC1Ev +; CHECK-NEXT: movl L__ZTVSt15basic_stringbufIcSt11char_traitsIcESaIcEE$non_lazy_ptr-L0$pb(%edi), %eax +; CHECK-NEXT: addl $8, %eax +; CHECK-NEXT: movl %eax, 0 +; CHECK-NEXT: ud2 entry: store i32 (...)** getelementptr ([10 x i32 (...)*], [10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 8), i32 (...)*** null, align 4 %tmp96.i.i142.i = call i8* @_Znwm( i32 180 ) nounwind ; [#uses=2] - call void @_ZNSt8ios_baseC2Ev( %"struct.std::ios_base"* null ) nounwind + call void @_ZNSt8ios_baseC2Ev( %"struct.std::ios_base"* null ) nounwind store i32 (...)** getelementptr ([4 x i32 (...)*], [4 x i32 (...)*]* @_ZTVSt9basic_iosIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4 store i32 (...)** null, i32 (...)*** null, align 4 %ctg2242.i.i163.i = getelementptr i8, i8* %tmp96.i.i142.i, i32 0 ; [#uses=1] @@ -28,7 +60,7 @@ %tmp159.i.i167.i = bitcast i8* %tmp96.i.i142.i to i32 (...)*** ; [#uses=1] store i32 (...)** getelementptr ([10 x i32 (...)*], [10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 3), i32 (...)*** %tmp159.i.i167.i, align 4 store i32 (...)** getelementptr ([16 x i32 (...)*], [16 x i32 (...)*]* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4 - call void @_ZNSt6localeC1Ev( %"struct.std::locale"* null ) nounwind + call void @_ZNSt6localeC1Ev( %"struct.std::locale"* null ) nounwind store i32 (...)** getelementptr ([16 x i32 (...)*], [16 x i32 (...)*]* @_ZTVSt15basic_stringbufIcSt11char_traitsIcESaIcEE, i32 0, i32 2), i32 (...)*** null, align 4 unreachable } @@ -37,4 +69,4 @@ declare void @_ZNSt8ios_baseC2Ev(%"struct.std::ios_base"*) -declare void @_ZNSt6localeC1Ev(%"struct.std::locale"*) nounwind +declare void @_ZNSt6localeC1Ev(%"struct.std::locale"*) nounwind diff --git a/llvm/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/llvm/test/CodeGen/X86/2008-04-09-BranchFolding.ll --- a/llvm/test/CodeGen/X86/2008-04-09-BranchFolding.ll +++ b/llvm/test/CodeGen/X86/2008-04-09-BranchFolding.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i686-- | not grep jmp +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s %struct..0anon = type { i32 } %struct.binding_level = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.binding_level*, i8, i8, i8, i8, i8, i32, %struct.tree_node* } @@ -8,6 +9,19 @@ %struct.tree_node = type { %struct.tree_decl } define fastcc %struct.tree_node* @pushdecl(%struct.tree_node* %x) nounwind { +; CHECK-LABEL: pushdecl: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %bb17.i +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %bb160 +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl entry: %tmp3.i40 = icmp eq %struct.binding_level* null, null ; [#uses=2] br label %bb140 @@ -39,10 +53,10 @@ bb273.i: ; preds = %bb226.i ret %struct.tree_node* null bb260: ; preds = %bb226.i - tail call void (i8*, i32, ...) @pedwarn_with_file_and_line( i8* %file.0, i32 %line.0, i8* null ) nounwind + tail call void (i8*, i32, ...) @pedwarn_with_file_and_line( i8* %file.0, i32 %line.0, i8* null ) nounwind ret %struct.tree_node* null bb344: ; preds = %bb174 ret %struct.tree_node* null } -declare void @pedwarn_with_file_and_line(i8*, i32, ...) nounwind +declare void @pedwarn_with_file_and_line(i8*, i32, ...) nounwind diff --git a/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll --- a/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll +++ b/llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | grep movw | not grep ", %e" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | FileCheck %s %struct.DBC_t = type { i32, i8*, i16, %struct.DBC_t*, i8*, i8*, i8*, i8*, i8*, %struct.DBC_t*, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, i32*, i8, i16, %struct.DRVOPT*, i16 } %struct.DRVOPT = type { i16, i32, i8, %struct.DRVOPT* } @@ -7,6 +8,89 @@ @iodbcdm_global_lock = external global %struct.pthread_mutex_t ; <%struct.pthread_mutex_t*> [#uses=1] define i16 @SQLDriversW(i8* %henv, i16 zeroext %fDir, i32* %szDrvDesc, i16 signext %cbDrvDescMax, i16* %pcbDrvDesc, i32* %szDrvAttr, i16 signext %cbDrvAttrMax, i16* %pcbDrvAttr) nounwind { +; CHECK-LABEL: SQLDriversW: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je LBB0_1 +; CHECK-NEXT: ## %bb.3: ## %bb28 +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movw $-2, %si +; CHECK-NEXT: jne LBB0_6 +; CHECK-NEXT: ## %bb.4: ## %bb37 +; CHECK-NEXT: movw $0, 40(%edi) +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: leal (,%ecx,4), %eax +; CHECK-NEXT: leal (,%ebx,4), %ecx +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: movzwl %bp, %edx +; CHECK-NEXT: cwtl +; CHECK-NEXT: movswl %cx, %ecx +; CHECK-NEXT: pushl $87 +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl $0 +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl $0 +; CHECK-NEXT: pushl %edx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: calll _SQLDrivers_Internal +; CHECK-NEXT: addl $48, %esp +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je LBB0_1 +; CHECK-NEXT: ## %bb.5: +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: LBB0_6: ## %done +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je LBB0_7 +; CHECK-NEXT: ## %bb.8: ## %bb167 +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: movl L_iodbcdm_global_lock$non_lazy_ptr, %eax +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: calll _pthread_mutex_unlock +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: jmp LBB0_2 +; CHECK-NEXT: LBB0_7: ## %bb150 +; CHECK-NEXT: movswl %si, %eax +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: movswl %cx, %ecx +; CHECK-NEXT: movswl %bx, %edx +; CHECK-NEXT: movzwl %bp, %esi +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl %edx +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl $1 +; CHECK-NEXT: calll _trace_SQLDriversW +; CHECK-NEXT: addl $48, %esp +; CHECK-NEXT: LBB0_1: ## %bb +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: LBB0_2: ## %bb +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl entry: %tmp12 = bitcast i8* %henv to %struct.GENV_t* ; <%struct.GENV_t*> [#uses=1] br i1 true, label %bb28, label %bb @@ -32,7 +116,7 @@ br i1 false, label %bb167, label %bb150 bb150: ; preds = %done %tmp157158 = sext i16 %retcode.0 to i32 ; [#uses=1] - tail call void @trace_SQLDriversW( i32 1, i32 %tmp157158, i8* %henv, i16 zeroext %fDir, i32* %szDrvDesc, i16 signext %cbDrvDescMax, i16* %pcbDrvDesc, i32* %szDrvAttr, i16 signext %cbDrvAttrMax, i16* %pcbDrvAttr ) nounwind + tail call void @trace_SQLDriversW( i32 1, i32 %tmp157158, i8* %henv, i16 zeroext %fDir, i32* %szDrvDesc, i16 signext %cbDrvDescMax, i16* %pcbDrvDesc, i32* %szDrvAttr, i16 signext %cbDrvAttrMax, i16* %pcbDrvAttr ) nounwind ret i16 0 bb167: ; preds = %done %tmp168 = tail call i32 @pthread_mutex_unlock( %struct.pthread_mutex_t* @iodbcdm_global_lock ) nounwind ; [#uses=0] @@ -41,6 +125,6 @@ declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*) -declare i16 @SQLDrivers_Internal(i8*, i16 zeroext , i8*, i16 signext , i16*, i8*, i16 signext , i16*, i8 zeroext ) nounwind +declare i16 @SQLDrivers_Internal(i8*, i16 zeroext , i8*, i16 signext , i16*, i8*, i16 signext , i16*, i8 zeroext ) nounwind declare void @trace_SQLDriversW(i32, i32, i8*, i16 zeroext , i32*, i16 signext , i16*, i32*, i16 signext , i16*) diff --git a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll --- a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll +++ b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep "%e" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s ; Make sure xorl operands are 32-bit registers. %struct.tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* } @@ -14,6 +15,179 @@ @.str89 = external constant [5 x i32] ; <[5 x i32]*> [#uses=1] define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalias sret %agg.result, %struct.wxDateTime* %this, i32* %format, %"struct.wxDateTime::TimeZone"* %tz, i1 %foo) personality i32 (...)* @__gxx_personality_v0 { +; CHECK-LABEL: _ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: subl $28, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset %esi, -20 +; CHECK-NEXT: .cfi_offset %edi, -16 +; CHECK-NEXT: .cfi_offset %ebx, -12 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl +; CHECK-NEXT: testb $1, %bl +; CHECK-NEXT: je LBB0_27 +; CHECK-NEXT: ## %bb.1: ## %bb116.i +; CHECK-NEXT: je LBB0_27 +; CHECK-NEXT: ## %bb.2: ## %bb52.i.i +; CHECK-NEXT: testb $1, %bl +; CHECK-NEXT: je LBB0_27 +; CHECK-NEXT: ## %bb.3: ## %bb142.i +; CHECK-NEXT: je LBB0_27 +; CHECK-NEXT: ## %bb.4: +; CHECK-NEXT: movl L_.str89$non_lazy_ptr, %edi +; CHECK-NEXT: movb $1, %bh +; CHECK-NEXT: movl $274877907, %ebp ## imm = 0x10624DD3 +; CHECK-NEXT: jmp LBB0_5 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_23: ## %bb7806 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: Ltmp16: +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll __ZN12wxStringBase6appendEmw +; CHECK-NEXT: Ltmp17: +; CHECK-NEXT: LBB0_5: ## %bb3261 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpl $37, 0 +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: ## %bb.6: ## %bb3306 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: Ltmp0: +; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll __ZN12wxStringBaseaSEPKw +; CHECK-NEXT: Ltmp1: +; CHECK-NEXT: ## %bb.7: ## %bb3314 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: movl 0, %eax +; CHECK-NEXT: cmpl $121, %eax +; CHECK-NEXT: ja LBB0_27 +; CHECK-NEXT: ## %bb.8: ## %bb3314 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: jmpl *LJTI0_0(,%eax,4) +; CHECK-NEXT: LBB0_10: ## %bb5809 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: ## %bb.11: ## %bb5809 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: testb %bh, %bh +; CHECK-NEXT: je LBB0_27 +; CHECK-NEXT: ## %bb.12: ## %bb91.i8504 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: testb $1, %bl +; CHECK-NEXT: je LBB0_14 +; CHECK-NEXT: ## %bb.13: ## %bb155.i8541 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: Ltmp4: +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll _gmtime_r +; CHECK-NEXT: Ltmp5: +; CHECK-NEXT: LBB0_14: ## %bb182.i8560 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: testb $1, %bl +; CHECK-NEXT: je LBB0_15 +; CHECK-NEXT: ## %bb.17: ## %bb278.i8617 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: je LBB0_19 +; CHECK-NEXT: ## %bb.18: ## %bb440.i8663 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: Ltmp6: +; CHECK-NEXT: movl L_.str4$non_lazy_ptr, %eax +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl L_.str33$non_lazy_ptr, %eax +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl L__ZZNK10wxDateTime5GetTmERKNS_8TimeZoneEE12__FUNCTION__$non_lazy_ptr, %eax +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl L_.str$non_lazy_ptr, %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: movl $1717, {{[0-9]+}}(%esp) ## imm = 0x6B5 +; CHECK-NEXT: calll __Z10wxOnAssertPKwiPKcS0_S0_ +; CHECK-NEXT: Ltmp7: +; CHECK-NEXT: LBB0_19: ## %bb448.i8694 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: jmp LBB0_20 +; CHECK-NEXT: LBB0_15: ## %bb187.i8591 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: ## %bb.16: ## %bb265.i8606 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: imull %ebp +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: shrl $6, %edx +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: imull $1000, %edx, %eax ## imm = 0x3E8 +; CHECK-NEXT: negl %eax +; CHECK-NEXT: LBB0_20: ## %invcont5814 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: movzwl %ax, %eax +; CHECK-NEXT: Ltmp8: +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: Ltmp9: +; CHECK-NEXT: ## %bb.21: ## %invcont5831 +; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: Ltmp10: +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll __ZN12wxStringBase10ConcatSelfEmPKwm +; CHECK-NEXT: Ltmp11: +; CHECK-NEXT: jmp LBB0_5 +; CHECK-NEXT: LBB0_22: ## %bb5968 +; CHECK-NEXT: Ltmp2: +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: Ltmp3: +; CHECK-NEXT: jmp LBB0_27 +; CHECK-NEXT: LBB0_9: ## %bb5657 +; CHECK-NEXT: Ltmp13: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll __ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE +; CHECK-NEXT: Ltmp14: +; CHECK-NEXT: LBB0_27: ## %bb115.critedge.i +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: addl $28, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl $4 +; CHECK-NEXT: LBB0_25: ## %lpad.loopexit.split-lp +; CHECK-NEXT: Ltmp15: +; CHECK-NEXT: jmp LBB0_27 +; CHECK-NEXT: LBB0_26: ## %lpad8185 +; CHECK-NEXT: Ltmp12: +; CHECK-NEXT: jmp LBB0_27 +; CHECK-NEXT: LBB0_24: ## %lpad.loopexit +; CHECK-NEXT: Ltmp18: +; CHECK-NEXT: jmp LBB0_27 +; CHECK-NEXT: Lfunc_end0: entry: br i1 %foo, label %bb116.i, label %bb115.critedge.i bb115.critedge.i: ; preds = %entry @@ -75,7 +249,7 @@ bb5484: ; preds = %bb3314 ret void bb5657: ; preds = %bb3314 - %tmp5661 = invoke zeroext i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE( %struct.wxDateTime* %this, %"struct.wxDateTime::TimeZone"* %tz ) + %tmp5661 = invoke zeroext i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE( %struct.wxDateTime* %this, %"struct.wxDateTime::TimeZone"* %tz ) to label %invcont5660 unwind label %lpad ; [#uses=0] invcont5660: ; preds = %bb5657 ret void @@ -120,7 +294,7 @@ invoke void (%struct.wxString*, i32*, ...) @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret null, i32* null, i32 %tmp58165817 ) to label %invcont5831 unwind label %lpad invcont5831: ; preds = %invcont5814 - %tmp5862 = invoke zeroext i8 @_ZN12wxStringBase10ConcatSelfEmPKwm( %struct.wxStringBase* null, i32 0, i32* null, i32 0 ) + %tmp5862 = invoke zeroext i8 @_ZN12wxStringBase10ConcatSelfEmPKwm( %struct.wxStringBase* null, i32 0, i32* null, i32 0 ) to label %bb7834 unwind label %lpad8185 ; [#uses=0] bb5968: ; preds = %bb3314 invoke void (%struct.wxString*, i32*, ...) @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret null, i32* null, i32 0 ) @@ -162,11 +336,11 @@ declare void @_Z10wxOnAssertPKwiPKcS0_S0_(i32*, i32, i8*, i32*, i32*) -declare zeroext i8 @_ZN12wxStringBase10ConcatSelfEmPKwm(%struct.wxStringBase*, i32, i32*, i32) +declare zeroext i8 @_ZN12wxStringBase10ConcatSelfEmPKwm(%struct.wxStringBase*, i32, i32*, i32) declare %struct.tm* @gmtime_r(i32*, %struct.tm*) -declare zeroext i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE(%struct.wxDateTime*, %"struct.wxDateTime::TimeZone"*) +declare zeroext i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE(%struct.wxDateTime*, %"struct.wxDateTime::TimeZone"*) declare %struct.wxStringBase* @_ZN12wxStringBase6appendEmw(%struct.wxStringBase*, i32, i32) diff --git a/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll b/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll --- a/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll +++ b/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll @@ -1,12 +1,31 @@ -; RUN: llc < %s -mtriple=i686-- | not grep 120 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; Don't accidentally add the offset twice for trailing bytes. %struct.S63 = type { [63 x i8] } @g1s63 = external global %struct.S63 ; <%struct.S63*> [#uses=1] -declare void @test63(%struct.S63* byval align 4 ) nounwind +declare void @test63(%struct.S63* byval align 4 ) nounwind define void @testit63_entry_2E_ce() nounwind { - tail call void @test63( %struct.S63* byval align 4 @g1s63 ) nounwind +; CHECK-LABEL: testit63_entry_2E_ce: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $64, %esp +; CHECK-NEXT: movl $15, %ecx +; CHECK-NEXT: movl %esp, %edi +; CHECK-NEXT: movl $g1s63, %esi +; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) +; CHECK-NEXT: movb g1s63+62, %al +; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) +; CHECK-NEXT: movzwl g1s63+60, %eax +; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) +; CHECK-NEXT: calll test63 +; CHECK-NEXT: addl $64, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl + tail call void @test63( %struct.S63* byval align 4 @g1s63 ) nounwind ret void } diff --git a/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll --- a/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll +++ b/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll @@ -1,7 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl > %t -; RUN: not grep "r[abcd]x" %t -; RUN: not grep "r[ds]i" %t -; RUN: not grep "r[bs]p" %t +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s %struct.BITMAP = type { i16, i16, i32, i32, i32, i32, i32, i32, i8*, i8* } %struct.BltData = type { float, float, float, float } @@ -11,6 +9,58 @@ %struct.BltSh = type { i8, i8, i8, i8, float, float*, float*, float*, float*, i32, i32, float*, float*, float* } define void @t(%struct.BltDepth* %depth, %struct.BltOp* %bop, i32 %mode) nounwind { +; CHECK-LABEL: t: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: cmpl $18, %edx +; CHECK-NEXT: je LBB0_3 +; CHECK-NEXT: ## %bb.1: ## %entry +; CHECK-NEXT: cmpl $1, %edx +; CHECK-NEXT: jne LBB0_10 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_2: ## %bb2898.us +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp LBB0_2 +; CHECK-NEXT: LBB0_3: ## %bb13086.preheader +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_4: ## %bb13088 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne LBB0_5 +; CHECK-NEXT: ## %bb.6: ## %bb13101 +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: jmp LBB0_7 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_5: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: movl $65535, %ecx ## imm = 0xFFFF +; CHECK-NEXT: LBB0_7: ## %bb13107 +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: shll $16, %edx +; CHECK-NEXT: subl %ecx, %edx +; CHECK-NEXT: incl %edx +; CHECK-NEXT: shrl $16, %edx +; CHECK-NEXT: subl %edx, %ecx +; CHECK-NEXT: testw %cx, %cx +; CHECK-NEXT: je LBB0_4 +; CHECK-NEXT: ## %bb.8: ## %bb13236 +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: cmpw $-1, %cx +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne LBB0_4 +; CHECK-NEXT: ## %bb.9: ## %bb13572 +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: movzwl %cx, %ecx +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: shll $16, %edx +; CHECK-NEXT: subl %ecx, %edx +; CHECK-NEXT: incl %edx +; CHECK-NEXT: shrl $16, %edx +; CHECK-NEXT: movw %dx, 0 +; CHECK-NEXT: jmp LBB0_4 +; CHECK-NEXT: LBB0_10: ## %return +; CHECK-NEXT: retq entry: switch i32 %mode, label %return [ i32 1, label %bb2898.us diff --git a/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll b/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll --- a/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll +++ b/llvm/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll @@ -1,9 +1,25 @@ -; RUN: llc < %s -enable-unsafe-fp-math -mtriple=i686-- | grep jp +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -enable-unsafe-fp-math -mtriple=i686-- | FileCheck %s ; rdar://5902801 declare void @test2() define i32 @test(double %p) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: fucomp %st(0) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jp .LBB0_1 +; CHECK-NEXT: # %bb.2: # %UnifiedReturnBlock +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %bb +; CHECK-NEXT: calll test2 +; CHECK-NEXT: movl $17, %eax +; CHECK-NEXT: retl %tmp5 = fcmp uno double %p, 0.000000e+00 br i1 %tmp5, label %bb, label %UnifiedReturnBlock bb: diff --git a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll --- a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll +++ b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s | grep abort | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s ; Calls to abort should all be merged ; ModuleID = '5898899.c' @@ -7,6 +8,53 @@ %struct.BoundaryAlignment = type { [3 x i8], i8, i16, i16, i8, [2 x i8] } define void @passing2(i64 %str.0, i64 %str.1, i16 signext %s, i32 %j, i8 signext %c, i16 signext %t, i16 signext %u, i8 signext %d) nounwind optsize { +; CHECK-LABEL: passing2: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %ah, {{[0-9]+}}(%rsp) +; CHECK-NEXT: shrq $16, %rsi +; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: shrq $24, %rax +; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %r8b, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %r9w, {{[0-9]+}}(%rsp) +; CHECK-NEXT: shll $14, %edi +; CHECK-NEXT: sarl $23, %edi +; CHECK-NEXT: cmpl %ecx, %edi +; CHECK-NEXT: jne LBB0_6 +; CHECK-NEXT: ## %bb.1: ## %bb27 +; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %al +; CHECK-NEXT: cmpb {{[0-9]+}}(%rsp), %al +; CHECK-NEXT: jne LBB0_6 +; CHECK-NEXT: ## %bb.2: ## %bb35 +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: shll $7, %eax +; CHECK-NEXT: cwtl +; CHECK-NEXT: shrl $7, %eax +; CHECK-NEXT: cmpw {{[0-9]+}}(%rsp), %ax +; CHECK-NEXT: jne LBB0_6 +; CHECK-NEXT: ## %bb.3: ## %bb51 +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: shll $7, %eax +; CHECK-NEXT: cwtl +; CHECK-NEXT: shrl $7, %eax +; CHECK-NEXT: cmpw {{[0-9]+}}(%rsp), %ax +; CHECK-NEXT: jne LBB0_6 +; CHECK-NEXT: ## %bb.4: ## %bb67 +; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %al +; CHECK-NEXT: cmpb {{[0-9]+}}(%rsp), %al +; CHECK-NEXT: jne LBB0_6 +; CHECK-NEXT: ## %bb.5: ## %bb75 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_6: ## %bb +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: callq _abort entry: %str_addr = alloca %struct.BoundaryAlignment ; <%struct.BoundaryAlignment*> [#uses=7] %s_addr = alloca i16 ; [#uses=1] @@ -64,7 +112,7 @@ br i1 %toBool, label %bb, label %bb27 bb: ; preds = %entry - call void (...) @abort( ) noreturn nounwind + call void (...) @abort( ) noreturn nounwind unreachable bb27: ; preds = %entry @@ -77,7 +125,7 @@ br i1 %toBool33, label %bb34, label %bb35 bb34: ; preds = %bb27 - call void (...) @abort( ) noreturn nounwind + call void (...) @abort( ) noreturn nounwind unreachable bb35: ; preds = %bb27 @@ -98,7 +146,7 @@ br i1 %toBool49, label %bb50, label %bb51 bb50: ; preds = %bb35 - call void (...) @abort( ) noreturn nounwind + call void (...) @abort( ) noreturn nounwind unreachable bb51: ; preds = %bb35 @@ -119,7 +167,7 @@ br i1 %toBool65, label %bb66, label %bb67 bb66: ; preds = %bb51 - call void (...) @abort( ) noreturn nounwind + call void (...) @abort( ) noreturn nounwind unreachable bb67: ; preds = %bb51 @@ -132,7 +180,7 @@ br i1 %toBool73, label %bb74, label %bb75 bb74: ; preds = %bb67 - call void (...) @abort( ) noreturn nounwind + call void (...) @abort( ) noreturn nounwind unreachable bb75: ; preds = %bb67 @@ -142,4 +190,4 @@ ret void } -declare void @abort(...) noreturn nounwind +declare void @abort(...) noreturn nounwind diff --git a/llvm/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-05-21-CoalescerBug.ll --- a/llvm/test/CodeGen/X86/2008-05-21-CoalescerBug.ll +++ b/llvm/test/CodeGen/X86/2008-05-21-CoalescerBug.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i686-- -O0 -fast-isel=false -optimize-regalloc -regalloc=basic | grep mov | count 5 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -O0 -fast-isel=false -optimize-regalloc -regalloc=basic | FileCheck %s ; PR2343 %llvm.dbg.anchor.type = type { i32, i32 } @@ -70,6 +71,33 @@ @llvm.used = appending global [1 x i8*] [ i8* bitcast (%struct.edge_def* (%struct.edge_def*, %struct.basic_block_def*)* @tree_redirect_edge_and_branch to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] define %struct.edge_def* @tree_redirect_edge_and_branch(%struct.edge_def* %e1, %struct.basic_block_def* %dest2) nounwind { +; CHECK-LABEL: tree_redirect_edge_and_branch: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movb $1, %cl +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: # implicit-def: $esi +; CHECK-NEXT: jmp .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %bb483 +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: movl 0, %edx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: jmp .LBB0_2 +; CHECK-NEXT: .LBB0_2: # %bb497 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_3: # %bb502 +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: je .LBB0_5 +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_4: # %bb507 +; CHECK-NEXT: movl $0, (%esi) +; CHECK-NEXT: jmp .LBB0_5 +; CHECK-NEXT: .LBB0_5: # %bb841 entry: br label %bb497 diff --git a/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll --- a/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll +++ b/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd | count 5 -; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movl | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | FileCheck %s @atomic = global double 0.000000e+00 ; [#uses=1] @atomic2 = global double 0.000000e+00 ; [#uses=1] @@ -7,6 +7,20 @@ @ioport = global i32 0 ; [#uses=2] define i16 @f(i64 %x, double %y) { +; CHECK-LABEL: f: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: movsd %xmm1, atomic +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: movsd %xmm1, atomic2 +; CHECK-NEXT: movsd %xmm0, anything +; CHECK-NEXT: movl ioport, %ecx +; CHECK-NEXT: movl ioport, %eax +; CHECK-NEXT: shrl $16, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retl %b = bitcast i64 %x to double ; [#uses=1] store volatile double %b, double* @atomic ; one processor operation only store volatile double 0.000000e+00, double* @atomic2 ; one processor operation only diff --git a/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll --- a/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll +++ b/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll @@ -1,6 +1,21 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 4 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s define i16 @test(i16* %tmp179) nounwind { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzwl (%eax), %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: andl $64512, %ecx ## imm = 0xFC00 +; CHECK-NEXT: cmpl $32768, %ecx ## imm = 0x8000 +; CHECK-NEXT: jne LBB0_2 +; CHECK-NEXT: ## %bb.1: ## %bb189 +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retl +; CHECK-NEXT: LBB0_2: ## %bb288 +; CHECK-NEXT: movw $32, %ax +; CHECK-NEXT: retl %tmp180 = load i16, i16* %tmp179, align 2 ; [#uses=2] %tmp184 = and i16 %tmp180, -1024 ; [#uses=1] %tmp186 = icmp eq i16 %tmp184, -32768 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2008-06-25-VecISelBug.ll b/llvm/test/CodeGen/X86/2008-06-25-VecISelBug.ll --- a/llvm/test/CodeGen/X86/2008-06-25-VecISelBug.ll +++ b/llvm/test/CodeGen/X86/2008-06-25-VecISelBug.ll @@ -1,6 +1,11 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | not grep pslldq +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s define void @t() nounwind { +; CHECK-LABEL: t: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,0.0E+0,1.0E+0] +; CHECK-NEXT: movaps %xmm0, 0 entry: %tmp1 = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > %tmp2 = insertelement <4 x float> %tmp1, float 1.000000e+00, i32 3 diff --git a/llvm/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll b/llvm/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll --- a/llvm/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll +++ b/llvm/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll @@ -1,6 +1,8 @@ -; RUN: llc < %s | grep ax +; RUN: llc < %s | FileCheck %s ; PR2024 +; CHECK: .init.text,"ax" + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i386-pc-linux-gnu" diff --git a/llvm/test/CodeGen/X86/2008-07-11-SHLBy1.ll b/llvm/test/CodeGen/X86/2008-07-11-SHLBy1.ll --- a/llvm/test/CodeGen/X86/2008-07-11-SHLBy1.ll +++ b/llvm/test/CodeGen/X86/2008-07-11-SHLBy1.ll @@ -1,5 +1,13 @@ -; RUN: llc < %s -mtriple=x86_64-- -o - | not grep shr +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s + define i128 @sl(i128 %x) { +; CHECK-LABEL: sl: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: shldq $1, %rdi, %rdx +; CHECK-NEXT: leaq (%rdi,%rdi), %rax +; CHECK-NEXT: retq %t = shl i128 %x, 1 ret i128 %t } diff --git a/llvm/test/CodeGen/X86/2008-08-06-CmpStride.ll b/llvm/test/CodeGen/X86/2008-08-06-CmpStride.ll --- a/llvm/test/CodeGen/X86/2008-08-06-CmpStride.ll +++ b/llvm/test/CodeGen/X86/2008-08-06-CmpStride.ll @@ -1,10 +1,29 @@ -; RUN: llc -mtriple=x86_64-- < %s -o - | grep "cmpl \$[1], %" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- < %s | FileCheck %s @.str = internal constant [4 x i8] c"%d\0A\00" declare i32 @printf(i8* noalias , ...) nounwind define i32 @main() nounwind { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl $10271, %ebx # imm = 0x281F +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %forbody +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl $.str, %edi +; CHECK-NEXT: movl %ebx, %esi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: callq printf +; CHECK-NEXT: addl $-10, %ebx +; CHECK-NEXT: cmpl $1, %ebx +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %afterfor +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq entry: br label %forbody diff --git a/llvm/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll b/llvm/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll --- a/llvm/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll +++ b/llvm/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll @@ -1,6 +1,13 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep xorl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s define i32 @foo(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: setae %al +; CHECK-NEXT: retq entry: tail call i32 @llvm.x86.sse.ucomige.ss( <4 x float> %a, <4 x float> %b ) nounwind readnone ret i32 %0 diff --git a/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll b/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll --- a/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll +++ b/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll @@ -1,19 +1,35 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | grep movs | count 2 -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | grep fld | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s ; check 'inreg' attribute for sse_regparm define inreg double @foo1() nounwind { +; CHECK-LABEL: foo1: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: retl ret double 1.0 } define inreg float @foo2() nounwind { +; CHECK-LABEL: foo2: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retl ret float 1.0 } define double @bar() nounwind { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: +; CHECK-NEXT: fld1 +; CHECK-NEXT: retl ret double 1.0 } define float @bar2() nounwind { +; CHECK-LABEL: bar2: +; CHECK: # %bb.0: +; CHECK-NEXT: fld1 +; CHECK-NEXT: retl ret float 1.0 } diff --git a/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll b/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll --- a/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll +++ b/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll @@ -1,12 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=-sse2,-sse3,-sse | FileCheck %s + ; ModuleID = 'nan.bc' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-apple-darwin8" -; RUN: llc < %s -mattr=-sse2,-sse3,-sse | grep fldl ; This NaN should be shortened to a double (not a float). declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f) define i32 @main() { +; CHECK-LABEL: main: +; CHECK: ## %bb.0: ## %entry_nan.main +; CHECK-NEXT: subl $28, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: fldl LCPI0_0 +; CHECK-NEXT: fstpt (%esp) +; CHECK-NEXT: calll __D3nan5printFeZv +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addl $28, %esp +; CHECK-NEXT: retl entry_nan.main: call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFFC001234000000800) ret i32 0 diff --git a/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll b/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll --- a/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll +++ b/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll @@ -1,7 +1,8 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; ModuleID = 'nan.bc' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-apple-darwin8" -; RUN: llc < %s -mattr=-sse2,-sse3,-sse | grep fldt | count 3 +; RUN: llc < %s -mattr=-sse2,-sse3,-sse | FileCheck %s ; it is not safe to shorten any of these NaNs. declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f) @@ -9,6 +10,25 @@ @_D3nan4rvale = global x86_fp80 0xK7FFF8001234000000000 ; [#uses=1] define i32 @main() { +; CHECK-LABEL: main: +; CHECK: ## %bb.0: ## %entry_nan.main +; CHECK-NEXT: subl $28, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: fldt __D3nan4rvale +; CHECK-NEXT: fstpt (%esp) +; CHECK-NEXT: calll __D3nan5printFeZv +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: fldt LCPI0_0 +; CHECK-NEXT: fstpt (%esp) +; CHECK-NEXT: calll __D3nan5printFeZv +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: fldt LCPI0_1 +; CHECK-NEXT: fstpt (%esp) +; CHECK-NEXT: calll __D3nan5printFeZv +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addl $28, %esp +; CHECK-NEXT: retl entry_nan.main: %tmp = load x86_fp80, x86_fp80* @_D3nan4rvale ; [#uses=1] call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %tmp) diff --git a/llvm/test/CodeGen/X86/2008-10-24-FlippedCompare.ll b/llvm/test/CodeGen/X86/2008-10-24-FlippedCompare.ll --- a/llvm/test/CodeGen/X86/2008-10-24-FlippedCompare.ll +++ b/llvm/test/CodeGen/X86/2008-10-24-FlippedCompare.ll @@ -1,6 +1,18 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -o - | not grep "ucomiss[^,]*esp" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s define void @f(float %wt) { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl $44 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: calll g +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: retl entry: %0 = fcmp ogt float %wt, 0.000000e+00 ; [#uses=1] %1 = tail call i32 @g(i32 44) ; [#uses=3] diff --git a/llvm/test/CodeGen/X86/2008-11-29-ULT-Sign.ll b/llvm/test/CodeGen/X86/2008-11-29-ULT-Sign.ll --- a/llvm/test/CodeGen/X86/2008-11-29-ULT-Sign.ll +++ b/llvm/test/CodeGen/X86/2008-11-29-ULT-Sign.ll @@ -1,8 +1,19 @@ -; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep "jns" | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i686-pc-linux-gnu" define i32 @a(i32 %x) nounwind { +; CHECK-LABEL: a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: jns .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: calll b +; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: retl entry: %cmp = icmp ult i32 %x, -2147483648 ; [#uses=1] br i1 %cmp, label %if.end, label %if.then diff --git a/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll b/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll --- a/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll +++ b/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | not grep lea +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s ; The inner loop should use [reg] addressing, not [reg+reg] addressing. ; rdar://6403965 @@ -6,6 +7,26 @@ target triple = "i386-apple-darwin9.5" define i8* @test(i8* %Q, i32* %L) nounwind { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: jmp LBB0_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_1: ## %bb +; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: incl %eax +; CHECK-NEXT: LBB0_2: ## %bb1 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movzbl (%eax), %ecx +; CHECK-NEXT: cmpb $12, %cl +; CHECK-NEXT: je LBB0_1 +; CHECK-NEXT: ## %bb.3: ## %bb1 +; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: cmpb $42, %cl +; CHECK-NEXT: je LBB0_1 +; CHECK-NEXT: ## %bb.4: ## %bb3 +; CHECK-NEXT: movb $4, 2(%eax) +; CHECK-NEXT: retl entry: br label %bb1 diff --git a/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll b/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll --- a/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll +++ b/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll @@ -1,8 +1,16 @@ -; RUN: llc < %s | grep "(%esp)" | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.5" ; a - a should be found and removed, leaving refs to only L and P define i8* @test(i8* %a, i8* %L, i8* %P) nounwind { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl $-2, %eax +; CHECK-NEXT: retl entry: %0 = ptrtoint i8* %a to i32 %1 = sub i32 -2, %0 diff --git a/llvm/test/CodeGen/X86/2008-12-02-dagcombine-2.ll b/llvm/test/CodeGen/X86/2008-12-02-dagcombine-2.ll --- a/llvm/test/CodeGen/X86/2008-12-02-dagcombine-2.ll +++ b/llvm/test/CodeGen/X86/2008-12-02-dagcombine-2.ll @@ -1,8 +1,14 @@ -; RUN: llc < %s | grep "(%esp)" | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.5" ; a - a should be found and removed, leaving refs to only L and P define i8* @test(i8* %a, i8* %L, i8* %P) nounwind { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl entry: %0 = ptrtoint i8* %a to i32 %1 = ptrtoint i8* %P to i32 diff --git a/llvm/test/CodeGen/X86/2008-12-02-dagcombine-3.ll b/llvm/test/CodeGen/X86/2008-12-02-dagcombine-3.ll --- a/llvm/test/CodeGen/X86/2008-12-02-dagcombine-3.ll +++ b/llvm/test/CodeGen/X86/2008-12-02-dagcombine-3.ll @@ -1,9 +1,18 @@ -; RUN: llc < %s | grep add | count 2 -; RUN: llc < %s | grep sub | grep -v subsections | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.5" ; this should be rearranged to have two +s and one - define i32 @test(i8* %a, i8* %L, i8* %P) nounwind { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: addl $-2, %eax +; CHECK-NEXT: retl entry: %0 = ptrtoint i8* %P to i32 %1 = sub i32 -2, %0 diff --git a/llvm/test/CodeGen/X86/2008-12-16-dagcombine-4.ll b/llvm/test/CodeGen/X86/2008-12-16-dagcombine-4.ll --- a/llvm/test/CodeGen/X86/2008-12-16-dagcombine-4.ll +++ b/llvm/test/CodeGen/X86/2008-12-16-dagcombine-4.ll @@ -1,8 +1,14 @@ -; RUN: llc < %s | grep "(%esp)" | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.5" ; a - a should be found and removed, leaving refs to only L and P define i32 @test(i32 %a, i32 %L, i32 %P) nounwind { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl entry: %0 = sub i32 %a, %L %1 = add i32 %P, %0 diff --git a/llvm/test/CodeGen/X86/2008-12-22-dagcombine-5.ll b/llvm/test/CodeGen/X86/2008-12-22-dagcombine-5.ll --- a/llvm/test/CodeGen/X86/2008-12-22-dagcombine-5.ll +++ b/llvm/test/CodeGen/X86/2008-12-22-dagcombine-5.ll @@ -1,8 +1,14 @@ -; RUN: llc < %s | grep "(%esp)" | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.5" ; -(-a) - a should be found and removed, leaving refs to only L and P define i32 @test(i32 %a, i32 %L, i32 %P) nounwind { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl entry: %0 = sub i32 %L, %a %1 = sub i32 %P, %0 diff --git a/llvm/test/CodeGen/X86/2008-12-23-crazy-address.ll b/llvm/test/CodeGen/X86/2008-12-23-crazy-address.ll --- a/llvm/test/CodeGen/X86/2008-12-23-crazy-address.ll +++ b/llvm/test/CodeGen/X86/2008-12-23-crazy-address.ll @@ -1,8 +1,22 @@ -; RUN: llc < %s -mtriple=i686-- -relocation-model=static | grep "lea.*X.*esp" | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -relocation-model=static | FileCheck %s @X = external global [0 x i32] define void @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: calll frob +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: leal X(%esp), %eax +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: calll borf +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl %eax +; CHECK-NEXT: retl entry: %Y = alloca i32 call void @frob(i32* %Y) nounwind @@ -14,6 +28,21 @@ } define void @bar(i32 %i) nounwind { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $40, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: calll frob +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: leal X(%esp,%esi,4), %eax +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: calll borf +; CHECK-NEXT: addl $44, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl entry: %Y = alloca [10 x i32] %0 = getelementptr [10 x i32], [10 x i32]* %Y, i32 0, i32 0 diff --git a/llvm/test/CodeGen/X86/2008-12-23-dagcombine-6.ll b/llvm/test/CodeGen/X86/2008-12-23-dagcombine-6.ll --- a/llvm/test/CodeGen/X86/2008-12-23-dagcombine-6.ll +++ b/llvm/test/CodeGen/X86/2008-12-23-dagcombine-6.ll @@ -1,8 +1,14 @@ -; RUN: llc < %s | grep "(%esp)" | count 4 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.5" ; a - a should be found and removed, leaving refs to only L and P define i32 @test(i32 %a, i32 %L, i32 %P) nounwind { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl entry: %0 = add i32 %a, %L %1 = add i32 %P, %0 @@ -13,6 +19,11 @@ ret i32 %2 } define i32 @test2(i32 %a, i32 %L, i32 %P) nounwind { +; CHECK-LABEL: test2: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl entry: %0 = add i32 %L, %a %1 = add i32 %P, %0 diff --git a/llvm/test/CodeGen/X86/2009-01-31-BigShift.ll b/llvm/test/CodeGen/X86/2009-01-31-BigShift.ll --- a/llvm/test/CodeGen/X86/2009-01-31-BigShift.ll +++ b/llvm/test/CodeGen/X86/2009-01-31-BigShift.ll @@ -1,7 +1,22 @@ -; RUN: llc < %s -mtriple=i686-- | not grep and +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; PR3401 define void @x(i288 %i) nounwind { +; CHECK-LABEL: x: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK-NEXT: calll add +; CHECK-NEXT: addl $36, %esp +; CHECK-NEXT: retl call void @add(i288 %i) ret void } diff --git a/llvm/test/CodeGen/X86/2009-01-31-BigShift2.ll b/llvm/test/CodeGen/X86/2009-01-31-BigShift2.ll --- a/llvm/test/CodeGen/X86/2009-01-31-BigShift2.ll +++ b/llvm/test/CodeGen/X86/2009-01-31-BigShift2.ll @@ -1,7 +1,17 @@ -; RUN: llc < %s -mtriple=i686-- | grep "mov.*56" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; PR3449 define void @test(<8 x double>* %P, i64* %Q) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl 56(%ecx), %edx +; CHECK-NEXT: movl 60(%ecx), %ecx +; CHECK-NEXT: movl %ecx, 4(%eax) +; CHECK-NEXT: movl %edx, (%eax) +; CHECK-NEXT: retl %A = load <8 x double>, <8 x double>* %P ; <<8 x double>> [#uses=1] %B = bitcast <8 x double> %A to i512 ; [#uses=1] %C = lshr i512 %B, 448 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll b/llvm/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll --- a/llvm/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll +++ b/llvm/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll @@ -1,6 +1,9 @@ -; RUN: llc < %s | grep weak | count 3 +; RUN: llc < %s | FileCheck %s ; PR3629 +; CHECK: weak +; CHECK-NEXT: weak +; CHECK-NEXT: weak target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "x86_64-unknown-freebsd7.1" module asm ".ident\09\22$FreeBSD$\22" diff --git a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll --- a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll +++ b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll @@ -1,8 +1,6 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts -; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 -relocation-model=static -o /dev/null -stats -info-output-file - > %t -; RUN: not grep spill %t -; RUN: not grep "%rsp" %t -; RUN: not grep "%rbp" %t +; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 -relocation-model=static | FileCheck %s ; The register-pressure scheduler should be able to schedule this in a ; way that does not require spills. @@ -10,6 +8,229 @@ @X = external global i64 ; [#uses=25] define fastcc i64 @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq {{.*}}(%rip), %r8 +; CHECK-NEXT: movq {{.*}}(%rip), %r10 +; CHECK-NEXT: movq {{.*}}(%rip), %r9 +; CHECK-NEXT: movq {{.*}}(%rip), %r12 +; CHECK-NEXT: movq {{.*}}(%rip), %r15 +; CHECK-NEXT: movq {{.*}}(%rip), %r14 +; CHECK-NEXT: movq {{.*}}(%rip), %r11 +; CHECK-NEXT: movq {{.*}}(%rip), %rdx +; CHECK-NEXT: addq %r15, %rdx +; CHECK-NEXT: movq {{.*}}(%rip), %rsi +; CHECK-NEXT: bswapq %rsi +; CHECK-NEXT: leaq (%r11,%r14), %rbx +; CHECK-NEXT: addq %r15, %rbx +; CHECK-NEXT: addq %rdx, %rbx +; CHECK-NEXT: addq %rsi, %rbx +; CHECK-NEXT: leaq (%r9,%r10), %rsi +; CHECK-NEXT: leaq (%rsi,%r8), %rdx +; CHECK-NEXT: addq %rsi, %rdx +; CHECK-NEXT: movq {{.*}}(%rip), %rdi +; CHECK-NEXT: addq %rbx, %r12 +; CHECK-NEXT: addq %r8, %rdx +; CHECK-NEXT: bswapq %rdi +; CHECK-NEXT: addq %rbx, %rdx +; CHECK-NEXT: leaq (%r15,%r14), %rsi +; CHECK-NEXT: addq %r12, %rsi +; CHECK-NEXT: addq %r11, %rdi +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: leaq (%r10,%r8), %rbx +; CHECK-NEXT: leaq (%rdx,%rbx), %rsi +; CHECK-NEXT: addq %rbx, %rsi +; CHECK-NEXT: movq {{.*}}(%rip), %rbx +; CHECK-NEXT: addq %r12, %rdi +; CHECK-NEXT: addq %rdi, %r9 +; CHECK-NEXT: addq %rdx, %rsi +; CHECK-NEXT: addq %rdi, %rsi +; CHECK-NEXT: bswapq %rbx +; CHECK-NEXT: leaq (%r12,%r15), %rdi +; CHECK-NEXT: addq %r9, %rdi +; CHECK-NEXT: addq %r14, %rbx +; CHECK-NEXT: addq %rdi, %rbx +; CHECK-NEXT: leaq (%rdx,%r8), %rax +; CHECK-NEXT: leaq (%rsi,%rax), %rdi +; CHECK-NEXT: addq %rax, %rdi +; CHECK-NEXT: movq {{.*}}(%rip), %rcx +; CHECK-NEXT: addq %r9, %rbx +; CHECK-NEXT: addq %rbx, %r10 +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: bswapq %rcx +; CHECK-NEXT: addq %rbx, %rdi +; CHECK-NEXT: leaq (%r9,%r12), %rax +; CHECK-NEXT: addq %r10, %rax +; CHECK-NEXT: addq %r15, %rcx +; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: leaq (%rsi,%rdx), %rbx +; CHECK-NEXT: leaq (%rdi,%rbx), %r11 +; CHECK-NEXT: addq %rbx, %r11 +; CHECK-NEXT: movq {{.*}}(%rip), %rbx +; CHECK-NEXT: addq %r10, %rcx +; CHECK-NEXT: addq %rcx, %r8 +; CHECK-NEXT: addq %rdi, %r11 +; CHECK-NEXT: addq %rcx, %r11 +; CHECK-NEXT: bswapq %rbx +; CHECK-NEXT: leaq (%r10,%r9), %rcx +; CHECK-NEXT: addq %r8, %rcx +; CHECK-NEXT: addq %r12, %rbx +; CHECK-NEXT: addq %rcx, %rbx +; CHECK-NEXT: leaq (%rdi,%rsi), %rax +; CHECK-NEXT: leaq (%r11,%rax), %r14 +; CHECK-NEXT: addq %rax, %r14 +; CHECK-NEXT: movq {{.*}}(%rip), %rax +; CHECK-NEXT: addq %r8, %rbx +; CHECK-NEXT: addq %rbx, %rdx +; CHECK-NEXT: addq %r11, %r14 +; CHECK-NEXT: bswapq %rax +; CHECK-NEXT: addq %rbx, %r14 +; CHECK-NEXT: leaq (%r8,%r10), %rbx +; CHECK-NEXT: addq %rdx, %rbx +; CHECK-NEXT: addq %r9, %rax +; CHECK-NEXT: addq %rbx, %rax +; CHECK-NEXT: leaq (%r11,%rdi), %rbx +; CHECK-NEXT: leaq (%r14,%rbx), %r9 +; CHECK-NEXT: addq %rbx, %r9 +; CHECK-NEXT: movq {{.*}}(%rip), %rbx +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: addq %rax, %rsi +; CHECK-NEXT: addq %r14, %r9 +; CHECK-NEXT: addq %rax, %r9 +; CHECK-NEXT: bswapq %rbx +; CHECK-NEXT: leaq (%rdx,%r8), %rax +; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: addq %r10, %rbx +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: leaq (%r14,%r11), %rax +; CHECK-NEXT: leaq (%r9,%rax), %r10 +; CHECK-NEXT: addq %rax, %r10 +; CHECK-NEXT: movq {{.*}}(%rip), %rax +; CHECK-NEXT: addq %rsi, %rbx +; CHECK-NEXT: addq %rbx, %rdi +; CHECK-NEXT: addq %r9, %r10 +; CHECK-NEXT: bswapq %rax +; CHECK-NEXT: addq %rbx, %r10 +; CHECK-NEXT: leaq (%rsi,%rdx), %rbx +; CHECK-NEXT: addq %rdi, %rbx +; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: addq %rbx, %rax +; CHECK-NEXT: leaq (%r9,%r14), %rbx +; CHECK-NEXT: leaq (%r10,%rbx), %r8 +; CHECK-NEXT: addq %rbx, %r8 +; CHECK-NEXT: movq {{.*}}(%rip), %rbx +; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: addq %rax, %r11 +; CHECK-NEXT: addq %r10, %r8 +; CHECK-NEXT: addq %rax, %r8 +; CHECK-NEXT: bswapq %rbx +; CHECK-NEXT: leaq (%rdi,%rsi), %rax +; CHECK-NEXT: addq %r11, %rax +; CHECK-NEXT: addq %rdx, %rbx +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: leaq (%r10,%r9), %rax +; CHECK-NEXT: leaq (%r8,%rax), %r15 +; CHECK-NEXT: addq %rax, %r15 +; CHECK-NEXT: movq {{.*}}(%rip), %rax +; CHECK-NEXT: addq %r11, %rbx +; CHECK-NEXT: addq %rbx, %r14 +; CHECK-NEXT: addq %r8, %r15 +; CHECK-NEXT: bswapq %rax +; CHECK-NEXT: addq %rbx, %r15 +; CHECK-NEXT: leaq (%r11,%rdi), %rbx +; CHECK-NEXT: addq %r14, %rbx +; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: addq %rbx, %rax +; CHECK-NEXT: leaq (%r8,%r10), %rbx +; CHECK-NEXT: leaq (%r15,%rbx), %rsi +; CHECK-NEXT: addq %rbx, %rsi +; CHECK-NEXT: movq {{.*}}(%rip), %rbx +; CHECK-NEXT: addq %r14, %rax +; CHECK-NEXT: addq %rax, %r9 +; CHECK-NEXT: addq %r15, %rsi +; CHECK-NEXT: addq %rax, %rsi +; CHECK-NEXT: bswapq %rbx +; CHECK-NEXT: leaq (%r14,%r11), %rax +; CHECK-NEXT: addq %r9, %rax +; CHECK-NEXT: addq %rdi, %rbx +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: leaq (%r15,%r8), %rax +; CHECK-NEXT: leaq (%rsi,%rax), %r12 +; CHECK-NEXT: addq %rax, %r12 +; CHECK-NEXT: movq {{.*}}(%rip), %rcx +; CHECK-NEXT: addq %r9, %rbx +; CHECK-NEXT: addq %rbx, %r10 +; CHECK-NEXT: addq %rsi, %r12 +; CHECK-NEXT: bswapq %rcx +; CHECK-NEXT: addq %rbx, %r12 +; CHECK-NEXT: leaq (%r9,%r14), %rax +; CHECK-NEXT: addq %r10, %rax +; CHECK-NEXT: addq %r11, %rcx +; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: leaq (%rsi,%r15), %rbx +; CHECK-NEXT: leaq (%r12,%rbx), %rax +; CHECK-NEXT: addq %rbx, %rax +; CHECK-NEXT: movq {{.*}}(%rip), %rbx +; CHECK-NEXT: addq %r10, %rcx +; CHECK-NEXT: addq %rcx, %r8 +; CHECK-NEXT: addq %r12, %rax +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: bswapq %rbx +; CHECK-NEXT: leaq (%r10,%r9), %rcx +; CHECK-NEXT: addq %r8, %rcx +; CHECK-NEXT: addq %r14, %rbx +; CHECK-NEXT: addq %rcx, %rbx +; CHECK-NEXT: leaq (%r12,%rsi), %rdx +; CHECK-NEXT: leaq (%rax,%rdx), %rcx +; CHECK-NEXT: addq %rdx, %rcx +; CHECK-NEXT: movq {{.*}}(%rip), %rdx +; CHECK-NEXT: addq %r8, %rbx +; CHECK-NEXT: addq %rbx, %r15 +; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: bswapq %rdx +; CHECK-NEXT: addq %rbx, %rcx +; CHECK-NEXT: leaq (%r8,%r10), %rbx +; CHECK-NEXT: addq %r15, %rbx +; CHECK-NEXT: addq %r9, %rdx +; CHECK-NEXT: addq %rbx, %rdx +; CHECK-NEXT: leaq (%rax,%r12), %r9 +; CHECK-NEXT: leaq (%rcx,%r9), %rbx +; CHECK-NEXT: addq %r9, %rbx +; CHECK-NEXT: addq %r15, %rdx +; CHECK-NEXT: addq %rdx, %rsi +; CHECK-NEXT: addq %rcx, %rbx +; CHECK-NEXT: addq %rdx, %rbx +; CHECK-NEXT: movq {{.*}}(%rip), %rdx +; CHECK-NEXT: bswapq %rdx +; CHECK-NEXT: addq %r10, %rdx +; CHECK-NEXT: leaq (%r15,%r8), %rdi +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: addq %rdi, %rdx +; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: leaq (%rbx,%rcx), %rdi +; CHECK-NEXT: addq %rcx, %rdi +; CHECK-NEXT: addq %rbx, %rdi +; CHECK-NEXT: addq %rsi, %rdx +; CHECK-NEXT: addq %rdx, %r12 +; CHECK-NEXT: addq %rdx, %rdi +; CHECK-NEXT: addq %r15, %rsi +; CHECK-NEXT: movq {{.*}}(%rip), %rax +; CHECK-NEXT: bswapq %rax +; CHECK-NEXT: movq %rax, {{.*}}(%rip) +; CHECK-NEXT: addq %r8, %rax +; CHECK-NEXT: addq %r12, %rsi +; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: addq %r12, %rax +; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: retq %tmp = load volatile i64, i64* @X ; [#uses=7] %tmp1 = load volatile i64, i64* @X ; [#uses=5] %tmp2 = load volatile i64, i64* @X ; [#uses=3] diff --git a/llvm/test/CodeGen/X86/2009-04-12-picrel.ll b/llvm/test/CodeGen/X86/2009-04-12-picrel.ll --- a/llvm/test/CodeGen/X86/2009-04-12-picrel.ll +++ b/llvm/test/CodeGen/X86/2009-04-12-picrel.ll @@ -1,10 +1,15 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -code-model=small > %t -; RUN: grep leaq %t | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -code-model=small | FileCheck %s @dst = external global [131072 x i32] @ptr = external global i32* define void @off01(i64 %i) nounwind { +; CHECK-LABEL: off01: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq dst+64(,%rdi,4), %rax +; CHECK-NEXT: movq %rax, {{.*}}(%rip) +; CHECK-NEXT: retq entry: %.sum = add i64 %i, 16 %0 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %.sum diff --git a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll --- a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll +++ b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll @@ -1,10 +1,63 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=fast | not grep sil +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=fast | FileCheck %s ; rdar://6787136 %struct.X = type { i8, [32 x i8] } @llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @z to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] define i32 @z() nounwind ssp { +; CHECK-LABEL: z: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $144, %esp +; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %eax +; CHECK-NEXT: movl (%eax), %eax +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movb $48, {{[0-9]+}}(%esp) +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) +; CHECK-NEXT: movb $15, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl $8, %edx +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: addl $36, %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl +; CHECK-NEXT: movb %bl, 32(%eax) +; CHECK-NEXT: movb %bl, 68(%eax) +; CHECK-NEXT: calll _f +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: ## %bb.1: ## %return +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %ecx +; CHECK-NEXT: movl (%ecx), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: jne LBB0_3 +; CHECK-NEXT: ## %bb.2: ## %SP_return +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: addl $144, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl +; CHECK-NEXT: LBB0_3: ## %CallStackCheckFailBlk +; CHECK-NEXT: calll ___stack_chk_fail +; CHECK-NEXT: ud2 entry: %retval = alloca i32 ; [#uses=2] %xxx = alloca %struct.X ; <%struct.X*> [#uses=6] diff --git a/llvm/test/CodeGen/X86/2009-04-25-CoalescerBug.ll b/llvm/test/CodeGen/X86/2009-04-25-CoalescerBug.ll --- a/llvm/test/CodeGen/X86/2009-04-25-CoalescerBug.ll +++ b/llvm/test/CodeGen/X86/2009-04-25-CoalescerBug.ll @@ -1,7 +1,19 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep mov | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s ; rdar://6806252 define i64 @test(i32* %tmp13) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %while.cond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %while.end +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: retq entry: br label %while.cond diff --git a/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll --- a/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll +++ b/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll @@ -1,8 +1,28 @@ -; RUN: llc < %s | grep "movl.*%ebx, 8(%esi)" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.0" define void @cpuid(i32* %data) nounwind { +; CHECK-LABEL: cpuid: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl 20(%esi), %eax +; CHECK-NEXT: movl 24(%esi), %ebx +; CHECK-NEXT: movl 28(%esi), %ecx +; CHECK-NEXT: movl 32(%esi), %edx +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: cpuid +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: movl %ebx, 8(%esi) +; CHECK-NEXT: movl %ecx, 12(%esi) +; CHECK-NEXT: movl %edx, 16(%esi) +; CHECK-NEXT: movl %eax, 4(%esi) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl entry: %arrayidx = getelementptr i32, i32* %data, i32 1 ; [#uses=1] %arrayidx2 = getelementptr i32, i32* %data, i32 2 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll b/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll --- a/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll +++ b/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll @@ -1,6 +1,25 @@ -; RUN: llc < %s -mtriple=x86_64-- | not grep "movzbl %[abcd]h," +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define void @BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i(i32*, i32 %c_nblock_used.2.i, i32 %.reload51, i32* %.out, i32* %.out1, i32* %.out2, i32* %.out3) nounwind { +; CHECK-LABEL: BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i: +; CHECK: # %bb.0: # %newFuncRoot +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: movl %edx, %edx +; CHECK-NEXT: movl (%rdi,%rdx,4), %edx +; CHECK-NEXT: movzbl %dl, %eax +; CHECK-NEXT: shrq $6, %rdx +; CHECK-NEXT: andl $67108860, %edx # imm = 0x3FFFFFC +; CHECK-NEXT: addl $4, %eax +; CHECK-NEXT: movl (%rdi,%rdx), %edx +; CHECK-NEXT: movzbl %dl, %edi +; CHECK-NEXT: shrl $8, %edx +; CHECK-NEXT: addl $5, %esi +; CHECK-NEXT: movl %eax, (%rcx) +; CHECK-NEXT: movl %edi, (%r8) +; CHECK-NEXT: movl %edx, (%r9) +; CHECK-NEXT: movl %esi, (%r10) +; CHECK-NEXT: retq newFuncRoot: br label %bb54.i diff --git a/llvm/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll b/llvm/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll --- a/llvm/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll +++ b/llvm/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll @@ -1,13 +1,24 @@ -; RUN: llc < %s -tailcallopt -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt -; RUN: llc < %s -tailcallopt -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -tailcallopt -mattr=+sse2 -mtriple=x86_64-apple-darwin | FileCheck %s ; Check that x86-64 tail calls support x86_fp80 and v2f32 types. (Tail call ; calling convention out of sync with standard c calling convention on x86_64) ; Bug 4278. -declare fastcc double @tailcallee(x86_fp80, <2 x float>) - +declare fastcc double @tailcallee(x86_fp80, <2 x float>) + define fastcc double @tailcall() { +; CHECK-LABEL: tailcall: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fld1 +; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) +; CHECK-NEXT: movaps {{.*#+}} xmm0 = <1.0E+0,1.0E+0,u,u> +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: jmp _tailcallee ## TAILCALL entry: %tmp = fpext float 1.000000e+00 to x86_fp80 %tmp2 = tail call fastcc double @tailcallee( x86_fp80 %tmp, <2 x float> ) diff --git a/llvm/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll b/llvm/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll --- a/llvm/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll +++ b/llvm/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll @@ -1,10 +1,25 @@ -; RUN: llc < %s -mtriple=i686-- -tailcallopt | not grep TAILCALL +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -tailcallopt | FileCheck %s ; Bug 4396. This tail call can NOT be optimized. declare fastcc i8* @_D3gcx2GC12mallocNoSyncMFmkZPv() nounwind define fastcc i8* @_D3gcx2GC12callocNoSyncMFmkZPv() nounwind { +; CHECK-LABEL: _D3gcx2GC12callocNoSyncMFmkZPv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: calll _D3gcx2GC12mallocNoSyncMFmkZPv +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: pushl $0 +; CHECK-NEXT: pushl $2 +; CHECK-NEXT: pushl $0 +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: calll memset +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl entry: %tmp6 = tail call fastcc i8* @_D3gcx2GC12mallocNoSyncMFmkZPv() ; [#uses=2] %tmp9 = tail call i8* @memset(i8* %tmp6, i32 0, i64 2) ; [#uses=0] diff --git a/llvm/test/CodeGen/X86/2009-08-12-badswitch.ll b/llvm/test/CodeGen/X86/2009-08-12-badswitch.ll --- a/llvm/test/CodeGen/X86/2009-08-12-badswitch.ll +++ b/llvm/test/CodeGen/X86/2009-08-12-badswitch.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s | grep LJT +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin10" @@ -30,6 +31,153 @@ declare void @f26() nounwind readnone define internal fastcc i32 @foo(i64 %bar) nounwind ssp { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: addq $-2, %rdi +; CHECK-NEXT: cmpq $25, %rdi +; CHECK-NEXT: ja LBB0_2 +; CHECK-NEXT: ## %bb.1: ## %bb49 +; CHECK-NEXT: leaq {{.*}}(%rip), %rax +; CHECK-NEXT: movslq (%rax,%rdi,4), %rcx +; CHECK-NEXT: addq %rax, %rcx +; CHECK-NEXT: jmpq *%rcx +; CHECK-NEXT: LBB0_3: ## %RRETURN_6 +; CHECK-NEXT: callq _f2 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_2: ## %RETURN +; CHECK-NEXT: callq _f1 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_4: ## %RRETURN_7 +; CHECK-NEXT: callq _f3 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_5: ## %RRETURN_14 +; CHECK-NEXT: callq _f4 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_6: ## %RRETURN_15 +; CHECK-NEXT: callq _f5 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_7: ## %RRETURN_16 +; CHECK-NEXT: callq _f6 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_8: ## %RRETURN_17 +; CHECK-NEXT: callq _f7 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_9: ## %RRETURN_18 +; CHECK-NEXT: callq _f8 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_10: ## %RRETURN_19 +; CHECK-NEXT: callq _f9 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_11: ## %RRETURN_20 +; CHECK-NEXT: callq _f10 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_12: ## %RRETURN_21 +; CHECK-NEXT: callq _f11 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_13: ## %RRETURN_22 +; CHECK-NEXT: callq _f12 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_14: ## %RRETURN_24 +; CHECK-NEXT: callq _f13 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_15: ## %RRETURN_26 +; CHECK-NEXT: callq _f14 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_16: ## %RRETURN_27 +; CHECK-NEXT: callq _f15 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_17: ## %RRETURN_28 +; CHECK-NEXT: callq _f16 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_18: ## %RRETURN_29 +; CHECK-NEXT: callq _f17 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_19: ## %RRETURN_30 +; CHECK-NEXT: callq _f18 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_20: ## %RRETURN_31 +; CHECK-NEXT: callq _f19 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_21: ## %RRETURN_38 +; CHECK-NEXT: callq _f20 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_22: ## %RRETURN_40 +; CHECK-NEXT: callq _f21 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_23: ## %RRETURN_42 +; CHECK-NEXT: callq _f22 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_24: ## %RRETURN_44 +; CHECK-NEXT: callq _f23 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_25: ## %RRETURN_48 +; CHECK-NEXT: callq _f24 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_26: ## %RRETURN_52 +; CHECK-NEXT: callq _f25 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_27: ## %RRETURN_1 +; CHECK-NEXT: callq _f26 +; CHECK-NEXT: LBB0_28: ## %EXIT +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +; CHECK-NEXT: .p2align 2, 0x90 +; CHECK-NEXT: .data_region jt32 +; CHECK-NEXT: .set L0_0_set_3, LBB0_3-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_4, LBB0_4-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_5, LBB0_5-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_6, LBB0_6-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_7, LBB0_7-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_8, LBB0_8-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_9, LBB0_9-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_10, LBB0_10-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_11, LBB0_11-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_12, LBB0_12-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_13, LBB0_13-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_14, LBB0_14-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_15, LBB0_15-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_16, LBB0_16-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_17, LBB0_17-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_18, LBB0_18-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_19, LBB0_19-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_20, LBB0_20-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_21, LBB0_21-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_22, LBB0_22-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_23, LBB0_23-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_24, LBB0_24-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_25, LBB0_25-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_26, LBB0_26-LJTI0_0 +; CHECK-NEXT: .set L0_0_set_27, LBB0_27-LJTI0_0 +; CHECK-NEXT: LJTI0_0: +; CHECK-NEXT: .long L0_0_set_3 +; CHECK-NEXT: .long L0_0_set_3 +; CHECK-NEXT: .long L0_0_set_4 +; CHECK-NEXT: .long L0_0_set_5 +; CHECK-NEXT: .long L0_0_set_6 +; CHECK-NEXT: .long L0_0_set_7 +; CHECK-NEXT: .long L0_0_set_8 +; CHECK-NEXT: .long L0_0_set_9 +; CHECK-NEXT: .long L0_0_set_10 +; CHECK-NEXT: .long L0_0_set_11 +; CHECK-NEXT: .long L0_0_set_12 +; CHECK-NEXT: .long L0_0_set_13 +; CHECK-NEXT: .long L0_0_set_14 +; CHECK-NEXT: .long L0_0_set_15 +; CHECK-NEXT: .long L0_0_set_16 +; CHECK-NEXT: .long L0_0_set_17 +; CHECK-NEXT: .long L0_0_set_18 +; CHECK-NEXT: .long L0_0_set_19 +; CHECK-NEXT: .long L0_0_set_20 +; CHECK-NEXT: .long L0_0_set_21 +; CHECK-NEXT: .long L0_0_set_22 +; CHECK-NEXT: .long L0_0_set_23 +; CHECK-NEXT: .long L0_0_set_24 +; CHECK-NEXT: .long L0_0_set_25 +; CHECK-NEXT: .long L0_0_set_26 +; CHECK-NEXT: .long L0_0_set_27 +; CHECK-NEXT: .end_data_region entry: br label %bb49 diff --git a/llvm/test/CodeGen/X86/20090313-signext.ll b/llvm/test/CodeGen/X86/20090313-signext.ll --- a/llvm/test/CodeGen/X86/20090313-signext.ll +++ b/llvm/test/CodeGen/X86/20090313-signext.ll @@ -1,11 +1,19 @@ -; RUN: llc < %s -mtriple=x86_64-- -relocation-model=pic > %t -; RUN: grep "movswl %ax, %edi" %t -; RUN: grep "movw (%rax), %ax" %t -; XFAIL: * +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -relocation-model=pic | FileCheck %s @x = common global i16 0 define signext i16 @f() nounwind { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq h@PLT +; CHECK-NEXT: movswl %ax, %edi +; CHECK-NEXT: callq g@PLT +; CHECK-NEXT: movq x@{{.*}}(%rip), %rax +; CHECK-NEXT: movzwl (%rax), %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq entry: %0 = tail call signext i16 @h() nounwind %1 = sext i16 %0 to i32 diff --git a/llvm/test/CodeGen/X86/SwitchLowering.ll b/llvm/test/CodeGen/X86/SwitchLowering.ll --- a/llvm/test/CodeGen/X86/SwitchLowering.ll +++ b/llvm/test/CodeGen/X86/SwitchLowering.ll @@ -1,7 +1,43 @@ -; RUN: llc < %s -mtriple=i686-- | grep cmp | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; PR964 define i8* @FindChar(i8* %CurPtr) { +; CHECK-LABEL: FindChar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: .cfi_offset %edi, -8 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %bb +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movzbl (%esi,%edi), %eax +; CHECK-NEXT: incl %edi +; CHECK-NEXT: cmpb $120, %al +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.2: # %bb +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %bb7 +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: calll foo +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-NEXT: addl %edi, %esi +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: br label %bb diff --git a/llvm/test/CodeGen/X86/addr-label-difference.ll b/llvm/test/CodeGen/X86/addr-label-difference.ll --- a/llvm/test/CodeGen/X86/addr-label-difference.ll +++ b/llvm/test/CodeGen/X86/addr-label-difference.ll @@ -1,5 +1,8 @@ -; RUN: llc %s -o - | grep "__TEXT,__const" +; RUN: llc < %s | FileCheck %s ; PR5929 + +; CHECK: .section __TEXT,__const + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin10.0" diff --git a/llvm/test/CodeGen/X86/aligned-comm.ll b/llvm/test/CodeGen/X86/aligned-comm.ll --- a/llvm/test/CodeGen/X86/aligned-comm.ll +++ b/llvm/test/CodeGen/X86/aligned-comm.ll @@ -1,6 +1,11 @@ -; RUN: llc < %s -mtriple=i686-- -; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep "array,16512,7" -; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep "array,16512,7" +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=i686 +; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s --check-prefix=DARWIN +; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s --check-prefix=DARWIN +; RUN: llc < %s -mtriple=i386-apple-darwin8 | FileCheck %s --check-prefix=DARWIN8 + +; i686: array,16512 +; DARWIN8: array,16512 +; DARWIN: array,16512,7 ; Darwin 9+ should get alignment on common symbols. @array = common global [4128 x i32] zeroinitializer, align 128 diff --git a/llvm/test/CodeGen/X86/arg-cast.ll b/llvm/test/CodeGen/X86/arg-cast.ll --- a/llvm/test/CodeGen/X86/arg-cast.ll +++ b/llvm/test/CodeGen/X86/arg-cast.ll @@ -1,13 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; This should compile to movl $2147483647, %eax + andl only. -; RUN: llc < %s | grep andl -; RUN: llc < %s | not grep movsd -; RUN: llc < %s | grep esp | not grep add +; RUN: llc < %s | FileCheck %s ; rdar://5736574 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin8" define i32 @foo(double %x) nounwind { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl $2147483647, %eax ## imm = 0x7FFFFFFF +; CHECK-NEXT: andl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl entry: %x15 = bitcast double %x to i64 ; [#uses=1] %tmp713 = lshr i64 %x15, 32 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/bitcast2.ll b/llvm/test/CodeGen/X86/bitcast2.ll --- a/llvm/test/CodeGen/X86/bitcast2.ll +++ b/llvm/test/CodeGen/X86/bitcast2.ll @@ -1,12 +1,20 @@ -; RUN: llc < %s -mtriple=x86_64-- -mattr=-avx | grep movq | count 2 -; RUN: llc < %s -mtriple=x86_64-- -mattr=-avx | not grep rsp +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=-avx | FileCheck %s define i64 @test1(double %A) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %xmm0, %rax +; CHECK-NEXT: retq %B = bitcast double %A to i64 ret i64 %B } define double @test2(i64 %A) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %xmm0 +; CHECK-NEXT: retq %B = bitcast i64 %A to double ret double %B } diff --git a/llvm/test/CodeGen/X86/break-anti-dependencies.ll b/llvm/test/CodeGen/X86/break-anti-dependencies.ll --- a/llvm/test/CodeGen/X86/break-anti-dependencies.ll +++ b/llvm/test/CodeGen/X86/break-anti-dependencies.ll @@ -1,14 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Without list-burr scheduling we may not see the difference in codegen here. ; Use a subtarget that has post-RA scheduling enabled because the anti-dependency ; breaker requires liveness information to be kept. -; RUN: llc < %s -mtriple=x86_64-- -mcpu=atom -enable-misched=false -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t -; RUN: grep "%xmm0" %t | count 14 -; RUN: not grep "%xmm1" %t -; RUN: llc < %s -mtriple=x86_64-- -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t -; RUN: grep "%xmm0" %t | count 7 -; RUN: grep "%xmm1" %t | count 7 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=atom -enable-misched=false -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none | FileCheck %s --check-prefix=none +; RUN: llc < %s -mtriple=x86_64-- -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical | FileCheck %s --check-prefix=critical define void @goo(double* %r, double* %p, double* %q) nounwind { +; none-LABEL: goo: +; none: # %bb.0: # %entry +; none-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; none-NEXT: addsd {{.*}}(%rip), %xmm0 +; none-NEXT: mulsd {{.*}}(%rip), %xmm0 +; none-NEXT: addsd {{.*}}(%rip), %xmm0 +; none-NEXT: mulsd {{.*}}(%rip), %xmm0 +; none-NEXT: addsd {{.*}}(%rip), %xmm0 +; none-NEXT: cvttsd2si %xmm0, %eax +; none-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; none-NEXT: addsd {{.*}}(%rip), %xmm0 +; none-NEXT: mulsd {{.*}}(%rip), %xmm0 +; none-NEXT: addsd {{.*}}(%rip), %xmm0 +; none-NEXT: mulsd {{.*}}(%rip), %xmm0 +; none-NEXT: addsd {{.*}}(%rip), %xmm0 +; none-NEXT: cvttsd2si %xmm0, %ecx +; none-NEXT: cmpl %eax, %ecx +; none-NEXT: jge .LBB0_2 +; none-NEXT: # %bb.1: # %bb +; none-NEXT: movabsq $4621425052621576602, %rax # imm = 0x402299999999999A +; none-NEXT: movq %rax, (%rdx) +; none-NEXT: .LBB0_2: # %return +; none-NEXT: retq +; +; critical-LABEL: goo: +; critical: # %bb.0: # %entry +; critical-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; critical-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; critical-NEXT: addsd {{.*}}(%rip), %xmm0 +; critical-NEXT: addsd {{.*}}(%rip), %xmm1 +; critical-NEXT: mulsd {{.*}}(%rip), %xmm0 +; critical-NEXT: mulsd {{.*}}(%rip), %xmm1 +; critical-NEXT: addsd {{.*}}(%rip), %xmm0 +; critical-NEXT: addsd {{.*}}(%rip), %xmm1 +; critical-NEXT: mulsd {{.*}}(%rip), %xmm0 +; critical-NEXT: mulsd {{.*}}(%rip), %xmm1 +; critical-NEXT: addsd {{.*}}(%rip), %xmm0 +; critical-NEXT: addsd {{.*}}(%rip), %xmm1 +; critical-NEXT: cvttsd2si %xmm0, %eax +; critical-NEXT: cvttsd2si %xmm1, %ecx +; critical-NEXT: cmpl %ecx, %eax +; critical-NEXT: jge .LBB0_2 +; critical-NEXT: # %bb.1: # %bb +; critical-NEXT: movabsq $4621425052621576602, %rax # imm = 0x402299999999999A +; critical-NEXT: movq %rax, (%rdx) +; critical-NEXT: .LBB0_2: # %return +; critical-NEXT: retq entry: %0 = load double, double* %p, align 8 %1 = fadd double %0, 1.100000e+00 diff --git a/llvm/test/CodeGen/X86/byval6.ll b/llvm/test/CodeGen/X86/byval6.ll --- a/llvm/test/CodeGen/X86/byval6.ll +++ b/llvm/test/CodeGen/X86/byval6.ll @@ -1,11 +1,50 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mcpu=generic -mtriple=i686-- | grep add | not grep 16 +; RUN: llc < %s -mcpu=generic -mtriple=i686-- | FileCheck %s %struct.W = type { x86_fp80, x86_fp80 } @B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32 @.cpx = internal constant %struct.W { x86_fp80 0xK4001E000000000000000, x86_fp80 0xK40028000000000000000 } define i32 @main() nounwind { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl .cpx+20, %eax +; CHECK-NEXT: movl .cpx+16, %ecx +; CHECK-NEXT: movl .cpx+12, %edx +; CHECK-NEXT: movl .cpx+8, %esi +; CHECK-NEXT: movl .cpx+4, %edi +; CHECK-NEXT: movl .cpx, %ebx +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl %edx +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl $3 +; CHECK-NEXT: calll bar +; CHECK-NEXT: addl $28, %esp +; CHECK-NEXT: movl B+20, %eax +; CHECK-NEXT: movl B+16, %ecx +; CHECK-NEXT: movl B+12, %edx +; CHECK-NEXT: movl B+8, %esi +; CHECK-NEXT: movl B+4, %edi +; CHECK-NEXT: movl B, %ebx +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl %edx +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl $3 +; CHECK-NEXT: calll baz +; CHECK-NEXT: addl $28, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl entry: tail call void (i32, ...) @bar( i32 3, %struct.W* byval @.cpx ) nounwind tail call void (i32, ...) @baz( i32 3, %struct.W* byval @B ) nounwind diff --git a/llvm/test/CodeGen/X86/coalesce-esp.ll b/llvm/test/CodeGen/X86/coalesce-esp.ll --- a/llvm/test/CodeGen/X86/coalesce-esp.ll +++ b/llvm/test/CodeGen/X86/coalesce-esp.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s | grep "movl %esp, %ebp" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s ; PR4572 ; Don't coalesce with %esp if it would end up putting %esp in @@ -11,6 +12,33 @@ %"struct.std::valarray" = type { i32, i32* } define void @_ZSt17__gslice_to_indexjRKSt8valarrayIjES2_RS0_(i32 %__o, %"struct.std::valarray"* nocapture %__l, %"struct.std::valarray"* nocapture %__s, %"struct.std::valarray"* nocapture %__i) nounwind { +; CHECK-LABEL: _ZSt17__gslice_to_indexjRKSt8valarrayIjES2_RS0_: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.5: # %return +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %bb4.preheader +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: jmp .LBB0_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_4: # %bb7.backedge +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: addl $-4, %edx +; CHECK-NEXT: .LBB0_2: # %bb4 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: jne .LBB0_4 +; CHECK-NEXT: # %bb.3: # %bb5 +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: movl $0, (%eax,%edx) +; CHECK-NEXT: jmp .LBB0_4 entry: %0 = alloca i32, i32 undef, align 4 ; [#uses=1] br i1 undef, label %return, label %bb4 diff --git a/llvm/test/CodeGen/X86/coalescer-commute1.ll b/llvm/test/CodeGen/X86/coalescer-commute1.ll --- a/llvm/test/CodeGen/X86/coalescer-commute1.ll +++ b/llvm/test/CodeGen/X86/coalescer-commute1.ll @@ -1,10 +1,30 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=corei7-avx -mattr=+sse2 | not grep movaps +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=corei7-avx -mattr=+sse2 | FileCheck %s ; PR1877 @NNTOT = weak global i32 0 ; [#uses=1] @G = weak global float 0.000000e+00 ; [#uses=1] define void @runcont(i32* %source) nounwind { +; CHECK-LABEL: runcont: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl L_NNTOT$non_lazy_ptr, %ecx +; CHECK-NEXT: movl (%ecx), %ecx +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_1: ## %bb +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vcvtsi2ssl (%eax,%edx,4), %xmm2, %xmm1 +; CHECK-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: incl %edx +; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: jne LBB0_1 +; CHECK-NEXT: ## %bb.2: ## %bb13 +; CHECK-NEXT: movl L_G$non_lazy_ptr, %eax +; CHECK-NEXT: vmovss %xmm0, (%eax) +; CHECK-NEXT: retl entry: %tmp10 = load i32, i32* @NNTOT, align 4 ; [#uses=1] br label %bb diff --git a/llvm/test/CodeGen/X86/coalescer-commute3.ll b/llvm/test/CodeGen/X86/coalescer-commute3.ll --- a/llvm/test/CodeGen/X86/coalescer-commute3.ll +++ b/llvm/test/CodeGen/X86/coalescer-commute3.ll @@ -1,8 +1,35 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -no-x86-call-frame-opt | grep mov | count 6 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -no-x86-call-frame-opt | FileCheck %s %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } define i32 @perimeter(%struct.quad_struct* %tree, i32 %size) nounwind { +; CHECK-LABEL: perimeter: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: je LBB0_3 +; CHECK-NEXT: ## %bb.1: ## %entry +; CHECK-NEXT: cmpl $2, %eax +; CHECK-NEXT: jne LBB0_3 +; CHECK-NEXT: ## %bb.2: ## %bb +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll _perimeter +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll _perimeter +; CHECK-NEXT: addl %esi, %eax +; CHECK-NEXT: jmp LBB0_4 +; CHECK-NEXT: LBB0_3: ## %UnifiedReturnBlock +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: LBB0_4: ## %UnifiedReturnBlock +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl entry: switch i32 %size, label %UnifiedReturnBlock [ i32 2, label %bb diff --git a/llvm/test/CodeGen/X86/coalescer-commute4.ll b/llvm/test/CodeGen/X86/coalescer-commute4.ll --- a/llvm/test/CodeGen/X86/coalescer-commute4.ll +++ b/llvm/test/CodeGen/X86/coalescer-commute4.ll @@ -1,7 +1,39 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s ; PR1501 define float @foo(i32* %x, float* %y, i32 %c) nounwind { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: je LBB0_1 +; CHECK-NEXT: ## %bb.2: ## %bb.preheader +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_3: ## %bb +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: cvtsi2ssl (%edx,%esi,4), %xmm1 +; CHECK-NEXT: mulss (%ecx,%esi,4), %xmm1 +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: incl %esi +; CHECK-NEXT: cmpl %eax, %esi +; CHECK-NEXT: jb LBB0_3 +; CHECK-NEXT: jmp LBB0_4 +; CHECK-NEXT: LBB0_1: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: LBB0_4: ## %bb23 +; CHECK-NEXT: movss %xmm0, (%esp) +; CHECK-NEXT: flds (%esp) +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl entry: %tmp2132 = icmp eq i32 %c, 0 ; [#uses=2] br i1 %tmp2132, label %bb23, label %bb.preheader diff --git a/llvm/test/CodeGen/X86/coalescer-commute5.ll b/llvm/test/CodeGen/X86/coalescer-commute5.ll --- a/llvm/test/CodeGen/X86/coalescer-commute5.ll +++ b/llvm/test/CodeGen/X86/coalescer-commute5.ll @@ -1,6 +1,11 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s define i32 @t() { +; CHECK-LABEL: t: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl entry: br i1 true, label %bb1664, label %bb1656 bb1656: ; preds = %entry @@ -18,4 +23,4 @@ ret i32 0 } -declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone +declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/coalescer-remat.ll b/llvm/test/CodeGen/X86/coalescer-remat.ll --- a/llvm/test/CodeGen/X86/coalescer-remat.ll +++ b/llvm/test/CodeGen/X86/coalescer-remat.ll @@ -1,9 +1,23 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep xor | count 3 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s @val = internal global i64 0 @"\01LC" = internal constant [7 x i8] c"0x%lx\0A\00" define i32 @main() nounwind { +; CHECK-LABEL: main: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) +; CHECK-NEXT: leaq {{.*}}(%rip), %rdi +; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: callq _printf +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq entry: %t0 = cmpxchg i64* @val, i64 0, i64 1 monotonic monotonic %0 = extractvalue { i64, i1 } %t0, 0 diff --git a/llvm/test/CodeGen/X86/compare-add.ll b/llvm/test/CodeGen/X86/compare-add.ll --- a/llvm/test/CodeGen/X86/compare-add.ll +++ b/llvm/test/CodeGen/X86/compare-add.ll @@ -1,6 +1,12 @@ -; RUN: llc < %s -mtriple=i686-- | not grep add +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define i1 @X(i32 %X) { +; CHECK-LABEL: X: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpl $12331, {{[0-9]+}}(%esp) # imm = 0x302B +; CHECK-NEXT: setne %al +; CHECK-NEXT: retl %Y = add i32 %X, 14 ; [#uses=1] %Z = icmp ne i32 %Y, 12345 ; [#uses=1] ret i1 %Z diff --git a/llvm/test/CodeGen/X86/compare_folding.ll b/llvm/test/CodeGen/X86/compare_folding.ll --- a/llvm/test/CodeGen/X86/compare_folding.ll +++ b/llvm/test/CodeGen/X86/compare_folding.ll @@ -1,10 +1,14 @@ -; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | \ -; RUN: grep movsd | count 1 -; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | \ -; RUN: grep ucomisd -declare i1 @llvm.isunordered.f64(double, double) +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | FileCheck %s +declare i1 @llvm.isunordered.f64(double, double) define i1 @test1(double %X, double %Y) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: ucomisd {{[0-9]+}}(%esp), %xmm0 +; CHECK-NEXT: setp %al +; CHECK-NEXT: retl %COM = fcmp uno double %X, %Y ; [#uses=1] ret i1 %COM } diff --git a/llvm/test/CodeGen/X86/cstring.ll b/llvm/test/CodeGen/X86/cstring.ll --- a/llvm/test/CodeGen/X86/cstring.ll +++ b/llvm/test/CodeGen/X86/cstring.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | not grep comm +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s ; rdar://6479858 +; CHECK-NOT: comm @str1 = internal constant [1 x i8] zeroinitializer diff --git a/llvm/test/CodeGen/X86/empty-struct-return-type.ll b/llvm/test/CodeGen/X86/empty-struct-return-type.ll --- a/llvm/test/CodeGen/X86/empty-struct-return-type.ll +++ b/llvm/test/CodeGen/X86/empty-struct-return-type.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s | grep call +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s ; PR4688 ; Return types can be empty structs, which can be awkward. @@ -7,6 +8,14 @@ target triple = "x86_64-unknown-linux-gnu" define void @_ZN15QtSharedPointer22internalSafetyCheckAddEPVKv(i8* %ptr) { +; CHECK-LABEL: _ZN15QtSharedPointer22internalSafetyCheckAddEPVKv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq _ZNK5QHashIPv15QHashDummyValueE5valueERKS0_ +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %0 = call { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** undef) ; <{ }> [#uses=0] ret void diff --git a/llvm/test/CodeGen/X86/extend.ll b/llvm/test/CodeGen/X86/extend.ll --- a/llvm/test/CodeGen/X86/extend.ll +++ b/llvm/test/CodeGen/X86/extend.ll @@ -1,16 +1,26 @@ -; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=intel | grep movzx | count 1 -; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=intel | grep movsx | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=intel | FileCheck %s @G1 = internal global i8 0 ; [#uses=1] @G2 = internal global i8 0 ; [#uses=1] define i16 @test1() { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: movzx eax, byte ptr [G1] +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: ret %tmp.0 = load i8, i8* @G1 ; [#uses=1] %tmp.3 = zext i8 %tmp.0 to i16 ; [#uses=1] ret i16 %tmp.3 } define i16 @test2() { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: +; CHECK-NEXT: movsx eax, byte ptr [G2] +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: ret %tmp.0 = load i8, i8* @G2 ; [#uses=1] %tmp.3 = sext i8 %tmp.0 to i16 ; [#uses=1] ret i16 %tmp.3 diff --git a/llvm/test/CodeGen/X86/extmul128.ll b/llvm/test/CodeGen/X86/extmul128.ll --- a/llvm/test/CodeGen/X86/extmul128.ll +++ b/llvm/test/CodeGen/X86/extmul128.ll @@ -1,12 +1,23 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep mul | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define i128 @i64_sext_i128(i64 %a, i64 %b) { +; CHECK-LABEL: i64_sext_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: imulq %rsi +; CHECK-NEXT: retq %aa = sext i64 %a to i128 %bb = sext i64 %b to i128 %cc = mul i128 %aa, %bb ret i128 %cc } define i128 @i64_zext_i128(i64 %a, i64 %b) { +; CHECK-LABEL: i64_zext_i128: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: mulq %rsi +; CHECK-NEXT: retq %aa = zext i64 %a to i128 %bb = zext i64 %b to i128 %cc = mul i128 %aa, %bb diff --git a/llvm/test/CodeGen/X86/extmul64.ll b/llvm/test/CodeGen/X86/extmul64.ll --- a/llvm/test/CodeGen/X86/extmul64.ll +++ b/llvm/test/CodeGen/X86/extmul64.ll @@ -1,12 +1,23 @@ -; RUN: llc < %s -mtriple=i686-- | grep mul | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define i64 @i32_sext_i64(i32 %a, i32 %b) { +; CHECK-LABEL: i32_sext_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: imull {{[0-9]+}}(%esp) +; CHECK-NEXT: retl %aa = sext i32 %a to i64 %bb = sext i32 %b to i64 %cc = mul i64 %aa, %bb ret i64 %cc } define i64 @i32_zext_i64(i32 %a, i32 %b) { +; CHECK-LABEL: i32_zext_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: retl %aa = zext i32 %a to i64 %bb = zext i32 %b to i64 %cc = mul i64 %aa, %bb diff --git a/llvm/test/CodeGen/X86/extractps.ll b/llvm/test/CodeGen/X86/extractps.ll --- a/llvm/test/CodeGen/X86/extractps.ll +++ b/llvm/test/CodeGen/X86/extractps.ll @@ -1,12 +1,16 @@ -; RUN: llc < %s -mtriple=i686-- -mcpu=penryn > %t -; RUN: not grep movd %t -; RUN: grep "movss %xmm" %t | count 1 -; RUN: grep "extractps \$1, %xmm0, " %t | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mcpu=penryn | FileCheck %s ; PR2647 @0 = external global float, align 16 ; :0 [#uses=2] -define internal void @""() nounwind { +define internal void @a() nounwind { +; CHECK-LABEL: a: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: rsqrtss %xmm0, %xmm0 +; CHECK-NEXT: movss %xmm0, __unnamed_1 +; CHECK-NEXT: retl load float, float* @0, align 16 ; :1 [#uses=1] insertelement <4 x float> undef, float %1, i32 0 ; <<4 x float>>:2 [#uses=1] call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 ) ; <<4 x float>>:3 [#uses=1] @@ -14,7 +18,14 @@ store float %4, float* @0, align 16 ret void } -define internal void @""() nounwind { +define internal void @b() nounwind { +; CHECK-LABEL: b: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] +; CHECK-NEXT: rsqrtss %xmm0, %xmm0 +; CHECK-NEXT: extractps $1, %xmm0, __unnamed_1 +; CHECK-NEXT: retl load float, float* @0, align 16 ; :1 [#uses=1] insertelement <4 x float> undef, float %1, i32 1 ; <<4 x float>>:2 [#uses=1] call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 ) ; <<4 x float>>:3 [#uses=1] diff --git a/llvm/test/CodeGen/X86/field-extract-use-trunc.ll b/llvm/test/CodeGen/X86/field-extract-use-trunc.ll --- a/llvm/test/CodeGen/X86/field-extract-use-trunc.ll +++ b/llvm/test/CodeGen/X86/field-extract-use-trunc.ll @@ -1,38 +1,107 @@ -; RUN: llc < %s -mtriple=i686-- | grep sar | count 1 -; RUN: llc < %s -mtriple=x86_64-- | not grep sar +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=i686 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=x86_64 define i32 @test(i32 %f12) nounwind { - %tmp7.25 = lshr i32 %f12, 16 +; i686-LABEL: test: +; i686: # %bb.0: +; i686-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; i686-NEXT: retl +; +; x86_64-LABEL: test: +; x86_64: # %bb.0: +; x86_64-NEXT: shrl $16, %edi +; x86_64-NEXT: movsbl %dil, %eax +; x86_64-NEXT: retq + %tmp7.25 = lshr i32 %f12, 16 %tmp7.26 = trunc i32 %tmp7.25 to i8 %tmp78.2 = sext i8 %tmp7.26 to i32 ret i32 %tmp78.2 } define i32 @test2(i32 %f12) nounwind { +; i686-LABEL: test2: +; i686: # %bb.0: +; i686-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; i686-NEXT: retl +; +; x86_64-LABEL: test2: +; x86_64: # %bb.0: +; x86_64-NEXT: shrl $16, %edi +; x86_64-NEXT: movsbl %dil, %eax +; x86_64-NEXT: retq %f11 = shl i32 %f12, 8 %tmp7.25 = ashr i32 %f11, 24 ret i32 %tmp7.25 } define i32 @test3(i32 %f12) nounwind { +; i686-LABEL: test3: +; i686: # %bb.0: +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: shrl $11, %eax +; i686-NEXT: movsbl %al, %eax +; i686-NEXT: retl +; +; x86_64-LABEL: test3: +; x86_64: # %bb.0: +; x86_64-NEXT: shrl $11, %edi +; x86_64-NEXT: movsbl %dil, %eax +; x86_64-NEXT: retq %f11 = shl i32 %f12, 13 %tmp7.25 = ashr i32 %f11, 24 ret i32 %tmp7.25 } define i64 @test4(i64 %f12) nounwind { +; i686-LABEL: test4: +; i686: # %bb.0: +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl %eax, %edx +; i686-NEXT: sarl $31, %edx +; i686-NEXT: retl +; +; x86_64-LABEL: test4: +; x86_64: # %bb.0: +; x86_64-NEXT: movslq %edi, %rax +; x86_64-NEXT: retq %f11 = shl i64 %f12, 32 %tmp7.25 = ashr i64 %f11, 32 ret i64 %tmp7.25 } define i16 @test5(i16 %f12) nounwind { +; i686-LABEL: test5: +; i686: # %bb.0: +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: shrl $6, %eax +; i686-NEXT: movsbl %al, %eax +; i686-NEXT: # kill: def $ax killed $ax killed $eax +; i686-NEXT: retl +; +; x86_64-LABEL: test5: +; x86_64: # %bb.0: +; x86_64-NEXT: shrl $6, %edi +; x86_64-NEXT: movsbl %dil, %eax +; x86_64-NEXT: # kill: def $ax killed $ax killed $eax +; x86_64-NEXT: retq %f11 = shl i16 %f12, 2 %tmp7.25 = ashr i16 %f11, 8 ret i16 %tmp7.25 } define i16 @test6(i16 %f12) nounwind { +; i686-LABEL: test6: +; i686: # %bb.0: +; i686-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; i686-NEXT: # kill: def $ax killed $ax killed $eax +; i686-NEXT: retl +; +; x86_64-LABEL: test6: +; x86_64: # %bb.0: +; x86_64-NEXT: movsbl %dil, %eax +; x86_64-NEXT: # kill: def $ax killed $ax killed $eax +; x86_64-NEXT: retq %f11 = shl i16 %f12, 8 %tmp7.25 = ashr i16 %f11, 8 ret i16 %tmp7.25 diff --git a/llvm/test/CodeGen/X86/fildll.ll b/llvm/test/CodeGen/X86/fildll.ll --- a/llvm/test/CodeGen/X86/fildll.ll +++ b/llvm/test/CodeGen/X86/fildll.ll @@ -1,11 +1,50 @@ -; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=att -mattr=-sse2 | grep fildll | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=att -mattr=-sse2 | FileCheck %s define fastcc double @sint64_to_fp(i64 %X) { +; CHECK-LABEL: sint64_to_fp: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: andl $-8, %esp +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %ecx, (%esp) +; CHECK-NEXT: fildll (%esp) +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: .cfi_def_cfa %esp, 4 +; CHECK-NEXT: retl %R = sitofp i64 %X to double ; [#uses=1] ret double %R } define fastcc double @uint64_to_fp(i64 %X) { +; CHECK-LABEL: uint64_to_fp: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: andl $-8, %esp +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %ecx, (%esp) +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: setns %al +; CHECK-NEXT: fildll (%esp) +; CHECK-NEXT: fadds {{\.LCPI.*}}(,%eax,4) +; CHECK-NEXT: fstpl {{[0-9]+}}(%esp) +; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: .cfi_def_cfa %esp, 4 +; CHECK-NEXT: retl %R = uitofp i64 %X to double ; [#uses=1] ret double %R } diff --git a/llvm/test/CodeGen/X86/fold-call-2.ll b/llvm/test/CodeGen/X86/fold-call-2.ll --- a/llvm/test/CodeGen/X86/fold-call-2.ll +++ b/llvm/test/CodeGen/X86/fold-call-2.ll @@ -1,8 +1,17 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s @f = external global void ()* ; [#uses=1] define i32 @main() nounwind { +; CHECK-LABEL: main: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: movl L_f$non_lazy_ptr, %eax +; CHECK-NEXT: calll *(%eax) +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: retl entry: load void ()*, void ()** @f, align 8 ; :0 [#uses=1] tail call void %0( ) nounwind diff --git a/llvm/test/CodeGen/X86/fold-call-3.ll b/llvm/test/CodeGen/X86/fold-call-3.ll --- a/llvm/test/CodeGen/X86/fold-call-3.ll +++ b/llvm/test/CodeGen/X86/fold-call-3.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep call | grep 560 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s ; rdar://6522427 ; This command line used to crash due to dangling nodes left after PreprocessISelDAG -; RUN: llc < %s -mtriple=x86_64-apple-darwin -pre-RA-sched=linearize +; RUN: llc < %s -mtriple=x86_64-apple-darwin -pre-RA-sched=linearize | FileCheck %s --check-prefix=pre-RA %"struct.clang::Action" = type { %"struct.clang::ActionBase" } %"struct.clang::ActionBase" = type { i32 (...)** } @@ -10,6 +11,63 @@ @llvm.used = appending global [1 x i8*] [ i8* bitcast (void (i8*, %"struct.clang::Action"*)* @_Z25RawPointerPerformanceTestPvRN5clang6ActionE to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(i8* %Val, %"struct.clang::Action"* %Actions) nounwind { +; CHECK-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: cmpl $0, {{.*}}(%rip) +; CHECK-NEXT: je LBB0_3 +; CHECK-NEXT: ## %bb.1: ## %bb.nph +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_2: ## %bb +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq (%rbx), %rcx +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: callq *560(%rcx) +; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: incl %ebp +; CHECK-NEXT: cmpl {{.*}}(%rip), %ebp +; CHECK-NEXT: jb LBB0_2 +; CHECK-NEXT: LBB0_3: ## %return +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +; +; pre-RA-LABEL: _Z25RawPointerPerformanceTestPvRN5clang6ActionE: +; pre-RA: ## %bb.0: ## %entry +; pre-RA-NEXT: pushq %rbp +; pre-RA-NEXT: pushq %rbx +; pre-RA-NEXT: subq $24, %rsp +; pre-RA-NEXT: cmpl $0, {{.*}}(%rip) +; pre-RA-NEXT: je LBB0_3 +; pre-RA-NEXT: ## %bb.1: ## %bb.nph +; pre-RA-NEXT: movq %rsi, %rbx +; pre-RA-NEXT: movq %rdi, %rax +; pre-RA-NEXT: xorl %ebp, %ebp +; pre-RA-NEXT: .p2align 4, 0x90 +; pre-RA-NEXT: LBB0_2: ## %bb +; pre-RA-NEXT: ## =>This Inner Loop Header: Depth=1 +; pre-RA-NEXT: movq (%rbx), %rcx +; pre-RA-NEXT: movq %rbx, %rdi +; pre-RA-NEXT: movq %rax, %rsi +; pre-RA-NEXT: callq *560(%rcx) +; pre-RA-NEXT: incl %ebp +; pre-RA-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; pre-RA-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; pre-RA-NEXT: cmpl {{.*}}(%rip), %ebp +; pre-RA-NEXT: jb LBB0_2 +; pre-RA-NEXT: LBB0_3: ## %return +; pre-RA-NEXT: addq $24, %rsp +; pre-RA-NEXT: popq %rbx +; pre-RA-NEXT: popq %rbp +; pre-RA-NEXT: retq entry: %0 = alloca %"struct.clang::ActionBase::ActionResult<0u>", align 8 ; <%"struct.clang::ActionBase::ActionResult<0u>"*> [#uses=3] %1 = load i32, i32* @NumTrials, align 4 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/fold-sext-trunc.ll b/llvm/test/CodeGen/X86/fold-sext-trunc.ll --- a/llvm/test/CodeGen/X86/fold-sext-trunc.ll +++ b/llvm/test/CodeGen/X86/fold-sext-trunc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep movslq | count 1 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s ; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -stop-after livedebugvalues -o - | FileCheck %s -check-prefix=MIR ; PR4050 @@ -9,6 +9,7 @@ declare void @func_28(i64, i64) +; CHECK: movslq g_10+4(%rip), %rdi define void @int322(i32 %foo) !dbg !5 { entry: %val = load i64, i64* getelementptr (%0, %0* bitcast (%struct.S1* @g_10 to %0*), i32 0, i32 0), !dbg !16 diff --git a/llvm/test/CodeGen/X86/fp-stack-2results.ll b/llvm/test/CodeGen/X86/fp-stack-2results.ll --- a/llvm/test/CodeGen/X86/fp-stack-2results.ll +++ b/llvm/test/CodeGen/X86/fp-stack-2results.ll @@ -1,11 +1,18 @@ -; RUN: llc < %s -mtriple=i686-- | grep fldz -; RUN: llc < %s -mtriple=x86_64-- | grep fld1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=ALL,i686 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s -check-prefixes=ALL,x86_64 + %0 = type { x86_fp80, x86_fp80 } ; This is basically this code on x86-64: ; _Complex long double test() { return 1.0; } define %0 @test() { +; ALL-LABEL: test: +; ALL: # %bb.0: +; ALL-NEXT: fldz +; ALL-NEXT: fld1 +; ALL-NEXT: ret{{[l|q]}} %A = fpext double 1.0 to x86_fp80 %B = fpext double 0.0 to x86_fp80 %mrv = insertvalue %0 undef, x86_fp80 %A, 0 @@ -19,6 +26,11 @@ ; fld %st(0) ; ret define %0 @test2() { +; ALL-LABEL: test2: +; ALL: # %bb.0: +; ALL-NEXT: fld1 +; ALL-NEXT: fld %st(0) +; ALL-NEXT: ret{{[l|q]}} %A = fpext double 1.0 to x86_fp80 %mrv = insertvalue %0 undef, x86_fp80 %A, 0 %mrv1 = insertvalue %0 %mrv, x86_fp80 %A, 1 @@ -27,6 +39,47 @@ ; Uses both values. define void @call1(x86_fp80 *%P1, x86_fp80 *%P2) { +; i686-LABEL: call1: +; i686: # %bb.0: +; i686-NEXT: pushl %edi +; i686-NEXT: .cfi_def_cfa_offset 8 +; i686-NEXT: pushl %esi +; i686-NEXT: .cfi_def_cfa_offset 12 +; i686-NEXT: .cfi_offset %esi, -12 +; i686-NEXT: .cfi_offset %edi, -8 +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: calll test +; i686-NEXT: fstpt (%edi) +; i686-NEXT: fstpt (%esi) +; i686-NEXT: popl %esi +; i686-NEXT: .cfi_def_cfa_offset 8 +; i686-NEXT: popl %edi +; i686-NEXT: .cfi_def_cfa_offset 4 +; i686-NEXT: retl +; +; x86_64-LABEL: call1: +; x86_64: # %bb.0: +; x86_64-NEXT: pushq %r14 +; x86_64-NEXT: .cfi_def_cfa_offset 16 +; x86_64-NEXT: pushq %rbx +; x86_64-NEXT: .cfi_def_cfa_offset 24 +; x86_64-NEXT: pushq %rax +; x86_64-NEXT: .cfi_def_cfa_offset 32 +; x86_64-NEXT: .cfi_offset %rbx, -24 +; x86_64-NEXT: .cfi_offset %r14, -16 +; x86_64-NEXT: movq %rsi, %r14 +; x86_64-NEXT: movq %rdi, %rbx +; x86_64-NEXT: callq test +; x86_64-NEXT: fstpt (%rbx) +; x86_64-NEXT: fstpt (%r14) +; x86_64-NEXT: addq $8, %rsp +; x86_64-NEXT: .cfi_def_cfa_offset 24 +; x86_64-NEXT: popq %rbx +; x86_64-NEXT: .cfi_def_cfa_offset 16 +; x86_64-NEXT: popq %r14 +; x86_64-NEXT: .cfi_def_cfa_offset 8 +; x86_64-NEXT: retq %a = call %0 @test() %b = extractvalue %0 %a, 0 store x86_fp80 %b, x86_fp80* %P1 @@ -38,6 +91,49 @@ ; Uses both values, requires fxch define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) { +; i686-LABEL: call2: +; i686: # %bb.0: +; i686-NEXT: pushl %edi +; i686-NEXT: .cfi_def_cfa_offset 8 +; i686-NEXT: pushl %esi +; i686-NEXT: .cfi_def_cfa_offset 12 +; i686-NEXT: .cfi_offset %esi, -12 +; i686-NEXT: .cfi_offset %edi, -8 +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: calll test +; i686-NEXT: fxch %st(1) +; i686-NEXT: fstpt (%edi) +; i686-NEXT: fstpt (%esi) +; i686-NEXT: popl %esi +; i686-NEXT: .cfi_def_cfa_offset 8 +; i686-NEXT: popl %edi +; i686-NEXT: .cfi_def_cfa_offset 4 +; i686-NEXT: retl +; +; x86_64-LABEL: call2: +; x86_64: # %bb.0: +; x86_64-NEXT: pushq %r14 +; x86_64-NEXT: .cfi_def_cfa_offset 16 +; x86_64-NEXT: pushq %rbx +; x86_64-NEXT: .cfi_def_cfa_offset 24 +; x86_64-NEXT: pushq %rax +; x86_64-NEXT: .cfi_def_cfa_offset 32 +; x86_64-NEXT: .cfi_offset %rbx, -24 +; x86_64-NEXT: .cfi_offset %r14, -16 +; x86_64-NEXT: movq %rsi, %r14 +; x86_64-NEXT: movq %rdi, %rbx +; x86_64-NEXT: callq test +; x86_64-NEXT: fxch %st(1) +; x86_64-NEXT: fstpt (%rbx) +; x86_64-NEXT: fstpt (%r14) +; x86_64-NEXT: addq $8, %rsp +; x86_64-NEXT: .cfi_def_cfa_offset 24 +; x86_64-NEXT: popq %rbx +; x86_64-NEXT: .cfi_def_cfa_offset 16 +; x86_64-NEXT: popq %r14 +; x86_64-NEXT: .cfi_def_cfa_offset 8 +; x86_64-NEXT: retq %a = call %0 @test() %b = extractvalue %0 %a, 1 store x86_fp80 %b, x86_fp80* %P1 @@ -49,6 +145,31 @@ ; Uses ST(0), ST(1) is dead but must be popped. define void @call3(x86_fp80 *%P1, x86_fp80 *%P2) { +; i686-LABEL: call3: +; i686: # %bb.0: +; i686-NEXT: pushl %esi +; i686-NEXT: .cfi_def_cfa_offset 8 +; i686-NEXT: .cfi_offset %esi, -8 +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: calll test +; i686-NEXT: fstp %st(1) +; i686-NEXT: fstpt (%esi) +; i686-NEXT: popl %esi +; i686-NEXT: .cfi_def_cfa_offset 4 +; i686-NEXT: retl +; +; x86_64-LABEL: call3: +; x86_64: # %bb.0: +; x86_64-NEXT: pushq %rbx +; x86_64-NEXT: .cfi_def_cfa_offset 16 +; x86_64-NEXT: .cfi_offset %rbx, -16 +; x86_64-NEXT: movq %rdi, %rbx +; x86_64-NEXT: callq test +; x86_64-NEXT: fstp %st(1) +; x86_64-NEXT: fstpt (%rbx) +; x86_64-NEXT: popq %rbx +; x86_64-NEXT: .cfi_def_cfa_offset 8 +; x86_64-NEXT: retq %a = call %0 @test() %b = extractvalue %0 %a, 0 store x86_fp80 %b, x86_fp80* %P1 @@ -57,6 +178,31 @@ ; Uses ST(1), ST(0) is dead and must be popped. define void @call4(x86_fp80 *%P1, x86_fp80 *%P2) { +; i686-LABEL: call4: +; i686: # %bb.0: +; i686-NEXT: pushl %esi +; i686-NEXT: .cfi_def_cfa_offset 8 +; i686-NEXT: .cfi_offset %esi, -8 +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: calll test +; i686-NEXT: fstp %st(0) +; i686-NEXT: fstpt (%esi) +; i686-NEXT: popl %esi +; i686-NEXT: .cfi_def_cfa_offset 4 +; i686-NEXT: retl +; +; x86_64-LABEL: call4: +; x86_64: # %bb.0: +; x86_64-NEXT: pushq %rbx +; x86_64-NEXT: .cfi_def_cfa_offset 16 +; x86_64-NEXT: .cfi_offset %rbx, -16 +; x86_64-NEXT: movq %rsi, %rbx +; x86_64-NEXT: callq test +; x86_64-NEXT: fstp %st(0) +; x86_64-NEXT: fstpt (%rbx) +; x86_64-NEXT: popq %rbx +; x86_64-NEXT: .cfi_def_cfa_offset 8 +; x86_64-NEXT: retq %a = call %0 @test() %c = extractvalue %0 %a, 1 diff --git a/llvm/test/CodeGen/X86/fp-stack-direct-ret.ll b/llvm/test/CodeGen/X86/fp-stack-direct-ret.ll --- a/llvm/test/CodeGen/X86/fp-stack-direct-ret.ll +++ b/llvm/test/CodeGen/X86/fp-stack-direct-ret.ll @@ -1,9 +1,13 @@ -; RUN: llc < %s -mtriple=i686-- | not grep fstp -; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | not grep movsd +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=ALL,GENERIC +; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | FileCheck %s --check-prefixes=ALL,YONAH declare double @foo() define double @bar() { +; ALL-LABEL: bar: +; ALL: # %bb.0: # %entry +; ALL-NEXT: jmp foo # TAILCALL entry: %tmp5 = tail call double @foo() ret double %tmp5 diff --git a/llvm/test/CodeGen/X86/fp-stack-ret-conv.ll b/llvm/test/CodeGen/X86/fp-stack-ret-conv.ll --- a/llvm/test/CodeGen/X86/fp-stack-ret-conv.ll +++ b/llvm/test/CodeGen/X86/fp-stack-ret-conv.ll @@ -1,11 +1,26 @@ -; RUN: llc < %s -mcpu=yonah | grep cvtss2sd -; RUN: llc < %s -mcpu=yonah | grep fstps -; RUN: llc < %s -mcpu=yonah | not grep cvtsd2ss +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=yonah | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-apple-darwin8" define void @test(double *%b) { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: calll _foo +; CHECK-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 +; CHECK-NEXT: movsd %xmm0, (%esi) +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl entry: %tmp13 = tail call double @foo() %tmp1314 = fptrunc double %tmp13 to float ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/fp-stack-ret-store.ll b/llvm/test/CodeGen/X86/fp-stack-ret-store.ll --- a/llvm/test/CodeGen/X86/fp-stack-ret-store.ll +++ b/llvm/test/CodeGen/X86/fp-stack-ret-store.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -mcpu=yonah | not grep movss +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=yonah | FileCheck %s + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin8" @@ -6,6 +8,19 @@ ; go through a stack slot to get there. define void @bar(double* %P) { +; CHECK-LABEL: bar: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: calll _foo +; CHECK-NEXT: fstpl (%esi) +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl entry: %tmp = tail call double (...) @foo( ) ; [#uses=1] store double %tmp, double* %P, align 8 @@ -15,6 +30,19 @@ declare double @foo(...) define void @bar2(float* %P) { +; CHECK-LABEL: bar2: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: calll _foo2 +; CHECK-NEXT: fstps (%esi) +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl entry: %tmp = tail call double (...) @foo2( ) ; [#uses=1] %tmp1 = fptrunc double %tmp to float ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/fp-stack-retcopy.ll b/llvm/test/CodeGen/X86/fp-stack-retcopy.ll --- a/llvm/test/CodeGen/X86/fp-stack-retcopy.ll +++ b/llvm/test/CodeGen/X86/fp-stack-retcopy.ll @@ -1,10 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; This should not copy the result of foo into an xmm register. -; RUN: llc < %s -mcpu=yonah -mtriple=i686-apple-darwin9 | not grep xmm +; RUN: llc < %s -mcpu=yonah -mtriple=i686-apple-darwin9 | FileCheck %s ; rdar://5689903 declare double @foo() define double @carg({ double, double }* byval %z) nounwind { +; CHECK-LABEL: carg: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: jmp _foo ## TAILCALL entry: %tmp5 = tail call double @foo() nounwind ; [#uses=1] ret double %tmp5 diff --git a/llvm/test/CodeGen/X86/fp-stack-set-st1.ll b/llvm/test/CodeGen/X86/fp-stack-set-st1.ll --- a/llvm/test/CodeGen/X86/fp-stack-set-st1.ll +++ b/llvm/test/CodeGen/X86/fp-stack-set-st1.ll @@ -1,6 +1,23 @@ -; RUN: llc < %s -mtriple=i686-- | grep fxch | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define i32 @main() nounwind { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fldl {{\.LCPI.*}} +; CHECK-NEXT: fldl {{\.LCPI.*}} +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: #APP +; CHECK-NEXT: fmul %st(1), %st +; CHECK-NEXT: fst %st(1) +; CHECK-NEXT: frndint +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fsub %st(1), %st +; CHECK-NEXT: f2xm1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fstp %st(0) entry: %asmtmp = tail call { double, double } asm sideeffect "fmul\09%st(1),%st\0A\09fst\09%st(1)\0A\09frndint\0A\09fxch %st(1)\0A\09fsub\09%st(1),%st\0A\09f2xm1\0A\09", "={st},={st(1)},0,1,~{dirflag},~{fpsr},~{flags}"(double 0x4030FEFBD582097D, double 4.620000e+01) nounwind ; <{ double, double }> [#uses=0] unreachable diff --git a/llvm/test/CodeGen/X86/fp2sint.ll b/llvm/test/CodeGen/X86/fp2sint.ll --- a/llvm/test/CodeGen/X86/fp2sint.ll +++ b/llvm/test/CodeGen/X86/fp2sint.ll @@ -1,8 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ;; LowerFP_TO_SINT should not create a stack object if it's not needed. -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | not grep add +; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s define i32 @main(i32 %argc, i8** %argv) { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %cond_false.i.i.i +; CHECK-NEXT: cvttsd2si 0, %eax +; CHECK-NEXT: movl %eax, 0 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl cond_false.i.i.i: ; preds = %bb.i5 %tmp35.i = load double, double* null, align 8 ; [#uses=1] %tmp3536.i = fptosi double %tmp35.i to i32 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/fp_load_fold.ll b/llvm/test/CodeGen/X86/fp_load_fold.ll --- a/llvm/test/CodeGen/X86/fp_load_fold.ll +++ b/llvm/test/CodeGen/X86/fp_load_fold.ll @@ -1,39 +1,75 @@ -; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=intel | \ -; RUN: grep -i ST | not grep "fadd\|fsub\|fdiv\|fmul" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=intel | FileCheck %s ; Test that the load of the memory location is folded into the operation. define double @test_add(double %X, double* %P) { +; CHECK-LABEL: test_add: +; CHECK: # %bb.0: +; CHECK-NEXT: mov eax, dword ptr [esp + 12] +; CHECK-NEXT: fld qword ptr [esp + 4] +; CHECK-NEXT: fadd qword ptr [eax] +; CHECK-NEXT: ret %Y = load double, double* %P ; [#uses=1] %R = fadd double %X, %Y ; [#uses=1] ret double %R } define double @test_mul(double %X, double* %P) { +; CHECK-LABEL: test_mul: +; CHECK: # %bb.0: +; CHECK-NEXT: mov eax, dword ptr [esp + 12] +; CHECK-NEXT: fld qword ptr [esp + 4] +; CHECK-NEXT: fmul qword ptr [eax] +; CHECK-NEXT: ret %Y = load double, double* %P ; [#uses=1] %R = fmul double %X, %Y ; [#uses=1] ret double %R } define double @test_sub(double %X, double* %P) { +; CHECK-LABEL: test_sub: +; CHECK: # %bb.0: +; CHECK-NEXT: mov eax, dword ptr [esp + 12] +; CHECK-NEXT: fld qword ptr [esp + 4] +; CHECK-NEXT: fsub qword ptr [eax] +; CHECK-NEXT: ret %Y = load double, double* %P ; [#uses=1] %R = fsub double %X, %Y ; [#uses=1] ret double %R } define double @test_subr(double %X, double* %P) { +; CHECK-LABEL: test_subr: +; CHECK: # %bb.0: +; CHECK-NEXT: mov eax, dword ptr [esp + 12] +; CHECK-NEXT: fld qword ptr [eax] +; CHECK-NEXT: fsub qword ptr [esp + 4] +; CHECK-NEXT: ret %Y = load double, double* %P ; [#uses=1] %R = fsub double %Y, %X ; [#uses=1] ret double %R } define double @test_div(double %X, double* %P) { +; CHECK-LABEL: test_div: +; CHECK: # %bb.0: +; CHECK-NEXT: mov eax, dword ptr [esp + 12] +; CHECK-NEXT: fld qword ptr [esp + 4] +; CHECK-NEXT: fdiv qword ptr [eax] +; CHECK-NEXT: ret %Y = load double, double* %P ; [#uses=1] %R = fdiv double %X, %Y ; [#uses=1] ret double %R } define double @test_divr(double %X, double* %P) { +; CHECK-LABEL: test_divr: +; CHECK: # %bb.0: +; CHECK-NEXT: mov eax, dword ptr [esp + 12] +; CHECK-NEXT: fld qword ptr [eax] +; CHECK-NEXT: fdiv qword ptr [esp + 4] +; CHECK-NEXT: ret %Y = load double, double* %P ; [#uses=1] %R = fdiv double %Y, %X ; [#uses=1] ret double %R diff --git a/llvm/test/CodeGen/X86/fsxor-alignment.ll b/llvm/test/CodeGen/X86/fsxor-alignment.ll --- a/llvm/test/CodeGen/X86/fsxor-alignment.ll +++ b/llvm/test/CodeGen/X86/fsxor-alignment.ll @@ -1,11 +1,23 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -enable-unsafe-fp-math | \ -; RUN: grep -v sp | grep xorps | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -enable-unsafe-fp-math | FileCheck %s ; Don't fold the incoming stack arguments into the xorps instructions used ; to do floating-point negations, because the arguments aren't vectors ; and aren't vector-aligned. define void @foo(float* %p, float* %q, float %s, float %y) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] +; CHECK-NEXT: xorps %xmm1, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: xorps %xmm1, %xmm2 +; CHECK-NEXT: movss %xmm0, (%ecx) +; CHECK-NEXT: movss %xmm2, (%eax) +; CHECK-NEXT: retl %ss = fsub float -0.0, %s %yy = fsub float -0.0, %y store float %ss, float* %p diff --git a/llvm/test/CodeGen/X86/hidden-vis-2.ll b/llvm/test/CodeGen/X86/hidden-vis-2.ll --- a/llvm/test/CodeGen/X86/hidden-vis-2.ll +++ b/llvm/test/CodeGen/X86/hidden-vis-2.ll @@ -1,9 +1,19 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep mov | count 1 -; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | not grep GOT +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s --check-prefix=i386-darwin9 +; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s --check-prefix=x86_64-darwin9 @x = weak hidden global i32 0 ; [#uses=1] define i32 @t() nounwind readonly { +; i386-darwin9-LABEL: t: +; i386-darwin9: ## %bb.0: ## %entry +; i386-darwin9-NEXT: movl _x, %eax +; i386-darwin9-NEXT: retl +; +; x86_64-darwin9-LABEL: t: +; x86_64-darwin9: ## %bb.0: ## %entry +; x86_64-darwin9-NEXT: movl {{.*}}(%rip), %eax +; x86_64-darwin9-NEXT: retq entry: %0 = load i32, i32* @x, align 4 ; [#uses=1] ret i32 %0 diff --git a/llvm/test/CodeGen/X86/i128-and-beyond.ll b/llvm/test/CodeGen/X86/i128-and-beyond.ll --- a/llvm/test/CodeGen/X86/i128-and-beyond.ll +++ b/llvm/test/CodeGen/X86/i128-and-beyond.ll @@ -1,8 +1,21 @@ -; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1 | count 14 +; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s ; These static initializers are too big to hand off to assemblers ; as monolithic blobs. +; CHECK: -1 +; CHECK-NEXT: -1 +; CHECK: -1 +; CHECK-NEXT: -1 +; CHECK-NEXT: -1 +; CHECK-NEXT: -1 +; CHECK: -1 +; CHECK-NEXT: -1 +; CHECK-NEXT: -1 +; CHECK-NEXT: -1 +; CHECK-NEXT: -1 +; CHECK-NEXT: -1 +; CHECK-NEXT: -1 @x = global i128 -1 @y = global i256 -1 @z = global i512 -1 diff --git a/llvm/test/CodeGen/X86/i128-immediate.ll b/llvm/test/CodeGen/X86/i128-immediate.ll --- a/llvm/test/CodeGen/X86/i128-immediate.ll +++ b/llvm/test/CodeGen/X86/i128-immediate.ll @@ -1,5 +1,11 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep movq | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define i128 @__addvti3() { +; CHECK-LABEL: __addvti3: +; CHECK: # %bb.0: +; CHECK-NEXT: movq $-1, %rax +; CHECK-NEXT: movq $-1, %rdx +; CHECK-NEXT: retq ret i128 -1 } diff --git a/llvm/test/CodeGen/X86/illegal-vector-args-return.ll b/llvm/test/CodeGen/X86/illegal-vector-args-return.ll --- a/llvm/test/CodeGen/X86/illegal-vector-args-return.ll +++ b/llvm/test/CodeGen/X86/illegal-vector-args-return.ll @@ -1,16 +1,24 @@ -; RUN: llc < %s -mattr=+sse2 -mcpu=nehalem | grep "mulpd %xmm3, %xmm1" -; RUN: llc < %s -mattr=+sse2 -mcpu=nehalem | grep "mulpd %xmm2, %xmm0" -; RUN: llc < %s -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm3, %xmm1" -; RUN: llc < %s -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm2, %xmm0" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=+sse2 -mcpu=nehalem | FileCheck %s target triple = "i686-apple-darwin8" define <4 x double> @foo(<4 x double> %x, <4 x double> %z) { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: +; CHECK-NEXT: mulpd %xmm2, %xmm0 +; CHECK-NEXT: mulpd %xmm3, %xmm1 +; CHECK-NEXT: retl %y = fmul <4 x double> %x, %z ret <4 x double> %y } define <8 x float> @bar(<8 x float> %x, <8 x float> %z) { +; CHECK-LABEL: bar: +; CHECK: ## %bb.0: +; CHECK-NEXT: addps %xmm2, %xmm0 +; CHECK-NEXT: addps %xmm3, %xmm1 +; CHECK-NEXT: retl %y = fadd <8 x float> %x, %z ret <8 x float> %y } diff --git a/llvm/test/CodeGen/X86/inline-asm-modifier-n.ll b/llvm/test/CodeGen/X86/inline-asm-modifier-n.ll --- a/llvm/test/CodeGen/X86/inline-asm-modifier-n.ll +++ b/llvm/test/CodeGen/X86/inline-asm-modifier-n.ll @@ -1,7 +1,14 @@ -; RUN: llc < %s -mtriple=i686-- -no-integrated-as | grep " 37" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -no-integrated-as | FileCheck %s ; rdar://7008959 define void @bork() nounwind { +; CHECK-LABEL: bork: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: BORK 37 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: retl entry: tail call void asm sideeffect "BORK ${0:n}", "i,~{dirflag},~{fpsr},~{flags}"(i32 -37) nounwind ret void diff --git a/llvm/test/CodeGen/X86/inline-asm-mrv.ll b/llvm/test/CodeGen/X86/inline-asm-mrv.ll --- a/llvm/test/CodeGen/X86/inline-asm-mrv.ll +++ b/llvm/test/CodeGen/X86/inline-asm-mrv.ll @@ -1,13 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -no-integrated-as | FileCheck %s ; PR2094 -; RUN: llc < %s -no-integrated-as | grep movslq -; RUN: llc < %s -no-integrated-as | grep addps -; RUN: llc < %s -no-integrated-as | grep paddd -; RUN: llc < %s -no-integrated-as | not grep movq target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin8" define i32 @test1(i8* %v, i8* %blk2, i8* %blk1, i32 %stride, i32 %h) nounwind { +; CHECK-LABEL: test1: +; CHECK: ## %bb.0: +; CHECK-NEXT: movslq %ecx, %rax +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: %eax %rcx %rdx %rax %r8d %rdx %rsi +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: set %eax +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retq %tmp12 = sext i32 %stride to i64 ; [#uses=1] %mrv = call {i32, i8*, i8*} asm sideeffect "$0 $1 $2 $3 $4 $5 $6", "=r,=r,=r,r,r,r,r"( i64 %tmp12, i32 %h, i8* %blk1, i8* %blk2 ) nounwind @@ -18,6 +26,13 @@ } define <4 x float> @test2() nounwind { +; CHECK-LABEL: test2: +; CHECK: ## %bb.0: +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: set %xmm0, %xmm1 +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: retq %mrv = call {<4 x float>, <4 x float>} asm "set $0, $1", "=x,=x"() %a = extractvalue {<4 x float>, <4 x float>} %mrv, 0 %b = extractvalue {<4 x float>, <4 x float>} %mrv, 1 @@ -26,6 +41,13 @@ } define <4 x i32> @test3() nounwind { +; CHECK-LABEL: test3: +; CHECK: ## %bb.0: +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: set %xmm0, %xmm1 +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: paddd %xmm1, %xmm0 +; CHECK-NEXT: retq %mrv = call {<4 x i32>, <4 x i32>} asm "set $0, $1", "=x,=x"() %a = extractvalue {<4 x i32>, <4 x i32>} %mrv, 0 %b = extractvalue {<4 x i32>, <4 x i32>} %mrv, 1 diff --git a/llvm/test/CodeGen/X86/inline-asm-pic.ll b/llvm/test/CodeGen/X86/inline-asm-pic.ll --- a/llvm/test/CodeGen/X86/inline-asm-pic.ll +++ b/llvm/test/CodeGen/X86/inline-asm-pic.ll @@ -1,9 +1,19 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep lea -; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep call +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | FileCheck %s @main_q = internal global i8* null ; [#uses=1] define void @func2() nounwind { +; CHECK-LABEL: func2: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: calll L0$pb +; CHECK-NEXT: L0$pb: +; CHECK-NEXT: popl %eax +; CHECK-NEXT: leal _main_q-L0$pb(%eax), %eax +; CHECK-NEXT: ## InlineAsm Start +; CHECK-NEXT: movl %eax, %gs:152 +; CHECK-NEXT: ## InlineAsm End +; CHECK-NEXT: retl entry: tail call void asm "mov $1,%gs:$0", "=*m,ri,~{dirflag},~{fpsr},~{flags}"(i8** inttoptr (i32 152 to i8**), i8* bitcast (i8** @main_q to i8*)) nounwind ret void diff --git a/llvm/test/CodeGen/X86/ins_subreg_coalesce-2.ll b/llvm/test/CodeGen/X86/ins_subreg_coalesce-2.ll --- a/llvm/test/CodeGen/X86/ins_subreg_coalesce-2.ll +++ b/llvm/test/CodeGen/X86/ins_subreg_coalesce-2.ll @@ -1,6 +1,13 @@ -; RUN: llc < %s -mtriple=x86_64-- | not grep movw +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define i16 @test5(i16 %f12) nounwind { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: +; CHECK-NEXT: shrl $6, %edi +; CHECK-NEXT: movsbl %dil, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq %f11 = shl i16 %f12, 2 ; [#uses=1] %tmp7.25 = ashr i16 %f11, 8 ; [#uses=1] ret i16 %tmp7.25 diff --git a/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll --- a/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll +++ b/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep mov | count 3 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s %struct.COMPOSITE = type { i8, i16, i16 } %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } @@ -18,6 +19,44 @@ %struct.rec = type { %struct.head_type } define void @FontChange(i1 %foo) nounwind { +; CHECK-LABEL: FontChange: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: je .LBB0_9 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %bb366 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %bb428 +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: je .LBB0_9 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_3: # %bb650 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpb $0, 0 +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.4: # %bb662 +; CHECK-NEXT: movl 0, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: andl $57344, %ecx # imm = 0xE000 +; CHECK-NEXT: cmpl $8192, %ecx # imm = 0x2000 +; CHECK-NEXT: jne .LBB0_9 +; CHECK-NEXT: # %bb.5: # %bb4884 +; CHECK-NEXT: andl $7168, %eax # imm = 0x1C00 +; CHECK-NEXT: cmpl $1024, %eax # imm = 0x400 +; CHECK-NEXT: jne .LBB0_9 +; CHECK-NEXT: # %bb.6: # %bb4932 +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: jne .LBB0_9 +; CHECK-NEXT: # %bb.7: # %bb4940 +; CHECK-NEXT: movl 0, %eax +; CHECK-NEXT: cmpl $160, %eax +; CHECK-NEXT: je .LBB0_9 +; CHECK-NEXT: # %bb.8: # %bb4940 +; CHECK-NEXT: cmpl $159, %eax +; CHECK-NEXT: .LBB0_9: # %bb4897 +; CHECK-NEXT: retq entry: br i1 %foo, label %bb298, label %bb49 bb49: ; preds = %entry diff --git a/llvm/test/CodeGen/X86/isel-sink2.ll b/llvm/test/CodeGen/X86/isel-sink2.ll --- a/llvm/test/CodeGen/X86/isel-sink2.ll +++ b/llvm/test/CodeGen/X86/isel-sink2.ll @@ -1,8 +1,18 @@ -; RUN: llc < %s -mtriple=i686-- > %t -; RUN: grep "movb.7(%...)" %t -; RUN: not grep leal %t +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define i8 @test(i32 *%P) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpb $0, 4(%eax) +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.2: # %F +; CHECK-NEXT: movb 7(%eax), %al +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %TB +; CHECK-NEXT: movb $4, %al +; CHECK-NEXT: retl %Q = getelementptr i32, i32* %P, i32 1 %R = bitcast i32* %Q to i8* %S = load i8, i8* %R diff --git a/llvm/test/CodeGen/X86/isnan.ll b/llvm/test/CodeGen/X86/isnan.ll --- a/llvm/test/CodeGen/X86/isnan.ll +++ b/llvm/test/CodeGen/X86/isnan.ll @@ -1,8 +1,18 @@ -; RUN: llc < %s -mtriple=i686-- | not grep call +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s declare i1 @llvm.isunordered.f64(double) define i1 @test_isnan(double %X) { +; CHECK-LABEL: test_isnan: +; CHECK: # %bb.0: +; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: fucomp %st(0) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: setp %al +; CHECK-NEXT: retl %R = fcmp uno double %X, %X ; [#uses=1] ret i1 %R } diff --git a/llvm/test/CodeGen/X86/isnan2.ll b/llvm/test/CodeGen/X86/isnan2.ll --- a/llvm/test/CodeGen/X86/isnan2.ll +++ b/llvm/test/CodeGen/X86/isnan2.ll @@ -1,8 +1,16 @@ -; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | not grep pxor +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | FileCheck %s ; This should not need to materialize 0.0 to evaluate the condition. define i32 @test(double %X) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomisd %xmm0, %xmm0 +; CHECK-NEXT: setp %al +; CHECK-NEXT: retl entry: %tmp6 = fcmp uno double %X, 0.000000e+00 ; [#uses=1] %tmp67 = zext i1 %tmp6 to i32 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/ispositive.ll b/llvm/test/CodeGen/X86/ispositive.ll --- a/llvm/test/CodeGen/X86/ispositive.ll +++ b/llvm/test/CodeGen/X86/ispositive.ll @@ -1,6 +1,12 @@ -; RUN: llc < %s -mtriple=i686-- | grep "shrl.*31" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define i32 @test1(i32 %X) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: retl entry: icmp slt i32 %X, 0 ; :0 [#uses=1] zext i1 %0 to i32 ; :1 [#uses=1] diff --git a/llvm/test/CodeGen/X86/large-constants.ll b/llvm/test/CodeGen/X86/large-constants.ll --- a/llvm/test/CodeGen/X86/large-constants.ll +++ b/llvm/test/CodeGen/X86/large-constants.ll @@ -1,6 +1,35 @@ -; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=corei7 | grep movabsq | count 3 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=corei7 | FileCheck %s define i64 @constant_hoisting(i64 %o0, i64 %o1, i64 %o2, i64 %o3, i64 %o4, i64 %o5) { +; CHECK-LABEL: constant_hoisting: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movabsq $-281474976710654, %rax ## imm = 0xFFFF000000000002 +; CHECK-NEXT: testq %rax, %rdi +; CHECK-NEXT: jne LBB0_7 +; CHECK-NEXT: ## %bb.1: ## %bb1 +; CHECK-NEXT: testq %rax, %rsi +; CHECK-NEXT: jne LBB0_7 +; CHECK-NEXT: ## %bb.2: ## %bb2 +; CHECK-NEXT: testq %rax, %rdx +; CHECK-NEXT: jne LBB0_7 +; CHECK-NEXT: ## %bb.3: ## %bb3 +; CHECK-NEXT: testq %rax, %rcx +; CHECK-NEXT: jne LBB0_7 +; CHECK-NEXT: ## %bb.4: ## %bb4 +; CHECK-NEXT: leaq 1(%rax), %rcx +; CHECK-NEXT: testq %rcx, %r8 +; CHECK-NEXT: jne LBB0_7 +; CHECK-NEXT: ## %bb.5: ## %bb5 +; CHECK-NEXT: addq $2, %rax +; CHECK-NEXT: andq %rax, %r9 +; CHECK-NEXT: je LBB0_6 +; CHECK-NEXT: LBB0_7: ## %fail +; CHECK-NEXT: movq $-1, %rax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_6: ## %bb6 +; CHECK-NEXT: movq %r9, %rax +; CHECK-NEXT: retq entry: %l0 = and i64 %o0, -281474976710654 %c0 = icmp ne i64 %l0, 0 @@ -39,6 +68,16 @@ } define void @constant_expressions() { +; CHECK-LABEL: constant_expressions: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movabsq $51250129900, %rax ## imm = 0xBEEBEEBEC +; CHECK-NEXT: movq (%rax), %rcx +; CHECK-NEXT: movq 16(%rax), %rdx +; CHECK-NEXT: addq 8(%rax), %rcx +; CHECK-NEXT: addq 24(%rax), %rdx +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: retq entry: %0 = load i64, i64* inttoptr (i64 add (i64 51250129900, i64 0) to i64*) %1 = load i64, i64* inttoptr (i64 add (i64 51250129900, i64 8) to i64*) @@ -53,6 +92,16 @@ define void @constant_expressions2() { +; CHECK-LABEL: constant_expressions2: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movabsq $51250129900, %rax ## imm = 0xBEEBEEBEC +; CHECK-NEXT: movq (%rax), %rcx +; CHECK-NEXT: movq 16(%rax), %rdx +; CHECK-NEXT: addq 8(%rax), %rcx +; CHECK-NEXT: addq 24(%rax), %rdx +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: movq %rdx, (%rax) +; CHECK-NEXT: retq entry: %0 = load i64, i64* inttoptr (i64 51250129900 to i64*) %1 = load i64, i64* inttoptr (i64 51250129908 to i64*) diff --git a/llvm/test/CodeGen/X86/lea-recursion.ll b/llvm/test/CodeGen/X86/lea-recursion.ll --- a/llvm/test/CodeGen/X86/lea-recursion.ll +++ b/llvm/test/CodeGen/X86/lea-recursion.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep lea | count 13 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s ; This testcase was written to demonstrate an instruction-selection problem, ; however it also happens to expose a limitation in the DAGCombiner's @@ -12,6 +13,37 @@ @g1 = weak global [1000 x i32] zeroinitializer, align 32 ; <[1000 x i32]*> [#uses=7] define void @foo() { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{.*}}(%rip), %eax +; CHECK-NEXT: movl {{.*}}(%rip), %ecx +; CHECK-NEXT: leal (%rax,%rcx), %edx +; CHECK-NEXT: leal 1(%rax,%rcx), %eax +; CHECK-NEXT: movl %eax, g0+{{.*}}(%rip) +; CHECK-NEXT: movl g1+{{.*}}(%rip), %eax +; CHECK-NEXT: leal 1(%rax,%rdx), %ecx +; CHECK-NEXT: leal 2(%rax,%rdx), %eax +; CHECK-NEXT: movl %eax, g0+{{.*}}(%rip) +; CHECK-NEXT: movl g1+{{.*}}(%rip), %eax +; CHECK-NEXT: leal 1(%rax,%rcx), %edx +; CHECK-NEXT: leal 2(%rax,%rcx), %eax +; CHECK-NEXT: movl %eax, g0+{{.*}}(%rip) +; CHECK-NEXT: movl g1+{{.*}}(%rip), %eax +; CHECK-NEXT: leal 1(%rax,%rdx), %ecx +; CHECK-NEXT: leal 2(%rax,%rdx), %eax +; CHECK-NEXT: movl %eax, g0+{{.*}}(%rip) +; CHECK-NEXT: movl g1+{{.*}}(%rip), %eax +; CHECK-NEXT: leal 1(%rax,%rcx), %edx +; CHECK-NEXT: leal 2(%rax,%rcx), %eax +; CHECK-NEXT: movl %eax, g0+{{.*}}(%rip) +; CHECK-NEXT: movl g1+{{.*}}(%rip), %eax +; CHECK-NEXT: leal 1(%rax,%rdx), %ecx +; CHECK-NEXT: leal 2(%rax,%rdx), %eax +; CHECK-NEXT: movl %eax, g0+{{.*}}(%rip) +; CHECK-NEXT: movl g1+{{.*}}(%rip), %eax +; CHECK-NEXT: leal 2(%rax,%rcx), %eax +; CHECK-NEXT: movl %eax, g0+{{.*}}(%rip) +; CHECK-NEXT: retq entry: %tmp4 = load i32, i32* getelementptr ([1000 x i32], [1000 x i32]* @g0, i32 0, i32 0) ; [#uses=1] %tmp8 = load i32, i32* getelementptr ([1000 x i32], [1000 x i32]* @g1, i32 0, i32 0) ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/limited-prec.ll b/llvm/test/CodeGen/X86/limited-prec.ll --- a/llvm/test/CodeGen/X86/limited-prec.ll +++ b/llvm/test/CodeGen/X86/limited-prec.ll @@ -1,11 +1,103 @@ -; RUN: llc < %s -limit-float-precision=6 -mtriple=i686-- | \ -; RUN: not grep exp | not grep log | not grep pow -; RUN: llc < %s -limit-float-precision=12 -mtriple=i686-- | \ -; RUN: not grep exp | not grep log | not grep pow -; RUN: llc < %s -limit-float-precision=18 -mtriple=i686-- | \ -; RUN: not grep exp | not grep log | not grep pow +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -limit-float-precision=6 -mtriple=i686-- | FileCheck %s --check-prefixes=ALL,precision6 +; RUN: llc < %s -limit-float-precision=12 -mtriple=i686-- | FileCheck %s --check-prefixes=ALL,precision12 +; RUN: llc < %s -limit-float-precision=18 -mtriple=i686-- | FileCheck %s --check-prefixes=ALL,precision18 define float @f1(float %x) nounwind noinline { +; precision6-LABEL: f1: +; precision6: # %bb.0: # %entry +; precision6-NEXT: subl $20, %esp +; precision6-NEXT: flds {{[0-9]+}}(%esp) +; precision6-NEXT: fmuls {{\.LCPI.*}} +; precision6-NEXT: fnstcw (%esp) +; precision6-NEXT: movzwl (%esp), %eax +; precision6-NEXT: orl $3072, %eax # imm = 0xC00 +; precision6-NEXT: movw %ax, {{[0-9]+}}(%esp) +; precision6-NEXT: fldcw {{[0-9]+}}(%esp) +; precision6-NEXT: fistl {{[0-9]+}}(%esp) +; precision6-NEXT: fldcw (%esp) +; precision6-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision6-NEXT: fisubl {{[0-9]+}}(%esp) +; precision6-NEXT: fld %st(0) +; precision6-NEXT: fmuls {{\.LCPI.*}} +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fmulp %st, %st(1) +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fstps {{[0-9]+}}(%esp) +; precision6-NEXT: shll $23, %eax +; precision6-NEXT: addl {{[0-9]+}}(%esp), %eax +; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision6-NEXT: flds {{[0-9]+}}(%esp) +; precision6-NEXT: addl $20, %esp +; precision6-NEXT: retl +; +; precision12-LABEL: f1: +; precision12: # %bb.0: # %entry +; precision12-NEXT: subl $20, %esp +; precision12-NEXT: flds {{[0-9]+}}(%esp) +; precision12-NEXT: fmuls {{\.LCPI.*}} +; precision12-NEXT: fnstcw (%esp) +; precision12-NEXT: movzwl (%esp), %eax +; precision12-NEXT: orl $3072, %eax # imm = 0xC00 +; precision12-NEXT: movw %ax, {{[0-9]+}}(%esp) +; precision12-NEXT: fldcw {{[0-9]+}}(%esp) +; precision12-NEXT: fistl {{[0-9]+}}(%esp) +; precision12-NEXT: fldcw (%esp) +; precision12-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision12-NEXT: fisubl {{[0-9]+}}(%esp) +; precision12-NEXT: fld %st(0) +; precision12-NEXT: fmuls {{\.LCPI.*}} +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmul %st(1), %st +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmulp %st, %st(1) +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fstps {{[0-9]+}}(%esp) +; precision12-NEXT: shll $23, %eax +; precision12-NEXT: addl {{[0-9]+}}(%esp), %eax +; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision12-NEXT: flds {{[0-9]+}}(%esp) +; precision12-NEXT: addl $20, %esp +; precision12-NEXT: retl +; +; precision18-LABEL: f1: +; precision18: # %bb.0: # %entry +; precision18-NEXT: subl $20, %esp +; precision18-NEXT: flds {{[0-9]+}}(%esp) +; precision18-NEXT: fmuls {{\.LCPI.*}} +; precision18-NEXT: fnstcw (%esp) +; precision18-NEXT: movzwl (%esp), %eax +; precision18-NEXT: orl $3072, %eax # imm = 0xC00 +; precision18-NEXT: movw %ax, {{[0-9]+}}(%esp) +; precision18-NEXT: fldcw {{[0-9]+}}(%esp) +; precision18-NEXT: fistl {{[0-9]+}}(%esp) +; precision18-NEXT: fldcw (%esp) +; precision18-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision18-NEXT: fisubl {{[0-9]+}}(%esp) +; precision18-NEXT: fld %st(0) +; precision18-NEXT: fmuls {{\.LCPI.*}} +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmulp %st, %st(1) +; precision18-NEXT: fld1 +; precision18-NEXT: faddp %st, %st(1) +; precision18-NEXT: fstps {{[0-9]+}}(%esp) +; precision18-NEXT: shll $23, %eax +; precision18-NEXT: addl {{[0-9]+}}(%esp), %eax +; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision18-NEXT: flds {{[0-9]+}}(%esp) +; precision18-NEXT: addl $20, %esp +; precision18-NEXT: retl entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %0 = call float @llvm.exp.f32(float %x) ; [#uses=1] @@ -15,6 +107,97 @@ declare float @llvm.exp.f32(float) nounwind readonly define float @f2(float %x) nounwind noinline { +; precision6-LABEL: f2: +; precision6: # %bb.0: # %entry +; precision6-NEXT: subl $20, %esp +; precision6-NEXT: flds {{[0-9]+}}(%esp) +; precision6-NEXT: fnstcw (%esp) +; precision6-NEXT: movzwl (%esp), %eax +; precision6-NEXT: orl $3072, %eax # imm = 0xC00 +; precision6-NEXT: movw %ax, {{[0-9]+}}(%esp) +; precision6-NEXT: fldcw {{[0-9]+}}(%esp) +; precision6-NEXT: fistl {{[0-9]+}}(%esp) +; precision6-NEXT: fldcw (%esp) +; precision6-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision6-NEXT: fisubl {{[0-9]+}}(%esp) +; precision6-NEXT: fld %st(0) +; precision6-NEXT: fmuls {{\.LCPI.*}} +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fmulp %st, %st(1) +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fstps {{[0-9]+}}(%esp) +; precision6-NEXT: shll $23, %eax +; precision6-NEXT: addl {{[0-9]+}}(%esp), %eax +; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision6-NEXT: flds {{[0-9]+}}(%esp) +; precision6-NEXT: addl $20, %esp +; precision6-NEXT: retl +; +; precision12-LABEL: f2: +; precision12: # %bb.0: # %entry +; precision12-NEXT: subl $20, %esp +; precision12-NEXT: flds {{[0-9]+}}(%esp) +; precision12-NEXT: fnstcw (%esp) +; precision12-NEXT: movzwl (%esp), %eax +; precision12-NEXT: orl $3072, %eax # imm = 0xC00 +; precision12-NEXT: movw %ax, {{[0-9]+}}(%esp) +; precision12-NEXT: fldcw {{[0-9]+}}(%esp) +; precision12-NEXT: fistl {{[0-9]+}}(%esp) +; precision12-NEXT: fldcw (%esp) +; precision12-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision12-NEXT: fisubl {{[0-9]+}}(%esp) +; precision12-NEXT: fld %st(0) +; precision12-NEXT: fmuls {{\.LCPI.*}} +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmul %st(1), %st +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmulp %st, %st(1) +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fstps {{[0-9]+}}(%esp) +; precision12-NEXT: shll $23, %eax +; precision12-NEXT: addl {{[0-9]+}}(%esp), %eax +; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision12-NEXT: flds {{[0-9]+}}(%esp) +; precision12-NEXT: addl $20, %esp +; precision12-NEXT: retl +; +; precision18-LABEL: f2: +; precision18: # %bb.0: # %entry +; precision18-NEXT: subl $20, %esp +; precision18-NEXT: flds {{[0-9]+}}(%esp) +; precision18-NEXT: fnstcw (%esp) +; precision18-NEXT: movzwl (%esp), %eax +; precision18-NEXT: orl $3072, %eax # imm = 0xC00 +; precision18-NEXT: movw %ax, {{[0-9]+}}(%esp) +; precision18-NEXT: fldcw {{[0-9]+}}(%esp) +; precision18-NEXT: fistl {{[0-9]+}}(%esp) +; precision18-NEXT: fldcw (%esp) +; precision18-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision18-NEXT: fisubl {{[0-9]+}}(%esp) +; precision18-NEXT: fld %st(0) +; precision18-NEXT: fmuls {{\.LCPI.*}} +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmulp %st, %st(1) +; precision18-NEXT: fld1 +; precision18-NEXT: faddp %st, %st(1) +; precision18-NEXT: fstps {{[0-9]+}}(%esp) +; precision18-NEXT: shll $23, %eax +; precision18-NEXT: addl {{[0-9]+}}(%esp), %eax +; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision18-NEXT: flds {{[0-9]+}}(%esp) +; precision18-NEXT: addl $20, %esp +; precision18-NEXT: retl entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %0 = call float @llvm.exp2.f32(float %x) ; [#uses=1] @@ -24,6 +207,100 @@ declare float @llvm.exp2.f32(float) nounwind readonly define float @f3(float %x) nounwind noinline { +; precision6-LABEL: f3: +; precision6: # %bb.0: # %entry +; precision6-NEXT: subl $20, %esp +; precision6-NEXT: flds {{[0-9]+}}(%esp) +; precision6-NEXT: fmuls {{\.LCPI.*}} +; precision6-NEXT: fnstcw (%esp) +; precision6-NEXT: movzwl (%esp), %eax +; precision6-NEXT: orl $3072, %eax # imm = 0xC00 +; precision6-NEXT: movw %ax, {{[0-9]+}}(%esp) +; precision6-NEXT: fldcw {{[0-9]+}}(%esp) +; precision6-NEXT: fistl {{[0-9]+}}(%esp) +; precision6-NEXT: fldcw (%esp) +; precision6-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision6-NEXT: fisubl {{[0-9]+}}(%esp) +; precision6-NEXT: fld %st(0) +; precision6-NEXT: fmuls {{\.LCPI.*}} +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fmulp %st, %st(1) +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fstps {{[0-9]+}}(%esp) +; precision6-NEXT: shll $23, %eax +; precision6-NEXT: addl {{[0-9]+}}(%esp), %eax +; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision6-NEXT: flds {{[0-9]+}}(%esp) +; precision6-NEXT: addl $20, %esp +; precision6-NEXT: retl +; +; precision12-LABEL: f3: +; precision12: # %bb.0: # %entry +; precision12-NEXT: subl $20, %esp +; precision12-NEXT: flds {{[0-9]+}}(%esp) +; precision12-NEXT: fmuls {{\.LCPI.*}} +; precision12-NEXT: fnstcw (%esp) +; precision12-NEXT: movzwl (%esp), %eax +; precision12-NEXT: orl $3072, %eax # imm = 0xC00 +; precision12-NEXT: movw %ax, {{[0-9]+}}(%esp) +; precision12-NEXT: fldcw {{[0-9]+}}(%esp) +; precision12-NEXT: fistl {{[0-9]+}}(%esp) +; precision12-NEXT: fldcw (%esp) +; precision12-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision12-NEXT: fisubl {{[0-9]+}}(%esp) +; precision12-NEXT: fld %st(0) +; precision12-NEXT: fmuls {{\.LCPI.*}} +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmul %st(1), %st +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmulp %st, %st(1) +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fstps {{[0-9]+}}(%esp) +; precision12-NEXT: shll $23, %eax +; precision12-NEXT: addl {{[0-9]+}}(%esp), %eax +; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision12-NEXT: flds {{[0-9]+}}(%esp) +; precision12-NEXT: addl $20, %esp +; precision12-NEXT: retl +; +; precision18-LABEL: f3: +; precision18: # %bb.0: # %entry +; precision18-NEXT: subl $20, %esp +; precision18-NEXT: flds {{[0-9]+}}(%esp) +; precision18-NEXT: fmuls {{\.LCPI.*}} +; precision18-NEXT: fnstcw (%esp) +; precision18-NEXT: movzwl (%esp), %eax +; precision18-NEXT: orl $3072, %eax # imm = 0xC00 +; precision18-NEXT: movw %ax, {{[0-9]+}}(%esp) +; precision18-NEXT: fldcw {{[0-9]+}}(%esp) +; precision18-NEXT: fistl {{[0-9]+}}(%esp) +; precision18-NEXT: fldcw (%esp) +; precision18-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision18-NEXT: fisubl {{[0-9]+}}(%esp) +; precision18-NEXT: fld %st(0) +; precision18-NEXT: fmuls {{\.LCPI.*}} +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmulp %st, %st(1) +; precision18-NEXT: fld1 +; precision18-NEXT: faddp %st, %st(1) +; precision18-NEXT: fstps {{[0-9]+}}(%esp) +; precision18-NEXT: shll $23, %eax +; precision18-NEXT: addl {{[0-9]+}}(%esp), %eax +; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision18-NEXT: flds {{[0-9]+}}(%esp) +; precision18-NEXT: addl $20, %esp +; precision18-NEXT: retl entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %0 = call float @llvm.pow.f32(float 1.000000e+01, float %x) ; [#uses=1] @@ -33,6 +310,89 @@ declare float @llvm.pow.f32(float, float) nounwind readonly define float @f4(float %x) nounwind noinline { +; precision6-LABEL: f4: +; precision6: # %bb.0: # %entry +; precision6-NEXT: subl $8, %esp +; precision6-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision6-NEXT: movl %eax, %ecx +; precision6-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF +; precision6-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 +; precision6-NEXT: movl %ecx, (%esp) +; precision6-NEXT: andl $2139095040, %eax # imm = 0x7F800000 +; precision6-NEXT: shrl $23, %eax +; precision6-NEXT: addl $-127, %eax +; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision6-NEXT: flds (%esp) +; precision6-NEXT: fld %st(0) +; precision6-NEXT: fmuls {{\.LCPI.*}} +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fmulp %st, %st(1) +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fildl {{[0-9]+}}(%esp) +; precision6-NEXT: fmuls {{\.LCPI.*}} +; precision6-NEXT: faddp %st, %st(1) +; precision6-NEXT: addl $8, %esp +; precision6-NEXT: retl +; +; precision12-LABEL: f4: +; precision12: # %bb.0: # %entry +; precision12-NEXT: subl $8, %esp +; precision12-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision12-NEXT: movl %eax, %ecx +; precision12-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF +; precision12-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 +; precision12-NEXT: movl %ecx, (%esp) +; precision12-NEXT: andl $2139095040, %eax # imm = 0x7F800000 +; precision12-NEXT: shrl $23, %eax +; precision12-NEXT: addl $-127, %eax +; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision12-NEXT: flds (%esp) +; precision12-NEXT: fld %st(0) +; precision12-NEXT: fmuls {{\.LCPI.*}} +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmul %st(1), %st +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmul %st(1), %st +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmulp %st, %st(1) +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fildl {{[0-9]+}}(%esp) +; precision12-NEXT: fmuls {{\.LCPI.*}} +; precision12-NEXT: faddp %st, %st(1) +; precision12-NEXT: addl $8, %esp +; precision12-NEXT: retl +; +; precision18-LABEL: f4: +; precision18: # %bb.0: # %entry +; precision18-NEXT: subl $8, %esp +; precision18-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision18-NEXT: movl %eax, %ecx +; precision18-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF +; precision18-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 +; precision18-NEXT: movl %ecx, (%esp) +; precision18-NEXT: andl $2139095040, %eax # imm = 0x7F800000 +; precision18-NEXT: shrl $23, %eax +; precision18-NEXT: addl $-127, %eax +; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision18-NEXT: flds (%esp) +; precision18-NEXT: fld %st(0) +; precision18-NEXT: fmuls {{\.LCPI.*}} +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmulp %st, %st(1) +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fildl {{[0-9]+}}(%esp) +; precision18-NEXT: fmuls {{\.LCPI.*}} +; precision18-NEXT: faddp %st, %st(1) +; precision18-NEXT: addl $8, %esp +; precision18-NEXT: retl entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %0 = call float @llvm.log.f32(float %x) ; [#uses=1] @@ -42,6 +402,83 @@ declare float @llvm.log.f32(float) nounwind readonly define float @f5(float %x) nounwind noinline { +; precision6-LABEL: f5: +; precision6: # %bb.0: # %entry +; precision6-NEXT: subl $8, %esp +; precision6-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision6-NEXT: movl %eax, %ecx +; precision6-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF +; precision6-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 +; precision6-NEXT: movl %ecx, (%esp) +; precision6-NEXT: andl $2139095040, %eax # imm = 0x7F800000 +; precision6-NEXT: shrl $23, %eax +; precision6-NEXT: addl $-127, %eax +; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision6-NEXT: flds (%esp) +; precision6-NEXT: fld %st(0) +; precision6-NEXT: fmuls {{\.LCPI.*}} +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fmulp %st, %st(1) +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fiaddl {{[0-9]+}}(%esp) +; precision6-NEXT: addl $8, %esp +; precision6-NEXT: retl +; +; precision12-LABEL: f5: +; precision12: # %bb.0: # %entry +; precision12-NEXT: subl $8, %esp +; precision12-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision12-NEXT: movl %eax, %ecx +; precision12-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF +; precision12-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 +; precision12-NEXT: movl %ecx, (%esp) +; precision12-NEXT: andl $2139095040, %eax # imm = 0x7F800000 +; precision12-NEXT: shrl $23, %eax +; precision12-NEXT: addl $-127, %eax +; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision12-NEXT: flds (%esp) +; precision12-NEXT: fld %st(0) +; precision12-NEXT: fmuls {{\.LCPI.*}} +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmul %st(1), %st +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmul %st(1), %st +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmulp %st, %st(1) +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fiaddl {{[0-9]+}}(%esp) +; precision12-NEXT: addl $8, %esp +; precision12-NEXT: retl +; +; precision18-LABEL: f5: +; precision18: # %bb.0: # %entry +; precision18-NEXT: subl $8, %esp +; precision18-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision18-NEXT: movl %eax, %ecx +; precision18-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF +; precision18-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 +; precision18-NEXT: movl %ecx, (%esp) +; precision18-NEXT: andl $2139095040, %eax # imm = 0x7F800000 +; precision18-NEXT: shrl $23, %eax +; precision18-NEXT: addl $-127, %eax +; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision18-NEXT: flds (%esp) +; precision18-NEXT: fld %st(0) +; precision18-NEXT: fmuls {{\.LCPI.*}} +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmulp %st, %st(1) +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fiaddl {{[0-9]+}}(%esp) +; precision18-NEXT: addl $8, %esp +; precision18-NEXT: retl entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %0 = call float @llvm.log2.f32(float %x) ; [#uses=1] @@ -51,6 +488,85 @@ declare float @llvm.log2.f32(float) nounwind readonly define float @f6(float %x) nounwind noinline { +; precision6-LABEL: f6: +; precision6: # %bb.0: # %entry +; precision6-NEXT: subl $8, %esp +; precision6-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision6-NEXT: movl %eax, %ecx +; precision6-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF +; precision6-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 +; precision6-NEXT: movl %ecx, (%esp) +; precision6-NEXT: andl $2139095040, %eax # imm = 0x7F800000 +; precision6-NEXT: shrl $23, %eax +; precision6-NEXT: addl $-127, %eax +; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision6-NEXT: flds (%esp) +; precision6-NEXT: fld %st(0) +; precision6-NEXT: fmuls {{\.LCPI.*}} +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fmulp %st, %st(1) +; precision6-NEXT: fadds {{\.LCPI.*}} +; precision6-NEXT: fildl {{[0-9]+}}(%esp) +; precision6-NEXT: fmuls {{\.LCPI.*}} +; precision6-NEXT: faddp %st, %st(1) +; precision6-NEXT: addl $8, %esp +; precision6-NEXT: retl +; +; precision12-LABEL: f6: +; precision12: # %bb.0: # %entry +; precision12-NEXT: subl $8, %esp +; precision12-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision12-NEXT: movl %eax, %ecx +; precision12-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF +; precision12-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 +; precision12-NEXT: movl %ecx, (%esp) +; precision12-NEXT: andl $2139095040, %eax # imm = 0x7F800000 +; precision12-NEXT: shrl $23, %eax +; precision12-NEXT: addl $-127, %eax +; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision12-NEXT: flds (%esp) +; precision12-NEXT: fld %st(0) +; precision12-NEXT: fmuls {{\.LCPI.*}} +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmul %st(1), %st +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fmulp %st, %st(1) +; precision12-NEXT: fadds {{\.LCPI.*}} +; precision12-NEXT: fildl {{[0-9]+}}(%esp) +; precision12-NEXT: fmuls {{\.LCPI.*}} +; precision12-NEXT: faddp %st, %st(1) +; precision12-NEXT: addl $8, %esp +; precision12-NEXT: retl +; +; precision18-LABEL: f6: +; precision18: # %bb.0: # %entry +; precision18-NEXT: subl $8, %esp +; precision18-NEXT: movl {{[0-9]+}}(%esp), %eax +; precision18-NEXT: movl %eax, %ecx +; precision18-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF +; precision18-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 +; precision18-NEXT: movl %ecx, (%esp) +; precision18-NEXT: andl $2139095040, %eax # imm = 0x7F800000 +; precision18-NEXT: shrl $23, %eax +; precision18-NEXT: addl $-127, %eax +; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) +; precision18-NEXT: flds (%esp) +; precision18-NEXT: fld %st(0) +; precision18-NEXT: fmuls {{\.LCPI.*}} +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmul %st(1), %st +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fmulp %st, %st(1) +; precision18-NEXT: fadds {{\.LCPI.*}} +; precision18-NEXT: fildl {{[0-9]+}}(%esp) +; precision18-NEXT: fmuls {{\.LCPI.*}} +; precision18-NEXT: faddp %st, %st(1) +; precision18-NEXT: addl $8, %esp +; precision18-NEXT: retl entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %0 = call float @llvm.log10.f32(float %x) ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce5.ll b/llvm/test/CodeGen/X86/loop-strength-reduce5.ll --- a/llvm/test/CodeGen/X86/loop-strength-reduce5.ll +++ b/llvm/test/CodeGen/X86/loop-strength-reduce5.ll @@ -1,9 +1,29 @@ -; RUN: llc < %s -mtriple=i686-- | grep inc | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s @X = weak global i16 0 ; [#uses=1] @Y = weak global i16 0 ; [#uses=1] define void @foo(i32 %N) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jle .LBB0_3 +; CHECK-NEXT: # %bb.1: # %bb.preheader +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %bb +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movw %dx, X +; CHECK-NEXT: movw %cx, Y +; CHECK-NEXT: incl %edx +; CHECK-NEXT: addl $4, %ecx +; CHECK-NEXT: cmpl %edx, %eax +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: .LBB0_3: # %return +; CHECK-NEXT: retl entry: %tmp1019 = icmp sgt i32 %N, 0 ; [#uses=1] br i1 %tmp1019, label %bb, label %return diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce6.ll b/llvm/test/CodeGen/X86/loop-strength-reduce6.ll --- a/llvm/test/CodeGen/X86/loop-strength-reduce6.ll +++ b/llvm/test/CodeGen/X86/loop-strength-reduce6.ll @@ -1,6 +1,11 @@ -; RUN: llc < %s -mtriple=x86_64-- | not grep inc +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind { +; CHECK-LABEL: decodeMP3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq entry: br label %cond_true189 diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce7.ll b/llvm/test/CodeGen/X86/loop-strength-reduce7.ll --- a/llvm/test/CodeGen/X86/loop-strength-reduce7.ll +++ b/llvm/test/CodeGen/X86/loop-strength-reduce7.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s | not grep imul +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s target triple = "i386-apple-darwin9.6" %struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] } @@ -7,6 +8,30 @@ %struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 } define fastcc void @outer_loop(%struct.lame_global_flags* nocapture %gfp, double* nocapture %xr, i32 %targ_bits, double* nocapture %best_noise, %struct.III_psy_xmin* nocapture %l3_xmin, i32* nocapture %l3_enc, %struct.III_scalefac_t* nocapture %scalefac, %struct.gr_info* nocapture %cod_info, i32 %ch) nounwind { +; CHECK-LABEL: outer_loop: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl $88, %eax +; CHECK-NEXT: movl $168, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_2: ## %bb28.i37 +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB0_3 Depth 2 +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_3: ## %bb29.i38 +; CHECK-NEXT: ## Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: incl %edx +; CHECK-NEXT: addl $12, %esi +; CHECK-NEXT: cmpl $11, %edx +; CHECK-NEXT: jbe LBB0_3 +; CHECK-NEXT: ## %bb.1: ## %bb28.i37.loopexit +; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: addl $4, %eax +; CHECK-NEXT: addl $168, %ecx +; CHECK-NEXT: jmp LBB0_2 entry: br label %bb4 diff --git a/llvm/test/CodeGen/X86/lsr-negative-stride.ll b/llvm/test/CodeGen/X86/lsr-negative-stride.ll --- a/llvm/test/CodeGen/X86/lsr-negative-stride.ll +++ b/llvm/test/CodeGen/X86/lsr-negative-stride.ll @@ -1,8 +1,5 @@ -; RUN: llc < %s -mtriple=i686-- > %t -; RUN: not grep neg %t -; RUN: not grep sub.*esp %t -; RUN: not grep esi %t -; RUN: not grep push %t +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; This corresponds to: ;int t(int a, int b) { @@ -17,6 +14,41 @@ define i32 @t(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: t: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: cmpl %ecx, %edx +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %bb.outer +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_3 Depth 2 +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_3: # %bb +; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: subl %ecx, %eax +; CHECK-NEXT: jle .LBB0_5 +; CHECK-NEXT: # %bb.4: # %cond_true +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=2 +; CHECK-NEXT: cmpl %eax, %ecx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_5: # %cond_false +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: subl %edx, %ecx +; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: .LBB0_6: # %bb17 +; CHECK-NEXT: retl entry: %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer diff --git a/llvm/test/CodeGen/X86/lsr-sort.ll b/llvm/test/CodeGen/X86/lsr-sort.ll --- a/llvm/test/CodeGen/X86/lsr-sort.ll +++ b/llvm/test/CodeGen/X86/lsr-sort.ll @@ -1,10 +1,23 @@ -; RUN: llc < %s -mtriple=x86_64-- > %t -; RUN: grep inc %t | count 1 -; RUN: not grep incw %t +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s @X = common global i16 0 ; [#uses=1] define i32 @foo(i32 %N) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle .LBB0_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %bb +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movw %ax, {{.*}}(%rip) +; CHECK-NEXT: incl %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: .LBB0_2: # %return +; CHECK-NEXT: retq entry: %0 = icmp sgt i32 %N, 0 ; [#uses=1] br i1 %0, label %bb, label %return diff --git a/llvm/test/CodeGen/X86/maskmovdqu.ll b/llvm/test/CodeGen/X86/maskmovdqu.ll --- a/llvm/test/CodeGen/X86/maskmovdqu.ll +++ b/llvm/test/CodeGen/X86/maskmovdqu.ll @@ -1,10 +1,38 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2,-avx | grep -i edi -; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2,-avx | grep -i rdi -; RUN: llc < %s -mtriple=i686-- -mattr=+avx | grep -i edi -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | grep -i rdi +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+sse2,-avx | FileCheck %s --check-prefixes=ALL,i686_SSE2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2,-avx | FileCheck %s --check-prefixes=ALL,x86_64_SSE2 +; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,i686_AVX +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,x86_64_AVX ; rdar://6573467 define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind { +; i686_SSE2-LABEL: test: +; i686_SSE2: # %bb.0: # %entry +; i686_SSE2-NEXT: pushl %edi +; i686_SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686_SSE2-NEXT: maskmovdqu %xmm1, %xmm0 +; i686_SSE2-NEXT: popl %edi +; i686_SSE2-NEXT: retl +; +; x86_64_SSE2-LABEL: test: +; x86_64_SSE2: # %bb.0: # %entry +; x86_64_SSE2-NEXT: movq %rsi, %rdi +; x86_64_SSE2-NEXT: maskmovdqu %xmm1, %xmm0 +; x86_64_SSE2-NEXT: retq +; +; i686_AVX-LABEL: test: +; i686_AVX: # %bb.0: # %entry +; i686_AVX-NEXT: pushl %edi +; i686_AVX-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686_AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 +; i686_AVX-NEXT: popl %edi +; i686_AVX-NEXT: retl +; +; x86_64_AVX-LABEL: test: +; x86_64_AVX: # %bb.0: # %entry +; x86_64_AVX-NEXT: movq %rsi, %rdi +; x86_64_AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 +; x86_64_AVX-NEXT: retq entry: tail call void @llvm.x86.sse2.maskmov.dqu( <16 x i8> %a, <16 x i8> %b, i8* %c ) ret void diff --git a/llvm/test/CodeGen/X86/movfs.ll b/llvm/test/CodeGen/X86/movfs.ll --- a/llvm/test/CodeGen/X86/movfs.ll +++ b/llvm/test/CodeGen/X86/movfs.ll @@ -1,6 +1,12 @@ -; RUN: llc < %s -mtriple=i686-- | grep fs +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define i32 @foo() nounwind readonly { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %fs:196, %eax +; CHECK-NEXT: movl (%eax), %eax +; CHECK-NEXT: retl entry: %tmp = load i32*, i32* addrspace(257)* getelementptr (i32*, i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31) ; [#uses=1] %tmp1 = load i32, i32* %tmp ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/mul-remat.ll b/llvm/test/CodeGen/X86/mul-remat.ll --- a/llvm/test/CodeGen/X86/mul-remat.ll +++ b/llvm/test/CodeGen/X86/mul-remat.ll @@ -1,7 +1,13 @@ -; RUN: llc < %s -mtriple=i686-- | grep mov | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; PR1874 - + define i32 @test(i32 %a, i32 %b) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl entry: %tmp3 = mul i32 %b, %a ret i32 %tmp3 diff --git a/llvm/test/CodeGen/X86/mul-shift-reassoc.ll b/llvm/test/CodeGen/X86/mul-shift-reassoc.ll --- a/llvm/test/CodeGen/X86/mul-shift-reassoc.ll +++ b/llvm/test/CodeGen/X86/mul-shift-reassoc.ll @@ -1,9 +1,16 @@ -; RUN: llc < %s -mtriple=i686-- | grep lea -; RUN: llc < %s -mtriple=i686-- | not grep add +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define i32 @test(i32 %X, i32 %Y) { ; Push the shl through the mul to allow an LEA to be formed, instead ; of using a shift and add separately. +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: imull %eax, %ecx +; CHECK-NEXT: leal (%eax,%ecx,2), %eax +; CHECK-NEXT: retl %tmp.2 = shl i32 %X, 1 ; [#uses=1] %tmp.3 = mul i32 %tmp.2, %Y ; [#uses=1] %tmp.5 = add i32 %tmp.3, %Y ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/neg-shl-add.ll b/llvm/test/CodeGen/X86/neg-shl-add.ll --- a/llvm/test/CodeGen/X86/neg-shl-add.ll +++ b/llvm/test/CodeGen/X86/neg-shl-add.ll @@ -1,15 +1,31 @@ -; RUN: llc -mtriple=x86_64-- < %s | not grep negq - +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s ; These sequences don't need neg instructions; they can be done with ; a single shift and sub each. define i64 @foo(i64 %x, i64 %y, i64 %n) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shlq %cl, %rsi +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: retq %a = sub i64 0, %y %b = shl i64 %a, %n %c = add i64 %b, %x ret i64 %c } define i64 @boo(i64 %x, i64 %y, i64 %n) nounwind { +; CHECK-LABEL: boo: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-NEXT: shlq %cl, %rsi +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: retq %a = sub i64 0, %y %b = shl i64 %a, %n %c = add i64 %x, %b diff --git a/llvm/test/CodeGen/X86/neg_fp.ll b/llvm/test/CodeGen/X86/neg_fp.ll --- a/llvm/test/CodeGen/X86/neg_fp.ll +++ b/llvm/test/CodeGen/X86/neg_fp.ll @@ -1,10 +1,22 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+sse4.1 -o %t -; RUN: grep xorps %t | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+sse4.1 | FileCheck %s ; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization ; -0 - (A - B) to (B - A) because A==B, -0 != 0 define float @negfp(float %a, float %b) { +; CHECK-LABEL: negfp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: subss {{[0-9]+}}(%esp), %xmm0 +; CHECK-NEXT: xorps {{\.LCPI.*}}, %xmm0 +; CHECK-NEXT: movss %xmm0, (%esp) +; CHECK-NEXT: flds (%esp) +; CHECK-NEXT: popl %eax +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: %sub = fsub float %a, %b ; [#uses=1] %neg = fsub float -0.000000e+00, %sub ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/negate-add-zero.ll b/llvm/test/CodeGen/X86/negate-add-zero.ll --- a/llvm/test/CodeGen/X86/negate-add-zero.ll +++ b/llvm/test/CodeGen/X86/negate-add-zero.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -enable-unsafe-fp-math | not grep xor +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -enable-unsafe-fp-math | FileCheck %s ; PR3374 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" @@ -824,6 +825,52 @@ declare void @_ZN21HNodeTranslateRotate311toCartesianEv(%struct.HNodeTranslateRotate3*) define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3* %this, %"struct.CDSVector"* %velv) { +; CHECK-LABEL: _ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: fldl 0 +; CHECK-NEXT: fldl 3184(%ecx) +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: fmull 3176(%ecx) +; CHECK-NEXT: fldz +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: fadd %st(1), %st +; CHECK-NEXT: fld %st(3) +; CHECK-NEXT: fmul %st(5), %st +; CHECK-NEXT: fadd %st(2), %st +; CHECK-NEXT: fxch %st(5) +; CHECK-NEXT: fmul %st, %st(0) +; CHECK-NEXT: fadd %st, %st(5) +; CHECK-NEXT: fsubr %st, %st(5) +; CHECK-NEXT: fxch %st(4) +; CHECK-NEXT: fmull -8 +; CHECK-NEXT: fxch %st(5) +; CHECK-NEXT: fstl 8 +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fsubp %st, %st(5) +; CHECK-NEXT: fxch %st(4) +; CHECK-NEXT: fsubp %st, %st(2) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fadd %st(2), %st +; CHECK-NEXT: faddp %st, %st(2) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstl 16 +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fadd %st, %st(0) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fadd %st, %st(0) +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fadd %st, %st(0) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstpl 2056(%ecx) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstpl 2064(%ecx) +; CHECK-NEXT: fstpl 2072(%ecx) +; CHECK-NEXT: movl %eax, 0 +; CHECK-NEXT: movl $4, 4 +; CHECK-NEXT: movl $3, 8 +; CHECK-NEXT: ud2 %1 = getelementptr double, double* null, i32 -1 ; [#uses=1] %2 = load double, double* %1, align 8 ; [#uses=1] %3 = load double, double* null, align 8 ; [#uses=2] diff --git a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll --- a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll +++ b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep cvtsi2sd +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s ; LSR previously eliminated the sitofp by introducing an induction ; variable which stepped by a bogus ((double)UINT32_C(-1)). It's theoretically @@ -6,6 +7,30 @@ ; test should be changed if that is done. define void @foo(i32 %N) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jns .LBB0_3 +; CHECK-NEXT: # %bb.1: # %bb.preheader +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %bb +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: cvtsi2sd %ebp, %xmm0 +; CHECK-NEXT: callq bar +; CHECK-NEXT: decl %ebp +; CHECK-NEXT: cmpl %ebp, %ebx +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: .LBB0_3: # %return +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq entry: %0 = icmp slt i32 %N, 0 ; [#uses=1] br i1 %0, label %bb, label %return diff --git a/llvm/test/CodeGen/X86/nobt.ll b/llvm/test/CodeGen/X86/nobt.ll --- a/llvm/test/CodeGen/X86/nobt.ll +++ b/llvm/test/CodeGen/X86/nobt.ll @@ -1,9 +1,19 @@ -; RUN: llc < %s -mtriple=i686-- | not grep btl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; This tests some cases where BT must not be generated. See also bt.ll. ; Fixes 20040709-[12].c in gcc testsuite. define void @test2(i32 %x, i32 %n) nounwind { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: calll foo +; CHECK-NEXT: .LBB0_2: # %UnifiedReturnBlock +; CHECK-NEXT: retl entry: %tmp1 = and i32 %x, 1 %tmp2 = urem i32 %tmp1, 15 @@ -20,6 +30,15 @@ } define void @test3(i32 %x, i32 %n) nounwind { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB1_2 +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: calll foo +; CHECK-NEXT: .LBB1_2: # %UnifiedReturnBlock +; CHECK-NEXT: retl entry: %tmp1 = and i32 %x, 1 %tmp2 = urem i32 %tmp1, 15 @@ -36,6 +55,15 @@ } define void @test4(i32 %x, i32 %n) nounwind { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB2_2 +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: calll foo +; CHECK-NEXT: .LBB2_2: # %UnifiedReturnBlock +; CHECK-NEXT: retl entry: %tmp1 = and i32 %x, 1 %tmp2 = urem i32 %tmp1, 15 @@ -52,6 +80,15 @@ } define void @test5(i32 %x, i32 %n) nounwind { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB3_2 +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: calll foo +; CHECK-NEXT: .LBB3_2: # %UnifiedReturnBlock +; CHECK-NEXT: retl entry: %tmp1 = and i32 %x, 1 %tmp2 = urem i32 %tmp1, 15 diff --git a/llvm/test/CodeGen/X86/optimize-max-0.ll b/llvm/test/CodeGen/X86/optimize-max-0.ll --- a/llvm/test/CodeGen/X86/optimize-max-0.ll +++ b/llvm/test/CodeGen/X86/optimize-max-0.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s | not grep cmov +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s ; LSR should be able to eliminate the max computations by ; making the loops use slt/ult comparisons instead of ne comparisons. @@ -7,6 +8,219 @@ target triple = "i386-apple-darwin9" define void @foo(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $28, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: imull %edi, %eax +; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: je LBB0_19 +; CHECK-NEXT: ## %bb.1: ## %bb10.preheader +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: sarl $31, %ebp +; CHECK-NEXT: shrl $30, %ebp +; CHECK-NEXT: addl %eax, %ebp +; CHECK-NEXT: sarl $2, %ebp +; CHECK-NEXT: testl %ebx, %ebx +; CHECK-NEXT: jle LBB0_12 +; CHECK-NEXT: ## %bb.2: ## %bb.nph9 +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle LBB0_12 +; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: incl %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_4: ## %bb6 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx +; CHECK-NEXT: movb %bl, (%edx,%esi) +; CHECK-NEXT: incl %esi +; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: jl LBB0_4 +; CHECK-NEXT: ## %bb.5: ## %bb9 +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl %edi, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: cmpl %ebx, %ecx +; CHECK-NEXT: je LBB0_12 +; CHECK-NEXT: ## %bb.6: ## %bb7.preheader +; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: jmp LBB0_4 +; CHECK-NEXT: LBB0_12: ## %bb18.loopexit +; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: addl %ebp, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: cmpl $1, %ebx +; CHECK-NEXT: jle LBB0_13 +; CHECK-NEXT: ## %bb.7: ## %bb.nph5 +; CHECK-NEXT: cmpl $2, {{[0-9]+}}(%esp) +; CHECK-NEXT: jl LBB0_13 +; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: shrl $31, %edx +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: sarl %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $31, %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: sarl %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: leal 2(%esi), %esi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: addl %esi, %ecx +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_9: ## %bb13 +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB0_10 Depth 2 +; CHECK-NEXT: movl %ebp, %esi +; CHECK-NEXT: shrl $31, %esi +; CHECK-NEXT: addl %ebp, %esi +; CHECK-NEXT: andl $-2, %esi +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: addl %ebx, %edi +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_10: ## %bb14 +; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movzbl -2(%edi,%esi,4), %ebx +; CHECK-NEXT: movb %bl, (%ecx,%esi) +; CHECK-NEXT: movzbl (%edi,%esi,4), %ebx +; CHECK-NEXT: movb %bl, (%eax,%esi) +; CHECK-NEXT: incl %esi +; CHECK-NEXT: cmpl %edx, %esi +; CHECK-NEXT: jl LBB0_10 +; CHECK-NEXT: ## %bb.11: ## %bb17 +; CHECK-NEXT: ## in Loop: Header=BB0_9 Depth=1 +; CHECK-NEXT: incl %ebp +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload +; CHECK-NEXT: addl $2, %ebx +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; CHECK-NEXT: jl LBB0_9 +; CHECK-NEXT: LBB0_13: ## %bb20 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: cmpl $1, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: je LBB0_19 +; CHECK-NEXT: ## %bb.14: ## %bb20 +; CHECK-NEXT: cmpl $3, %edx +; CHECK-NEXT: jne LBB0_24 +; CHECK-NEXT: ## %bb.15: ## %bb22 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: testl %ebx, %ebx +; CHECK-NEXT: jle LBB0_18 +; CHECK-NEXT: ## %bb.16: ## %bb.nph +; CHECK-NEXT: leal 15(%ebx), %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl %eax, %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl $15, %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: addl %esi, %esi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_17: ## %bb23 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: calll _memcpy +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: addl %edi, %esi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: jne LBB0_17 +; CHECK-NEXT: LBB0_18: ## %bb26 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: addl %ecx, %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: addl %esi, %edx +; CHECK-NEXT: jmp LBB0_23 +; CHECK-NEXT: LBB0_19: ## %bb29 +; CHECK-NEXT: testl %ebx, %ebx +; CHECK-NEXT: jle LBB0_22 +; CHECK-NEXT: ## %bb.20: ## %bb.nph11 +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: leal 15(%edi), %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_21: ## %bb30 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: calll _memcpy +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: addl %esi, %edi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: jne LBB0_21 +; CHECK-NEXT: LBB0_22: ## %bb33 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: addl %ecx, %edx +; CHECK-NEXT: LBB0_23: ## %bb33 +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: sarl %eax +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl $128 +; CHECK-NEXT: pushl %edx +; CHECK-NEXT: calll _memset +; CHECK-NEXT: addl $44, %esp +; CHECK-NEXT: LBB0_25: ## %return +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +; CHECK-NEXT: LBB0_24: ## %return +; CHECK-NEXT: addl $28, %esp +; CHECK-NEXT: jmp LBB0_25 entry: %0 = mul i32 %x, %w %1 = mul i32 %x, %w @@ -232,6 +446,208 @@ } define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind { +; CHECK-LABEL: bar: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $28, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: imull %ecx, %eax +; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: je LBB1_19 +; CHECK-NEXT: ## %bb.1: ## %bb10.preheader +; CHECK-NEXT: shrl $2, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: je LBB1_12 +; CHECK-NEXT: ## %bb.2: ## %bb.nph9 +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: je LBB1_12 +; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: incl %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_6: ## %bb7.preheader +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB1_4 Depth 2 +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_4: ## %bb6 +; CHECK-NEXT: ## Parent Loop BB1_6 Depth=1 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx +; CHECK-NEXT: movb %bl, (%edx,%esi) +; CHECK-NEXT: incl %esi +; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: jb LBB1_4 +; CHECK-NEXT: ## %bb.5: ## %bb9 +; CHECK-NEXT: ## in Loop: Header=BB1_6 Depth=1 +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl %edi, %edx +; CHECK-NEXT: cmpl %ebp, %ecx +; CHECK-NEXT: jne LBB1_6 +; CHECK-NEXT: LBB1_12: ## %bb18.loopexit +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: cmpl $1, %ebp +; CHECK-NEXT: jbe LBB1_13 +; CHECK-NEXT: ## %bb.7: ## %bb.nph5 +; CHECK-NEXT: cmpl $2, {{[0-9]+}}(%esp) +; CHECK-NEXT: jb LBB1_13 +; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: shrl %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: leal 2(%edx), %edx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_9: ## %bb13 +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB1_10 Depth 2 +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: andl $1, %ebx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: addl %edx, %ebx +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_10: ## %bb14 +; CHECK-NEXT: ## Parent Loop BB1_9 Depth=1 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movzbl -2(%ebx,%esi,4), %edx +; CHECK-NEXT: movb %dl, (%eax,%esi) +; CHECK-NEXT: movzbl (%ebx,%esi,4), %edx +; CHECK-NEXT: movb %dl, (%ecx,%esi) +; CHECK-NEXT: incl %esi +; CHECK-NEXT: cmpl %ebp, %esi +; CHECK-NEXT: jb LBB1_10 +; CHECK-NEXT: ## %bb.11: ## %bb17 +; CHECK-NEXT: ## in Loop: Header=BB1_9 Depth=1 +; CHECK-NEXT: incl %edi +; CHECK-NEXT: addl %ebp, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload +; CHECK-NEXT: addl $2, %edx +; CHECK-NEXT: addl %ebp, %eax +; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; CHECK-NEXT: jb LBB1_9 +; CHECK-NEXT: LBB1_13: ## %bb20 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: cmpl $1, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: je LBB1_19 +; CHECK-NEXT: ## %bb.14: ## %bb20 +; CHECK-NEXT: cmpl $3, %edx +; CHECK-NEXT: jne LBB1_24 +; CHECK-NEXT: ## %bb.15: ## %bb22 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: je LBB1_18 +; CHECK-NEXT: ## %bb.16: ## %bb.nph +; CHECK-NEXT: movl %ebp, %esi +; CHECK-NEXT: leal 15(%ebp), %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: leal 15(%ecx), %ebx +; CHECK-NEXT: andl $-16, %ebx +; CHECK-NEXT: addl %eax, %edi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: leal (%edx,%eax), %ebp +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_17: ## %bb23 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: calll _memcpy +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: addl %ecx, %ebp +; CHECK-NEXT: addl %ebx, %edi +; CHECK-NEXT: decl %esi +; CHECK-NEXT: jne LBB1_17 +; CHECK-NEXT: LBB1_18: ## %bb26 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: shrl %ecx +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl $128 +; CHECK-NEXT: pushl %edx +; CHECK-NEXT: jmp LBB1_23 +; CHECK-NEXT: LBB1_19: ## %bb29 +; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: je LBB1_22 +; CHECK-NEXT: ## %bb.20: ## %bb.nph11 +; CHECK-NEXT: movl %ebp, %esi +; CHECK-NEXT: leal 15(%ecx), %ebx +; CHECK-NEXT: andl $-16, %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB1_21: ## %bb30 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: calll _memcpy +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: addl %ecx, %ebp +; CHECK-NEXT: addl %ebx, %edi +; CHECK-NEXT: decl %esi +; CHECK-NEXT: jne LBB1_21 +; CHECK-NEXT: LBB1_22: ## %bb33 +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl $128 +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: LBB1_23: ## %bb33 +; CHECK-NEXT: calll _memset +; CHECK-NEXT: addl $44, %esp +; CHECK-NEXT: LBB1_25: ## %return +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +; CHECK-NEXT: LBB1_24: ## %return +; CHECK-NEXT: addl $28, %esp +; CHECK-NEXT: jmp LBB1_25 entry: %0 = mul i32 %x, %w %1 = mul i32 %x, %w diff --git a/llvm/test/CodeGen/X86/overlap-shift.ll b/llvm/test/CodeGen/X86/overlap-shift.ll --- a/llvm/test/CodeGen/X86/overlap-shift.ll +++ b/llvm/test/CodeGen/X86/overlap-shift.ll @@ -1,3 +1,6 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=intel | FileCheck %s + ;; X's live range extends beyond the shift, so the register allocator ;; cannot coalesce it with Y. Because of this, a copy needs to be ;; emitted before the shift to save the register value before it is @@ -6,12 +9,15 @@ ; Check that the shift gets turned into an LEA. -; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=intel | \ -; RUN: not grep "mov E.X, E.X" - @G = external global i32 ; [#uses=1] define i32 @test1(i32 %X) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: mov eax, dword ptr [esp + 4] +; CHECK-NEXT: lea ecx, [4*eax] +; CHECK-NEXT: mov dword ptr [G], ecx +; CHECK-NEXT: ret %Z = shl i32 %X, 2 ; [#uses=1] store volatile i32 %Z, i32* @G ret i32 %X diff --git a/llvm/test/CodeGen/X86/packed_struct.ll b/llvm/test/CodeGen/X86/packed_struct.ll --- a/llvm/test/CodeGen/X86/packed_struct.ll +++ b/llvm/test/CodeGen/X86/packed_struct.ll @@ -1,9 +1,5 @@ -; RUN: llc < %s > %t -; RUN: grep foos+5 %t -; RUN: grep foos+1 %t -; RUN: grep foos+9 %t -; RUN: grep bara+19 %t -; RUN: grep bara+4 %t +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s ; make sure we compute the correct offset for a packed structure @@ -16,6 +12,12 @@ @bara = weak global [4 x <{ i32, i8 }>] zeroinitializer ; <[4 x <{ i32, i8 }>]*> [#uses=2] define i32 @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl foos+5, %eax +; CHECK-NEXT: addl foos+1, %eax +; CHECK-NEXT: addl foos+9, %eax +; CHECK-NEXT: retl entry: %tmp = load i32, i32* getelementptr (%struct.anon, %struct.anon* @foos, i32 0, i32 1) ; [#uses=1] %tmp3 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @foos, i32 0, i32 2) ; [#uses=1] @@ -26,6 +28,11 @@ } define i8 @bar() nounwind { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movb bara+19, %al +; CHECK-NEXT: addb bara+4, %al +; CHECK-NEXT: retl entry: %tmp = load i8, i8* getelementptr ([4 x <{ i32, i8 }>], [4 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 1) ; [#uses=1] %tmp4 = load i8, i8* getelementptr ([4 x <{ i32, i8 }>], [4 x <{ i32, i8 }>]* @bara, i32 0, i32 3, i32 1) ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/peep-test-0.ll b/llvm/test/CodeGen/X86/peep-test-0.ll --- a/llvm/test/CodeGen/X86/peep-test-0.ll +++ b/llvm/test/CodeGen/X86/peep-test-0.ll @@ -1,8 +1,24 @@ -; RUN: llc < %s -mtriple=x86_64-- > %t -; RUN: not grep cmp %t -; RUN: not grep test %t +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define void @loop(i64 %n, double* nocapture %d) nounwind { +; CHECK-LABEL: loop: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shlq $4, %rax +; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %bb +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: mulsd %xmm0, %xmm1 +; CHECK-NEXT: movsd %xmm1, (%rax) +; CHECK-NEXT: addq $8, %rax +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: retq entry: br label %bb diff --git a/llvm/test/CodeGen/X86/peep-test-1.ll b/llvm/test/CodeGen/X86/peep-test-1.ll --- a/llvm/test/CodeGen/X86/peep-test-1.ll +++ b/llvm/test/CodeGen/X86/peep-test-1.ll @@ -1,9 +1,21 @@ -; RUN: llc < %s -mtriple=i686-- > %t -; RUN: grep dec %t | count 1 -; RUN: not grep test %t -; RUN: not grep cmp %t +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define void @foo(i32 %n, double* nocapture %p) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %bb +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fldl (%eax,%ecx,8) +; CHECK-NEXT: fmull {{\.LCPI.*}} +; CHECK-NEXT: fstpl (%eax,%ecx,8) +; CHECK-NEXT: decl %ecx +; CHECK-NEXT: js .LBB0_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: retl br label %bb bb: diff --git a/llvm/test/CodeGen/X86/pic-load-remat.ll b/llvm/test/CodeGen/X86/pic-load-remat.ll --- a/llvm/test/CodeGen/X86/pic-load-remat.ll +++ b/llvm/test/CodeGen/X86/pic-load-remat.ll @@ -1,6 +1,23 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | FileCheck %s define void @f() nounwind { +; CHECK-LABEL: f: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: calll L0$pb +; CHECK-NEXT: L0$pb: +; CHECK-NEXT: popl %eax +; CHECK-NEXT: pxor %xmm0, %xmm0 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1] +; CHECK-NEXT: psllw LCPI0_1-L0$pb(%eax), %xmm1 +; CHECK-NEXT: pavgw LCPI0_2-L0$pb(%eax), %xmm0 +; CHECK-NEXT: paddsw %xmm0, %xmm0 +; CHECK-NEXT: paddw %xmm1, %xmm0 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_1: ## %bb +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movdqa %xmm0, 0 +; CHECK-NEXT: jmp LBB0_1 entry: br label %bb @@ -38,10 +55,10 @@ br label %bb } -declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone -declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone -declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/postalloc-coalescing.ll b/llvm/test/CodeGen/X86/postalloc-coalescing.ll --- a/llvm/test/CodeGen/X86/postalloc-coalescing.ll +++ b/llvm/test/CodeGen/X86/postalloc-coalescing.ll @@ -1,6 +1,23 @@ -; RUN: llc < %s -mtriple=i686-- | grep mov | count 3 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define fastcc i32 @_Z18yy_get_next_bufferv() nounwind { +; CHECK-LABEL: _Z18yy_get_next_bufferv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: cmpl $-1, %eax +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %bb116 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movb %al, 0 +; CHECK-NEXT: cmpl $-1, %eax +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %bb158 +; CHECK-NEXT: movb %al, 0 +; CHECK-NEXT: cmpl $-1, %eax +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl entry: br label %bb131 diff --git a/llvm/test/CodeGen/X86/pr1489.ll b/llvm/test/CodeGen/X86/pr1489.ll --- a/llvm/test/CodeGen/X86/pr1489.ll +++ b/llvm/test/CodeGen/X86/pr1489.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -frame-pointer=all -O0 -mcpu=i486 | grep 1082126238 | count 3 -; RUN: llc < %s -frame-pointer=all -O0 -mcpu=i486 | grep -- -1236950581 | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -frame-pointer=all -O0 -mcpu=i486 | FileCheck %s ;; magic constants are 3.999f and half of 3.999 ; ModuleID = '1489.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" @@ -7,6 +7,21 @@ @.str = internal constant [13 x i8] c"%d %d %d %d\0A\00" ; <[13 x i8]*> [#uses=1] define i32 @quux() nounwind { +; CHECK-LABEL: quux: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E +; CHECK-NEXT: calll _lrintf +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: setl %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl entry: %tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 ) ; [#uses=1] %tmp2 = icmp slt i32 %tmp1, 1 ; [#uses=1] @@ -17,6 +32,22 @@ declare i32 @lrintf(float) define i32 @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl $1074789875, 4(%eax) ## imm = 0x400FFDF3 +; CHECK-NEXT: movl $-1236950581, (%eax) ## imm = 0xB645A1CB +; CHECK-NEXT: calll _lrint +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: setl %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl entry: %tmp1 = tail call i32 @lrint( double 3.999000e+00 ) ; [#uses=1] %tmp2 = icmp slt i32 %tmp1, 1 ; [#uses=1] @@ -27,6 +58,21 @@ declare i32 @lrint(double) define i32 @bar() nounwind { +; CHECK-LABEL: bar: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E +; CHECK-NEXT: calll _lrintf +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: setl %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl entry: %tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 ) ; [#uses=1] %tmp2 = icmp slt i32 %tmp1, 1 ; [#uses=1] @@ -35,6 +81,21 @@ } define i32 @baz() nounwind { +; CHECK-LABEL: baz: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E +; CHECK-NEXT: calll _lrintf +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: setl %cl +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl entry: %tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 ) ; [#uses=1] %tmp2 = icmp slt i32 %tmp1, 1 ; [#uses=1] @@ -43,6 +104,38 @@ } define i32 @main() nounwind { +; CHECK-LABEL: main: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $48, %esp +; CHECK-NEXT: calll _baz +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: calll _bar +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: calll _foo +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: calll _quux +; CHECK-NEXT: movl %esp, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload +; CHECK-NEXT: movl %edx, 16(%ecx) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: movl %esi, 12(%ecx) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload +; CHECK-NEXT: movl %edi, 8(%ecx) +; CHECK-NEXT: movl %eax, 4(%ecx) +; CHECK-NEXT: movl $_.str, (%ecx) +; CHECK-NEXT: calll _printf +; CHECK-NEXT: ## implicit-def: $ecx +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: addl $48, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl entry: %tmp = tail call i32 @baz( ) ; [#uses=1] %tmp1 = tail call i32 @bar( ) ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/pr1505.ll b/llvm/test/CodeGen/X86/pr1505.ll --- a/llvm/test/CodeGen/X86/pr1505.ll +++ b/llvm/test/CodeGen/X86/pr1505.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mcpu=i486 | not grep fldl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=i486 | FileCheck %s ; PR1505 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" @@ -6,6 +7,12 @@ @G = weak global float 0.000000e+00 ; [#uses=1] define void @t1(float %F) { +; CHECK-LABEL: t1: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: movl L_G$non_lazy_ptr, %eax +; CHECK-NEXT: fstps (%eax) +; CHECK-NEXT: retl entry: store float %F, float* @G ret void diff --git a/llvm/test/CodeGen/X86/pr2326.ll b/llvm/test/CodeGen/X86/pr2326.ll --- a/llvm/test/CodeGen/X86/pr2326.ll +++ b/llvm/test/CodeGen/X86/pr2326.ll @@ -1,7 +1,18 @@ -; RUN: llc < %s -mtriple=i686-- | grep sete +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; PR2326 define i32 @func_59(i32 %p_60) nounwind { +; CHECK-LABEL: func_59: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: movl 0, %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: cmpl (%esp), %eax +; CHECK-NEXT: sete %cl +; CHECK-NEXT: pushl $0 +; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: calll func_15 entry: %l_108 = alloca i32 ; [#uses=2] %tmp15 = load i32, i32* null, align 4 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/pr3366.ll b/llvm/test/CodeGen/X86/pr3366.ll --- a/llvm/test/CodeGen/X86/pr3366.ll +++ b/llvm/test/CodeGen/X86/pr3366.ll @@ -1,7 +1,15 @@ -; RUN: llc < %s -mtriple=i686-- -disable-cgp-branch-opts | grep movzbl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -disable-cgp-branch-opts | FileCheck %s ; PR3366 define void @_ada_c34002a() nounwind { +; CHECK-LABEL: _ada_c34002a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movb $90, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: idivb 0 +; CHECK-NEXT: cmpb $3, %al +; CHECK-NEXT: # %bb.1: # %bb457 entry: %0 = load i8, i8* null, align 1 %1 = sdiv i8 90, %0 diff --git a/llvm/test/CodeGen/X86/pr3457.ll b/llvm/test/CodeGen/X86/pr3457.ll --- a/llvm/test/CodeGen/X86/pr3457.ll +++ b/llvm/test/CodeGen/X86/pr3457.ll @@ -1,8 +1,30 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 | not grep fstpt +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 | FileCheck %s ; PR3457 ; rdar://6548010 define void @foo(double* nocapture %P) nounwind { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $24, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: calll _test +; CHECK-NEXT: fstpl {{[0-9]+}}(%esp) +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, (%esp) ## 8-byte Spill +; CHECK-NEXT: calll _test +; CHECK-NEXT: fstpl {{[0-9]+}}(%esp) +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd (%esp), %xmm1 ## 8-byte Reload +; CHECK-NEXT: ## xmm1 = mem[0],zero +; CHECK-NEXT: mulsd %xmm1, %xmm1 +; CHECK-NEXT: mulsd %xmm0, %xmm0 +; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: movsd %xmm0, (%esi) +; CHECK-NEXT: addl $24, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl entry: %0 = tail call double (...) @test() nounwind ; [#uses=2] %1 = tail call double (...) @test() nounwind ; [#uses=2] diff --git a/llvm/test/CodeGen/X86/remat-constant.ll b/llvm/test/CodeGen/X86/remat-constant.ll --- a/llvm/test/CodeGen/X86/remat-constant.ll +++ b/llvm/test/CodeGen/X86/remat-constant.ll @@ -1,12 +1,23 @@ -; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static | grep xmm | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static | FileCheck %s declare void @bar() nounwind @a = external constant float -declare void @qux(float %f) nounwind +declare void @qux(float %f) nounwind define void @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq bar +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq qux +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq qux +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq %f = load float, float* @a call void @bar() call void @qux(float %f) diff --git a/llvm/test/CodeGen/X86/ret-addr.ll b/llvm/test/CodeGen/X86/ret-addr.ll --- a/llvm/test/CodeGen/X86/ret-addr.ll +++ b/llvm/test/CodeGen/X86/ret-addr.ll @@ -1,7 +1,27 @@ -; RUN: llc < %s -frame-pointer=all -mtriple=i686-- | not grep xor -; RUN: llc < %s -frame-pointer=all -mtriple=x86_64-- | not grep xor +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -frame-pointer=all -mtriple=i686-- | FileCheck %s --check-prefix=i686 +; RUN: llc < %s -frame-pointer=all -mtriple=x86_64-- | FileCheck %s --check-prefix=x86_64 define i8* @h() nounwind readnone optsize { +; i686-LABEL: h: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: movl %esp, %ebp +; i686-NEXT: movl (%ebp), %eax +; i686-NEXT: movl (%eax), %eax +; i686-NEXT: movl 4(%eax), %eax +; i686-NEXT: popl %ebp +; i686-NEXT: retl +; +; x86_64-LABEL: h: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: pushq %rbp +; x86_64-NEXT: movq %rsp, %rbp +; x86_64-NEXT: movq (%rbp), %rax +; x86_64-NEXT: movq (%rax), %rax +; x86_64-NEXT: movq 8(%rax), %rax +; x86_64-NEXT: popq %rbp +; x86_64-NEXT: retq entry: %0 = tail call i8* @llvm.returnaddress(i32 2) ; [#uses=1] ret i8* %0 @@ -10,12 +30,44 @@ declare i8* @llvm.returnaddress(i32) nounwind readnone define i8* @g() nounwind readnone optsize { +; i686-LABEL: g: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: movl %esp, %ebp +; i686-NEXT: movl (%ebp), %eax +; i686-NEXT: movl 4(%eax), %eax +; i686-NEXT: popl %ebp +; i686-NEXT: retl +; +; x86_64-LABEL: g: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: pushq %rbp +; x86_64-NEXT: movq %rsp, %rbp +; x86_64-NEXT: movq (%rbp), %rax +; x86_64-NEXT: movq 8(%rax), %rax +; x86_64-NEXT: popq %rbp +; x86_64-NEXT: retq entry: %0 = tail call i8* @llvm.returnaddress(i32 1) ; [#uses=1] ret i8* %0 } define i8* @f() nounwind readnone optsize { +; i686-LABEL: f: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: movl %esp, %ebp +; i686-NEXT: movl 4(%ebp), %eax +; i686-NEXT: popl %ebp +; i686-NEXT: retl +; +; x86_64-LABEL: f: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: pushq %rbp +; x86_64-NEXT: movq %rsp, %rbp +; x86_64-NEXT: movq 8(%rbp), %rax +; x86_64-NEXT: popq %rbp +; x86_64-NEXT: retq entry: %0 = tail call i8* @llvm.returnaddress(i32 0) ; [#uses=1] ret i8* %0 diff --git a/llvm/test/CodeGen/X86/ret-i64-0.ll b/llvm/test/CodeGen/X86/ret-i64-0.ll --- a/llvm/test/CodeGen/X86/ret-i64-0.ll +++ b/llvm/test/CodeGen/X86/ret-i64-0.ll @@ -1,5 +1,11 @@ -; RUN: llc < %s -mtriple=i686-- | grep xor | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define i64 @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: retl ret i64 0 } diff --git a/llvm/test/CodeGen/X86/scalar-extract.ll b/llvm/test/CodeGen/X86/scalar-extract.ll --- a/llvm/test/CodeGen/X86/scalar-extract.ll +++ b/llvm/test/CodeGen/X86/scalar-extract.ll @@ -1,10 +1,17 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+mmx -o %t -; RUN: not grep movq %t +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+mmx | FileCheck %s ; Check that widening doesn't introduce a mmx register in this case when ; a simple load/store would suffice. define void @foo(<2 x i16>* %A, <2 x i16>* %B) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl (%ecx), %ecx +; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: retl entry: %tmp1 = load <2 x i16>, <2 x i16>* %A ; <<2 x i16>> [#uses=1] store <2 x i16> %tmp1, <2 x i16>* %B diff --git a/llvm/test/CodeGen/X86/setuge.ll b/llvm/test/CodeGen/X86/setuge.ll --- a/llvm/test/CodeGen/X86/setuge.ll +++ b/llvm/test/CodeGen/X86/setuge.ll @@ -1,8 +1,26 @@ -; RUN: llc < %s -mtriple=i686-- | not grep set +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s declare i1 @llvm.isunordered.f32(float, float) define float @cmp(float %A, float %B, float %C, float %D) nounwind { +; CHECK-LABEL: cmp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: fucompp +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jbe .LBB0_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: retl entry: %tmp.1 = fcmp uno float %A, %B ; [#uses=1] %tmp.2 = fcmp oge float %A, %B ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/shift-coalesce.ll b/llvm/test/CodeGen/X86/shift-coalesce.ll --- a/llvm/test/CodeGen/X86/shift-coalesce.ll +++ b/llvm/test/CodeGen/X86/shift-coalesce.ll @@ -1,12 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=intel | \ -; RUN: grep "shld.*cl" -; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=intel | \ -; RUN: not grep "mov cl, bl" +; RUN: llc < %s -mtriple=i686-- -x86-asm-syntax=intel | FileCheck %s ; PR687 define i64 @foo(i64 %x, i64* %X) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: push esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset esi, -8 +; CHECK-NEXT: mov esi, dword ptr [esp + 8] +; CHECK-NEXT: mov edx, dword ptr [esp + 12] +; CHECK-NEXT: mov eax, dword ptr [esp + 16] +; CHECK-NEXT: mov cl, byte ptr [eax] +; CHECK-NEXT: mov eax, esi +; CHECK-NEXT: shl eax, cl +; CHECK-NEXT: shld edx, esi, cl +; CHECK-NEXT: test cl, 32 +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mov edx, eax +; CHECK-NEXT: xor eax, eax +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: pop esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: ret %tmp.1 = load i64, i64* %X ; [#uses=1] %tmp.3 = trunc i64 %tmp.1 to i8 ; [#uses=1] %shift.upgrd.1 = zext i8 %tmp.3 to i64 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll --- a/llvm/test/CodeGen/X86/shift-i128.ll +++ b/llvm/test/CodeGen/X86/shift-i128.ll @@ -1,142 +1,142 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=ALL,i686 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=ALL,x86_64 ; ; Scalars ; define void @test_lshr_i128(i128 %x, i128 %a, i128* nocapture %r) nounwind { -; X86-LABEL: test_lshr_i128: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: subl $20, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shrdl %cl, %edi, %esi -; X86-NEXT: shrl %cl, %edx -; X86-NEXT: shrl %cl, %edi -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: testb $32, %al -; X86-NEXT: jne .LBB0_1 -; X86-NEXT: # %bb.2: # %entry -; X86-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-NEXT: jmp .LBB0_3 -; X86-NEXT: .LBB0_1: -; X86-NEXT: movl %edi, %esi -; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: .LBB0_3: # %entry -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, %edx -; X86-NEXT: subb $64, %dl -; X86-NEXT: jb .LBB0_5 -; X86-NEXT: # %bb.4: # %entry -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: .LBB0_5: # %entry -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: negb %dl -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: shldl %cl, %ebp, %edi -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: shll %cl, %esi -; X86-NEXT: testb $32, %dl -; X86-NEXT: movl %esi, %ebx -; X86-NEXT: jne .LBB0_7 -; X86-NEXT: # %bb.6: # %entry -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: .LBB0_7: # %entry -; X86-NEXT: movb %al, %ah -; X86-NEXT: addb $-64, %ah -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movb %ah, %cl -; X86-NEXT: shrl %cl, %edi -; X86-NEXT: testb $32, %ah -; X86-NEXT: movl $0, %ecx -; X86-NEXT: jne .LBB0_9 -; X86-NEXT: # %bb.8: # %entry -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: .LBB0_9: # %entry -; X86-NEXT: cmpb $64, %al -; X86-NEXT: jb .LBB0_10 -; X86-NEXT: # %bb.11: # %entry -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: jmp .LBB0_12 -; X86-NEXT: .LBB0_10: -; X86-NEXT: movl (%esp), %ecx # 4-byte Reload -; X86-NEXT: orl %ebx, %ecx -; X86-NEXT: .LBB0_12: # %entry -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: testb $32, %dl -; X86-NEXT: jne .LBB0_14 -; X86-NEXT: # %bb.13: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB0_14: # %entry -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shrdl %cl, %esi, %edx -; X86-NEXT: testb $32, %al -; X86-NEXT: jne .LBB0_16 -; X86-NEXT: # %bb.15: # %entry -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB0_16: # %entry -; X86-NEXT: movb %ah, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shrdl %cl, %edx, %ebp -; X86-NEXT: testb $32, %ah -; X86-NEXT: jne .LBB0_18 -; X86-NEXT: # %bb.17: # %entry -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: .LBB0_18: # %entry -; X86-NEXT: cmpb $64, %al -; X86-NEXT: jae .LBB0_20 -; X86-NEXT: # %bb.19: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: .LBB0_20: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: testb %al, %al -; X86-NEXT: je .LBB0_22 -; X86-NEXT: # %bb.21: # %entry -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: movl (%esp), %esi # 4-byte Reload -; X86-NEXT: .LBB0_22: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 12(%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 8(%ecx) -; X86-NEXT: movl %esi, 4(%ecx) -; X86-NEXT: movl %ebx, (%ecx) -; X86-NEXT: addl $20, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; i686-LABEL: test_lshr_i128: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $20, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: movb {{[0-9]+}}(%esp), %al +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrdl %cl, %edi, %esi +; i686-NEXT: shrl %cl, %edx +; i686-NEXT: shrl %cl, %edi +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB0_1 +; i686-NEXT: # %bb.2: # %entry +; i686-NEXT: movl %edx, (%esp) # 4-byte Spill +; i686-NEXT: jmp .LBB0_3 +; i686-NEXT: .LBB0_1: +; i686-NEXT: movl %edi, %esi +; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill +; i686-NEXT: xorl %edi, %edi +; i686-NEXT: .LBB0_3: # %entry +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, %edx +; i686-NEXT: subb $64, %dl +; i686-NEXT: jb .LBB0_5 +; i686-NEXT: # %bb.4: # %entry +; i686-NEXT: xorl %edi, %edi +; i686-NEXT: .LBB0_5: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: negb %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shldl %cl, %ebp, %edi +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: shll %cl, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl %esi, %ebx +; i686-NEXT: jne .LBB0_7 +; i686-NEXT: # %bb.6: # %entry +; i686-NEXT: movl %edi, %ebx +; i686-NEXT: .LBB0_7: # %entry +; i686-NEXT: movb %al, %ah +; i686-NEXT: addb $-64, %ah +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movb %ah, %cl +; i686-NEXT: shrl %cl, %edi +; i686-NEXT: testb $32, %ah +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB0_9 +; i686-NEXT: # %bb.8: # %entry +; i686-NEXT: movl %edi, %ecx +; i686-NEXT: .LBB0_9: # %entry +; i686-NEXT: cmpb $64, %al +; i686-NEXT: jb .LBB0_10 +; i686-NEXT: # %bb.11: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: jmp .LBB0_12 +; i686-NEXT: .LBB0_10: +; i686-NEXT: movl (%esp), %ecx # 4-byte Reload +; i686-NEXT: orl %ebx, %ecx +; i686-NEXT: .LBB0_12: # %entry +; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB0_14 +; i686-NEXT: # %bb.13: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB0_14: # %entry +; i686-NEXT: movl %ebx, %edx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shrdl %cl, %esi, %edx +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB0_16 +; i686-NEXT: # %bb.15: # %entry +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB0_16: # %entry +; i686-NEXT: movb %ah, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: shrdl %cl, %edx, %ebp +; i686-NEXT: testb $32, %ah +; i686-NEXT: jne .LBB0_18 +; i686-NEXT: # %bb.17: # %entry +; i686-NEXT: movl %ebp, %edi +; i686-NEXT: .LBB0_18: # %entry +; i686-NEXT: cmpb $64, %al +; i686-NEXT: jae .LBB0_20 +; i686-NEXT: # %bb.19: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; i686-NEXT: .LBB0_20: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: testb %al, %al +; i686-NEXT: je .LBB0_22 +; i686-NEXT: # %bb.21: # %entry +; i686-NEXT: movl %edi, %ebx +; i686-NEXT: movl (%esp), %esi # 4-byte Reload +; i686-NEXT: .LBB0_22: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 12(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 8(%ecx) +; i686-NEXT: movl %esi, 4(%ecx) +; i686-NEXT: movl %ebx, (%ecx) +; i686-NEXT: addl $20, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl ; -; X64-LABEL: test_lshr_i128: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: shrdq %cl, %rsi, %rdi -; X64-NEXT: shrq %cl, %rsi -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: testb $64, %cl -; X64-NEXT: cmovneq %rsi, %rdi -; X64-NEXT: cmoveq %rsi, %rax -; X64-NEXT: movq %rax, 8(%r8) -; X64-NEXT: movq %rdi, (%r8) -; X64-NEXT: retq +; x86_64-LABEL: test_lshr_i128: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: movq %rdx, %rcx +; x86_64-NEXT: shrdq %cl, %rsi, %rdi +; x86_64-NEXT: shrq %cl, %rsi +; x86_64-NEXT: xorl %eax, %eax +; x86_64-NEXT: testb $64, %cl +; x86_64-NEXT: cmovneq %rsi, %rdi +; x86_64-NEXT: cmoveq %rsi, %rax +; x86_64-NEXT: movq %rax, 8(%r8) +; x86_64-NEXT: movq %rdi, (%r8) +; x86_64-NEXT: retq entry: %0 = lshr i128 %x, %a store i128 %0, i128* %r, align 16 @@ -144,141 +144,141 @@ } define void @test_ashr_i128(i128 %x, i128 %a, i128* nocapture %r) nounwind { -; X86-LABEL: test_ashr_i128: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shrdl %cl, %ebx, %esi -; X86-NEXT: shrl %cl, %edx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: sarl %cl, %edi -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: testb $32, %al -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: jne .LBB1_1 -; X86-NEXT: # %bb.2: # %entry -; X86-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-NEXT: jmp .LBB1_3 -; X86-NEXT: .LBB1_1: -; X86-NEXT: movl %edi, %esi -; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: .LBB1_3: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, %edx -; X86-NEXT: subb $64, %dl -; X86-NEXT: jb .LBB1_5 -; X86-NEXT: # %bb.4: # %entry -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: .LBB1_5: # %entry -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: negb %dl -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: shldl %cl, %ebp, %edi -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: shll %cl, %esi -; X86-NEXT: testb $32, %dl -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: jne .LBB1_7 -; X86-NEXT: # %bb.6: # %entry -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: .LBB1_7: # %entry -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movb %al, %ah -; X86-NEXT: addb $-64, %ah -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movb %ah, %cl -; X86-NEXT: sarl %cl, %edi -; X86-NEXT: testb $32, %ah -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: jne .LBB1_9 -; X86-NEXT: # %bb.8: # %entry -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: .LBB1_9: # %entry -; X86-NEXT: cmpb $64, %al -; X86-NEXT: jb .LBB1_10 -; X86-NEXT: # %bb.11: # %entry -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: jmp .LBB1_12 -; X86-NEXT: .LBB1_10: -; X86-NEXT: movl (%esp), %ecx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: .LBB1_12: # %entry -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: testb $32, %dl -; X86-NEXT: jne .LBB1_14 -; X86-NEXT: # %bb.13: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB1_14: # %entry -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shrdl %cl, %esi, %edx -; X86-NEXT: testb $32, %al -; X86-NEXT: jne .LBB1_16 -; X86-NEXT: # %bb.15: # %entry -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB1_16: # %entry -; X86-NEXT: movb %ah, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shrdl %cl, %edx, %ebp -; X86-NEXT: testb $32, %ah -; X86-NEXT: jne .LBB1_18 -; X86-NEXT: # %bb.17: # %entry -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: .LBB1_18: # %entry -; X86-NEXT: cmpb $64, %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: jae .LBB1_20 -; X86-NEXT: # %bb.19: -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: .LBB1_20: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: testb %al, %al -; X86-NEXT: je .LBB1_22 -; X86-NEXT: # %bb.21: # %entry -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: movl (%esp), %esi # 4-byte Reload -; X86-NEXT: .LBB1_22: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 12(%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 8(%ecx) -; X86-NEXT: movl %esi, 4(%ecx) -; X86-NEXT: movl %ebx, (%ecx) -; X86-NEXT: addl $24, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; i686-LABEL: test_ashr_i128: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $24, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: movb {{[0-9]+}}(%esp), %al +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrdl %cl, %ebx, %esi +; i686-NEXT: shrl %cl, %edx +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: sarl %cl, %edi +; i686-NEXT: sarl $31, %ebx +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, %al +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jne .LBB1_1 +; i686-NEXT: # %bb.2: # %entry +; i686-NEXT: movl %edx, (%esp) # 4-byte Spill +; i686-NEXT: jmp .LBB1_3 +; i686-NEXT: .LBB1_1: +; i686-NEXT: movl %edi, %esi +; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: .LBB1_3: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, %edx +; i686-NEXT: subb $64, %dl +; i686-NEXT: jb .LBB1_5 +; i686-NEXT: # %bb.4: # %entry +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: .LBB1_5: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: negb %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shldl %cl, %ebp, %edi +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: shll %cl, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl %esi, %ecx +; i686-NEXT: jne .LBB1_7 +; i686-NEXT: # %bb.6: # %entry +; i686-NEXT: movl %edi, %ecx +; i686-NEXT: .LBB1_7: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movb %al, %ah +; i686-NEXT: addb $-64, %ah +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movb %ah, %cl +; i686-NEXT: sarl %cl, %edi +; i686-NEXT: testb $32, %ah +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: jne .LBB1_9 +; i686-NEXT: # %bb.8: # %entry +; i686-NEXT: movl %edi, %ecx +; i686-NEXT: .LBB1_9: # %entry +; i686-NEXT: cmpb $64, %al +; i686-NEXT: jb .LBB1_10 +; i686-NEXT: # %bb.11: # %entry +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB1_12 +; i686-NEXT: .LBB1_10: +; i686-NEXT: movl (%esp), %ecx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: .LBB1_12: # %entry +; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB1_14 +; i686-NEXT: # %bb.13: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB1_14: # %entry +; i686-NEXT: movl %ebx, %edx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shrdl %cl, %esi, %edx +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB1_16 +; i686-NEXT: # %bb.15: # %entry +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB1_16: # %entry +; i686-NEXT: movb %ah, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: shrdl %cl, %edx, %ebp +; i686-NEXT: testb $32, %ah +; i686-NEXT: jne .LBB1_18 +; i686-NEXT: # %bb.17: # %entry +; i686-NEXT: movl %ebp, %edi +; i686-NEXT: .LBB1_18: # %entry +; i686-NEXT: cmpb $64, %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: jae .LBB1_20 +; i686-NEXT: # %bb.19: +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: movl %ecx, %edi +; i686-NEXT: .LBB1_20: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: testb %al, %al +; i686-NEXT: je .LBB1_22 +; i686-NEXT: # %bb.21: # %entry +; i686-NEXT: movl %edi, %ebx +; i686-NEXT: movl (%esp), %esi # 4-byte Reload +; i686-NEXT: .LBB1_22: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 12(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 8(%ecx) +; i686-NEXT: movl %esi, 4(%ecx) +; i686-NEXT: movl %ebx, (%ecx) +; i686-NEXT: addl $24, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl ; -; X64-LABEL: test_ashr_i128: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: shrdq %cl, %rsi, %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: sarq %cl, %rax -; X64-NEXT: sarq $63, %rsi -; X64-NEXT: testb $64, %cl -; X64-NEXT: cmovneq %rax, %rdi -; X64-NEXT: cmoveq %rax, %rsi -; X64-NEXT: movq %rsi, 8(%r8) -; X64-NEXT: movq %rdi, (%r8) -; X64-NEXT: retq +; x86_64-LABEL: test_ashr_i128: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: movq %rdx, %rcx +; x86_64-NEXT: shrdq %cl, %rsi, %rdi +; x86_64-NEXT: movq %rsi, %rax +; x86_64-NEXT: sarq %cl, %rax +; x86_64-NEXT: sarq $63, %rsi +; x86_64-NEXT: testb $64, %cl +; x86_64-NEXT: cmovneq %rax, %rdi +; x86_64-NEXT: cmoveq %rax, %rsi +; x86_64-NEXT: movq %rsi, 8(%r8) +; x86_64-NEXT: movq %rdi, (%r8) +; x86_64-NEXT: retq entry: %0 = ashr i128 %x, %a store i128 %0, i128* %r, align 16 @@ -286,137 +286,137 @@ } define void @test_shl_i128(i128 %x, i128 %a, i128* nocapture %r) nounwind { -; X86-LABEL: test_shl_i128: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: subl $20, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll %cl, %ebx -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: shll %cl, %esi -; X86-NEXT: movl %edi, %edx -; X86-NEXT: shldl %cl, %ebp, %edx -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: testb $32, %al -; X86-NEXT: jne .LBB2_1 -; X86-NEXT: # %bb.2: # %entry -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X86-NEXT: jmp .LBB2_3 -; X86-NEXT: .LBB2_1: -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: .LBB2_3: # %entry -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, %edx -; X86-NEXT: subb $64, %dl -; X86-NEXT: jb .LBB2_5 -; X86-NEXT: # %bb.4: # %entry -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: .LBB2_5: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: negb %dl -; X86-NEXT: movl %edi, %esi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: shrdl %cl, %edi, %ebx -; X86-NEXT: testb $32, %dl -; X86-NEXT: movl %esi, %ebp -; X86-NEXT: jne .LBB2_7 -; X86-NEXT: # %bb.6: # %entry -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: .LBB2_7: # %entry -; X86-NEXT: movb %al, %ah -; X86-NEXT: addb $-64, %ah -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movb %ah, %cl -; X86-NEXT: shll %cl, %ebx -; X86-NEXT: testb $32, %ah -; X86-NEXT: movl $0, %ecx -; X86-NEXT: jne .LBB2_9 -; X86-NEXT: # %bb.8: # %entry -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: .LBB2_9: # %entry -; X86-NEXT: cmpb $64, %al -; X86-NEXT: jb .LBB2_10 -; X86-NEXT: # %bb.11: # %entry -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: jmp .LBB2_12 -; X86-NEXT: .LBB2_10: -; X86-NEXT: movl (%esp), %ecx # 4-byte Reload -; X86-NEXT: orl %ebp, %ecx -; X86-NEXT: .LBB2_12: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: testb $32, %dl -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: jne .LBB2_14 -; X86-NEXT: # %bb.13: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB2_14: # %entry -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shldl %cl, %ebp, %esi -; X86-NEXT: testb $32, %al -; X86-NEXT: jne .LBB2_16 -; X86-NEXT: # %bb.15: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB2_16: # %entry -; X86-NEXT: movb %ah, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl %cl, %esi, %edi -; X86-NEXT: testb $32, %ah -; X86-NEXT: jne .LBB2_18 -; X86-NEXT: # %bb.17: # %entry -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: .LBB2_18: # %entry -; X86-NEXT: cmpb $64, %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: jae .LBB2_20 -; X86-NEXT: # %bb.19: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: .LBB2_20: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: testb %al, %al -; X86-NEXT: je .LBB2_22 -; X86-NEXT: # %bb.21: # %entry -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: movl (%esp), %ebp # 4-byte Reload -; X86-NEXT: .LBB2_22: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 4(%ecx) -; X86-NEXT: movl %esi, (%ecx) -; X86-NEXT: movl %edx, 12(%ecx) -; X86-NEXT: movl %ebp, 8(%ecx) -; X86-NEXT: addl $20, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; i686-LABEL: test_shl_i128: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $20, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movb {{[0-9]+}}(%esp), %al +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shll %cl, %ebx +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: shll %cl, %esi +; i686-NEXT: movl %edi, %edx +; i686-NEXT: shldl %cl, %ebp, %edx +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB2_1 +; i686-NEXT: # %bb.2: # %entry +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, (%esp) # 4-byte Spill +; i686-NEXT: jmp .LBB2_3 +; i686-NEXT: .LBB2_1: +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill +; i686-NEXT: xorl %esi, %esi +; i686-NEXT: .LBB2_3: # %entry +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, %edx +; i686-NEXT: subb $64, %dl +; i686-NEXT: jb .LBB2_5 +; i686-NEXT: # %bb.4: # %entry +; i686-NEXT: xorl %esi, %esi +; i686-NEXT: .LBB2_5: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: negb %dl +; i686-NEXT: movl %edi, %esi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: shrdl %cl, %edi, %ebx +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl %esi, %ebp +; i686-NEXT: jne .LBB2_7 +; i686-NEXT: # %bb.6: # %entry +; i686-NEXT: movl %ebx, %ebp +; i686-NEXT: .LBB2_7: # %entry +; i686-NEXT: movb %al, %ah +; i686-NEXT: addb $-64, %ah +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movb %ah, %cl +; i686-NEXT: shll %cl, %ebx +; i686-NEXT: testb $32, %ah +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB2_9 +; i686-NEXT: # %bb.8: # %entry +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: .LBB2_9: # %entry +; i686-NEXT: cmpb $64, %al +; i686-NEXT: jb .LBB2_10 +; i686-NEXT: # %bb.11: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: jmp .LBB2_12 +; i686-NEXT: .LBB2_10: +; i686-NEXT: movl (%esp), %ecx # 4-byte Reload +; i686-NEXT: orl %ebp, %ecx +; i686-NEXT: .LBB2_12: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: jne .LBB2_14 +; i686-NEXT: # %bb.13: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB2_14: # %entry +; i686-NEXT: movl %edx, %esi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shldl %cl, %ebp, %esi +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB2_16 +; i686-NEXT: # %bb.15: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB2_16: # %entry +; i686-NEXT: movb %ah, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shldl %cl, %esi, %edi +; i686-NEXT: testb $32, %ah +; i686-NEXT: jne .LBB2_18 +; i686-NEXT: # %bb.17: # %entry +; i686-NEXT: movl %edi, %ebx +; i686-NEXT: .LBB2_18: # %entry +; i686-NEXT: cmpb $64, %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: jae .LBB2_20 +; i686-NEXT: # %bb.19: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; i686-NEXT: .LBB2_20: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: testb %al, %al +; i686-NEXT: je .LBB2_22 +; i686-NEXT: # %bb.21: # %entry +; i686-NEXT: movl %ebx, %edx +; i686-NEXT: movl (%esp), %ebp # 4-byte Reload +; i686-NEXT: .LBB2_22: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 4(%ecx) +; i686-NEXT: movl %esi, (%ecx) +; i686-NEXT: movl %edx, 12(%ecx) +; i686-NEXT: movl %ebp, 8(%ecx) +; i686-NEXT: addl $20, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl ; -; X64-LABEL: test_shl_i128: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: shldq %cl, %rdi, %rsi -; X64-NEXT: shlq %cl, %rdi -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: testb $64, %cl -; X64-NEXT: cmovneq %rdi, %rsi -; X64-NEXT: cmoveq %rdi, %rax -; X64-NEXT: movq %rsi, 8(%r8) -; X64-NEXT: movq %rax, (%r8) -; X64-NEXT: retq +; x86_64-LABEL: test_shl_i128: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: movq %rdx, %rcx +; x86_64-NEXT: shldq %cl, %rdi, %rsi +; x86_64-NEXT: shlq %cl, %rdi +; x86_64-NEXT: xorl %eax, %eax +; x86_64-NEXT: testb $64, %cl +; x86_64-NEXT: cmovneq %rdi, %rsi +; x86_64-NEXT: cmoveq %rdi, %rax +; x86_64-NEXT: movq %rsi, 8(%r8) +; x86_64-NEXT: movq %rax, (%r8) +; x86_64-NEXT: retq entry: %0 = shl i128 %x, %a store i128 %0, i128* %r, align 16 @@ -424,9 +424,9 @@ } define void @test_lshr_i128_outofrange(i128 %x, i128* nocapture %r) nounwind { -; CHECK-LABEL: test_lshr_i128_outofrange: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ret{{[l|q]}} +; ALL-LABEL: test_lshr_i128_outofrange: +; ALL: # %bb.0: # %entry +; ALL-NEXT: ret{{[l|q]}} entry: %0 = lshr i128 %x, -1 store i128 %0, i128* %r, align 16 @@ -434,9 +434,9 @@ } define void @test_ashr_i128_outofrange(i128 %x, i128* nocapture %r) nounwind { -; CHECK-LABEL: test_ashr_i128_outofrange: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ret{{[l|q]}} +; ALL-LABEL: test_ashr_i128_outofrange: +; ALL: # %bb.0: # %entry +; ALL-NEXT: ret{{[l|q]}} entry: %0 = ashr i128 %x, -1 store i128 %0, i128* %r, align 16 @@ -444,9 +444,9 @@ } define void @test_shl_i128_outofrange(i128 %x, i128* nocapture %r) nounwind { -; CHECK-LABEL: test_shl_i128_outofrange: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ret{{[l|q]}} +; ALL-LABEL: test_shl_i128_outofrange: +; ALL: # %bb.0: # %entry +; ALL-NEXT: ret{{[l|q]}} entry: %0 = shl i128 %x, -1 store i128 %0, i128* %r, align 16 @@ -458,290 +458,290 @@ ; define void @test_lshr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture %r) nounwind { -; X86-LABEL: test_lshr_v2i128: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: subl $68, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shrl %cl, %edi -; X86-NEXT: movl %esi, %ebp -; X86-NEXT: shrl %cl, %ebp -; X86-NEXT: shrdl %cl, %esi, %edx -; X86-NEXT: testb $32, %al -; X86-NEXT: jne .LBB6_1 -; X86-NEXT: # %bb.2: # %entry -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: jmp .LBB6_3 -; X86-NEXT: .LBB6_1: -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: .LBB6_3: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shrdl %cl, %ebx, %esi -; X86-NEXT: testb $32, %al -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: jne .LBB6_5 -; X86-NEXT: # %bb.4: # %entry -; X86-NEXT: movl %esi, %edi -; X86-NEXT: .LBB6_5: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: shrl %cl, %ebx -; X86-NEXT: shrl %cl, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: subl $64, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X86-NEXT: testb $32, %dl -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl $0, %ecx -; X86-NEXT: jne .LBB6_7 -; X86-NEXT: # %bb.6: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: .LBB6_7: # %entry -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: shrdl %cl, %ebp, %esi -; X86-NEXT: testb $32, %dl -; X86-NEXT: jne .LBB6_9 -; X86-NEXT: # %bb.8: # %entry -; X86-NEXT: movl %esi, %ebx -; X86-NEXT: .LBB6_9: # %entry -; X86-NEXT: movl %edi, %esi -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shrl %cl, %ebp -; X86-NEXT: testb $32, %cl -; X86-NEXT: movl $0, %ecx -; X86-NEXT: jne .LBB6_11 -; X86-NEXT: # %bb.10: # %entry -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: .LBB6_11: # %entry -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movb $64, %cl -; X86-NEXT: subb %dl, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: shldl %cl, %ebx, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: shll %cl, %edi -; X86-NEXT: testb $32, %cl -; X86-NEXT: movb $64, %bl -; X86-NEXT: jne .LBB6_12 -; X86-NEXT: # %bb.13: # %entry -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: jmp .LBB6_14 -; X86-NEXT: .LBB6_12: -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: .LBB6_14: # %entry -; X86-NEXT: movl %esi, %edi -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: shrdl %cl, %ebp, %esi -; X86-NEXT: testb $32, %dl -; X86-NEXT: jne .LBB6_16 -; X86-NEXT: # %bb.15: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB6_16: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subb %al, %bl -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: shll %cl, %ebp -; X86-NEXT: testb $32, %bl -; X86-NEXT: movl $0, %ecx -; X86-NEXT: jne .LBB6_18 -; X86-NEXT: # %bb.17: # %entry -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: .LBB6_18: # %entry -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: subl $64, %ecx -; X86-NEXT: sbbl $0, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl $0, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl $0, %esi -; X86-NEXT: setae %bh -; X86-NEXT: jb .LBB6_20 -; X86-NEXT: # %bb.19: # %entry -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: .LBB6_20: # %entry -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shrdl %cl, %esi, %edi -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: testb $32, %cl -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: jne .LBB6_22 -; X86-NEXT: # %bb.21: # %entry -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB6_22: # %entry -; X86-NEXT: testb %bh, %bh -; X86-NEXT: jne .LBB6_24 -; X86-NEXT: # %bb.23: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB6_24: # %entry -; X86-NEXT: testb $32, %cl -; X86-NEXT: movl $0, %ecx -; X86-NEXT: jne .LBB6_26 -; X86-NEXT: # %bb.25: # %entry -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: .LBB6_26: # %entry -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: shldl %cl, %edi, %esi -; X86-NEXT: testb $32, %bl -; X86-NEXT: jne .LBB6_28 -; X86-NEXT: # %bb.27: # %entry -; X86-NEXT: movl %esi, %ebp -; X86-NEXT: .LBB6_28: # %entry -; X86-NEXT: testb %bh, %bh -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: jne .LBB6_30 -; X86-NEXT: # %bb.29: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl %ebp, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB6_30: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: jne .LBB6_32 -; X86-NEXT: # %bb.31: # %entry -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: .LBB6_32: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: shrdl %cl, %ebp, %edi -; X86-NEXT: movl %edi, %ebp -; X86-NEXT: testb $32, %cl -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: je .LBB6_33 -; X86-NEXT: # %bb.34: # %entry -; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: jne .LBB6_35 -; X86-NEXT: .LBB6_36: # %entry -; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: je .LBB6_38 -; X86-NEXT: .LBB6_37: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB6_38: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: je .LBB6_40 -; X86-NEXT: # %bb.39: # %entry -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: .LBB6_40: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: je .LBB6_42 -; X86-NEXT: # %bb.41: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: .LBB6_42: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, 28(%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, 24(%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, 12(%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, 8(%ecx) -; X86-NEXT: movl %esi, 20(%ecx) -; X86-NEXT: movl %eax, 16(%ecx) -; X86-NEXT: movl %ebx, 4(%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, (%ecx) -; X86-NEXT: addl $68, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; X86-NEXT: .LBB6_33: # %entry -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: je .LBB6_36 -; X86-NEXT: .LBB6_35: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: jne .LBB6_37 -; X86-NEXT: jmp .LBB6_38 +; i686-LABEL: test_lshr_v2i128: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $68, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrl %cl, %edi +; i686-NEXT: movl %esi, %ebp +; i686-NEXT: shrl %cl, %ebp +; i686-NEXT: shrdl %cl, %esi, %edx +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB6_1 +; i686-NEXT: # %bb.2: # %entry +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB6_3 +; i686-NEXT: .LBB6_1: +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB6_3: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrdl %cl, %ebx, %esi +; i686-NEXT: testb $32, %al +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: jne .LBB6_5 +; i686-NEXT: # %bb.4: # %entry +; i686-NEXT: movl %esi, %edi +; i686-NEXT: .LBB6_5: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shrl %cl, %ebx +; i686-NEXT: shrl %cl, %ebp +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: subl $64, %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB6_7 +; i686-NEXT: # %bb.6: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: .LBB6_7: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: shrdl %cl, %ebp, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB6_9 +; i686-NEXT: # %bb.8: # %entry +; i686-NEXT: movl %esi, %ebx +; i686-NEXT: .LBB6_9: # %entry +; i686-NEXT: movl %edi, %esi +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: shrl %cl, %ebp +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB6_11 +; i686-NEXT: # %bb.10: # %entry +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB6_11: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movb $64, %cl +; i686-NEXT: subb %dl, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: shldl %cl, %ebx, %edi +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: shll %cl, %edi +; i686-NEXT: testb $32, %cl +; i686-NEXT: movb $64, %bl +; i686-NEXT: jne .LBB6_12 +; i686-NEXT: # %bb.13: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB6_14 +; i686-NEXT: .LBB6_12: +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB6_14: # %entry +; i686-NEXT: movl %esi, %edi +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: shrdl %cl, %ebp, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB6_16 +; i686-NEXT: # %bb.15: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB6_16: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: subb %al, %bl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: shll %cl, %ebp +; i686-NEXT: testb $32, %bl +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB6_18 +; i686-NEXT: # %bb.17: # %entry +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB6_18: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: subl $64, %ecx +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: setae %bh +; i686-NEXT: jb .LBB6_20 +; i686-NEXT: # %bb.19: # %entry +; i686-NEXT: xorl %edi, %edi +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB6_20: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shrdl %cl, %esi, %edi +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jne .LBB6_22 +; i686-NEXT: # %bb.21: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB6_22: # %entry +; i686-NEXT: testb %bh, %bh +; i686-NEXT: jne .LBB6_24 +; i686-NEXT: # %bb.23: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB6_24: # %entry +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB6_26 +; i686-NEXT: # %bb.25: # %entry +; i686-NEXT: movl %esi, %ecx +; i686-NEXT: .LBB6_26: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: shldl %cl, %edi, %esi +; i686-NEXT: testb $32, %bl +; i686-NEXT: jne .LBB6_28 +; i686-NEXT: # %bb.27: # %entry +; i686-NEXT: movl %esi, %ebp +; i686-NEXT: .LBB6_28: # %entry +; i686-NEXT: testb %bh, %bh +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: jne .LBB6_30 +; i686-NEXT: # %bb.29: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: orl %ebp, %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB6_30: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB6_32 +; i686-NEXT: # %bb.31: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB6_32: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: shrdl %cl, %ebp, %edi +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: je .LBB6_33 +; i686-NEXT: # %bb.34: # %entry +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB6_35 +; i686-NEXT: .LBB6_36: # %entry +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: je .LBB6_38 +; i686-NEXT: .LBB6_37: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB6_38: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: orl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: orl {{[0-9]+}}(%esp), %edx +; i686-NEXT: orl %ecx, %edx +; i686-NEXT: je .LBB6_40 +; i686-NEXT: # %bb.39: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: .LBB6_40: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: orl {{[0-9]+}}(%esp), %edx +; i686-NEXT: orl {{[0-9]+}}(%esp), %eax +; i686-NEXT: orl %edx, %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: je .LBB6_42 +; i686-NEXT: # %bb.41: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: .LBB6_42: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 28(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 24(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 12(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 8(%ecx) +; i686-NEXT: movl %esi, 20(%ecx) +; i686-NEXT: movl %eax, 16(%ecx) +; i686-NEXT: movl %ebx, 4(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, (%ecx) +; i686-NEXT: addl $68, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl +; i686-NEXT: .LBB6_33: # %entry +; i686-NEXT: movl %ebp, %edi +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: je .LBB6_36 +; i686-NEXT: .LBB6_35: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: movl %ecx, %edi +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB6_37 +; i686-NEXT: jmp .LBB6_38 ; -; X64-LABEL: test_lshr_v2i128: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; X64-NEXT: movb {{[0-9]+}}(%rsp), %r9b -; X64-NEXT: movl %r9d, %ecx -; X64-NEXT: shrdq %cl, %rax, %rdx -; X64-NEXT: movl %r8d, %ecx -; X64-NEXT: shrdq %cl, %rsi, %rdi -; X64-NEXT: shrq %cl, %rsi -; X64-NEXT: xorl %r11d, %r11d -; X64-NEXT: testb $64, %r8b -; X64-NEXT: cmovneq %rsi, %rdi -; X64-NEXT: cmovneq %r11, %rsi -; X64-NEXT: movl %r9d, %ecx -; X64-NEXT: shrq %cl, %rax -; X64-NEXT: testb $64, %r9b -; X64-NEXT: cmovneq %rax, %rdx -; X64-NEXT: cmovneq %r11, %rax -; X64-NEXT: movq %rax, 24(%r10) -; X64-NEXT: movq %rdx, 16(%r10) -; X64-NEXT: movq %rsi, 8(%r10) -; X64-NEXT: movq %rdi, (%r10) -; X64-NEXT: retq +; x86_64-LABEL: test_lshr_v2i128: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: movq %rcx, %rax +; x86_64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; x86_64-NEXT: movb {{[0-9]+}}(%rsp), %r9b +; x86_64-NEXT: movl %r9d, %ecx +; x86_64-NEXT: shrdq %cl, %rax, %rdx +; x86_64-NEXT: movl %r8d, %ecx +; x86_64-NEXT: shrdq %cl, %rsi, %rdi +; x86_64-NEXT: shrq %cl, %rsi +; x86_64-NEXT: xorl %r11d, %r11d +; x86_64-NEXT: testb $64, %r8b +; x86_64-NEXT: cmovneq %rsi, %rdi +; x86_64-NEXT: cmovneq %r11, %rsi +; x86_64-NEXT: movl %r9d, %ecx +; x86_64-NEXT: shrq %cl, %rax +; x86_64-NEXT: testb $64, %r9b +; x86_64-NEXT: cmovneq %rax, %rdx +; x86_64-NEXT: cmovneq %r11, %rax +; x86_64-NEXT: movq %rax, 24(%r10) +; x86_64-NEXT: movq %rdx, 16(%r10) +; x86_64-NEXT: movq %rsi, 8(%r10) +; x86_64-NEXT: movq %rdi, (%r10) +; x86_64-NEXT: retq entry: %0 = lshr <2 x i128> %x, %a store <2 x i128> %0, <2 x i128>* %r, align 16 @@ -749,296 +749,296 @@ } define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture %r) nounwind { -; X86-LABEL: test_ashr_v2i128: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: subl $80, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebp, %ebx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: sarl %cl, %ebx -; X86-NEXT: movl %esi, %edi -; X86-NEXT: shrl %cl, %edi -; X86-NEXT: shrdl %cl, %esi, %edx -; X86-NEXT: sarl $31, %ebp -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: testb $32, %al -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: jne .LBB7_1 -; X86-NEXT: # %bb.2: # %entry -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: jmp .LBB7_3 -; X86-NEXT: .LBB7_1: -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB7_3: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: shrdl %cl, %edx, %edi -; X86-NEXT: testb $32, %al -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: jne .LBB7_5 -; X86-NEXT: # %bb.4: # %entry -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: .LBB7_5: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: sarl %cl, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sarl $31, %ebp -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: subl $64, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X86-NEXT: testb $32, %dl -; X86-NEXT: movl $0, %esi -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: jne .LBB7_7 -; X86-NEXT: # %bb.6: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: .LBB7_7: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: shrdl %cl, %ebp, %esi -; X86-NEXT: testb $32, %dl -; X86-NEXT: jne .LBB7_9 -; X86-NEXT: # %bb.8: # %entry -; X86-NEXT: movl %esi, %edi -; X86-NEXT: .LBB7_9: # %entry -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sarl %cl, %esi -; X86-NEXT: testb $32, %cl -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: jne .LBB7_11 -; X86-NEXT: # %bb.10: # %entry -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: .LBB7_11: # %entry -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movb $64, %cl -; X86-NEXT: subb %dl, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: shldl %cl, %ebx, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: shll %cl, %ebp -; X86-NEXT: testb $32, %cl -; X86-NEXT: movb $64, %bl -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: je .LBB7_13 -; X86-NEXT: # %bb.12: -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: .LBB7_13: # %entry -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: shrdl %cl, %edi, %esi -; X86-NEXT: testb $32, %dl -; X86-NEXT: jne .LBB7_15 -; X86-NEXT: # %bb.14: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB7_15: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: subb %al, %bl -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: shll %cl, %ebp -; X86-NEXT: testb $32, %bl -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: jne .LBB7_17 -; X86-NEXT: # %bb.16: # %entry -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB7_17: # %entry -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: subl $64, %ecx -; X86-NEXT: sbbl $0, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl $0, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl $0, %esi -; X86-NEXT: setae %bh -; X86-NEXT: jb .LBB7_19 -; X86-NEXT: # %bb.18: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB7_19: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shrdl %cl, %edi, %esi -; X86-NEXT: sarl %cl, %edi -; X86-NEXT: testb $32, %cl -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: je .LBB7_20 -; X86-NEXT: # %bb.21: # %entry -; X86-NEXT: testb %bh, %bh -; X86-NEXT: je .LBB7_22 -; X86-NEXT: .LBB7_23: # %entry -; X86-NEXT: testb $32, %cl -; X86-NEXT: jne .LBB7_25 -; X86-NEXT: .LBB7_24: # %entry -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB7_25: # %entry -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl %cl, %esi, %edi -; X86-NEXT: testb $32, %bl -; X86-NEXT: jne .LBB7_27 -; X86-NEXT: # %bb.26: # %entry -; X86-NEXT: movl %edi, %ebp -; X86-NEXT: .LBB7_27: # %entry -; X86-NEXT: testb %bh, %bh -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: jne .LBB7_29 -; X86-NEXT: # %bb.28: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: orl %ebp, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB7_29: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: jne .LBB7_31 -; X86-NEXT: # %bb.30: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB7_31: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: shrdl %cl, %ebp, %ebx -; X86-NEXT: testb $32, %cl -; X86-NEXT: jne .LBB7_33 -; X86-NEXT: # %bb.32: # %entry -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: .LBB7_33: # %entry -; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: je .LBB7_35 -; X86-NEXT: # %bb.34: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl %ebx, %ecx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: .LBB7_35: # %entry -; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: je .LBB7_37 -; X86-NEXT: # %bb.36: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB7_37: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: je .LBB7_39 -; X86-NEXT: # %bb.38: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB7_39: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %edx, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: je .LBB7_41 -; X86-NEXT: # %bb.40: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: .LBB7_41: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, 28(%ecx) -; X86-NEXT: movl %edi, 24(%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, 12(%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, 8(%ecx) -; X86-NEXT: movl %esi, 20(%ecx) -; X86-NEXT: movl %eax, 16(%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, 4(%ecx) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, (%ecx) -; X86-NEXT: addl $80, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; X86-NEXT: .LBB7_20: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testb %bh, %bh -; X86-NEXT: jne .LBB7_23 -; X86-NEXT: .LBB7_22: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB7_24 -; X86-NEXT: jmp .LBB7_25 +; i686-LABEL: test_ashr_v2i128: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $80, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl %ebp, %ebx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: sarl %cl, %ebx +; i686-NEXT: movl %esi, %edi +; i686-NEXT: shrl %cl, %edi +; i686-NEXT: shrdl %cl, %esi, %edx +; i686-NEXT: sarl $31, %ebp +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, %al +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jne .LBB7_1 +; i686-NEXT: # %bb.2: # %entry +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB7_3 +; i686-NEXT: .LBB7_1: +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_3: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: shrdl %cl, %edx, %edi +; i686-NEXT: testb $32, %al +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: jne .LBB7_5 +; i686-NEXT: # %bb.4: # %entry +; i686-NEXT: movl %edi, %ebx +; i686-NEXT: .LBB7_5: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %ebp, %edi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: sarl %cl, %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: sarl $31, %ebp +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: subl $64, %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl $0, %esi +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: jne .LBB7_7 +; i686-NEXT: # %bb.6: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: movl %edi, %ecx +; i686-NEXT: .LBB7_7: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: shrdl %cl, %ebp, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB7_9 +; i686-NEXT: # %bb.8: # %entry +; i686-NEXT: movl %esi, %edi +; i686-NEXT: .LBB7_9: # %entry +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: sarl %cl, %esi +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: jne .LBB7_11 +; i686-NEXT: # %bb.10: # %entry +; i686-NEXT: movl %esi, %ecx +; i686-NEXT: .LBB7_11: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movb $64, %cl +; i686-NEXT: subb %dl, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: shldl %cl, %ebx, %ebp +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %ebp +; i686-NEXT: shll %cl, %ebp +; i686-NEXT: testb $32, %cl +; i686-NEXT: movb $64, %bl +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: je .LBB7_13 +; i686-NEXT: # %bb.12: +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: xorl %ebp, %ebp +; i686-NEXT: .LBB7_13: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: shrdl %cl, %edi, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB7_15 +; i686-NEXT: # %bb.14: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_15: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: subb %al, %bl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: shll %cl, %ebp +; i686-NEXT: testb $32, %bl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: jne .LBB7_17 +; i686-NEXT: # %bb.16: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_17: # %entry +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: subl $64, %ecx +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: setae %bh +; i686-NEXT: jb .LBB7_19 +; i686-NEXT: # %bb.18: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_19: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shrdl %cl, %edi, %esi +; i686-NEXT: sarl %cl, %edi +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: je .LBB7_20 +; i686-NEXT: # %bb.21: # %entry +; i686-NEXT: testb %bh, %bh +; i686-NEXT: je .LBB7_22 +; i686-NEXT: .LBB7_23: # %entry +; i686-NEXT: testb $32, %cl +; i686-NEXT: jne .LBB7_25 +; i686-NEXT: .LBB7_24: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_25: # %entry +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shldl %cl, %esi, %edi +; i686-NEXT: testb $32, %bl +; i686-NEXT: jne .LBB7_27 +; i686-NEXT: # %bb.26: # %entry +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: .LBB7_27: # %entry +; i686-NEXT: testb %bh, %bh +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: jne .LBB7_29 +; i686-NEXT: # %bb.28: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: orl %ebp, %ebx +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_29: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB7_31 +; i686-NEXT: # %bb.30: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_31: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: shrdl %cl, %ebp, %ebx +; i686-NEXT: testb $32, %cl +; i686-NEXT: jne .LBB7_33 +; i686-NEXT: # %bb.32: # %entry +; i686-NEXT: movl %ebx, %esi +; i686-NEXT: .LBB7_33: # %entry +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: je .LBB7_35 +; i686-NEXT: # %bb.34: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: orl %ebx, %ecx +; i686-NEXT: movl %ecx, %esi +; i686-NEXT: .LBB7_35: # %entry +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: je .LBB7_37 +; i686-NEXT: # %bb.36: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_37: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: orl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: orl {{[0-9]+}}(%esp), %edx +; i686-NEXT: orl %ecx, %edx +; i686-NEXT: je .LBB7_39 +; i686-NEXT: # %bb.38: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_39: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: orl {{[0-9]+}}(%esp), %edx +; i686-NEXT: orl {{[0-9]+}}(%esp), %eax +; i686-NEXT: orl %edx, %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: je .LBB7_41 +; i686-NEXT: # %bb.40: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: .LBB7_41: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 28(%ecx) +; i686-NEXT: movl %edi, 24(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 12(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 8(%ecx) +; i686-NEXT: movl %esi, 20(%ecx) +; i686-NEXT: movl %eax, 16(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 4(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, (%ecx) +; i686-NEXT: addl $80, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl +; i686-NEXT: .LBB7_20: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb %bh, %bh +; i686-NEXT: jne .LBB7_23 +; i686-NEXT: .LBB7_22: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb $32, %cl +; i686-NEXT: je .LBB7_24 +; i686-NEXT: jmp .LBB7_25 ; -; X64-LABEL: test_ashr_v2i128: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rcx, %r11 -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; X64-NEXT: movb {{[0-9]+}}(%rsp), %r9b -; X64-NEXT: movl %r9d, %ecx -; X64-NEXT: shrdq %cl, %r11, %rdx -; X64-NEXT: movl %r8d, %ecx -; X64-NEXT: shrdq %cl, %rsi, %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: sarq %cl, %rax -; X64-NEXT: sarq $63, %rsi -; X64-NEXT: testb $64, %r8b -; X64-NEXT: cmovneq %rax, %rdi -; X64-NEXT: cmoveq %rax, %rsi -; X64-NEXT: movq %r11, %rax -; X64-NEXT: movl %r9d, %ecx -; X64-NEXT: sarq %cl, %rax -; X64-NEXT: sarq $63, %r11 -; X64-NEXT: testb $64, %r9b -; X64-NEXT: cmovneq %rax, %rdx -; X64-NEXT: cmoveq %rax, %r11 -; X64-NEXT: movq %r11, 24(%r10) -; X64-NEXT: movq %rdx, 16(%r10) -; X64-NEXT: movq %rsi, 8(%r10) -; X64-NEXT: movq %rdi, (%r10) -; X64-NEXT: retq +; x86_64-LABEL: test_ashr_v2i128: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: movq %rcx, %r11 +; x86_64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; x86_64-NEXT: movb {{[0-9]+}}(%rsp), %r9b +; x86_64-NEXT: movl %r9d, %ecx +; x86_64-NEXT: shrdq %cl, %r11, %rdx +; x86_64-NEXT: movl %r8d, %ecx +; x86_64-NEXT: shrdq %cl, %rsi, %rdi +; x86_64-NEXT: movq %rsi, %rax +; x86_64-NEXT: sarq %cl, %rax +; x86_64-NEXT: sarq $63, %rsi +; x86_64-NEXT: testb $64, %r8b +; x86_64-NEXT: cmovneq %rax, %rdi +; x86_64-NEXT: cmoveq %rax, %rsi +; x86_64-NEXT: movq %r11, %rax +; x86_64-NEXT: movl %r9d, %ecx +; x86_64-NEXT: sarq %cl, %rax +; x86_64-NEXT: sarq $63, %r11 +; x86_64-NEXT: testb $64, %r9b +; x86_64-NEXT: cmovneq %rax, %rdx +; x86_64-NEXT: cmoveq %rax, %r11 +; x86_64-NEXT: movq %r11, 24(%r10) +; x86_64-NEXT: movq %rdx, 16(%r10) +; x86_64-NEXT: movq %rsi, 8(%r10) +; x86_64-NEXT: movq %rdi, (%r10) +; x86_64-NEXT: retq entry: %0 = ashr <2 x i128> %x, %a store <2 x i128> %0, <2 x i128>* %r, align 16 @@ -1046,307 +1046,307 @@ } define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture %r) nounwind { -; X86-LABEL: test_shl_v2i128: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: subl $72, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: shll %cl, %ebp -; X86-NEXT: shll %cl, %esi -; X86-NEXT: movl %edx, %eax -; X86-NEXT: subl $64, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: testb $32, %bl -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl $0, %eax -; X86-NEXT: movl $0, %ecx -; X86-NEXT: jne .LBB8_2 -; X86-NEXT: # %bb.1: # %entry -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: .LBB8_2: # %entry -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: shldl %cl, %edi, %eax -; X86-NEXT: testb $32, %bl -; X86-NEXT: jne .LBB8_4 -; X86-NEXT: # %bb.3: # %entry -; X86-NEXT: movl %eax, %esi -; X86-NEXT: .LBB8_4: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movb $64, %cl -; X86-NEXT: subb %bl, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %esi -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrdl %cl, %edi, %eax -; X86-NEXT: testb $32, %cl -; X86-NEXT: jne .LBB8_5 -; X86-NEXT: # %bb.6: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: jmp .LBB8_7 -; X86-NEXT: .LBB8_5: -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: .LBB8_7: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl %cl, %esi, %edi -; X86-NEXT: testb $32, %bl -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: jne .LBB8_9 -; X86-NEXT: # %bb.8: # %entry -; X86-NEXT: movl %edi, %ebp -; X86-NEXT: .LBB8_9: # %entry -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %ebp -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: shll %cl, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shll %cl, %esi -; X86-NEXT: testb $32, %dl -; X86-NEXT: movl $0, %edi -; X86-NEXT: movl $0, %ecx -; X86-NEXT: jne .LBB8_11 -; X86-NEXT: # %bb.10: # %entry -; X86-NEXT: movl %esi, %edi -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: .LBB8_11: # %entry -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: shldl %cl, %ebx, %edi -; X86-NEXT: testb $32, %dl -; X86-NEXT: jne .LBB8_13 -; X86-NEXT: # %bb.12: # %entry -; X86-NEXT: movl %edi, %ebp -; X86-NEXT: .LBB8_13: # %entry -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movb $64, %cl -; X86-NEXT: subb %dl, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: shrl %cl, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: testb $32, %cl -; X86-NEXT: movl $0, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: jne .LBB8_15 -; X86-NEXT: # %bb.14: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: .LBB8_15: # %entry -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: shldl %cl, %ebp, %edi -; X86-NEXT: testb $32, %dl -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: jne .LBB8_17 -; X86-NEXT: # %bb.16: # %entry -; X86-NEXT: movl %edi, %esi -; X86-NEXT: .LBB8_17: # %entry -; X86-NEXT: orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: subl $64, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X86-NEXT: jb .LBB8_19 -; X86-NEXT: # %bb.18: # %entry -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: .LBB8_19: # %entry -; X86-NEXT: jb .LBB8_21 -; X86-NEXT: # %bb.20: # %entry -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: .LBB8_21: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shll %cl, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: shldl %cl, %ebp, %edi -; X86-NEXT: testb $32, %cl -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: jne .LBB8_23 -; X86-NEXT: # %bb.22: # %entry -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: .LBB8_23: # %entry -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll %cl, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testb $32, %al -; X86-NEXT: movl $0, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: jne .LBB8_25 -; X86-NEXT: # %bb.24: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: .LBB8_25: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: jne .LBB8_27 -; X86-NEXT: # %bb.26: # %entry -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB8_27: # %entry -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl %cl, %edi, %esi -; X86-NEXT: testb $32, %al -; X86-NEXT: jne .LBB8_29 -; X86-NEXT: # %bb.28: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB8_29: # %entry -; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: jne .LBB8_30 -; X86-NEXT: # %bb.31: # %entry -; X86-NEXT: testb %al, %al -; X86-NEXT: je .LBB8_32 -; X86-NEXT: .LBB8_33: # %entry -; X86-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: jne .LBB8_35 -; X86-NEXT: .LBB8_34: # %entry -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB8_35: # %entry -; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: shrdl %cl, %ebx, %esi -; X86-NEXT: testb $32, %cl -; X86-NEXT: jne .LBB8_37 -; X86-NEXT: # %bb.36: # %entry -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB8_37: # %entry -; X86-NEXT: testb %al, %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: jne .LBB8_38 -; X86-NEXT: # %bb.39: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: testb %al, %al -; X86-NEXT: jne .LBB8_41 -; X86-NEXT: jmp .LBB8_42 -; X86-NEXT: .LBB8_30: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl %ebp, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testb %al, %al -; X86-NEXT: jne .LBB8_33 -; X86-NEXT: .LBB8_32: # %entry -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X86-NEXT: je .LBB8_34 -; X86-NEXT: jmp .LBB8_35 -; X86-NEXT: .LBB8_38: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: testb %al, %al -; X86-NEXT: je .LBB8_42 -; X86-NEXT: .LBB8_41: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB8_42: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: je .LBB8_44 -; X86-NEXT: # %bb.43: # %entry -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: .LBB8_44: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: orl %edx, %ebx -; X86-NEXT: je .LBB8_46 -; X86-NEXT: # %bb.45: # %entry -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: .LBB8_46: # %entry -; X86-NEXT: movl %esi, 20(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, 16(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, 4(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: movl %edi, 28(%eax) -; X86-NEXT: movl %ecx, 24(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, 12(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: addl $72, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; i686-LABEL: test_shl_v2i128: +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $72, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: shll %cl, %ebp +; i686-NEXT: shll %cl, %esi +; i686-NEXT: movl %edx, %eax +; i686-NEXT: subl $64, %eax +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: sbbl $0, %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: sbbl $0, %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: sbbl $0, %eax +; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, %bl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl $0, %eax +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB8_2 +; i686-NEXT: # %bb.1: # %entry +; i686-NEXT: movl %esi, %eax +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB8_2: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, %eax +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: shldl %cl, %edi, %eax +; i686-NEXT: testb $32, %bl +; i686-NEXT: jne .LBB8_4 +; i686-NEXT: # %bb.3: # %entry +; i686-NEXT: movl %eax, %esi +; i686-NEXT: .LBB8_4: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movb $64, %cl +; i686-NEXT: subb %bl, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl %edi, %esi +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: shrdl %cl, %edi, %eax +; i686-NEXT: testb $32, %cl +; i686-NEXT: jne .LBB8_5 +; i686-NEXT: # %bb.6: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB8_7 +; i686-NEXT: .LBB8_5: +; i686-NEXT: movl %esi, %eax +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB8_7: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shldl %cl, %esi, %edi +; i686-NEXT: testb $32, %bl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: jne .LBB8_9 +; i686-NEXT: # %bb.8: # %entry +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: .LBB8_9: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ecx, %ebp +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shll %cl, %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shll %cl, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl $0, %edi +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB8_11 +; i686-NEXT: # %bb.10: # %entry +; i686-NEXT: movl %esi, %edi +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB8_11: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: shldl %cl, %ebx, %edi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB8_13 +; i686-NEXT: # %bb.12: # %entry +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: .LBB8_13: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movb $64, %cl +; i686-NEXT: subb %dl, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: shrl %cl, %ebx +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl $0, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: jne .LBB8_15 +; i686-NEXT: # %bb.14: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: .LBB8_15: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: shldl %cl, %ebp, %edi +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: jne .LBB8_17 +; i686-NEXT: # %bb.16: # %entry +; i686-NEXT: movl %edi, %esi +; i686-NEXT: .LBB8_17: # %entry +; i686-NEXT: orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl %ebx, %eax +; i686-NEXT: subl $64, %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; i686-NEXT: jb .LBB8_19 +; i686-NEXT: # %bb.18: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB8_19: # %entry +; i686-NEXT: jb .LBB8_21 +; i686-NEXT: # %bb.20: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB8_21: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %ebp, %ebx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: shll %cl, %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: shldl %cl, %ebp, %edi +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: jne .LBB8_23 +; i686-NEXT: # %bb.22: # %entry +; i686-NEXT: movl %edi, %ecx +; i686-NEXT: .LBB8_23: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shll %cl, %edi +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb $32, %al +; i686-NEXT: movl $0, %edi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: jne .LBB8_25 +; i686-NEXT: # %bb.24: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: .LBB8_25: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB8_27 +; i686-NEXT: # %bb.26: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_27: # %entry +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shldl %cl, %edi, %esi +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB8_29 +; i686-NEXT: # %bb.28: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_29: # %entry +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: jne .LBB8_30 +; i686-NEXT: # %bb.31: # %entry +; i686-NEXT: testb %al, %al +; i686-NEXT: je .LBB8_32 +; i686-NEXT: .LBB8_33: # %entry +; i686-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB8_35 +; i686-NEXT: .LBB8_34: # %entry +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_35: # %entry +; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: shrdl %cl, %ebx, %esi +; i686-NEXT: testb $32, %cl +; i686-NEXT: jne .LBB8_37 +; i686-NEXT: # %bb.36: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_37: # %entry +; i686-NEXT: testb %al, %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: jne .LBB8_38 +; i686-NEXT: # %bb.39: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: testb %al, %al +; i686-NEXT: jne .LBB8_41 +; i686-NEXT: jmp .LBB8_42 +; i686-NEXT: .LBB8_30: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: orl %ebp, %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb %al, %al +; i686-NEXT: jne .LBB8_33 +; i686-NEXT: .LBB8_32: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: je .LBB8_34 +; i686-NEXT: jmp .LBB8_35 +; i686-NEXT: .LBB8_38: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: testb %al, %al +; i686-NEXT: je .LBB8_42 +; i686-NEXT: .LBB8_41: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_42: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: orl {{[0-9]+}}(%esp), %eax +; i686-NEXT: orl {{[0-9]+}}(%esp), %edx +; i686-NEXT: orl %eax, %edx +; i686-NEXT: je .LBB8_44 +; i686-NEXT: # %bb.43: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_44: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: orl {{[0-9]+}}(%esp), %edx +; i686-NEXT: orl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: orl %edx, %ebx +; i686-NEXT: je .LBB8_46 +; i686-NEXT: # %bb.45: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: .LBB8_46: # %entry +; i686-NEXT: movl %esi, 20(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 16(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 4(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, (%eax) +; i686-NEXT: movl %edi, 28(%eax) +; i686-NEXT: movl %ecx, 24(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 12(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 8(%eax) +; i686-NEXT: addl $72, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl ; -; X64-LABEL: test_shl_v2i128: -; X64: # %bb.0: # %entry -; X64-NEXT: movq %rcx, %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; X64-NEXT: movb {{[0-9]+}}(%rsp), %r9b -; X64-NEXT: movl %r9d, %ecx -; X64-NEXT: shldq %cl, %rdx, %rax -; X64-NEXT: movl %r8d, %ecx -; X64-NEXT: shldq %cl, %rdi, %rsi -; X64-NEXT: shlq %cl, %rdi -; X64-NEXT: xorl %r11d, %r11d -; X64-NEXT: testb $64, %r8b -; X64-NEXT: cmovneq %rdi, %rsi -; X64-NEXT: cmovneq %r11, %rdi -; X64-NEXT: movl %r9d, %ecx -; X64-NEXT: shlq %cl, %rdx -; X64-NEXT: testb $64, %r9b -; X64-NEXT: cmovneq %rdx, %rax -; X64-NEXT: cmovneq %r11, %rdx -; X64-NEXT: movq %rax, 24(%r10) -; X64-NEXT: movq %rdx, 16(%r10) -; X64-NEXT: movq %rsi, 8(%r10) -; X64-NEXT: movq %rdi, (%r10) -; X64-NEXT: retq +; x86_64-LABEL: test_shl_v2i128: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: movq %rcx, %rax +; x86_64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; x86_64-NEXT: movb {{[0-9]+}}(%rsp), %r9b +; x86_64-NEXT: movl %r9d, %ecx +; x86_64-NEXT: shldq %cl, %rdx, %rax +; x86_64-NEXT: movl %r8d, %ecx +; x86_64-NEXT: shldq %cl, %rdi, %rsi +; x86_64-NEXT: shlq %cl, %rdi +; x86_64-NEXT: xorl %r11d, %r11d +; x86_64-NEXT: testb $64, %r8b +; x86_64-NEXT: cmovneq %rdi, %rsi +; x86_64-NEXT: cmovneq %r11, %rdi +; x86_64-NEXT: movl %r9d, %ecx +; x86_64-NEXT: shlq %cl, %rdx +; x86_64-NEXT: testb $64, %r9b +; x86_64-NEXT: cmovneq %rdx, %rax +; x86_64-NEXT: cmovneq %r11, %rdx +; x86_64-NEXT: movq %rax, 24(%r10) +; x86_64-NEXT: movq %rdx, 16(%r10) +; x86_64-NEXT: movq %rsi, 8(%r10) +; x86_64-NEXT: movq %rdi, (%r10) +; x86_64-NEXT: retq entry: %0 = shl <2 x i128> %x, %a store <2 x i128> %0, <2 x i128>* %r, align 16 @@ -1354,9 +1354,9 @@ } define void @test_lshr_v2i128_outofrange(<2 x i128> %x, <2 x i128>* nocapture %r) nounwind { -; CHECK-LABEL: test_lshr_v2i128_outofrange: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ret{{[l|q]}} +; ALL-LABEL: test_lshr_v2i128_outofrange: +; ALL: # %bb.0: # %entry +; ALL-NEXT: ret{{[l|q]}} entry: %0 = lshr <2 x i128> %x, store <2 x i128> %0, <2 x i128>* %r, align 16 @@ -1364,9 +1364,9 @@ } define void @test_ashr_v2i128_outofrange(<2 x i128> %x, <2 x i128>* nocapture %r) nounwind { -; CHECK-LABEL: test_ashr_v2i128_outofrange: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ret{{[l|q]}} +; ALL-LABEL: test_ashr_v2i128_outofrange: +; ALL: # %bb.0: # %entry +; ALL-NEXT: ret{{[l|q]}} entry: %0 = ashr <2 x i128> %x, store <2 x i128> %0, <2 x i128>* %r, align 16 @@ -1374,9 +1374,9 @@ } define void @test_shl_v2i128_outofrange(<2 x i128> %x, <2 x i128>* nocapture %r) nounwind { -; CHECK-LABEL: test_shl_v2i128_outofrange: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ret{{[l|q]}} +; ALL-LABEL: test_shl_v2i128_outofrange: +; ALL: # %bb.0: # %entry +; ALL-NEXT: ret{{[l|q]}} entry: %0 = shl <2 x i128> %x, store <2 x i128> %0, <2 x i128>* %r, align 16 @@ -1384,25 +1384,25 @@ } define void @test_lshr_v2i128_outofrange_sum(<2 x i128> %x, <2 x i128>* nocapture %r) nounwind { -; X86-LABEL: test_lshr_v2i128_outofrange_sum: -; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 28(%eax) -; X86-NEXT: movl $0, 24(%eax) -; X86-NEXT: movl $0, 20(%eax) -; X86-NEXT: movl $0, 16(%eax) -; X86-NEXT: movl $0, 12(%eax) -; X86-NEXT: movl $0, 8(%eax) -; X86-NEXT: movl $0, 4(%eax) -; X86-NEXT: movl $0, (%eax) -; X86-NEXT: retl +; i686-LABEL: test_lshr_v2i128_outofrange_sum: +; i686: # %bb.0: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl $0, 28(%eax) +; i686-NEXT: movl $0, 24(%eax) +; i686-NEXT: movl $0, 20(%eax) +; i686-NEXT: movl $0, 16(%eax) +; i686-NEXT: movl $0, 12(%eax) +; i686-NEXT: movl $0, 8(%eax) +; i686-NEXT: movl $0, 4(%eax) +; i686-NEXT: movl $0, (%eax) +; i686-NEXT: retl ; -; X64-LABEL: test_lshr_v2i128_outofrange_sum: -; X64: # %bb.0: # %entry -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 16(%r8) -; X64-NEXT: movaps %xmm0, (%r8) -; X64-NEXT: retq +; x86_64-LABEL: test_lshr_v2i128_outofrange_sum: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: xorps %xmm0, %xmm0 +; x86_64-NEXT: movaps %xmm0, 16(%r8) +; x86_64-NEXT: movaps %xmm0, (%r8) +; x86_64-NEXT: retq entry: %0 = lshr <2 x i128> %x, %1 = lshr <2 x i128> %0, @@ -1411,25 +1411,25 @@ } define void @test_ashr_v2i128_outofrange_sum(<2 x i128> %x, <2 x i128>* nocapture %r) nounwind { -; X86-LABEL: test_ashr_v2i128_outofrange_sum: -; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 28(%eax) -; X86-NEXT: movl $0, 24(%eax) -; X86-NEXT: movl $0, 20(%eax) -; X86-NEXT: movl $0, 16(%eax) -; X86-NEXT: movl $0, 12(%eax) -; X86-NEXT: movl $0, 8(%eax) -; X86-NEXT: movl $0, 4(%eax) -; X86-NEXT: movl $0, (%eax) -; X86-NEXT: retl +; i686-LABEL: test_ashr_v2i128_outofrange_sum: +; i686: # %bb.0: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl $0, 28(%eax) +; i686-NEXT: movl $0, 24(%eax) +; i686-NEXT: movl $0, 20(%eax) +; i686-NEXT: movl $0, 16(%eax) +; i686-NEXT: movl $0, 12(%eax) +; i686-NEXT: movl $0, 8(%eax) +; i686-NEXT: movl $0, 4(%eax) +; i686-NEXT: movl $0, (%eax) +; i686-NEXT: retl ; -; X64-LABEL: test_ashr_v2i128_outofrange_sum: -; X64: # %bb.0: # %entry -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 16(%r8) -; X64-NEXT: movaps %xmm0, (%r8) -; X64-NEXT: retq +; x86_64-LABEL: test_ashr_v2i128_outofrange_sum: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: xorps %xmm0, %xmm0 +; x86_64-NEXT: movaps %xmm0, 16(%r8) +; x86_64-NEXT: movaps %xmm0, (%r8) +; x86_64-NEXT: retq entry: %0 = ashr <2 x i128> %x, %1 = ashr <2 x i128> %0, @@ -1438,25 +1438,25 @@ } define void @test_shl_v2i128_outofrange_sum(<2 x i128> %x, <2 x i128>* nocapture %r) nounwind { -; X86-LABEL: test_shl_v2i128_outofrange_sum: -; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 28(%eax) -; X86-NEXT: movl $0, 24(%eax) -; X86-NEXT: movl $0, 20(%eax) -; X86-NEXT: movl $0, 16(%eax) -; X86-NEXT: movl $0, 12(%eax) -; X86-NEXT: movl $0, 8(%eax) -; X86-NEXT: movl $0, 4(%eax) -; X86-NEXT: movl $0, (%eax) -; X86-NEXT: retl +; i686-LABEL: test_shl_v2i128_outofrange_sum: +; i686: # %bb.0: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl $0, 28(%eax) +; i686-NEXT: movl $0, 24(%eax) +; i686-NEXT: movl $0, 20(%eax) +; i686-NEXT: movl $0, 16(%eax) +; i686-NEXT: movl $0, 12(%eax) +; i686-NEXT: movl $0, 8(%eax) +; i686-NEXT: movl $0, 4(%eax) +; i686-NEXT: movl $0, (%eax) +; i686-NEXT: retl ; -; X64-LABEL: test_shl_v2i128_outofrange_sum: -; X64: # %bb.0: # %entry -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 16(%r8) -; X64-NEXT: movaps %xmm0, (%r8) -; X64-NEXT: retq +; x86_64-LABEL: test_shl_v2i128_outofrange_sum: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: xorps %xmm0, %xmm0 +; x86_64-NEXT: movaps %xmm0, 16(%r8) +; x86_64-NEXT: movaps %xmm0, (%r8) +; x86_64-NEXT: retq entry: %0 = shl <2 x i128> %x, %1 = shl <2 x i128> %0, @@ -1469,36 +1469,36 @@ ; define <2 x i256> @shl_sext_shl_outofrange(<2 x i128> %a0) { -; X86-LABEL: shl_sext_shl_outofrange: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 60(%eax) -; X86-NEXT: movl $0, 56(%eax) -; X86-NEXT: movl $0, 52(%eax) -; X86-NEXT: movl $0, 48(%eax) -; X86-NEXT: movl $0, 44(%eax) -; X86-NEXT: movl $0, 40(%eax) -; X86-NEXT: movl $0, 36(%eax) -; X86-NEXT: movl $0, 32(%eax) -; X86-NEXT: movl $0, 28(%eax) -; X86-NEXT: movl $0, 24(%eax) -; X86-NEXT: movl $0, 20(%eax) -; X86-NEXT: movl $0, 16(%eax) -; X86-NEXT: movl $0, 12(%eax) -; X86-NEXT: movl $0, 8(%eax) -; X86-NEXT: movl $0, 4(%eax) -; X86-NEXT: movl $0, (%eax) -; X86-NEXT: retl $4 +; i686-LABEL: shl_sext_shl_outofrange: +; i686: # %bb.0: +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl $0, 60(%eax) +; i686-NEXT: movl $0, 56(%eax) +; i686-NEXT: movl $0, 52(%eax) +; i686-NEXT: movl $0, 48(%eax) +; i686-NEXT: movl $0, 44(%eax) +; i686-NEXT: movl $0, 40(%eax) +; i686-NEXT: movl $0, 36(%eax) +; i686-NEXT: movl $0, 32(%eax) +; i686-NEXT: movl $0, 28(%eax) +; i686-NEXT: movl $0, 24(%eax) +; i686-NEXT: movl $0, 20(%eax) +; i686-NEXT: movl $0, 16(%eax) +; i686-NEXT: movl $0, 12(%eax) +; i686-NEXT: movl $0, 8(%eax) +; i686-NEXT: movl $0, 4(%eax) +; i686-NEXT: movl $0, (%eax) +; i686-NEXT: retl $4 ; -; X64-LABEL: shl_sext_shl_outofrange: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 32(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: retq +; x86_64-LABEL: shl_sext_shl_outofrange: +; x86_64: # %bb.0: +; x86_64-NEXT: movq %rdi, %rax +; x86_64-NEXT: xorps %xmm0, %xmm0 +; x86_64-NEXT: movaps %xmm0, 48(%rdi) +; x86_64-NEXT: movaps %xmm0, 32(%rdi) +; x86_64-NEXT: movaps %xmm0, 16(%rdi) +; x86_64-NEXT: movaps %xmm0, (%rdi) +; x86_64-NEXT: retq %1 = shl <2 x i128> %a0, %2 = sext <2 x i128> %1 to <2 x i256> %3 = shl <2 x i256> %2, @@ -1506,36 +1506,36 @@ } define <2 x i256> @shl_zext_shl_outofrange(<2 x i128> %a0) { -; X86-LABEL: shl_zext_shl_outofrange: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 60(%eax) -; X86-NEXT: movl $0, 56(%eax) -; X86-NEXT: movl $0, 52(%eax) -; X86-NEXT: movl $0, 48(%eax) -; X86-NEXT: movl $0, 44(%eax) -; X86-NEXT: movl $0, 40(%eax) -; X86-NEXT: movl $0, 36(%eax) -; X86-NEXT: movl $0, 32(%eax) -; X86-NEXT: movl $0, 28(%eax) -; X86-NEXT: movl $0, 24(%eax) -; X86-NEXT: movl $0, 20(%eax) -; X86-NEXT: movl $0, 16(%eax) -; X86-NEXT: movl $0, 12(%eax) -; X86-NEXT: movl $0, 8(%eax) -; X86-NEXT: movl $0, 4(%eax) -; X86-NEXT: movl $0, (%eax) -; X86-NEXT: retl $4 +; i686-LABEL: shl_zext_shl_outofrange: +; i686: # %bb.0: +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl $0, 60(%eax) +; i686-NEXT: movl $0, 56(%eax) +; i686-NEXT: movl $0, 52(%eax) +; i686-NEXT: movl $0, 48(%eax) +; i686-NEXT: movl $0, 44(%eax) +; i686-NEXT: movl $0, 40(%eax) +; i686-NEXT: movl $0, 36(%eax) +; i686-NEXT: movl $0, 32(%eax) +; i686-NEXT: movl $0, 28(%eax) +; i686-NEXT: movl $0, 24(%eax) +; i686-NEXT: movl $0, 20(%eax) +; i686-NEXT: movl $0, 16(%eax) +; i686-NEXT: movl $0, 12(%eax) +; i686-NEXT: movl $0, 8(%eax) +; i686-NEXT: movl $0, 4(%eax) +; i686-NEXT: movl $0, (%eax) +; i686-NEXT: retl $4 ; -; X64-LABEL: shl_zext_shl_outofrange: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 32(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: retq +; x86_64-LABEL: shl_zext_shl_outofrange: +; x86_64: # %bb.0: +; x86_64-NEXT: movq %rdi, %rax +; x86_64-NEXT: xorps %xmm0, %xmm0 +; x86_64-NEXT: movaps %xmm0, 48(%rdi) +; x86_64-NEXT: movaps %xmm0, 32(%rdi) +; x86_64-NEXT: movaps %xmm0, 16(%rdi) +; x86_64-NEXT: movaps %xmm0, (%rdi) +; x86_64-NEXT: retq %1 = shl <2 x i128> %a0, %2 = zext <2 x i128> %1 to <2 x i256> %3 = shl <2 x i256> %2, @@ -1543,36 +1543,36 @@ } define <2 x i256> @shl_zext_lshr_outofrange(<2 x i128> %a0) { -; X86-LABEL: shl_zext_lshr_outofrange: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl $0, 60(%eax) -; X86-NEXT: movl $0, 56(%eax) -; X86-NEXT: movl $0, 52(%eax) -; X86-NEXT: movl $0, 48(%eax) -; X86-NEXT: movl $0, 44(%eax) -; X86-NEXT: movl $0, 40(%eax) -; X86-NEXT: movl $0, 36(%eax) -; X86-NEXT: movl $0, 32(%eax) -; X86-NEXT: movl $0, 28(%eax) -; X86-NEXT: movl $0, 24(%eax) -; X86-NEXT: movl $0, 20(%eax) -; X86-NEXT: movl $0, 16(%eax) -; X86-NEXT: movl $0, 12(%eax) -; X86-NEXT: movl $0, 8(%eax) -; X86-NEXT: movl $0, 4(%eax) -; X86-NEXT: movl $0, (%eax) -; X86-NEXT: retl $4 +; i686-LABEL: shl_zext_lshr_outofrange: +; i686: # %bb.0: +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl $0, 60(%eax) +; i686-NEXT: movl $0, 56(%eax) +; i686-NEXT: movl $0, 52(%eax) +; i686-NEXT: movl $0, 48(%eax) +; i686-NEXT: movl $0, 44(%eax) +; i686-NEXT: movl $0, 40(%eax) +; i686-NEXT: movl $0, 36(%eax) +; i686-NEXT: movl $0, 32(%eax) +; i686-NEXT: movl $0, 28(%eax) +; i686-NEXT: movl $0, 24(%eax) +; i686-NEXT: movl $0, 20(%eax) +; i686-NEXT: movl $0, 16(%eax) +; i686-NEXT: movl $0, 12(%eax) +; i686-NEXT: movl $0, 8(%eax) +; i686-NEXT: movl $0, 4(%eax) +; i686-NEXT: movl $0, (%eax) +; i686-NEXT: retl $4 ; -; X64-LABEL: shl_zext_lshr_outofrange: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movaps %xmm0, 48(%rdi) -; X64-NEXT: movaps %xmm0, 32(%rdi) -; X64-NEXT: movaps %xmm0, 16(%rdi) -; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: retq +; x86_64-LABEL: shl_zext_lshr_outofrange: +; x86_64: # %bb.0: +; x86_64-NEXT: movq %rdi, %rax +; x86_64-NEXT: xorps %xmm0, %xmm0 +; x86_64-NEXT: movaps %xmm0, 48(%rdi) +; x86_64-NEXT: movaps %xmm0, 32(%rdi) +; x86_64-NEXT: movaps %xmm0, 16(%rdi) +; x86_64-NEXT: movaps %xmm0, (%rdi) +; x86_64-NEXT: retq %1 = lshr <2 x i128> %a0, %2 = zext <2 x i128> %1 to <2 x i256> %3 = shl <2 x i256> %2, @@ -1580,36 +1580,36 @@ } define i128 @lshr_shl_mask(i128 %a0) { -; X86-LABEL: lshr_shl_mask: -; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: .cfi_offset %esi, -12 -; X86-NEXT: .cfi_offset %edi, -8 -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl $2147483647, %edi # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %esi, 8(%eax) -; X86-NEXT: movl %edx, 4(%eax) -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: popl %edi -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl $4 +; i686-LABEL: lshr_shl_mask: +; i686: # %bb.0: +; i686-NEXT: pushl %edi +; i686-NEXT: .cfi_def_cfa_offset 8 +; i686-NEXT: pushl %esi +; i686-NEXT: .cfi_def_cfa_offset 12 +; i686-NEXT: .cfi_offset %esi, -12 +; i686-NEXT: .cfi_offset %edi, -8 +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl $2147483647, %edi # imm = 0x7FFFFFFF +; i686-NEXT: andl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl %edi, 12(%eax) +; i686-NEXT: movl %esi, 8(%eax) +; i686-NEXT: movl %edx, 4(%eax) +; i686-NEXT: movl %ecx, (%eax) +; i686-NEXT: popl %esi +; i686-NEXT: .cfi_def_cfa_offset 8 +; i686-NEXT: popl %edi +; i686-NEXT: .cfi_def_cfa_offset 4 +; i686-NEXT: retl $4 ; -; X64-LABEL: lshr_shl_mask: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: andq %rsi, %rdx -; X64-NEXT: retq +; x86_64-LABEL: lshr_shl_mask: +; x86_64: # %bb.0: +; x86_64-NEXT: movq %rdi, %rax +; x86_64-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF +; x86_64-NEXT: andq %rsi, %rdx +; x86_64-NEXT: retq %1 = shl i128 %a0, 1 %2 = lshr i128 %1, 1 ret i128 %2 diff --git a/llvm/test/CodeGen/X86/shrink-fp-const1.ll b/llvm/test/CodeGen/X86/shrink-fp-const1.ll --- a/llvm/test/CodeGen/X86/shrink-fp-const1.ll +++ b/llvm/test/CodeGen/X86/shrink-fp-const1.ll @@ -1,7 +1,12 @@ -; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | not grep cvtss2sd +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s ; PR1264 define double @foo(double %x) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq %y = fmul double %x, 5.000000e-01 ret double %y } diff --git a/llvm/test/CodeGen/X86/shrink-fp-const2.ll b/llvm/test/CodeGen/X86/shrink-fp-const2.ll --- a/llvm/test/CodeGen/X86/shrink-fp-const2.ll +++ b/llvm/test/CodeGen/X86/shrink-fp-const2.ll @@ -1,6 +1,11 @@ -; RUN: llc < %s -mtriple=i686-- | grep flds +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; This should be a flds, not fldt. define x86_fp80 @test2() nounwind { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: flds {{\.LCPI.*}} +; CHECK-NEXT: retl entry: ret x86_fp80 0xK3FFFC000000000000000 } diff --git a/llvm/test/CodeGen/X86/split-eh-lpad-edges.ll b/llvm/test/CodeGen/X86/split-eh-lpad-edges.ll --- a/llvm/test/CodeGen/X86/split-eh-lpad-edges.ll +++ b/llvm/test/CodeGen/X86/split-eh-lpad-edges.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep jmp +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s ; rdar://6647639 %struct.FetchPlanHeader = type { i8*, i8*, i32, i8*, i8*, i8*, i8*, i8*, %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)*, %struct.__attributeDescriptionFlags } @@ -11,6 +12,36 @@ @"\01l_objc_msgSend_fixup_alloc" = external global %struct._message_ref_t, align 16 ; <%struct._message_ref_t*> [#uses=2] define %struct.NSArray* @newFetchedRowsForFetchPlan_MT(%struct.FetchPlanHeader* %fetchPlan, %struct.objc_selector* %selectionMethod, %struct.NSObject* %selectionParameter) ssp personality i32 (...)* @__gxx_personality_v0 { +; CHECK-LABEL: newFetchedRowsForFetchPlan_MT: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: Ltmp0: +; CHECK-NEXT: movq l_objc_msgSend_fixup_alloc@{{.*}}(%rip), %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: Ltmp1: +; CHECK-NEXT: ## %bb.1: ## %invcont +; CHECK-NEXT: Ltmp2: +; CHECK-NEXT: movq %rax, %rdi +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: callq _objc_msgSend +; CHECK-NEXT: Ltmp3: +; CHECK-NEXT: ## %bb.2: ## %invcont26 +; CHECK-NEXT: Ltmp4: +; CHECK-NEXT: movq l_objc_msgSend_fixup_alloc@{{.*}}(%rip), %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: Ltmp5: +; CHECK-NEXT: ## %bb.3: ## %invcont27 +; CHECK-NEXT: ud2 +; CHECK-NEXT: LBB0_4: ## %lpad +; CHECK-NEXT: Ltmp6: +; CHECK-NEXT: ud2 +; CHECK-NEXT: Lfunc_end0: entry: %0 = invoke %struct.NSObject* null(%struct.NSObject* null, %struct._message_ref_t* @"\01l_objc_msgSend_fixup_alloc") to label %invcont unwind label %lpad ; <%struct.NSObject*> [#uses=1] diff --git a/llvm/test/CodeGen/X86/split-vector-bitcast.ll b/llvm/test/CodeGen/X86/split-vector-bitcast.ll --- a/llvm/test/CodeGen/X86/split-vector-bitcast.ll +++ b/llvm/test/CodeGen/X86/split-vector-bitcast.ll @@ -1,8 +1,30 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=-sse2,+sse | grep addps +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=-sse2,+sse | FileCheck %s ; PR10497 + another isel issue with sse2 disabled ; (This is primarily checking that this construct doesn't crash.) define void @a(<2 x float>* %a, <2 x i32>* %b) { +; CHECK-LABEL: a: +; CHECK: # %bb.0: +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] +; CHECK-NEXT: addps %xmm0, %xmm0 +; CHECK-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: movss %xmm0, (%esp) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl (%esp), %edx +; CHECK-NEXT: addl %edx, %edx +; CHECK-NEXT: addl %ecx, %ecx +; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: movl %edx, 4(%eax) +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl %cc = load <2 x float>, <2 x float>* %a %c = fadd <2 x float> %cc, %cc %dd = bitcast <2 x float> %c to <2 x i32> diff --git a/llvm/test/CodeGen/X86/split-vector-rem.ll b/llvm/test/CodeGen/X86/split-vector-rem.ll --- a/llvm/test/CodeGen/X86/split-vector-rem.ll +++ b/llvm/test/CodeGen/X86/split-vector-rem.ll @@ -1,15 +1,207 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep div | count 16 -; RUN: llc < %s -mtriple=x86_64-- | grep fmodf | count 8 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa %xmm0, %xmm4 +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3] +; CHECK-NEXT: movd %xmm0, %ecx +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: movd %edx, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,0,1] +; CHECK-NEXT: movd %xmm5, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1] +; CHECK-NEXT: movd %xmm5, %ecx +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: movd %edx, %xmm5 +; CHECK-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1] +; CHECK-NEXT: movd %xmm4, %eax +; CHECK-NEXT: movd %xmm2, %ecx +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: movd %edx, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3] +; CHECK-NEXT: movd %xmm4, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3] +; CHECK-NEXT: movd %xmm2, %ecx +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: movd %edx, %xmm2 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] +; CHECK-NEXT: movd %xmm2, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm3[3,1,2,3] +; CHECK-NEXT: movd %xmm2, %ecx +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: movd %edx, %xmm2 +; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1] +; CHECK-NEXT: movd %xmm4, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1] +; CHECK-NEXT: movd %xmm4, %ecx +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: movd %edx, %xmm4 +; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] +; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: movd %xmm3, %ecx +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: movd %edx, %xmm2 +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] +; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,2,3] +; CHECK-NEXT: movd %xmm1, %ecx +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ecx +; CHECK-NEXT: movd %edx, %xmm1 +; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] +; CHECK-NEXT: movdqa %xmm2, %xmm1 +; CHECK-NEXT: retq %m = srem <8 x i32> %t, %u ret <8 x i32> %m } define <8 x i32> @bar(<8 x i32> %t, <8 x i32> %u) { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa %xmm0, %xmm4 +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3] +; CHECK-NEXT: movd %xmm0, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ecx +; CHECK-NEXT: movd %edx, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,0,1] +; CHECK-NEXT: movd %xmm5, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1] +; CHECK-NEXT: movd %xmm5, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ecx +; CHECK-NEXT: movd %edx, %xmm5 +; CHECK-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1] +; CHECK-NEXT: movd %xmm4, %eax +; CHECK-NEXT: movd %xmm2, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ecx +; CHECK-NEXT: movd %edx, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3] +; CHECK-NEXT: movd %xmm4, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3] +; CHECK-NEXT: movd %xmm2, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ecx +; CHECK-NEXT: movd %edx, %xmm2 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] +; CHECK-NEXT: movd %xmm2, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm3[3,1,2,3] +; CHECK-NEXT: movd %xmm2, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ecx +; CHECK-NEXT: movd %edx, %xmm2 +; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1] +; CHECK-NEXT: movd %xmm4, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1] +; CHECK-NEXT: movd %xmm4, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ecx +; CHECK-NEXT: movd %edx, %xmm4 +; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] +; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: movd %xmm3, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ecx +; CHECK-NEXT: movd %edx, %xmm2 +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] +; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,2,3] +; CHECK-NEXT: movd %xmm1, %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ecx +; CHECK-NEXT: movd %edx, %xmm1 +; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] +; CHECK-NEXT: movdqa %xmm2, %xmm1 +; CHECK-NEXT: retq %m = urem <8 x i32> %t, %u ret <8 x i32> %m } define <8 x float> @qux(<8 x float> %t, <8 x float> %u) { +; CHECK-LABEL: qux: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $104, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm2, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; CHECK-NEXT: movaps %xmm2, %xmm1 +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm2[2,3] +; CHECK-NEXT: callq fmodf +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmodf +; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmodf +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; CHECK-NEXT: callq fmodf +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] +; CHECK-NEXT: callq fmodf +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq fmodf +; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq fmodf +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; CHECK-NEXT: callq fmodf +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $104, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq %m = frem <8 x float> %t, %u ret <8 x float> %m } diff --git a/llvm/test/CodeGen/X86/sse-align-1.ll b/llvm/test/CodeGen/X86/sse-align-1.ll --- a/llvm/test/CodeGen/X86/sse-align-1.ll +++ b/llvm/test/CodeGen/X86/sse-align-1.ll @@ -1,10 +1,19 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep movap | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define <4 x float> @foo(<4 x float>* %p) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: retq %t = load <4 x float>, <4 x float>* %p ret <4 x float> %t } define <2 x double> @bar(<2 x double>* %p) nounwind { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: retq %t = load <2 x double>, <2 x double>* %p ret <2 x double> %t } diff --git a/llvm/test/CodeGen/X86/sse-align-11.ll b/llvm/test/CodeGen/X86/sse-align-11.ll --- a/llvm/test/CodeGen/X86/sse-align-11.ll +++ b/llvm/test/CodeGen/X86/sse-align-11.ll @@ -1,13 +1,23 @@ -; RUN: llc < %s -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps -; RUN: llc < %s -mcpu=yonah -mtriple=i686-linux-gnu | grep movaps +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=yonah -mtriple=i686-apple-darwin8 | FileCheck %s --check-prefix=i686-apple-darwin8 +; RUN: llc < %s -mcpu=yonah -mtriple=i686-linux-gnu | FileCheck %s --check-prefix=i686-linux-gnu ; PR8969 - make 32-bit linux have a 16-byte aligned stack define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind { +; i686-apple-darwin8-LABEL: foo: +; i686-apple-darwin8: ## %bb.0: ## %entry +; i686-apple-darwin8-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 +; i686-apple-darwin8-NEXT: retl +; +; i686-linux-gnu-LABEL: foo: +; i686-linux-gnu: # %bb.0: # %entry +; i686-linux-gnu-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 +; i686-linux-gnu-NEXT: retl entry: - %tmp6 = insertelement <4 x float> undef, float %a, i32 0 - %tmp7 = insertelement <4 x float> %tmp6, float %b, i32 1 - %tmp8 = insertelement <4 x float> %tmp7, float %c, i32 2 - %tmp9 = insertelement <4 x float> %tmp8, float %d, i32 3 + %tmp6 = insertelement <4 x float> undef, float %a, i32 0 + %tmp7 = insertelement <4 x float> %tmp6, float %b, i32 1 + %tmp8 = insertelement <4 x float> %tmp7, float %c, i32 2 + %tmp9 = insertelement <4 x float> %tmp8, float %d, i32 3 ret <4 x float> %tmp9 } diff --git a/llvm/test/CodeGen/X86/sse-align-4.ll b/llvm/test/CodeGen/X86/sse-align-4.ll --- a/llvm/test/CodeGen/X86/sse-align-4.ll +++ b/llvm/test/CodeGen/X86/sse-align-4.ll @@ -1,10 +1,19 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep movup | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define void @foo(<4 x float>* %p, <4 x float> %x) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movups %xmm0, (%rdi) +; CHECK-NEXT: retq store <4 x float> %x, <4 x float>* %p, align 4 ret void } define void @bar(<2 x double>* %p, <2 x double> %x) nounwind { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: +; CHECK-NEXT: movups %xmm0, (%rdi) +; CHECK-NEXT: retq store <2 x double> %x, <2 x double>* %p, align 8 ret void } diff --git a/llvm/test/CodeGen/X86/sse-align-5.ll b/llvm/test/CodeGen/X86/sse-align-5.ll --- a/llvm/test/CodeGen/X86/sse-align-5.ll +++ b/llvm/test/CodeGen/X86/sse-align-5.ll @@ -1,6 +1,11 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep movaps | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define <2 x i64> @bar(<2 x i64>* %p) nounwind { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: retq %t = load <2 x i64>, <2 x i64>* %p ret <2 x i64> %t } diff --git a/llvm/test/CodeGen/X86/sse-align-6.ll b/llvm/test/CodeGen/X86/sse-align-6.ll --- a/llvm/test/CodeGen/X86/sse-align-6.ll +++ b/llvm/test/CodeGen/X86/sse-align-6.ll @@ -1,6 +1,21 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep movdqu | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqu (%rdi), %xmm1 +; CHECK-NEXT: movdqa %xmm0, %xmm2 +; CHECK-NEXT: psrlq $32, %xmm2 +; CHECK-NEXT: pmuludq %xmm1, %xmm2 +; CHECK-NEXT: movdqa %xmm1, %xmm3 +; CHECK-NEXT: psrlq $32, %xmm3 +; CHECK-NEXT: pmuludq %xmm0, %xmm3 +; CHECK-NEXT: paddq %xmm2, %xmm3 +; CHECK-NEXT: psllq $32, %xmm3 +; CHECK-NEXT: pmuludq %xmm1, %xmm0 +; CHECK-NEXT: paddq %xmm3, %xmm0 +; CHECK-NEXT: retq %t = load <2 x i64>, <2 x i64>* %p, align 8 %z = mul <2 x i64> %t, %x ret <2 x i64> %z diff --git a/llvm/test/CodeGen/X86/sse-align-8.ll b/llvm/test/CodeGen/X86/sse-align-8.ll --- a/llvm/test/CodeGen/X86/sse-align-8.ll +++ b/llvm/test/CodeGen/X86/sse-align-8.ll @@ -1,6 +1,11 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep movups | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: +; CHECK-NEXT: movups %xmm0, (%rdi) +; CHECK-NEXT: retq store <2 x i64> %x, <2 x i64>* %p, align 8 ret void } diff --git a/llvm/test/CodeGen/X86/sse-align-9.ll b/llvm/test/CodeGen/X86/sse-align-9.ll --- a/llvm/test/CodeGen/X86/sse-align-9.ll +++ b/llvm/test/CodeGen/X86/sse-align-9.ll @@ -1,10 +1,19 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep movup | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define <4 x float> @foo(<4 x float>* %p) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movups (%rdi), %xmm0 +; CHECK-NEXT: retq %t = load <4 x float>, <4 x float>* %p, align 4 ret <4 x float> %t } define <2 x double> @bar(<2 x double>* %p) nounwind { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: +; CHECK-NEXT: movups (%rdi), %xmm0 +; CHECK-NEXT: retq %t = load <2 x double>, <2 x double>* %p, align 8 ret <2 x double> %t } diff --git a/llvm/test/CodeGen/X86/sse-load-ret.ll b/llvm/test/CodeGen/X86/sse-load-ret.ll --- a/llvm/test/CodeGen/X86/sse-load-ret.ll +++ b/llvm/test/CodeGen/X86/sse-load-ret.ll @@ -1,19 +1,50 @@ -; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | not grep movss -; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | not grep xmm +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | FileCheck %s define double @test1(double* %P) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: fldl (%eax) +; CHECK-NEXT: retl %X = load double, double* %P ; [#uses=1] ret double %X } define double @test2() { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: +; CHECK-NEXT: fldl {{\.LCPI.*}} +; CHECK-NEXT: retl ret double 1.234560e+03 } -; FIXME: Todo -;double %test3(bool %B) { -; %C = select bool %B, double 123.412, double 523.01123123 -; ret double %C -;} +define double @test3(i1 %B) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: andl $-8, %esp +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: testb $1, 8(%ebp) +; CHECK-NEXT: jne .LBB2_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: jmp .LBB2_3 +; CHECK-NEXT: .LBB2_1: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: movsd %xmm0, (%esp) +; CHECK-NEXT: fldl (%esp) +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: .cfi_def_cfa %esp, 4 +; CHECK-NEXT: retl + %C = select i1 %B, double 123.412, double 523.01123123 + ret double %C +} diff --git a/llvm/test/CodeGen/X86/sse-varargs.ll b/llvm/test/CodeGen/X86/sse-varargs.ll --- a/llvm/test/CodeGen/X86/sse-varargs.ll +++ b/llvm/test/CodeGen/X86/sse-varargs.ll @@ -1,8 +1,22 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | grep xmm | grep esp - +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s define i32 @t() nounwind { +; CHECK-LABEL: t: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: andl $-16, %esp +; CHECK-NEXT: subl $48, %esp +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [10,11,12,13] +; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $1, (%esp) +; CHECK-NEXT: calll foo +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl entry: - tail call void (i32, ...) @foo( i32 1, <4 x i32> < i32 10, i32 11, i32 12, i32 13 > ) nounwind + tail call void (i32, ...) @foo( i32 1, <4 x i32> < i32 10, i32 11, i32 12, i32 13 > ) nounwind ret i32 0 } diff --git a/llvm/test/CodeGen/X86/store-global-address.ll b/llvm/test/CodeGen/X86/store-global-address.ll --- a/llvm/test/CodeGen/X86/store-global-address.ll +++ b/llvm/test/CodeGen/X86/store-global-address.ll @@ -1,9 +1,14 @@ -; RUN: llc < %s -mtriple=i686-- | grep movl | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s @dst = global i32 0 ; [#uses=1] @ptr = global i32* null ; [#uses=1] define void @test() { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $dst, ptr +; CHECK-NEXT: retl store i32* @dst, i32** @ptr ret void } diff --git a/llvm/test/CodeGen/X86/storetrunc-fp.ll b/llvm/test/CodeGen/X86/storetrunc-fp.ll --- a/llvm/test/CodeGen/X86/storetrunc-fp.ll +++ b/llvm/test/CodeGen/X86/storetrunc-fp.ll @@ -1,6 +1,15 @@ -; RUN: llc < %s -mtriple=i686-- | not grep flds +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s define void @foo(x86_fp80 %a, x86_fp80 %b, float* %fp) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: fldt {{[0-9]+}}(%esp) +; CHECK-NEXT: fldt {{[0-9]+}}(%esp) +; CHECK-NEXT: faddp %st, %st(1) +; CHECK-NEXT: fstps (%eax) +; CHECK-NEXT: retl %c = fadd x86_fp80 %a, %b %d = fptrunc x86_fp80 %c to float store float %d, float* %fp diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-0.ll b/llvm/test/CodeGen/X86/subreg-to-reg-0.ll --- a/llvm/test/CodeGen/X86/subreg-to-reg-0.ll +++ b/llvm/test/CodeGen/X86/subreg-to-reg-0.ll @@ -1,9 +1,15 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep mov | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s ; Do eliminate the zero-extension instruction and rely on ; x86-64's implicit zero-extension! define i64 @foo(i32* %p) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: incl %eax +; CHECK-NEXT: retq %t = load i32, i32* %p %n = add i32 %t, 1 %z = zext i32 %n to i64 diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-2.ll b/llvm/test/CodeGen/X86/subreg-to-reg-2.ll --- a/llvm/test/CodeGen/X86/subreg-to-reg-2.ll +++ b/llvm/test/CodeGen/X86/subreg-to-reg-2.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s ; rdar://6707985 %XXOO = type { %"struct.XXC::XXCC", i8*, %"struct.XXC::XXOO::$_71" } @@ -9,6 +10,13 @@ %"struct.XXC::XXOO::$_71" = type { [2 x %XXValue*] } define internal fastcc %XXValue* @t(i64* %out, %"struct.XXC::ArrayStorage"* %tmp9) nounwind { +; CHECK-LABEL: t: +; CHECK: ## %bb.0: ## %prologue +; CHECK-NEXT: movq 22222222, %rax +; CHECK-NEXT: movq %rax, (%rdi) +; CHECK-NEXT: movl %eax, %eax +; CHECK-NEXT: movq 32(%rsi,%rax,8), %rax +; CHECK-NEXT: retq prologue: %array = load %XXValue*, %XXValue** inttoptr (i64 11111111 to %XXValue**) ; <%XXValue*> [#uses=0] %index = load %XXValue*, %XXValue** inttoptr (i64 22222222 to %XXValue**) ; <%XXValue*> [#uses=1] diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-4.ll b/llvm/test/CodeGen/X86/subreg-to-reg-4.ll --- a/llvm/test/CodeGen/X86/subreg-to-reg-4.ll +++ b/llvm/test/CodeGen/X86/subreg-to-reg-4.ll @@ -1,17 +1,16 @@ -; RUN: llc < %s -mtriple=x86_64-- > %t -; RUN: not grep leaq %t -; RUN: not grep incq %t -; RUN: not grep decq %t -; RUN: not grep negq %t -; RUN: not grep addq %t -; RUN: not grep subq %t -; RUN: not grep "movl %" %t +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s ; Utilize implicit zero-extension on x86-64 to eliminate explicit ; zero-extensions. Shrink 64-bit adds to 32-bit when the high ; 32-bits will be zeroed. define void @bar(i64 %x, i64 %y, i64* %z) nounwind readnone { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addl %esi, %edi +; CHECK-NEXT: movq %rdi, (%rdx) +; CHECK-NEXT: retq entry: %t0 = add i64 %x, %y %t1 = and i64 %t0, 4294967295 @@ -19,6 +18,12 @@ ret void } define void @easy(i32 %x, i32 %y, i64* %z) nounwind readnone { +; CHECK-LABEL: easy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: addl %esi, %edi +; CHECK-NEXT: movq %rdi, (%rdx) +; CHECK-NEXT: retq entry: %t0 = add i32 %x, %y %tn = zext i32 %t0 to i64 @@ -27,6 +32,12 @@ ret void } define void @cola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone { +; CHECK-LABEL: cola: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addl (%rdi), %esi +; CHECK-NEXT: xorq %rcx, %rsi +; CHECK-NEXT: movq %rsi, (%rdx) +; CHECK-NEXT: retq entry: %p = load i64, i64* %x %t0 = add i64 %p, %y @@ -36,6 +47,12 @@ ret void } define void @yaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone { +; CHECK-LABEL: yaks: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addl (%rdi), %esi +; CHECK-NEXT: xorl %esi, %ecx +; CHECK-NEXT: movq %rcx, (%rdx) +; CHECK-NEXT: retq entry: %p = load i64, i64* %x %t0 = add i64 %p, %y @@ -45,6 +62,12 @@ ret void } define void @foo(i64 *%x, i64 *%y, i64* %z) nounwind readnone { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: addl (%rsi), %eax +; CHECK-NEXT: movq %rax, (%rdx) +; CHECK-NEXT: retq entry: %a = load i64, i64* %x %b = load i64, i64* %y @@ -54,6 +77,12 @@ ret void } define void @avo(i64 %x, i64* %z, i64 %u) nounwind readnone { +; CHECK-LABEL: avo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addl $734847, %edi # imm = 0xB367F +; CHECK-NEXT: xorq %rdx, %rdi +; CHECK-NEXT: movq %rdi, (%rsi) +; CHECK-NEXT: retq entry: %t0 = add i64 %x, 734847 %t1 = and i64 %t0, 4294967295 @@ -62,6 +91,12 @@ ret void } define void @phe(i64 %x, i64* %z, i64 %u) nounwind readnone { +; CHECK-LABEL: phe: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addl $734847, %edi # imm = 0xB367F +; CHECK-NEXT: xorl %edi, %edx +; CHECK-NEXT: movq %rdx, (%rsi) +; CHECK-NEXT: retq entry: %t0 = add i64 %x, 734847 %t1 = xor i64 %t0, %u @@ -70,6 +105,11 @@ ret void } define void @oze(i64 %y, i64* %z) nounwind readnone { +; CHECK-LABEL: oze: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: incl %edi +; CHECK-NEXT: movq %rdi, (%rsi) +; CHECK-NEXT: retq entry: %t0 = add i64 %y, 1 %t1 = and i64 %t0, 4294967295 @@ -78,6 +118,11 @@ } define void @sbar(i64 %x, i64 %y, i64* %z) nounwind readnone { +; CHECK-LABEL: sbar: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: movq %rdi, (%rdx) +; CHECK-NEXT: retq entry: %t0 = sub i64 %x, %y %t1 = and i64 %t0, 4294967295 @@ -85,6 +130,12 @@ ret void } define void @seasy(i32 %x, i32 %y, i64* %z) nounwind readnone { +; CHECK-LABEL: seasy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: movq %rdi, (%rdx) +; CHECK-NEXT: retq entry: %t0 = sub i32 %x, %y %tn = zext i32 %t0 to i64 @@ -93,6 +144,13 @@ ret void } define void @scola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone { +; CHECK-LABEL: scola: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: xorq %rcx, %rax +; CHECK-NEXT: movq %rax, (%rdx) +; CHECK-NEXT: retq entry: %p = load i64, i64* %x %t0 = sub i64 %p, %y @@ -102,6 +160,13 @@ ret void } define void @syaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone { +; CHECK-LABEL: syaks: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: xorl %eax, %ecx +; CHECK-NEXT: movq %rcx, (%rdx) +; CHECK-NEXT: retq entry: %p = load i64, i64* %x %t0 = sub i64 %p, %y @@ -111,6 +176,12 @@ ret void } define void @sfoo(i64 *%x, i64 *%y, i64* %z) nounwind readnone { +; CHECK-LABEL: sfoo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: subl (%rsi), %eax +; CHECK-NEXT: movq %rax, (%rdx) +; CHECK-NEXT: retq entry: %a = load i64, i64* %x %b = load i64, i64* %y @@ -120,6 +191,11 @@ ret void } define void @swya(i64 %y, i64* %z) nounwind readnone { +; CHECK-LABEL: swya: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: negl %edi +; CHECK-NEXT: movq %rdi, (%rsi) +; CHECK-NEXT: retq entry: %t0 = sub i64 0, %y %t1 = and i64 %t0, 4294967295 @@ -127,6 +203,11 @@ ret void } define void @soze(i64 %y, i64* %z) nounwind readnone { +; CHECK-LABEL: soze: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: decl %edi +; CHECK-NEXT: movq %rdi, (%rsi) +; CHECK-NEXT: retq entry: %t0 = sub i64 %y, 1 %t1 = and i64 %t0, 4294967295 diff --git a/llvm/test/CodeGen/X86/switch-zextload.ll b/llvm/test/CodeGen/X86/switch-zextload.ll --- a/llvm/test/CodeGen/X86/switch-zextload.ll +++ b/llvm/test/CodeGen/X86/switch-zextload.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s | grep mov | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s ; Do zextload, instead of a load and a separate zext. @@ -8,6 +9,17 @@ %struct.node_t = type { i8, i8, i8, i8, i32, i32, %struct.node_t**, %struct.node_t*, %struct.move_s } define fastcc void @set_proof_and_disproof_numbers(%struct.node_t* nocapture %node) nounwind { +; CHECK-LABEL: set_proof_and_disproof_numbers: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movzbl 0, %eax +; CHECK-NEXT: cmpl $3, %eax +; CHECK-NEXT: ja LBB0_3 +; CHECK-NEXT: ## %bb.1: ## %entry +; CHECK-NEXT: jmpl *LJTI0_0(,%eax,4) +; CHECK-NEXT: LBB0_3: ## %return +; CHECK-NEXT: retl +; CHECK-NEXT: LBB0_2: ## %bb31 +; CHECK-NEXT: ud2 entry: %0 = load i8, i8* null, align 1 ; [#uses=1] switch i8 %0, label %return [ diff --git a/llvm/test/CodeGen/X86/tailcall.ll b/llvm/test/CodeGen/X86/tailcall.ll --- a/llvm/test/CodeGen/X86/tailcall.ll +++ b/llvm/test/CodeGen/X86/tailcall.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i686-- -tailcallopt | grep TAILCALL | count 7 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -tailcallopt | FileCheck %s ; With -tailcallopt, CodeGen guarantees a tail call optimization ; for all of these. @@ -6,6 +7,15 @@ declare fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) define fastcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind { +; CHECK-LABEL: tailcaller: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: jmp tailcallee # TAILCALL entry: %tmp11 = tail call fastcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2) ret i32 %tmp11 @@ -14,6 +24,9 @@ declare fastcc i8* @alias_callee() define fastcc noalias i8* @noalias_caller() nounwind { +; CHECK-LABEL: noalias_caller: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp alias_callee # TAILCALL %p = tail call fastcc i8* @alias_callee() ret i8* %p } @@ -21,6 +34,9 @@ declare fastcc noalias i8* @noalias_callee() define fastcc i8* @alias_caller() nounwind { +; CHECK-LABEL: alias_caller: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp noalias_callee # TAILCALL %p = tail call fastcc noalias i8* @noalias_callee() ret i8* %p } @@ -28,6 +44,9 @@ declare fastcc i32 @i32_callee() define fastcc i32 @ret_undef() nounwind { +; CHECK-LABEL: ret_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp i32_callee # TAILCALL %p = tail call fastcc i32 @i32_callee() ret i32 undef } @@ -35,17 +54,52 @@ declare fastcc void @does_not_return() define fastcc i32 @noret() nounwind { +; CHECK-LABEL: noret: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp does_not_return # TAILCALL tail call fastcc void @does_not_return() unreachable } define fastcc void @void_test(i32, i32, i32, i32) { +; CHECK-LABEL: void_test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl %esi, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: jmp void_test # TAILCALL entry: tail call fastcc void @void_test( i32 %0, i32 %1, i32 %2, i32 %3) - ret void + ret void } define fastcc i1 @i1test(i32, i32, i32, i32) { +; CHECK-LABEL: i1test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl %esi, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: jmp i1test # TAILCALL entry: %4 = tail call fastcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3) ret i1 %4 diff --git a/llvm/test/CodeGen/X86/twoaddr-coalesce.ll b/llvm/test/CodeGen/X86/twoaddr-coalesce.ll --- a/llvm/test/CodeGen/X86/twoaddr-coalesce.ll +++ b/llvm/test/CodeGen/X86/twoaddr-coalesce.ll @@ -1,9 +1,33 @@ -; RUN: llc < %s -mtriple=i686-- | grep mov | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; rdar://6523745 @"\01LC" = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1] define i32 @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %bb1.thread +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %bb1 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: shrb $7, %al +; CHECK-NEXT: addb %bl, %al +; CHECK-NEXT: sarb %al +; CHECK-NEXT: movsbl %al, %eax +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl $LC +; CHECK-NEXT: calll printf +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: incl %ebx +; CHECK-NEXT: cmpl $258, %ebx # imm = 0x102 +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %bb2 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl bb1.thread: br label %bb1 diff --git a/llvm/test/CodeGen/X86/umul-with-carry.ll b/llvm/test/CodeGen/X86/umul-with-carry.ll --- a/llvm/test/CodeGen/X86/umul-with-carry.ll +++ b/llvm/test/CodeGen/X86/umul-with-carry.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=i386-- | grep "jc" | count 1 -; XFAIL: * +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-- | FileCheck %s ; FIXME: umul-with-overflow not supported yet. @@ -7,6 +7,24 @@ @no = internal constant [4 x i8] c"no\0A\00" define i1 @func(i32 %v1, i32 %v2) nounwind { +; CHECK-LABEL: func: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: jno .LBB0_1 +; CHECK-NEXT: # %bb.2: # %carry +; CHECK-NEXT: pushl $no +; CHECK-NEXT: calll printf +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %normal +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl $ok +; CHECK-NEXT: calll printf +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: retl entry: %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) %sum = extractvalue {i32, i1} %t, 0 diff --git a/llvm/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/llvm/test/CodeGen/X86/variable-sized-darwin-bzero.ll --- a/llvm/test/CodeGen/X86/variable-sized-darwin-bzero.ll +++ b/llvm/test/CodeGen/X86/variable-sized-darwin-bzero.ll @@ -1,6 +1,18 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin10 | grep __bzero +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-apple-darwin10 | FileCheck %s define void @foo(i8* %p, i64 %n) { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll ___bzero +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: retl call void @llvm.memset.p0i8.i64(i8* align 4 %p, i8 0, i64 %n, i1 false) ret void } diff --git a/llvm/test/CodeGen/X86/vec_align.ll b/llvm/test/CodeGen/X86/vec_align.ll --- a/llvm/test/CodeGen/X86/vec_align.ll +++ b/llvm/test/CodeGen/X86/vec_align.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mcpu=yonah -relocation-model=static | grep movaps | count 2 +; RUN: llc < %s -mcpu=yonah -relocation-model=static | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin8" @@ -9,6 +9,10 @@ @G = external global { float,float,float,float}, align 16 define %f4 @test1(float %W, float %X, float %Y, float %Z) nounwind { +; CHECK-LABEL: test1: +; CHECK: ## %bb.0: +; CHECK-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 +; CHECK-NEXT: retl %tmp = insertelement %f4 undef, float %W, i32 0 %tmp2 = insertelement %f4 %tmp, float %X, i32 1 %tmp4 = insertelement %f4 %tmp2, float %Y, i32 2 @@ -17,6 +21,10 @@ } define %f4 @test2() nounwind { +; CHECK-LABEL: test2: +; CHECK: ## %bb.0: +; CHECK-NEXT: movaps _G, %xmm0 +; CHECK-NEXT: retl %Wp = getelementptr { float,float,float,float}, { float,float,float,float}* @G, i32 0, i32 0 %Xp = getelementptr { float,float,float,float}, { float,float,float,float}* @G, i32 0, i32 1 %Yp = getelementptr { float,float,float,float}, { float,float,float,float}* @G, i32 0, i32 2 diff --git a/llvm/test/CodeGen/X86/vec_call.ll b/llvm/test/CodeGen/X86/vec_call.ll --- a/llvm/test/CodeGen/X86/vec_call.ll +++ b/llvm/test/CodeGen/X86/vec_call.ll @@ -1,11 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mcpu=generic -mattr=+sse2 -mtriple=i686-apple-darwin8 | \ -; RUN: grep "subl.*60" -; RUN: llc < %s -mcpu=generic -mattr=+sse2 -mtriple=i686-apple-darwin8 | \ -; RUN: grep "movaps.*32" +; RUN: llc < %s -mcpu=generic -mattr=+sse2 -mtriple=i686-apple-darwin8 | FileCheck %s define void @test() { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: +; CHECK-NEXT: subl $60, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,3,9] +; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $7, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $6, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $5, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $4, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $3, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $2, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $1, (%esp) +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4,3,2,1] +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [8,7,6,5] +; CHECK-NEXT: movaps {{.*#+}} xmm2 = [6,4,2,0] +; CHECK-NEXT: movaps {{.*#+}} xmm3 = [8,4,2,1] +; CHECK-NEXT: calll _xx +; CHECK-NEXT: addl $60, %esp +; CHECK-NEXT: retl tail call void @xx( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <2 x i64> bitcast (<4 x i32> < i32 4, i32 3, i32 2, i32 1 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 8, i32 7, i32 6, i32 5 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 6, i32 4, i32 2, i32 0 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 8, i32 4, i32 2, i32 1 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 0, i32 1, i32 3, i32 9 > to <2 x i64>) ) ret void } diff --git a/llvm/test/CodeGen/X86/vector-intrinsics.ll b/llvm/test/CodeGen/X86/vector-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-intrinsics.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep call | count 43 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s declare <4 x double> @llvm.sin.v4f64(<4 x double> %p) declare <4 x double> @llvm.cos.v4f64(<4 x double> %p) @@ -6,21 +7,142 @@ declare <4 x double> @llvm.powi.v4f64(<4 x double> %p, i32) define <4 x double> @foo(<4 x double> %p) +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: callq sin +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq sin +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq sin +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq sin +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq { %t = call <4 x double> @llvm.sin.v4f64(<4 x double> %p) ret <4 x double> %t } define <4 x double> @goo(<4 x double> %p) +; CHECK-LABEL: goo: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: callq cos +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq cos +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq cos +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq cos +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq { %t = call <4 x double> @llvm.cos.v4f64(<4 x double> %p) ret <4 x double> %t } define <4 x double> @moo(<4 x double> %p, <4 x double> %q) +; CHECK-LABEL: moo: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $88, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm2, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm2, %xmm1 +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $88, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq { %t = call <4 x double> @llvm.pow.v4f64(<4 x double> %p, <4 x double> %q) ret <4 x double> %t } define <4 x double> @zoo(<4 x double> %p, i32 %q) +; CHECK-LABEL: zoo: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: subq $48, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $48, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq { %t = call <4 x double> @llvm.powi.v4f64(<4 x double> %p, i32 %q) ret <4 x double> %t @@ -32,12 +154,166 @@ declare <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32) define void @a(<9 x double>* %p) nounwind { +; CHECK-LABEL: a: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $96, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps 16(%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps 32(%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps 48(%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: callq exp +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq exp +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq exp +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq exp +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq exp +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq exp +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq exp +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: callq exp +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: callq exp +; CHECK-NEXT: movsd %xmm0, 64(%rbx) +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, (%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 16(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 32(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 48(%rbx) +; CHECK-NEXT: addq $96, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %a = load <9 x double>, <9 x double>* %p %r = call <9 x double> @llvm.exp.v9f64(<9 x double> %a) store <9 x double> %r, <9 x double>* %p ret void } define void @b(<9 x double>* %p, <9 x double>* %q) nounwind { +; CHECK-LABEL: b: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $160, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps 16(%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps 32(%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps 48(%rdi), %xmm2 +; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rsi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps 16(%rsi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps 32(%rsi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps 48(%rsi), %xmm1 +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: callq pow +; CHECK-NEXT: movsd %xmm0, 64(%rbx) +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, (%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 16(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 32(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 48(%rbx) +; CHECK-NEXT: addq $160, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq %a = load <9 x double>, <9 x double>* %p %b = load <9 x double>, <9 x double>* %q %r = call <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b) @@ -45,6 +321,83 @@ ret void } define void @c(<9 x double>* %p, i32 %n) nounwind { +; CHECK-LABEL: c: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $104, %rsp +; CHECK-NEXT: movl %esi, %ebp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps 16(%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps 32(%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps 48(%rdi), %xmm0 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movsd %xmm0, 64(%rbx) +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, (%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 16(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 32(%rbx) +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, 48(%rbx) +; CHECK-NEXT: addq $104, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq %a = load <9 x double>, <9 x double>* %p %r = call <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32 %n) store <9 x double> %r, <9 x double>* %p diff --git a/llvm/test/CodeGen/X86/vector-variable-idx.ll b/llvm/test/CodeGen/X86/vector-variable-idx.ll --- a/llvm/test/CodeGen/X86/vector-variable-idx.ll +++ b/llvm/test/CodeGen/X86/vector-variable-idx.ll @@ -1,11 +1,27 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep movss | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s ; PR2676 define float @foo(<4 x float> %p, i32 %t) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: andl $3, %edi +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq %z = extractelement <4 x float> %p, i32 %t ret float %z } define <4 x float> @bar(<4 x float> %p, float %f, i32 %t) { +; CHECK-LABEL: bar: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: andl $3, %edi +; CHECK-NEXT: movss %xmm1, -24(%rsp,%rdi,4) +; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: retq %z = insertelement <4 x float> %p, float %f, i32 %t ret <4 x float> %z } diff --git a/llvm/test/CodeGen/X86/volatile.ll b/llvm/test/CodeGen/X86/volatile.ll --- a/llvm/test/CodeGen/X86/volatile.ll +++ b/llvm/test/CodeGen/X86/volatile.ll @@ -1,9 +1,27 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=sse2 | grep movsd | count 5 -; RUN: llc < %s -mtriple=i686-- -mattr=sse2 -O0 | grep -v esp | grep movsd | count 5 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=sse2 | FileCheck %s -check-prefixes=ALL,OPT +; RUN: llc < %s -mtriple=i686-- -mattr=sse2 -O0 | FileCheck %s --check-prefixes=ALL,NOOPT @x = external global double define void @foo() nounwind { +; OPT-LABEL: foo: +; OPT: # %bb.0: +; OPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; OPT-NEXT: xorps %xmm0, %xmm0 +; OPT-NEXT: movsd %xmm0, x +; OPT-NEXT: movsd %xmm0, x +; OPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; OPT-NEXT: retl +; +; NOOPT-LABEL: foo: +; NOOPT: # %bb.0: +; NOOPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NOOPT-NEXT: xorps %xmm1, %xmm1 +; NOOPT-NEXT: movsd %xmm1, x +; NOOPT-NEXT: movsd %xmm1, x +; NOOPT-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NOOPT-NEXT: retl %a = load volatile double, double* @x store volatile double 0.0, double* @x store volatile double 0.0, double* @x @@ -12,6 +30,10 @@ } define void @bar() nounwind { +; ALL-LABEL: bar: +; ALL: # %bb.0: +; ALL-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: retl %c = load volatile double, double* @x ret void } diff --git a/llvm/test/CodeGen/X86/x86-64-arg.ll b/llvm/test/CodeGen/X86/x86-64-arg.ll --- a/llvm/test/CodeGen/X86/x86-64-arg.ll +++ b/llvm/test/CodeGen/X86/x86-64-arg.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s | grep "movl %edi, %eax" +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + ; The input value is already sign extended, don't re-extend it. ; This testcase corresponds to: ; int test(short X) { return (int)X; } @@ -8,6 +10,10 @@ define i32 @test(i16 signext %X) { +; CHECK-LABEL: test: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq entry: %tmp12 = sext i16 %X to i32 ; [#uses=1] ret i32 %tmp12 diff --git a/llvm/test/CodeGen/X86/x86-64-disp.ll b/llvm/test/CodeGen/X86/x86-64-disp.ll --- a/llvm/test/CodeGen/X86/x86-64-disp.ll +++ b/llvm/test/CodeGen/X86/x86-64-disp.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s | grep mov | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s ; Fold an offset into an address even if it's not a 32-bit ; signed integer. @@ -8,6 +9,11 @@ @call_used_regs = external global [53 x i8], align 32 define fastcc void @foo() nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF +; CHECK-NEXT: movb $1, call_used_regs(%rax) +; CHECK-NEXT: retq %t = getelementptr [53 x i8], [53 x i8]* @call_used_regs, i64 0, i64 4294967295 store i8 1, i8* %t, align 1 ret void diff --git a/llvm/test/CodeGen/X86/x86-64-gv-offset.ll b/llvm/test/CodeGen/X86/x86-64-gv-offset.ll --- a/llvm/test/CodeGen/X86/x86-64-gv-offset.ll +++ b/llvm/test/CodeGen/X86/x86-64-gv-offset.ll @@ -1,13 +1,23 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep lea +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s %struct.x = type { float, double } @X = global %struct.x { float 1.000000e+00, double 2.000000e+00 }, align 16 ; <%struct.x*> [#uses=2] define i32 @main() nounwind { +; CHECK-LABEL: main: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: callq _t +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq entry: %tmp2 = load float, float* getelementptr (%struct.x, %struct.x* @X, i32 0, i32 0), align 16 ; [#uses=1] %tmp4 = load double, double* getelementptr (%struct.x, %struct.x* @X, i32 0, i32 1), align 8 ; [#uses=1] - tail call void @t( float %tmp2, double %tmp4 ) nounwind + tail call void @t( float %tmp2, double %tmp4 ) nounwind ret i32 0 } diff --git a/llvm/test/CodeGen/X86/x86-64-mem.ll b/llvm/test/CodeGen/X86/x86-64-mem.ll --- a/llvm/test/CodeGen/X86/x86-64-mem.ll +++ b/llvm/test/CodeGen/X86/x86-64-mem.ll @@ -1,11 +1,6 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -o %t1 -; RUN: grep GOTPCREL %t1 | count 4 -; RUN: grep %%rip %t1 | count 6 -; RUN: grep movq %t1 | count 6 -; RUN: grep leaq %t1 | count 1 -; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=static -o %t2 -; RUN: grep movl %t2 | count 2 -; RUN: grep movq %t2 | count 2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=DARWIN +; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=static | FileCheck %s --check-prefix=LINUX @ptr = external global i32* ; [#uses=1] @src = external global [0 x i32] ; <[0 x i32]*> [#uses=1] @@ -17,17 +12,51 @@ @bdst = internal global [500000 x i32] zeroinitializer, align 32 ; <[500000 x i32]*> [#uses=0] define void @test1() nounwind { +; DARWIN-LABEL: test1: +; DARWIN: ## %bb.0: +; DARWIN-NEXT: movq _src@{{.*}}(%rip), %rax +; DARWIN-NEXT: movl (%rax), %eax +; DARWIN-NEXT: movq _dst@{{.*}}(%rip), %rcx +; DARWIN-NEXT: movl %eax, (%rcx) +; DARWIN-NEXT: retq +; +; LINUX-LABEL: test1: +; LINUX: # %bb.0: +; LINUX-NEXT: movl {{.*}}(%rip), %eax +; LINUX-NEXT: movl %eax, {{.*}}(%rip) +; LINUX-NEXT: retq %tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @src, i32 0, i32 0) ; [#uses=1] store i32 %tmp, i32* getelementptr ([0 x i32], [0 x i32]* @dst, i32 0, i32 0) ret void } define void @test2() nounwind { +; DARWIN-LABEL: test2: +; DARWIN: ## %bb.0: +; DARWIN-NEXT: movq _dst@{{.*}}(%rip), %rax +; DARWIN-NEXT: movq _ptr@{{.*}}(%rip), %rcx +; DARWIN-NEXT: movq %rax, (%rcx) +; DARWIN-NEXT: retq +; +; LINUX-LABEL: test2: +; LINUX: # %bb.0: +; LINUX-NEXT: movq $dst, {{.*}}(%rip) +; LINUX-NEXT: retq store i32* getelementptr ([0 x i32], [0 x i32]* @dst, i32 0, i32 0), i32** @ptr ret void } define void @test3() nounwind { +; DARWIN-LABEL: test3: +; DARWIN: ## %bb.0: +; DARWIN-NEXT: leaq {{.*}}(%rip), %rax +; DARWIN-NEXT: movq %rax, {{.*}}(%rip) +; DARWIN-NEXT: retq +; +; LINUX-LABEL: test3: +; LINUX: # %bb.0: +; LINUX-NEXT: movq $ldst, {{.*}}(%rip) +; LINUX-NEXT: retq store i32* getelementptr ([500 x i32], [500 x i32]* @ldst, i32 0, i32 0), i32** @lptr br label %return diff --git a/llvm/test/CodeGen/X86/x86-64-pic-1.ll b/llvm/test/CodeGen/X86/x86-64-pic-1.ll --- a/llvm/test/CodeGen/X86/x86-64-pic-1.ll +++ b/llvm/test/CodeGen/X86/x86-64-pic-1.ll @@ -1,7 +1,15 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep "callq f@PLT" %t1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s define void @g() { +; CHECK-LABEL: g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq f@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: call void @f( ) ret void diff --git a/llvm/test/CodeGen/X86/x86-64-pic-10.ll b/llvm/test/CodeGen/X86/x86-64-pic-10.ll --- a/llvm/test/CodeGen/X86/x86-64-pic-10.ll +++ b/llvm/test/CodeGen/X86/x86-64-pic-10.ll @@ -1,14 +1,26 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep "callq g@PLT" %t1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s @g = weak alias i32 (), i32 ()* @f define void @h() { +; CHECK-LABEL: h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq g@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %tmp31 = call i32 @g() ret void } define weak i32 @f() { +; CHECK-LABEL: f: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: retq ret i32 42 } diff --git a/llvm/test/CodeGen/X86/x86-64-pic-11.ll b/llvm/test/CodeGen/X86/x86-64-pic-11.ll --- a/llvm/test/CodeGen/X86/x86-64-pic-11.ll +++ b/llvm/test/CodeGen/X86/x86-64-pic-11.ll @@ -1,7 +1,15 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep "callq __fixunsxfti@PLT" %t1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s define i128 @f(x86_fp80 %a) nounwind { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq __fixunsxfti@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq entry: %tmp78 = fptoui x86_fp80 %a to i128 ret i128 %tmp78 diff --git a/llvm/test/CodeGen/X86/x86-64-pic-2.ll b/llvm/test/CodeGen/X86/x86-64-pic-2.ll --- a/llvm/test/CodeGen/X86/x86-64-pic-2.ll +++ b/llvm/test/CodeGen/X86/x86-64-pic-2.ll @@ -1,8 +1,15 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep "callq f" %t1 -; RUN: not grep "callq f@PLT" %t1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s define void @g() { +; CHECK-LABEL: g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq f +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: call void @f( ) ret void diff --git a/llvm/test/CodeGen/X86/x86-64-pic-4.ll b/llvm/test/CodeGen/X86/x86-64-pic-4.ll --- a/llvm/test/CodeGen/X86/x86-64-pic-4.ll +++ b/llvm/test/CodeGen/X86/x86-64-pic-4.ll @@ -1,9 +1,14 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep "movq a@GOTPCREL(%rip)," %t1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s @a = global i32 0 define i32 @get_a() { +; CHECK-LABEL: get_a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq a@{{.*}}(%rip), %rax +; CHECK-NEXT: movl (%rax), %eax +; CHECK-NEXT: retq entry: %tmp1 = load i32, i32* @a, align 4 ret i32 %tmp1 diff --git a/llvm/test/CodeGen/X86/x86-64-pic-5.ll b/llvm/test/CodeGen/X86/x86-64-pic-5.ll --- a/llvm/test/CodeGen/X86/x86-64-pic-5.ll +++ b/llvm/test/CodeGen/X86/x86-64-pic-5.ll @@ -1,10 +1,13 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep "movl a(%rip)," %t1 -; RUN: not grep GOTPCREL %t1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s @a = hidden global i32 0 define i32 @get_a() { +; CHECK-LABEL: get_a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{.*}}(%rip), %eax +; CHECK-NEXT: retq entry: %tmp1 = load i32, i32* @a, align 4 ret i32 %tmp1 diff --git a/llvm/test/CodeGen/X86/x86-64-pic-6.ll b/llvm/test/CodeGen/X86/x86-64-pic-6.ll --- a/llvm/test/CodeGen/X86/x86-64-pic-6.ll +++ b/llvm/test/CodeGen/X86/x86-64-pic-6.ll @@ -1,10 +1,13 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep "movl a(%rip)," %t1 -; RUN: not grep GOTPCREL %t1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s @a = internal global i32 0 define i32 @get_a() nounwind { +; CHECK-LABEL: get_a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{.*}}(%rip), %eax +; CHECK-NEXT: retq entry: %tmp1 = load i32, i32* @a, align 4 ret i32 %tmp1 diff --git a/llvm/test/CodeGen/X86/x86-64-pic-7.ll b/llvm/test/CodeGen/X86/x86-64-pic-7.ll --- a/llvm/test/CodeGen/X86/x86-64-pic-7.ll +++ b/llvm/test/CodeGen/X86/x86-64-pic-7.ll @@ -1,7 +1,11 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep "movq f@GOTPCREL(%rip)," %t1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s define void ()* @g() nounwind { +; CHECK-LABEL: g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq f@{{.*}}(%rip), %rax +; CHECK-NEXT: retq entry: ret void ()* @f } diff --git a/llvm/test/CodeGen/X86/x86-64-pic-8.ll b/llvm/test/CodeGen/X86/x86-64-pic-8.ll --- a/llvm/test/CodeGen/X86/x86-64-pic-8.ll +++ b/llvm/test/CodeGen/X86/x86-64-pic-8.ll @@ -1,8 +1,11 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep "leaq f(%rip)," %t1 -; RUN: not grep GOTPCREL %t1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s define void ()* @g() { +; CHECK-LABEL: g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq {{.*}}(%rip), %rax +; CHECK-NEXT: retq entry: ret void ()* @f } diff --git a/llvm/test/CodeGen/X86/x86-64-pic-9.ll b/llvm/test/CodeGen/X86/x86-64-pic-9.ll --- a/llvm/test/CodeGen/X86/x86-64-pic-9.ll +++ b/llvm/test/CodeGen/X86/x86-64-pic-9.ll @@ -1,13 +1,19 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep "leaq f(%rip)," %t1 -; RUN: not grep GOTPCREL %t1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s define void ()* @g() nounwind { +; CHECK-LABEL: g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq {{.*}}(%rip), %rax +; CHECK-NEXT: retq entry: ret void ()* @f } define internal void @f() nounwind { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: retq entry: ret void } diff --git a/llvm/test/CodeGen/X86/x86-64-ret0.ll b/llvm/test/CodeGen/X86/x86-64-ret0.ll --- a/llvm/test/CodeGen/X86/x86-64-ret0.ll +++ b/llvm/test/CodeGen/X86/x86-64-ret0.ll @@ -1,7 +1,16 @@ -; RUN: llc < %s -mtriple=x86_64-- | grep mov | count 1 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s define i32 @f() nounwind { - tail call void @t( i32 1 ) nounwind +; CHECK-LABEL: f: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: callq t +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq + tail call void @t( i32 1 ) nounwind ret i32 0 } diff --git a/llvm/test/CodeGen/X86/x86-64-varargs.ll b/llvm/test/CodeGen/X86/x86-64-varargs.ll --- a/llvm/test/CodeGen/X86/x86-64-varargs.ll +++ b/llvm/test/CodeGen/X86/x86-64-varargs.ll @@ -1,10 +1,29 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -code-model=large -relocation-model=static | grep call | not grep rax +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -code-model=large -relocation-model=static | FileCheck %s @.str = internal constant [26 x i8] c"%d, %f, %d, %lld, %d, %f\0A\00" ; <[26 x i8]*> [#uses=1] -declare i32 @printf(i8*, ...) nounwind +declare i32 @printf(i8*, ...) nounwind define i32 @main() nounwind { +; CHECK-LABEL: main: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movabsq $_.str, %rdi +; CHECK-NEXT: movabsq $_printf, %r9 +; CHECK-NEXT: movabsq $LCPI0_0, %rax +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movabsq $LCPI0_1, %rax +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: movabsq $123456677890, %rcx ## imm = 0x1CBE976802 +; CHECK-NEXT: movl $12, %esi +; CHECK-NEXT: movl $120, %edx +; CHECK-NEXT: movl $-10, %r8d +; CHECK-NEXT: movb $2, %al +; CHECK-NEXT: callq *%r9 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq entry: %tmp10.i = tail call i32 (i8*, ...) @printf( i8* getelementptr ([26 x i8], [26 x i8]* @.str, i32 0, i64 0), i32 12, double 0x3FF3EB8520000000, i32 120, i64 123456677890, i32 -10, double 4.500000e+15 ) nounwind ; [#uses=0] ret i32 0 diff --git a/llvm/test/CodeGen/X86/x86-store-gv-addr.ll b/llvm/test/CodeGen/X86/x86-store-gv-addr.ll --- a/llvm/test/CodeGen/X86/x86-store-gv-addr.ll +++ b/llvm/test/CodeGen/X86/x86-store-gv-addr.ll @@ -1,10 +1,21 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=static | not grep lea -; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -relocation-model=static | not grep lea +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=static | FileCheck %s --check-prefix=x86_64-darwin +; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -relocation-model=static | FileCheck %s --check-prefix=x86_64-linux @v = external global i32, align 8 @v_addr = external global i32*, align 8 define void @t() nounwind optsize { +; x86_64-darwin-LABEL: t: +; x86_64-darwin: ## %bb.0: +; x86_64-darwin-NEXT: movq _v@{{.*}}(%rip), %rax +; x86_64-darwin-NEXT: movq _v_addr@{{.*}}(%rip), %rcx +; x86_64-darwin-NEXT: movq %rax, (%rcx) +; x86_64-darwin-NEXT: ud2 +; +; x86_64-linux-LABEL: t: +; x86_64-linux: # %bb.0: +; x86_64-linux-NEXT: movq $v, {{.*}}(%rip) store i32* @v, i32** @v_addr, align 8 unreachable } diff --git a/llvm/test/CodeGen/X86/xray-selective-instrumentation-miss.ll b/llvm/test/CodeGen/X86/xray-selective-instrumentation-miss.ll --- a/llvm/test/CodeGen/X86/xray-selective-instrumentation-miss.ll +++ b/llvm/test/CodeGen/X86/xray-selective-instrumentation-miss.ll @@ -1,9 +1,14 @@ -; RUN: llc -mcpu=nehalem < %s | not grep xray_sled_ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=nehalem < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin8" define i32 @foo() nounwind uwtable "xray-instruction-threshold"="3" { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq entry: ret i32 0 } diff --git a/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll b/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll --- a/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll +++ b/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll @@ -1,9 +1,21 @@ -; RUN: llc -verify-machineinstrs -mcpu=nehalem < %s | grep xray_sled_ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mcpu=nehalem | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin8" define i32 @foo() nounwind uwtable "xray-instruction-threshold"="1" { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: .p2align 1, 0x90 +; CHECK-NEXT: Lxray_sled_0: +; CHECK-NEXT: .ascii "\353\t" +; CHECK-NEXT: nopw 512(%rax,%rax) +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 1, 0x90 +; CHECK-NEXT: Lxray_sled_1: +; CHECK-NEXT: retq +; CHECK-NEXT: nopw %cs:512(%rax,%rax) entry: ret i32 0 } diff --git a/llvm/test/CodeGen/X86/zext-inreg-1.ll b/llvm/test/CodeGen/X86/zext-inreg-1.ll --- a/llvm/test/CodeGen/X86/zext-inreg-1.ll +++ b/llvm/test/CodeGen/X86/zext-inreg-1.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i686-- | not grep and +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s ; These tests differ from the ones in zext-inreg-0.ll in that ; on x86-64 they do require and instructions. @@ -7,11 +8,27 @@ ; This related to not having ZERO_EXTEND_REG node. define i64 @l(i64 %d) nounwind { +; CHECK-LABEL: l: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl $1, %eax +; CHECK-NEXT: adcl $0, %ecx +; CHECK-NEXT: movzbl %cl, %edx +; CHECK-NEXT: retl %e = add i64 %d, 1 %retval = and i64 %e, 1099511627775 ret i64 %retval } define i64 @m(i64 %d) nounwind { +; CHECK-LABEL: m: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl $1, %eax +; CHECK-NEXT: adcl $0, %ecx +; CHECK-NEXT: movzwl %cx, %edx +; CHECK-NEXT: retl %e = add i64 %d, 1 %retval = and i64 %e, 281474976710655 ret i64 %retval