Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -16547,16 +16547,11 @@ // non-casted variable when we check for possible users. switch (ArithOp.getOpcode()) { case ISD::ADD: - // Due to an isel shortcoming, be conservative if this add is likely to be - // selected as part of a load-modify-store instruction. When the root node - // in a match is a store, isel doesn't know how to remap non-chain non-flag - // uses of other nodes in the match, such as the ADD in this case. This - // leads to the ADD being left around and reselected, with the result being - // two adds in the output. Alas, even if none our users are stores, that - // doesn't prove we're O.K. Ergo, if we have any parents that aren't - // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require - // climbing the DAG back to the root, and it doesn't seem to be worth the - // effort. + // We only want to rewrite this as a target-specific node with attached + // flags if there is a reasonable chance of either using that to do custom + // instructions selection that can fold some of the memory operands, or if + // only the flags are used. If there are other uses, leave the node alone + // and emit a test instruction. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) if (UI->getOpcode() != ISD::CopyToReg && @@ -16667,11 +16662,13 @@ case ISD::SUB: case ISD::OR: case ISD::XOR: - // Due to the ISEL shortcoming noted above, be conservative if this op is - // likely to be selected as part of a load-modify-store instruction. + // Similar to ISD::ADD above, check if the uses will preclude useful + // lowering of the target-specific node. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() == ISD::STORE) + if (UI->getOpcode() != ISD::CopyToReg && + UI->getOpcode() != ISD::SETCC && + UI->getOpcode() != ISD::STORE) goto default_case; // Otherwise use a regular EFLAGS-setting instruction. Index: llvm/trunk/test/CodeGen/X86/atomic-minmax-i6432.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/atomic-minmax-i6432.ll +++ llvm/trunk/test/CodeGen/X86/atomic-minmax-i6432.ll @@ -15,29 +15,16 @@ ; LINUX-NEXT: .p2align 4, 0x90 ; LINUX-NEXT: .LBB0_1: # %atomicrmw.start ; LINUX-NEXT: # =>This Inner Loop Header: Depth=1 -; LINUX-NEXT: xorl %ecx, %ecx ; LINUX-NEXT: cmpl %eax, %esi +; LINUX-NEXT: movl $0, %ecx ; LINUX-NEXT: sbbl %edx, %ecx -; LINUX-NEXT: setl %cl -; LINUX-NEXT: andb $1, %cl -; LINUX-NEXT: movl %eax, %ebx -; LINUX-NEXT: jne .LBB0_3 -; LINUX-NEXT: # BB#2: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LINUX-NEXT: movl $0, %ecx +; LINUX-NEXT: cmovll %edx, %ecx ; LINUX-NEXT: movl $5, %ebx -; LINUX-NEXT: .LBB0_3: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB0_1 Depth=1 -; LINUX-NEXT: testb %cl, %cl -; LINUX-NEXT: movl %edx, %ecx -; LINUX-NEXT: jne .LBB0_5 -; LINUX-NEXT: # BB#4: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB0_1 Depth=1 -; LINUX-NEXT: xorl %ecx, %ecx -; LINUX-NEXT: .LBB0_5: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LINUX-NEXT: cmovll %eax, %ebx ; LINUX-NEXT: lock cmpxchg8b sc64 ; LINUX-NEXT: jne .LBB0_1 -; LINUX-NEXT: # BB#6: # %atomicrmw.end +; LINUX-NEXT: # BB#2: # %atomicrmw.end ; LINUX-NEXT: popl %esi ; LINUX-NEXT: popl %ebx ; LINUX-NEXT: retl @@ -57,29 +44,16 @@ ; PIC-NEXT: .p2align 4, 0x90 ; PIC-NEXT: LBB0_1: ## %atomicrmw.start ; PIC-NEXT: ## =>This Inner Loop Header: Depth=1 -; PIC-NEXT: xorl %ecx, %ecx ; PIC-NEXT: cmpl %eax, %edi +; PIC-NEXT: movl $0, %ecx ; PIC-NEXT: sbbl %edx, %ecx -; PIC-NEXT: setl %cl -; PIC-NEXT: andb $1, %cl -; PIC-NEXT: movl %eax, %ebx -; PIC-NEXT: jne LBB0_3 -; PIC-NEXT: ## BB#2: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB0_1 Depth=1 +; PIC-NEXT: movl $0, %ecx +; PIC-NEXT: cmovll %edx, %ecx ; PIC-NEXT: movl $5, %ebx -; PIC-NEXT: LBB0_3: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; PIC-NEXT: testb %cl, %cl -; PIC-NEXT: movl %edx, %ecx -; PIC-NEXT: jne LBB0_5 -; PIC-NEXT: ## BB#4: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; PIC-NEXT: xorl %ecx, %ecx -; PIC-NEXT: LBB0_5: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB0_1 Depth=1 +; PIC-NEXT: cmovll %eax, %ebx ; PIC-NEXT: lock cmpxchg8b (%esi) ; PIC-NEXT: jne LBB0_1 -; PIC-NEXT: ## BB#6: ## %atomicrmw.end +; PIC-NEXT: ## BB#2: ## %atomicrmw.end ; PIC-NEXT: popl %esi ; PIC-NEXT: popl %edi ; PIC-NEXT: popl %ebx @@ -102,26 +76,13 @@ ; LINUX-NEXT: cmpl $7, %eax ; LINUX-NEXT: movl %edx, %ecx ; LINUX-NEXT: sbbl $0, %ecx -; LINUX-NEXT: setl %cl -; LINUX-NEXT: andb $1, %cl -; LINUX-NEXT: movl %eax, %ebx -; LINUX-NEXT: jne .LBB1_3 -; LINUX-NEXT: # BB#2: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LINUX-NEXT: movl $0, %ecx +; LINUX-NEXT: cmovll %edx, %ecx ; LINUX-NEXT: movl $6, %ebx -; LINUX-NEXT: .LBB1_3: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB1_1 Depth=1 -; LINUX-NEXT: testb %cl, %cl -; LINUX-NEXT: movl %edx, %ecx -; LINUX-NEXT: jne .LBB1_5 -; LINUX-NEXT: # BB#4: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB1_1 Depth=1 -; LINUX-NEXT: xorl %ecx, %ecx -; LINUX-NEXT: .LBB1_5: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LINUX-NEXT: cmovll %eax, %ebx ; LINUX-NEXT: lock cmpxchg8b sc64 ; LINUX-NEXT: jne .LBB1_1 -; LINUX-NEXT: # BB#6: # %atomicrmw.end +; LINUX-NEXT: # BB#2: # %atomicrmw.end ; LINUX-NEXT: popl %ebx ; LINUX-NEXT: retl ; @@ -141,26 +102,13 @@ ; PIC-NEXT: cmpl $7, %eax ; PIC-NEXT: movl %edx, %ecx ; PIC-NEXT: sbbl $0, %ecx -; PIC-NEXT: setl %cl -; PIC-NEXT: andb $1, %cl -; PIC-NEXT: movl %eax, %ebx -; PIC-NEXT: jne LBB1_3 -; PIC-NEXT: ## BB#2: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB1_1 Depth=1 +; PIC-NEXT: movl $0, %ecx +; PIC-NEXT: cmovll %edx, %ecx ; PIC-NEXT: movl $6, %ebx -; PIC-NEXT: LBB1_3: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB1_1 Depth=1 -; PIC-NEXT: testb %cl, %cl -; PIC-NEXT: movl %edx, %ecx -; PIC-NEXT: jne LBB1_5 -; PIC-NEXT: ## BB#4: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB1_1 Depth=1 -; PIC-NEXT: xorl %ecx, %ecx -; PIC-NEXT: LBB1_5: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB1_1 Depth=1 +; PIC-NEXT: cmovll %eax, %ebx ; PIC-NEXT: lock cmpxchg8b (%esi) ; PIC-NEXT: jne LBB1_1 -; PIC-NEXT: ## BB#6: ## %atomicrmw.end +; PIC-NEXT: ## BB#2: ## %atomicrmw.end ; PIC-NEXT: popl %esi ; PIC-NEXT: popl %ebx ; PIC-NEXT: retl @@ -181,29 +129,16 @@ ; LINUX-NEXT: .p2align 4, 0x90 ; LINUX-NEXT: .LBB2_1: # %atomicrmw.start ; LINUX-NEXT: # =>This Inner Loop Header: Depth=1 -; LINUX-NEXT: xorl %ecx, %ecx ; LINUX-NEXT: cmpl %eax, %esi +; LINUX-NEXT: movl $0, %ecx ; LINUX-NEXT: sbbl %edx, %ecx -; LINUX-NEXT: setb %cl -; LINUX-NEXT: andb $1, %cl -; LINUX-NEXT: movl %eax, %ebx -; LINUX-NEXT: jne .LBB2_3 -; LINUX-NEXT: # BB#2: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LINUX-NEXT: movl $0, %ecx +; LINUX-NEXT: cmovbl %edx, %ecx ; LINUX-NEXT: movl $7, %ebx -; LINUX-NEXT: .LBB2_3: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB2_1 Depth=1 -; LINUX-NEXT: testb %cl, %cl -; LINUX-NEXT: movl %edx, %ecx -; LINUX-NEXT: jne .LBB2_5 -; LINUX-NEXT: # BB#4: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB2_1 Depth=1 -; LINUX-NEXT: xorl %ecx, %ecx -; LINUX-NEXT: .LBB2_5: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LINUX-NEXT: cmovbl %eax, %ebx ; LINUX-NEXT: lock cmpxchg8b sc64 ; LINUX-NEXT: jne .LBB2_1 -; LINUX-NEXT: # BB#6: # %atomicrmw.end +; LINUX-NEXT: # BB#2: # %atomicrmw.end ; LINUX-NEXT: popl %esi ; LINUX-NEXT: popl %ebx ; LINUX-NEXT: retl @@ -223,29 +158,16 @@ ; PIC-NEXT: .p2align 4, 0x90 ; PIC-NEXT: LBB2_1: ## %atomicrmw.start ; PIC-NEXT: ## =>This Inner Loop Header: Depth=1 -; PIC-NEXT: xorl %ecx, %ecx ; PIC-NEXT: cmpl %eax, %edi +; PIC-NEXT: movl $0, %ecx ; PIC-NEXT: sbbl %edx, %ecx -; PIC-NEXT: setb %cl -; PIC-NEXT: andb $1, %cl -; PIC-NEXT: movl %eax, %ebx -; PIC-NEXT: jne LBB2_3 -; PIC-NEXT: ## BB#2: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB2_1 Depth=1 +; PIC-NEXT: movl $0, %ecx +; PIC-NEXT: cmovbl %edx, %ecx ; PIC-NEXT: movl $7, %ebx -; PIC-NEXT: LBB2_3: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB2_1 Depth=1 -; PIC-NEXT: testb %cl, %cl -; PIC-NEXT: movl %edx, %ecx -; PIC-NEXT: jne LBB2_5 -; PIC-NEXT: ## BB#4: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB2_1 Depth=1 -; PIC-NEXT: xorl %ecx, %ecx -; PIC-NEXT: LBB2_5: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB2_1 Depth=1 +; PIC-NEXT: cmovbl %eax, %ebx ; PIC-NEXT: lock cmpxchg8b (%esi) ; PIC-NEXT: jne LBB2_1 -; PIC-NEXT: ## BB#6: ## %atomicrmw.end +; PIC-NEXT: ## BB#2: ## %atomicrmw.end ; PIC-NEXT: popl %esi ; PIC-NEXT: popl %edi ; PIC-NEXT: popl %ebx @@ -268,26 +190,13 @@ ; LINUX-NEXT: cmpl $9, %eax ; LINUX-NEXT: movl %edx, %ecx ; LINUX-NEXT: sbbl $0, %ecx -; LINUX-NEXT: setb %cl -; LINUX-NEXT: andb $1, %cl -; LINUX-NEXT: movl %eax, %ebx -; LINUX-NEXT: jne .LBB3_3 -; LINUX-NEXT: # BB#2: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LINUX-NEXT: movl $0, %ecx +; LINUX-NEXT: cmovbl %edx, %ecx ; LINUX-NEXT: movl $8, %ebx -; LINUX-NEXT: .LBB3_3: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB3_1 Depth=1 -; LINUX-NEXT: testb %cl, %cl -; LINUX-NEXT: movl %edx, %ecx -; LINUX-NEXT: jne .LBB3_5 -; LINUX-NEXT: # BB#4: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB3_1 Depth=1 -; LINUX-NEXT: xorl %ecx, %ecx -; LINUX-NEXT: .LBB3_5: # %atomicrmw.start -; LINUX-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LINUX-NEXT: cmovbl %eax, %ebx ; LINUX-NEXT: lock cmpxchg8b sc64 ; LINUX-NEXT: jne .LBB3_1 -; LINUX-NEXT: # BB#6: # %atomicrmw.end +; LINUX-NEXT: # BB#2: # %atomicrmw.end ; LINUX-NEXT: popl %ebx ; LINUX-NEXT: retl ; @@ -307,26 +216,13 @@ ; PIC-NEXT: cmpl $9, %eax ; PIC-NEXT: movl %edx, %ecx ; PIC-NEXT: sbbl $0, %ecx -; PIC-NEXT: setb %cl -; PIC-NEXT: andb $1, %cl -; PIC-NEXT: movl %eax, %ebx -; PIC-NEXT: jne LBB3_3 -; PIC-NEXT: ## BB#2: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB3_1 Depth=1 +; PIC-NEXT: movl $0, %ecx +; PIC-NEXT: cmovbl %edx, %ecx ; PIC-NEXT: movl $8, %ebx -; PIC-NEXT: LBB3_3: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB3_1 Depth=1 -; PIC-NEXT: testb %cl, %cl -; PIC-NEXT: movl %edx, %ecx -; PIC-NEXT: jne LBB3_5 -; PIC-NEXT: ## BB#4: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB3_1 Depth=1 -; PIC-NEXT: xorl %ecx, %ecx -; PIC-NEXT: LBB3_5: ## %atomicrmw.start -; PIC-NEXT: ## in Loop: Header=BB3_1 Depth=1 +; PIC-NEXT: cmovbl %eax, %ebx ; PIC-NEXT: lock cmpxchg8b (%esi) ; PIC-NEXT: jne LBB3_1 -; PIC-NEXT: ## BB#6: ## %atomicrmw.end +; PIC-NEXT: ## BB#2: ## %atomicrmw.end ; PIC-NEXT: popl %esi ; PIC-NEXT: popl %ebx ; PIC-NEXT: retl Index: llvm/trunk/test/CodeGen/X86/atomic128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/atomic128.ll +++ llvm/trunk/test/CodeGen/X86/atomic128.ll @@ -165,26 +165,13 @@ ; CHECK-NEXT: cmpq %rax, %rsi ; CHECK-NEXT: movq %r8, %rcx ; CHECK-NEXT: sbbq %rdx, %rcx -; CHECK-NEXT: setge %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movq %rax, %rbx -; CHECK-NEXT: jne LBB5_3 -; CHECK-NEXT: ## BB#2: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1 -; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: LBB5_3: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1 -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: jne LBB5_5 -; CHECK-NEXT: ## BB#4: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1 ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: LBB5_5: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1 +; CHECK-NEXT: cmovgeq %rdx, %rcx +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovgeq %rax, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: jne LBB5_1 -; CHECK-NEXT: ## BB#6: ## %atomicrmw.end +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end ; CHECK-NEXT: movq %rax, {{.*}}(%rip) ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx @@ -211,26 +198,13 @@ ; CHECK-NEXT: cmpq %rsi, %rax ; CHECK-NEXT: movq %rdx, %rcx ; CHECK-NEXT: sbbq %r8, %rcx -; CHECK-NEXT: setge %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movq %rax, %rbx -; CHECK-NEXT: jne LBB6_3 -; CHECK-NEXT: ## BB#2: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1 -; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: LBB6_3: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1 -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: jne LBB6_5 -; CHECK-NEXT: ## BB#4: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1 ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: LBB6_5: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1 +; CHECK-NEXT: cmovgeq %rdx, %rcx +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovgeq %rax, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: jne LBB6_1 -; CHECK-NEXT: ## BB#6: ## %atomicrmw.end +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end ; CHECK-NEXT: movq %rax, {{.*}}(%rip) ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx @@ -257,26 +231,13 @@ ; CHECK-NEXT: cmpq %rax, %rsi ; CHECK-NEXT: movq %r8, %rcx ; CHECK-NEXT: sbbq %rdx, %rcx -; CHECK-NEXT: setae %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movq %rax, %rbx -; CHECK-NEXT: jne LBB7_3 -; CHECK-NEXT: ## BB#2: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1 -; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: LBB7_3: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1 -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: jne LBB7_5 -; CHECK-NEXT: ## BB#4: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1 ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: LBB7_5: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1 +; CHECK-NEXT: cmovaeq %rdx, %rcx +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovaeq %rax, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: jne LBB7_1 -; CHECK-NEXT: ## BB#6: ## %atomicrmw.end +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end ; CHECK-NEXT: movq %rax, {{.*}}(%rip) ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx @@ -303,26 +264,13 @@ ; CHECK-NEXT: cmpq %rax, %rsi ; CHECK-NEXT: movq %r8, %rcx ; CHECK-NEXT: sbbq %rdx, %rcx -; CHECK-NEXT: setb %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movq %rax, %rbx -; CHECK-NEXT: jne LBB8_3 -; CHECK-NEXT: ## BB#2: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1 -; CHECK-NEXT: movq %rsi, %rbx -; CHECK-NEXT: LBB8_3: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1 -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: jne LBB8_5 -; CHECK-NEXT: ## BB#4: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1 ; CHECK-NEXT: movq %r8, %rcx -; CHECK-NEXT: LBB8_5: ## %atomicrmw.start -; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1 +; CHECK-NEXT: cmovbq %rdx, %rcx +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: cmovbq %rax, %rbx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: jne LBB8_1 -; CHECK-NEXT: ## BB#6: ## %atomicrmw.end +; CHECK-NEXT: ## BB#2: ## %atomicrmw.end ; CHECK-NEXT: movq %rax, {{.*}}(%rip) ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx Index: llvm/trunk/test/CodeGen/X86/cmov.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/cmov.ll +++ llvm/trunk/test/CodeGen/X86/cmov.ll @@ -208,7 +208,8 @@ define i32 @smin(i32 %x) { ; CHECK-LABEL: smin: ; CHECK: # BB#0: -; CHECK-NEXT: xorl $-1, %edi +; CHECK-NEXT: notl %edi +; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: movl $-1, %eax ; CHECK-NEXT: cmovsl %edi, %eax ; CHECK-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/cmp.ll +++ llvm/trunk/test/CodeGen/X86/cmp.ll @@ -388,13 +388,14 @@ ; CHECK-LABEL: test20: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; CHECK-NEXT: andl $16777215, %edi ## encoding: [0x81,0xe7,0xff,0xff,0xff,0x00] +; CHECK-NEXT: testl $16777215, %edi ## encoding: [0xf7,0xc7,0xff,0xff,0xff,0x00] ; CHECK-NEXT: ## imm = 0xFFFFFF ; CHECK-NEXT: setne %al ## encoding: [0x0f,0x95,0xc0] ; CHECK-NEXT: movzbl %sil, %ecx ## encoding: [0x40,0x0f,0xb6,0xce] ; CHECK-NEXT: addl %eax, %ecx ## encoding: [0x01,0xc1] ; CHECK-NEXT: setne (%rdx) ## encoding: [0x0f,0x95,0x02] -; CHECK-NEXT: testl %edi, %edi ## encoding: [0x85,0xff] +; CHECK-NEXT: testl $16777215, %edi ## encoding: [0xf7,0xc7,0xff,0xff,0xff,0x00] +; CHECK-NEXT: ## imm = 0xFFFFFF ; CHECK-NEXT: setne {{.*}}(%rip) ## encoding: [0x0f,0x95,0x05,A,A,A,A] ; CHECK-NEXT: ## fixup A - offset: 3, value: _d-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq ## encoding: [0xc3] Index: llvm/trunk/test/CodeGen/X86/select.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/select.ll +++ llvm/trunk/test/CodeGen/X86/select.ll @@ -166,19 +166,15 @@ ; MCU-LABEL: test5: ; MCU: # BB#0: ; MCU-NEXT: pushl %esi -; MCU-NEXT: andb $1, %al +; MCU-NEXT: movl {{[0-9]+}}(%esp), %esi +; MCU-NEXT: testb $1, %al ; MCU-NEXT: jne .LBB4_2 ; MCU-NEXT: # BB#1: +; MCU-NEXT: movw {{[0-9]+}}(%esp), %cx ; MCU-NEXT: movw {{[0-9]+}}(%esp), %dx ; MCU-NEXT: .LBB4_2: -; MCU-NEXT: movl {{[0-9]+}}(%esp), %esi -; MCU-NEXT: testb %al, %al -; MCU-NEXT: jne .LBB4_4 -; MCU-NEXT: # BB#3: -; MCU-NEXT: movw {{[0-9]+}}(%esp), %cx -; MCU-NEXT: .LBB4_4: -; MCU-NEXT: movw %dx, (%esi) ; MCU-NEXT: movw %cx, 2(%esi) +; MCU-NEXT: movw %dx, (%esi) ; MCU-NEXT: popl %esi ; MCU-NEXT: retl %x = select i1 %c, <2 x i16> %a, <2 x i16> %b @@ -292,81 +288,69 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind { ; GENERIC-LABEL: test8: ; GENERIC: ## BB#0: -; GENERIC-NEXT: andb $1, %dil +; GENERIC-NEXT: testb $1, %dil ; GENERIC-NEXT: jne LBB7_1 ; GENERIC-NEXT: ## BB#2: -; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; GENERIC-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; GENERIC-NEXT: jmp LBB7_3 -; GENERIC-NEXT: LBB7_1: ; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; GENERIC-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; GENERIC-NEXT: LBB7_3: -; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; GENERIC-NEXT: testb %dil, %dil -; GENERIC-NEXT: jne LBB7_4 -; GENERIC-NEXT: ## BB#5: +; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; GENERIC-NEXT: jmp LBB7_3 +; GENERIC-NEXT: LBB7_1: +; GENERIC-NEXT: movd %r9d, %xmm0 +; GENERIC-NEXT: movd %r8d, %xmm1 +; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; GENERIC-NEXT: movd %ecx, %xmm2 +; GENERIC-NEXT: movd %edx, %xmm0 +; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; GENERIC-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; GENERIC-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; GENERIC-NEXT: jmp LBB7_6 -; GENERIC-NEXT: LBB7_4: -; GENERIC-NEXT: movd %r9d, %xmm1 -; GENERIC-NEXT: movd %r8d, %xmm2 -; GENERIC-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; GENERIC-NEXT: movd %ecx, %xmm3 -; GENERIC-NEXT: movd %edx, %xmm1 -; GENERIC-NEXT: LBB7_6: -; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; GENERIC-NEXT: LBB7_3: +; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; GENERIC-NEXT: pcmpeqd %xmm2, %xmm2 -; GENERIC-NEXT: paddd %xmm2, %xmm1 ; GENERIC-NEXT: paddd %xmm2, %xmm0 -; GENERIC-NEXT: movq %xmm0, 16(%rsi) -; GENERIC-NEXT: movdqa %xmm1, (%rsi) +; GENERIC-NEXT: paddd %xmm2, %xmm1 +; GENERIC-NEXT: movq %xmm1, 16(%rsi) +; GENERIC-NEXT: movdqa %xmm0, (%rsi) ; GENERIC-NEXT: retq ; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test8: ; ATOM: ## BB#0: -; ATOM-NEXT: andb $1, %dil +; ATOM-NEXT: testb $1, %dil ; ATOM-NEXT: jne LBB7_1 ; ATOM-NEXT: ## BB#2: -; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; ATOM-NEXT: jmp LBB7_3 -; ATOM-NEXT: LBB7_1: -; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; ATOM-NEXT: LBB7_3: -; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; ATOM-NEXT: testb %dil, %dil -; ATOM-NEXT: jne LBB7_4 -; ATOM-NEXT: ## BB#5: ; ATOM-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; ATOM-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] -; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] -; ATOM-NEXT: jmp LBB7_6 -; ATOM-NEXT: LBB7_4: -; ATOM-NEXT: movd %r9d, %xmm1 +; ATOM-NEXT: jmp LBB7_3 +; ATOM-NEXT: LBB7_1: +; ATOM-NEXT: movd %r9d, %xmm0 ; ATOM-NEXT: movd %r8d, %xmm2 -; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; ATOM-NEXT: movd %ecx, %xmm3 -; ATOM-NEXT: movd %edx, %xmm1 -; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; ATOM-NEXT: LBB7_6: +; ATOM-NEXT: movd %edx, %xmm0 +; ATOM-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; ATOM-NEXT: LBB7_3: +; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; ATOM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; ATOM-NEXT: pcmpeqd %xmm2, %xmm2 +; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] ; ATOM-NEXT: paddd %xmm2, %xmm0 ; ATOM-NEXT: paddd %xmm2, %xmm1 -; ATOM-NEXT: movq %xmm0, 16(%rsi) -; ATOM-NEXT: movdqa %xmm1, (%rsi) +; ATOM-NEXT: movq %xmm1, 16(%rsi) +; ATOM-NEXT: movdqa %xmm0, (%rsi) ; ATOM-NEXT: retq ; ATOM-NEXT: ## -- End function ; @@ -376,68 +360,67 @@ ; MCU-NEXT: pushl %ebx ; MCU-NEXT: pushl %edi ; MCU-NEXT: pushl %esi -; MCU-NEXT: andb $1, %al +; MCU-NEXT: testb $1, %al ; MCU-NEXT: jne .LBB7_1 ; MCU-NEXT: # BB#2: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx -; MCU-NEXT: movl (%ecx), %ecx +; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax +; MCU-NEXT: movl (%eax), %eax ; MCU-NEXT: je .LBB7_5 ; MCU-NEXT: .LBB7_4: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi -; MCU-NEXT: movl (%esi), %esi +; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx +; MCU-NEXT: movl (%ecx), %ecx ; MCU-NEXT: je .LBB7_8 ; MCU-NEXT: .LBB7_7: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi -; MCU-NEXT: movl (%edi), %edi +; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi +; MCU-NEXT: movl (%esi), %esi ; MCU-NEXT: je .LBB7_11 ; MCU-NEXT: .LBB7_10: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx -; MCU-NEXT: movl (%ebx), %ebx +; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi +; MCU-NEXT: movl (%edi), %edi ; MCU-NEXT: je .LBB7_14 ; MCU-NEXT: .LBB7_13: +; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx +; MCU-NEXT: movl (%ebx), %ebx +; MCU-NEXT: je .LBB7_17 +; MCU-NEXT: .LBB7_16: ; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp -; MCU-NEXT: jmp .LBB7_15 +; MCU-NEXT: jmp .LBB7_18 ; MCU-NEXT: .LBB7_1: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx -; MCU-NEXT: movl (%ecx), %ecx +; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax +; MCU-NEXT: movl (%eax), %eax ; MCU-NEXT: jne .LBB7_4 ; MCU-NEXT: .LBB7_5: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi -; MCU-NEXT: movl (%esi), %esi +; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx +; MCU-NEXT: movl (%ecx), %ecx ; MCU-NEXT: jne .LBB7_7 ; MCU-NEXT: .LBB7_8: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi -; MCU-NEXT: movl (%edi), %edi +; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi +; MCU-NEXT: movl (%esi), %esi ; MCU-NEXT: jne .LBB7_10 ; MCU-NEXT: .LBB7_11: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx -; MCU-NEXT: movl (%ebx), %ebx +; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi +; MCU-NEXT: movl (%edi), %edi ; MCU-NEXT: jne .LBB7_13 ; MCU-NEXT: .LBB7_14: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp -; MCU-NEXT: .LBB7_15: -; MCU-NEXT: movl (%ebp), %ebp -; MCU-NEXT: testb %al, %al +; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx +; MCU-NEXT: movl (%ebx), %ebx ; MCU-NEXT: jne .LBB7_16 -; MCU-NEXT: # BB#17: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax -; MCU-NEXT: jmp .LBB7_18 -; MCU-NEXT: .LBB7_16: -; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax +; MCU-NEXT: .LBB7_17: +; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp ; MCU-NEXT: .LBB7_18: -; MCU-NEXT: movl (%eax), %eax -; MCU-NEXT: decl %eax +; MCU-NEXT: movl (%ebp), %ebp ; MCU-NEXT: decl %ebp ; MCU-NEXT: decl %ebx ; MCU-NEXT: decl %edi ; MCU-NEXT: decl %esi ; MCU-NEXT: decl %ecx -; MCU-NEXT: movl %ecx, 20(%edx) -; MCU-NEXT: movl %esi, 16(%edx) -; MCU-NEXT: movl %edi, 12(%edx) -; MCU-NEXT: movl %ebx, 8(%edx) -; MCU-NEXT: movl %ebp, 4(%edx) -; MCU-NEXT: movl %eax, (%edx) +; MCU-NEXT: decl %eax +; MCU-NEXT: movl %eax, 20(%edx) +; MCU-NEXT: movl %ecx, 16(%edx) +; MCU-NEXT: movl %esi, 12(%edx) +; MCU-NEXT: movl %edi, 8(%edx) +; MCU-NEXT: movl %ebx, 4(%edx) +; MCU-NEXT: movl %ebp, (%edx) ; MCU-NEXT: popl %esi ; MCU-NEXT: popl %edi ; MCU-NEXT: popl %ebx Index: llvm/trunk/test/CodeGen/X86/tbm_patterns.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/tbm_patterns.ll +++ llvm/trunk/test/CodeGen/X86/tbm_patterns.ll @@ -27,10 +27,9 @@ define i32 @test_x86_tbm_bextri_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32_z: ; CHECK: # BB#0: -; CHECK-NEXT: shrl $4, %edi -; CHECK-NEXT: andl $4095, %edi # imm = 0xFFF -; CHECK-NEXT: cmovel %esi, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: bextr $3076, %edi, %eax # imm = 0xC04 +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 %t1 = and i32 %t0, 4095 @@ -63,10 +62,10 @@ define i64 @test_x86_tbm_bextri_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64_z: ; CHECK: # BB#0: -; CHECK-NEXT: shrl $4, %edi -; CHECK-NEXT: andl $4095, %edi # imm = 0xFFF -; CHECK-NEXT: cmoveq %rsi, %rdi -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: bextr $3076, %edi, %eax # imm = 0xC04 +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: cmovneq %rax, %rsi +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 %t1 = and i64 %t0, 4095 @@ -88,9 +87,8 @@ define i32 @test_x86_tbm_blcfill_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blcfill_u32_z: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: andl %edi, %eax +; CHECK-NEXT: blcfill %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 @@ -113,8 +111,8 @@ define i64 @test_x86_tbm_blcfill_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blcfill_u64_z: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: andq %rdi, %rax +; CHECK-NEXT: blcfill %rdi, %rax +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: cmoveq %rsi, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 @@ -138,10 +136,8 @@ define i32 @test_x86_tbm_blci_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blci_u32_z: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: blci %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = add i32 1, %a @@ -166,9 +162,8 @@ define i64 @test_x86_tbm_blci_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blci_u64_z: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: orq %rdi, %rax +; CHECK-NEXT: blci %rdi, %rax +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: cmoveq %rsi, %rax ; CHECK-NEXT: retq %t0 = add i64 1, %a @@ -213,11 +208,8 @@ define i32 @test_x86_tbm_blcic_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blcic_u32_z: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: notl %ecx -; CHECK-NEXT: andl %ecx, %eax +; CHECK-NEXT: blcic %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 @@ -242,9 +234,8 @@ define i64 @test_x86_tbm_blcic_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blcic_u64_z: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: notq %rdi -; CHECK-NEXT: andq %rdi, %rax +; CHECK-NEXT: blcic %rdi, %rax +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: cmoveq %rsi, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 @@ -268,9 +259,8 @@ define i32 @test_x86_tbm_blcmsk_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blcmsk_u32_z: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: xorl %edi, %eax +; CHECK-NEXT: blcmsk %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 @@ -293,8 +283,8 @@ define i64 @test_x86_tbm_blcmsk_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blcmsk_u64_z: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: xorq %rdi, %rax +; CHECK-NEXT: blcmsk %rdi, %rax +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: cmoveq %rsi, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 @@ -317,9 +307,8 @@ define i32 @test_x86_tbm_blcs_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blcs_u32_z: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: blcs %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 @@ -342,8 +331,8 @@ define i64 @test_x86_tbm_blcs_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blcs_u64_z: ; CHECK: # BB#0: -; CHECK-NEXT: leaq 1(%rdi), %rax -; CHECK-NEXT: orq %rdi, %rax +; CHECK-NEXT: blcs %rdi, %rax +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: cmoveq %rsi, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 @@ -366,9 +355,8 @@ define i32 @test_x86_tbm_blsfill_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blsfill_u32_z: ; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: leal -1(%rdi), %eax -; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: blsfill %edi, %eax +; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, -1 @@ -391,8 +379,8 @@ define i64 @test_x86_tbm_blsfill_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blsfill_u64_z: ; CHECK: # BB#0: -; CHECK-NEXT: leaq -1(%rdi), %rax -; CHECK-NEXT: orq %rdi, %rax +; CHECK-NEXT: blsfill %rdi, %rax +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: cmoveq %rsi, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, -1 @@ -416,12 +404,9 @@ define i32 @test_x86_tbm_blsic_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blsic_u32_z: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: decl %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmovel %esi, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: blsic %edi, %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, -1 @@ -445,12 +430,9 @@ define i64 @test_x86_tbm_blsic_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_blsic_u64_z: ; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: decq %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cmoveq %rsi, %rdi -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: blsic %rdi, %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: cmoveq %rsi, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, -1 @@ -474,12 +456,9 @@ define i32 @test_x86_tbm_t1mskc_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_t1mskc_u32_z: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: incl %edi -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmovel %esi, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: t1mskc %edi, %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, 1 @@ -503,12 +482,9 @@ define i64 @test_x86_tbm_t1mskc_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_t1mskc_u64_z: ; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cmoveq %rsi, %rdi -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: t1mskc %rdi, %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: cmoveq %rsi, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, 1 @@ -532,12 +508,9 @@ define i32 @test_x86_tbm_tzmsk_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_tzmsk_u32_z: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: decl %edi -; CHECK-NEXT: andl %eax, %edi -; CHECK-NEXT: cmovel %esi, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: tzmsk %edi, %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 %t1 = add i32 %a, -1 @@ -561,12 +534,9 @@ define i64 @test_x86_tbm_tzmsk_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_tzmsk_u64_z: ; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: decq %rdi -; CHECK-NEXT: andq %rax, %rdi -; CHECK-NEXT: cmoveq %rsi, %rdi -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: tzmsk %rdi, %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: cmoveq %rsi, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 %t1 = add i64 %a, -1