Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -472,6 +472,9 @@ SDValue &InFlag); bool tryOptimizeRem8Extend(SDNode *N); + + bool hasNoSignFlagUses(SDValue Flags) const; + bool hasNoCarryFlagUses(SDValue Flags) const; }; } @@ -2225,7 +2228,7 @@ /// Test whether the given X86ISD::CMP node has any uses which require the SF /// flag to be accurate. -static bool hasNoSignFlagUses(SDValue Flags) { +bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { // Examine each user of the node. for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); UI != UE; ++UI) { @@ -2265,7 +2268,7 @@ /// Test whether the given node which sets flags has any uses which require the /// CF flag to be accurate. -static bool hasNoCarryFlagUses(SDValue Flags) { + bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const { // Examine each user of the node. for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); UI != UE; ++UI) { Index: llvm/trunk/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td @@ -2387,6 +2387,16 @@ // Pattern fragments to auto generate BMI instructions. //===----------------------------------------------------------------------===// +def or_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs), + (X86or_flag node:$lhs, node:$rhs), [{ + return hasNoCarryFlagUses(SDValue(N, 1)); +}]>; + +def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs), + (X86xor_flag node:$lhs, node:$rhs), [{ + return hasNoCarryFlagUses(SDValue(N, 1)); +}]>; + let Predicates = [HasBMI] in { // FIXME: patterns for the load versions are not implemented def : Pat<(and GR32:$src, (add GR32:$src, -1)), @@ -2403,6 +2413,14 @@ (BLSI32rr GR32:$src)>; def : Pat<(and GR64:$src, (ineg GR64:$src)), (BLSI64rr GR64:$src)>; + + // Versions to match flag producing ops. + // X86and_flag nodes are rarely created. Those should use CMP+AND. We do + // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed. + def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)), + (BLSMSK32rr GR32:$src)>; + def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)), + (BLSMSK64rr GR64:$src)>; } multiclass bmi_bextr opc, string mnemonic, RegisterClass RC, @@ -2801,6 +2819,45 @@ (TZMSK32rr GR32:$src)>; def : Pat<(and (not GR64:$src), (add GR64:$src, -1)), (TZMSK64rr GR64:$src)>; + + // Patterns to match flag producing ops. + // X86and_flag nodes are rarely created. Those should use CMP+AND. We do + // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed. + def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))), + (BLCI32rr GR32:$src)>; + def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))), + (BLCI64rr GR64:$src)>; + + // Extra patterns because opt can optimize the above patterns to this. + def : Pat<(or_flag_nocf GR32:$src, (sub -2, GR32:$src)), + (BLCI32rr GR32:$src)>; + def : Pat<(or_flag_nocf GR64:$src, (sub -2, GR64:$src)), + (BLCI64rr GR64:$src)>; + + def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, 1)), + (BLCMSK32rr GR32:$src)>; + def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, 1)), + (BLCMSK64rr GR64:$src)>; + + def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, 1)), + (BLCS32rr GR32:$src)>; + def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, 1)), + (BLCS64rr GR64:$src)>; + + def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, -1)), + (BLSFILL32rr GR32:$src)>; + def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, -1)), + (BLSFILL64rr GR64:$src)>; + + def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, -1)), + (BLSIC32rr GR32:$src)>; + def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, -1)), + (BLSIC64rr GR64:$src)>; + + def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, 1)), + (T1MSKC32rr GR32:$src)>; + def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, 1)), + (T1MSKC64rr GR64:$src)>; } // HasTBM //===----------------------------------------------------------------------===// Index: llvm/trunk/test/CodeGen/X86/bmi.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bmi.ll +++ llvm/trunk/test/CodeGen/X86/bmi.ll @@ -691,9 +691,7 @@ define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; X86-LABEL: blsmsk32_z2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal -1(%eax), %ecx -; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: cmovel %eax, %ecx @@ -703,9 +701,7 @@ ; X64-LABEL: blsmsk32_z2: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal -1(%rdi), %ecx -; X64-NEXT: xorl %edi, %ecx +; X64-NEXT: blsmskl %edi, %ecx ; X64-NEXT: cmovnel %edx, %eax ; X64-NEXT: retq %t0 = sub i32 %a, 1 @@ -800,8 +796,7 @@ ; X64-LABEL: blsmsk64_z2: ; X64: # %bb.0: ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: leaq -1(%rdi), %rcx -; X64-NEXT: xorq %rdi, %rcx +; X64-NEXT: blsmskq %rdi, %rcx ; X64-NEXT: cmovneq %rdx, %rax ; X64-NEXT: retq %t0 = sub i64 %a, 1 Index: llvm/trunk/test/CodeGen/X86/tbm_patterns.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/tbm_patterns.ll +++ llvm/trunk/test/CodeGen/X86/tbm_patterns.ll @@ -226,10 +226,7 @@ ; CHECK-LABEL: test_x86_tbm_blci_u32_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %ecx -; CHECK-NEXT: notl %ecx -; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: blcil %edi, %ecx ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 1, %a @@ -269,9 +266,7 @@ ; CHECK-LABEL: test_x86_tbm_blci_u64_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: leaq 1(%rdi), %rcx -; CHECK-NEXT: notq %rcx -; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: blciq %rdi, %rcx ; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 1, %a @@ -409,9 +404,7 @@ ; CHECK-LABEL: test_x86_tbm_blcmsk_u32_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %ecx -; CHECK-NEXT: xorl %edi, %ecx +; CHECK-NEXT: blcmskl %edi, %ecx ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 @@ -448,8 +441,7 @@ ; CHECK-LABEL: test_x86_tbm_blcmsk_u64_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: leaq 1(%rdi), %rcx -; CHECK-NEXT: xorq %rdi, %rcx +; CHECK-NEXT: blcmskq %rdi, %rcx ; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 @@ -486,9 +478,7 @@ ; CHECK-LABEL: test_x86_tbm_blcs_u32_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %ecx -; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: blcsl %edi, %ecx ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 @@ -525,8 +515,7 @@ ; CHECK-LABEL: test_x86_tbm_blcs_u64_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: leaq 1(%rdi), %rcx -; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: blcsq %rdi, %rcx ; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 @@ -563,9 +552,7 @@ ; CHECK-LABEL: test_x86_tbm_blsfill_u32_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal -1(%rdi), %ecx -; CHECK-NEXT: orl %edi, %ecx +; CHECK-NEXT: blsfilll %edi, %ecx ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, -1 @@ -602,8 +589,7 @@ ; CHECK-LABEL: test_x86_tbm_blsfill_u64_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: leaq -1(%rdi), %rcx -; CHECK-NEXT: orq %rdi, %rcx +; CHECK-NEXT: blsfillq %rdi, %rcx ; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, -1 @@ -642,10 +628,7 @@ ; CHECK-LABEL: test_x86_tbm_blsic_u32_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: notl %ecx -; CHECK-NEXT: decl %edi -; CHECK-NEXT: orl %ecx, %edi +; CHECK-NEXT: blsicl %edi, %ecx ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 @@ -685,10 +668,7 @@ ; CHECK-LABEL: test_x86_tbm_blsic_u64_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: movq %rdi, %rcx -; CHECK-NEXT: notq %rcx -; CHECK-NEXT: decq %rdi -; CHECK-NEXT: orq %rcx, %rdi +; CHECK-NEXT: blsicq %rdi, %rcx ; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 @@ -728,10 +708,7 @@ ; CHECK-LABEL: test_x86_tbm_t1mskc_u32_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: notl %ecx -; CHECK-NEXT: incl %edi -; CHECK-NEXT: orl %ecx, %edi +; CHECK-NEXT: t1mskcl %edi, %ecx ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 @@ -771,10 +748,7 @@ ; CHECK-LABEL: test_x86_tbm_t1mskc_u64_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: movq %rdi, %rcx -; CHECK-NEXT: notq %rcx -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: orq %rcx, %rdi +; CHECK-NEXT: t1mskcq %rdi, %rcx ; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1