diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -1263,20 +1263,28 @@ return LD->getAlign() >= 4 && LD->isSimple(); }]>; - -// An 'and' node with a single use. -def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ - return N->hasOneUse(); -}]>; -// An 'srl' node with a single use. -def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{ +// binary op with only one user +class binop_oneuse + : PatFrag<(ops node:$A, node:$B), + (operator node:$A, node:$B), [{ return N->hasOneUse(); }]>; -// An 'trunc' node with a single use. -def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ + +def add_su : binop_oneuse; +def and_su : binop_oneuse; +def srl_su : binop_oneuse; + +// unary op with only one user +class unop_oneuse + : PatFrag<(ops node:$A), + (operator node:$A), [{ return N->hasOneUse(); }]>; + +def ineg_su : unop_oneuse; +def trunc_su : unop_oneuse; + //===----------------------------------------------------------------------===// // Instruction list. // @@ -2546,37 +2554,41 @@ return hasNoCarryFlagUses(SDValue(N, 1)); }]>; + let Predicates = [HasBMI] in { - // FIXME: patterns for the load versions are not implemented - def : Pat<(and GR32:$src, (add GR32:$src, -1)), + // FIXME(1): patterns for the load versions are not implemented + // FIXME(2): By only matching `add_su` and `ineg_su` we may emit + // extra `mov` instructions if `src` has future uses. It may be better + // to always match if `src` has more users. + def : Pat<(and GR32:$src, (add_su GR32:$src, -1)), (BLSR32rr GR32:$src)>; - def : Pat<(and GR64:$src, (add GR64:$src, -1)), + def : Pat<(and GR64:$src, (add_su GR64:$src, -1)), (BLSR64rr GR64:$src)>; - def : Pat<(xor GR32:$src, (add GR32:$src, -1)), + def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)), (BLSMSK32rr GR32:$src)>; - def : Pat<(xor GR64:$src, (add GR64:$src, -1)), + def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)), (BLSMSK64rr GR64:$src)>; - def : Pat<(and GR32:$src, (ineg GR32:$src)), + def : Pat<(and GR32:$src, (ineg_su GR32:$src)), (BLSI32rr GR32:$src)>; - def : Pat<(and GR64:$src, (ineg GR64:$src)), + def : Pat<(and GR64:$src, (ineg_su GR64:$src)), (BLSI64rr GR64:$src)>; // Versions to match flag producing ops. - def : Pat<(and_flag_nocf GR32:$src, (add GR32:$src, -1)), + def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)), (BLSR32rr GR32:$src)>; - def : Pat<(and_flag_nocf GR64:$src, (add GR64:$src, -1)), + def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)), (BLSR64rr GR64:$src)>; - def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)), + def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)), (BLSMSK32rr GR32:$src)>; - def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)), + def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)), (BLSMSK64rr GR64:$src)>; - def : Pat<(and_flag_nocf GR32:$src, (ineg GR32:$src)), + def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)), (BLSI32rr GR32:$src)>; - def : Pat<(and_flag_nocf GR64:$src, (ineg GR64:$src)), + def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)), (BLSI64rr GR64:$src)>; } diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-blsi.mir b/llvm/test/CodeGen/X86/GlobalISel/select-blsi.mir --- a/llvm/test/CodeGen/X86/GlobalISel/select-blsi.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-blsi.mir @@ -24,9 +24,13 @@ liveins: $edi ; CHECK-LABEL: name: test_blsi32rr - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[BLSI32rr:%[0-9]+]]:gr32 = BLSI32rr [[COPY]], implicit-def $eflags - ; CHECK: $edi = COPY [[BLSI32rr]] + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags + ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[MOV32r0_]], [[COPY]], implicit-def $eflags + ; CHECK-NEXT: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[SUB32rr]], [[COPY]], implicit-def $eflags + ; CHECK-NEXT: $edi = COPY [[AND32rr]] %0(s32) = COPY $edi %1(s32) = G_CONSTANT i32 0 %2(s32) = G_SUB %1, %0 @@ -50,11 +54,13 @@ liveins: $edi ; CHECK-LABEL: name: test_blsi32rr_nomatch - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags - ; CHECK: [[SUB32ri:%[0-9]+]]:gr32 = SUB32ri8 [[MOV32r0_]], 0, implicit-def $eflags - ; CHECK: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[SUB32ri]], [[COPY]], implicit-def $eflags - ; CHECK: $edi = COPY [[AND32rr]] + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags + ; CHECK-NEXT: [[SUB32ri8_:%[0-9]+]]:gr32 = SUB32ri8 [[MOV32r0_]], 0, implicit-def $eflags + ; CHECK-NEXT: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[SUB32ri8_]], [[COPY]], implicit-def $eflags + ; CHECK-NEXT: $edi = COPY [[AND32rr]] %0(s32) = COPY $edi %1(s32) = G_CONSTANT i32 0 %2(s32) = G_SUB %1, %1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-blsr.mir b/llvm/test/CodeGen/X86/GlobalISel/select-blsr.mir --- a/llvm/test/CodeGen/X86/GlobalISel/select-blsr.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-blsr.mir @@ -21,9 +21,12 @@ liveins: $edi ; CHECK-LABEL: name: test_blsr32rr - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[BLSR32rr:%[0-9]+]]:gr32 = BLSR32rr [[COPY]], implicit-def $eflags - ; CHECK: $edi = COPY [[BLSR32rr]] + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[DEC32r:%[0-9]+]]:gr32 = DEC32r [[COPY]], implicit-def $eflags + ; CHECK-NEXT: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[DEC32r]], [[COPY]], implicit-def $eflags + ; CHECK-NEXT: $edi = COPY [[AND32rr]] %0(s32) = COPY $edi %1(s32) = G_CONSTANT i32 -1 %2(s32) = G_ADD %0, %1 @@ -47,11 +50,13 @@ liveins: $edi ; CHECK-LABEL: name: test_blsr32rr_nomatch - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri -1 - ; CHECK: [[DEC32r:%[0-9]+]]:gr32 = DEC32r [[MOV32ri]], implicit-def $eflags - ; CHECK: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[DEC32r]], [[COPY]], implicit-def $eflags - ; CHECK: $edi = COPY [[AND32rr]] + ; CHECK: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri -1 + ; CHECK-NEXT: [[DEC32r:%[0-9]+]]:gr32 = DEC32r [[MOV32ri]], implicit-def $eflags + ; CHECK-NEXT: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[DEC32r]], [[COPY]], implicit-def $eflags + ; CHECK-NEXT: $edi = COPY [[AND32rr]] %0(s32) = COPY $edi %1(s32) = G_CONSTANT i32 -1 %2(s32) = G_ADD %1, %1 diff --git a/llvm/test/CodeGen/X86/bmi-out-of-order.ll b/llvm/test/CodeGen/X86/bmi-out-of-order.ll --- a/llvm/test/CodeGen/X86/bmi-out-of-order.ll +++ b/llvm/test/CodeGen/X86/bmi-out-of-order.ll @@ -6,17 +6,17 @@ ; X86-LABEL: blsmsk_used2: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: blsmskl %eax, %ecx -; X86-NEXT: decl %eax +; X86-NEXT: leal -1(%eax), %ecx +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: imull %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: blsmsk_used2: ; X64: # %bb.0: # %entry ; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: blsmskl %edi, %ecx ; X64-NEXT: leal -1(%rdi), %eax -; X64-NEXT: imull %ecx, %eax +; X64-NEXT: xorl %eax, %edi +; X64-NEXT: imull %edi, %eax ; X64-NEXT: retq entry: %sub = add i32 %a, -1 @@ -196,15 +196,17 @@ ; X86-LABEL: blsi_used2: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: blsil %eax, %ecx -; X86-NEXT: negl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: negl %ecx +; X86-NEXT: andl %ecx, %eax ; X86-NEXT: imull %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: blsi_used2: ; X64: # %bb.0: # %entry -; X64-NEXT: blsil %edi, %eax -; X64-NEXT: negl %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: andl %eax, %edi ; X64-NEXT: imull %edi, %eax ; X64-NEXT: retq entry: @@ -380,17 +382,17 @@ ; X86-LABEL: blsr_used2: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: blsrl %eax, %ecx -; X86-NEXT: decl %eax +; X86-NEXT: leal -1(%eax), %ecx +; X86-NEXT: andl %ecx, %eax ; X86-NEXT: imull %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: blsr_used2: ; X64: # %bb.0: # %entry ; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: blsrl %edi, %ecx ; X64-NEXT: leal -1(%rdi), %eax -; X64-NEXT: imull %ecx, %eax +; X64-NEXT: andl %eax, %edi +; X64-NEXT: imull %edi, %eax ; X64-NEXT: retq entry: %sub = add i32 %a, -1