Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -33452,6 +33452,36 @@ } } + // Handle (CMOV (ADD (CTTZ X), C), C-1, (X != 0)) -> + // (ADD (CMOV (CTTZ X), -1, (X != 0)), C) or + // (CMOV C-1, (ADD (CTTZ X), C), (X == 0)) -> + // (ADD (CMOV C-1, (CTTZ X), (X == 0)), C) + if (CC == X86::COND_NE || CC == X86::COND_E) { + auto *Cnst = CC == X86::COND_E ? dyn_cast(TrueOp) + : dyn_cast(FalseOp); + SDValue Add = CC == X86::COND_E ? FalseOp : TrueOp; + + if (Cnst && Add.getOpcode() == ISD::ADD && Add.hasOneUse()) { + auto *AddOp1 = dyn_cast(Add.getOperand(1)); + SDValue AddOp2 = Add.getOperand(0); + if (AddOp1 && (AddOp2.getOpcode() == ISD::CTTZ_ZERO_UNDEF || + AddOp2.getOpcode() == ISD::CTTZ)) { + APInt Diff = Cnst->getAPIntValue() - AddOp1->getAPIntValue(); + if (CC == X86::COND_NE) { + Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(), AddOp2, + DAG.getConstant(Diff, DL, Add.getValueType()), + DAG.getConstant(CC, DL, MVT::i8), Cond); + } else { + Add = DAG.getNode(X86ISD::CMOV, DL, Add.getValueType(), + DAG.getConstant(Diff, DL, Add.getValueType()), + AddOp2, DAG.getConstant(CC, DL, MVT::i8), Cond); + } + return DAG.getNode(X86ISD::ADD, DL, Add.getValueType(), Add, + SDValue(AddOp1, 0)); + } + } + } + return SDValue(); } Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -3591,6 +3591,13 @@ case X86::TZCNT32rr: case X86::TZCNT32rm: case X86::TZCNT64rr: case X86::TZCNT64rm: return X86::COND_B; + case X86::BSF16rr: + case X86::BSF16rm: + case X86::BSF32rr: + case X86::BSF32rm: + case X86::BSF64rr: + case X86::BSF64rm: + return X86::COND_E; } } Index: llvm/trunk/test/CodeGen/X86/dagcombine-select.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/dagcombine-select.ll +++ llvm/trunk/test/CodeGen/X86/dagcombine-select.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-unknown-unknown -verify-machineinstrs < %s | FileCheck -enable-var-scope %s +; RUN: llc -mtriple=x86_64-unknown-unknown -verify-machineinstrs -mattr=+bmi < %s | FileCheck -check-prefix=BMI -enable-var-scope %s define i32 @select_and1(i32 %x, i32 %y) { ; CHECK-LABEL: select_and1: @@ -279,3 +280,97 @@ %bo = frem double 5.1, %sel ret double %bo } + +declare i64 @llvm.cttz.i64(i64, i1) +define i64 @cttz_64_eq_select(i64 %v) nounwind { +; CHECK-LABEL: cttz_64_eq_select: +; CHECK: # %bb.0: +; CHECK-NEXT: bsfq %rdi, %rcx +; CHECK-NEXT: movq $-1, %rax +; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: addq $6, %rax +; CHECK-NEXT: retq + +; BMI-LABEL: cttz_64_eq_select: +; BMI: # %bb.0: +; BMI-NEXT: tzcntq %rdi, %rcx +; BMI-NEXT: movq $-1, %rax +; BMI-NEXT: cmovbq %rcx, %rax +; BMI-NEXT: addq $6, %rax +; BMI-NEXT: retq + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %.op = add nuw nsw i64 %cnt, 6 + %add = select i1 %tobool, i64 5, i64 %.op + ret i64 %add +} + +define i64 @cttz_64_ne_select(i64 %v) nounwind { +; CHECK-LABEL: cttz_64_ne_select: +; CHECK: # %bb.0: +; CHECK-NEXT: bsfq %rdi, %rcx +; CHECK-NEXT: movq $-1, %rax +; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: addq $6, %rax +; CHECK-NEXT: retq + +; BMI-LABEL: cttz_64_ne_select: +; BMI: # %bb.0: +; BMI-NEXT: tzcntq %rdi, %rcx +; BMI-NEXT: movq $-1, %rax +; BMI-NEXT: cmovbq %rcx, %rax +; BMI-NEXT: addq $6, %rax +; BMI-NEXT: retq + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp ne i64 %v, 0 + %.op = add nuw nsw i64 %cnt, 6 + %add = select i1 %tobool, i64 %.op, i64 5 + ret i64 %add +} + +declare i32 @llvm.cttz.i32(i32, i1) +define i32 @cttz_32_eq_select(i32 %v) nounwind { +; CHECK-LABEL: cttz_32_eq_select: +; CHECK: # %bb.0: +; CHECK-NEXT: bsfl %edi, %ecx +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: addl $6, %eax +; CHECK-NEXT: retq + +; BMI-LABEL: cttz_32_eq_select: +; BMI: # %bb.0: +; BMI-NEXT: tzcntl %edi, %ecx +; BMI-NEXT: movl $-1, %eax +; BMI-NEXT: cmovbl %ecx, %eax +; BMI-NEXT: addl $6, %eax +; BMI-NEXT: retq + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %.op = add nuw nsw i32 %cnt, 6 + %add = select i1 %tobool, i32 5, i32 %.op + ret i32 %add +} + +define i32 @cttz_32_ne_select(i32 %v) nounwind { +; CHECK-LABEL: cttz_32_ne_select: +; CHECK: # %bb.0: +; CHECK-NEXT: bsfl %edi, %ecx +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: addl $6, %eax +; CHECK-NEXT: retq + +; BMI-LABEL: cttz_32_ne_select: +; BMI: # %bb.0: +; BMI-NEXT: tzcntl %edi, %ecx +; BMI-NEXT: movl $-1, %eax +; BMI-NEXT: cmovbl %ecx, %eax +; BMI-NEXT: addl $6, %eax +; BMI-NEXT: retq + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp ne i32 %v, 0 + %.op = add nuw nsw i32 %cnt, 6 + %add = select i1 %tobool, i32 %.op, i32 5 + ret i32 %add +}