Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -8996,9 +8996,39 @@ case X86::TZCNT32rr: case X86::TZCNT32rm: case X86::TZCNT64rr: case X86::TZCNT64rm: return X86::COND_B; + case X86::BSF16rr: + case X86::BSF16rm: + case X86::BSF32rr: + case X86::BSF32rm: + case X86::BSF64rr: + case X86::BSF64rm: + return X86::COND_E; } } +static bool isAdd(MachineInstr &MI) { + switch (MI.getOpcode()) { + case X86::ADD64ri32: + case X86::ADD64ri8: + case X86::ADD32ri: + case X86::ADD32ri8: + case X86::ADD16ri: + case X86::ADD16ri8: + case X86::ADD8ri: + case X86::ADD64rr: + case X86::ADD32rr: + case X86::ADD16rr: + case X86::ADD8rr: + case X86::ADD64rm: + case X86::ADD32rm: + case X86::ADD16rm: + case X86::ADD8rm: + return true; + } + + return false; +} + /// Check if there exists an earlier instruction that /// operates on the same source operands and sets flags in the same way as /// Compare; remove Compare if possible. @@ -9111,6 +9141,9 @@ ? Def.getReverse() /* points to MI */ : CmpInstr.getParent()->rend(); MachineInstr *Movr0Inst = nullptr; + MachineInstr *AddInstr = nullptr; + MachineInstr *CMovInstr = nullptr; + MachineInstr *CMovImmInit = nullptr; for (; RI != RE; ++RI) { MachineInstr &Instr = *RI; // Check whether CmpInstr can be made redundant by the current instruction. @@ -9124,6 +9157,63 @@ Instr.readsRegister(X86::EFLAGS, TRI)) { // This instruction modifies or uses EFLAGS. + // Check whether it's possible to move the ADD instruction + // below CMOV instruction. If Instr is ADD then + // the following code is possible: + // C-code: ffs(x) + A ; 'A' is imm + // asm-code: + // %1 = bsf %x + // %2 = add %1, A+1 ; 'A+1' is imm + // test %x, %x + // %3 = mov A + // %4 = cmov %2, %3 + // and this code can be transformed to: + // %1 = bsf %x + // %3 = mov -1 ; -1 = A - (A+1), where 'A+1' is imm + // ; from '%2 = add %1, A+1' + // %2 = cmov %1, %3 + // %4 = add %2, A + if (!AddInstr && isAdd(Instr) && Instr.getOperand(0).isReg() && + Instr.getOperand(1).isReg() && Instr.getOperand(2).isImm() && + Def->getOperand(0).isReg()) { + unsigned AddOp0Reg = Instr.getOperand(0).getReg(); + unsigned AddOp1Reg = Instr.getOperand(1).getReg(); + unsigned DefOp0Reg = Def->getOperand(0).getReg(); + + if (!MRI->hasOneUse(DefOp0Reg) || AddOp1Reg != DefOp0Reg) + return false; + + CMovInstr = &*MRI->use_instr_begin(AddOp0Reg); + if (!MRI->hasOneUse(AddOp1Reg) || + X86::getCondFromCMovOpc(CMovInstr->getOpcode()) == + X86::COND_INVALID) + return false; + auto isNotMovXXri = [](unsigned Opcode) { + return (Opcode != X86::MOV16ri && Opcode != X86::MOV32ri && + Opcode != X86::MOV64ri && Opcode != X86::MOV8ri && + Opcode != X86::MOV32ri64 && Opcode != X86::MOV64ri32); + }; + CMovImmInit = + CMovInstr->getOperand(1).getReg() == AddOp0Reg + ? MRI->getUniqueVRegDef(CMovInstr->getOperand(2).getReg()) + : MRI->getUniqueVRegDef(CMovInstr->getOperand(1).getReg()); + if (isNotMovXXri(CMovImmInit->getOpcode()) && + !CMovImmInit->isSubregToReg()) + return false; + + if (CMovImmInit->isSubregToReg()) { + MachineInstr *Mov = + MRI->getUniqueVRegDef(CMovImmInit->getOperand(2).getReg()); + if (!MRI->hasOneUse(Mov->getOperand(0).getReg()) || + isNotMovXXri(Mov->getOpcode())) + return false; + CMovImmInit = Mov; + } + + AddInstr = &Instr; + continue; + } + // MOV32r0 etc. are implemented with xor which clobbers condition code. // They are safe to move up, if the definition to EFLAGS is dead and // earlier instructions do not read or write EFLAGS. @@ -9273,6 +9363,37 @@ return false; } + // Perform the described above transformation: + // move ADD instruction below CMOV. + // %1 = bsf %x ; = Def + // %2 = add %1, A+1 ; = AddInstr + // test %x, %x + // %3 = mov A ; = CMovImmInit + // %4 = cmov %2, %3 ; = CMovInstr + if (AddInstr && CMovInstr && CMovImmInit) { + int NotZeroOp = 0; + switch (X86::getCondFromCMovOpc(CMovInstr->getOpcode())) { + case X86::COND_E: + NotZeroOp = 1; + break; + case X86::COND_NE: + NotZeroOp = 2; + break; + default: + // We can't perform this optimization when the ZF flag isn't used. + return false; + } + AddInstr->removeFromParent(); + CMovImmInit->getOperand(1).setImm(CMovImmInit->getOperand(1).getImm() - + AddInstr->getOperand(2).getImm()); + CMovInstr->getOperand(NotZeroOp).setReg(Def->getOperand(0).getReg()); + AddInstr->getOperand(1).setReg(AddInstr->getOperand(0).getReg()); + AddInstr->getOperand(0).setReg(CMovInstr->getOperand(0).getReg()); + CMovInstr->getOperand(0).setReg(AddInstr->getOperand(1).getReg()); + CMovInstr->getParent()->insertAfter(MachineBasicBlock::iterator(CMovInstr), + AddInstr); + } + // Make sure Sub instruction defines EFLAGS and mark the def live. unsigned i = 0, e = Sub->getNumOperands(); for (; i != e; ++i) { Index: test/CodeGen/X86/peep-test-4.ll =================================================================== --- test/CodeGen/X86/peep-test-4.ll +++ test/CodeGen/X86/peep-test-4.ll @@ -286,6 +286,66 @@ ret void } +define i64 @testCTZ4(i64 %v) nounwind { +; CHECK-LABEL: testCTZ4: +; CHECK: # %bb.0: +; CHECK-NEXT: tzcntq %rdi, %rcx +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovaeq %rcx, %rax +; CHECK-NEXT: addq $6, %rax +; CHECK-NEXT: retq + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp eq i64 %v, 0 + %.op = add nuw nsw i64 %cnt, 6 + %add = select i1 %tobool, i64 5, i64 %.op + ret i64 %add +} + +define i64 @testCTZ5(i64 %v) nounwind { +; CHECK-LABEL: testCTZ5: +; CHECK: # %bb.0: +; CHECK-NEXT: tzcntq %rdi, %rcx +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovaeq %rcx, %rax +; CHECK-NEXT: addq $6, %rax +; CHECK-NEXT: retq + %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true) + %tobool = icmp ne i64 %v, 0 + %.op = add nuw nsw i64 %cnt, 6 + %add = select i1 %tobool, i64 %.op, i64 5 + ret i64 %add +} + +define i32 @testCTZ6(i32 %v) nounwind { +; CHECK-LABEL: testCTZ6: +; CHECK: # %bb.0: +; CHECK-NEXT: tzcntl %edi, %ecx +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovael %ecx, %eax +; CHECK-NEXT: addl $6, %eax +; CHECK-NEXT: retq + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp eq i32 %v, 0 + %.op = add nuw nsw i32 %cnt, 6 + %add = select i1 %tobool, i32 5, i32 %.op + ret i32 %add +} + +define i32 @testCTZ7(i32 %v) nounwind { +; CHECK-LABEL: testCTZ7: +; CHECK: # %bb.0: +; CHECK-NEXT: tzcntl %edi, %ecx +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovael %ecx, %eax +; CHECK-NEXT: addl $6, %eax +; CHECK-NEXT: retq + %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true) + %tobool = icmp ne i32 %v, 0 + %.op = add nuw nsw i32 %cnt, 6 + %add = select i1 %tobool, i32 %.op, i32 5 + ret i32 %add +} + declare i64 @llvm.ctlz.i64(i64, i1) define i64 @testCLZ(i64 %v) nounwind { ; CHECK-LABEL: testCLZ: