Index: llvm/trunk/lib/Target/X86/X86FlagsCopyLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FlagsCopyLowering.cpp +++ llvm/trunk/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -127,6 +127,10 @@ MachineInstr &JmpI, CondRegArray &CondRegs); void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse, MachineInstr &CopyDefI); + void rewriteSetCarryExtended(MachineBasicBlock &TestMBB, + MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &SetBI, + MachineOperand &FlagUse, CondRegArray &CondRegs); void rewriteSetCC(MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, MachineInstr &SetCCI, MachineOperand &FlagUse, @@ -512,8 +516,7 @@ } else if (MI.getOpcode() == TargetOpcode::COPY) { rewriteCopy(MI, *FlagUse, CopyDefI); } else { - // We assume that arithmetic instructions that use flags also def - // them. + // We assume all other instructions that use flags also def them. assert(MI.findRegisterDefOperand(X86::EFLAGS) && "Expected a def of EFLAGS for this instruction!"); @@ -525,7 +528,23 @@ // logic. FlagsKilled = true; - rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); + switch (MI.getOpcode()) { + case X86::SETB_C8r: + case X86::SETB_C16r: + case X86::SETB_C32r: + case X86::SETB_C64r: + // Use custom lowering for arithmetic that is merely extending the + // carry flag. We model this as the SETB_C* pseudo instructions. + rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse, + CondRegs); + break; + + default: + // Generically handle remaining uses as arithmetic instructions. + rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, + CondRegs); + break; + } break; } @@ -753,6 +772,126 @@ MI.eraseFromParent(); } +void X86FlagsCopyLoweringPass::rewriteSetCarryExtended( + MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse, + CondRegArray &CondRegs) { + // This routine is only used to handle pseudos for setting a register to zero + // or all ones based on CF. This is essentially the sign extended from 1-bit + // form of SETB and modeled with the SETB_C* pseudos. They require special + // handling as they aren't normal SETcc instructions and are lowered to an + // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that + // they are only provided in reg-defining forms. A complicating factor is that + // they can define many different register widths. + assert(SetBI.getOperand(0).isReg() && + "Cannot have a non-register defined operand to this variant of SETB!"); + + // Little helper to do the common final step of replacing the register def'ed + // by this SETB instruction with a new register and removing the SETB + // instruction. + auto RewriteToReg = [&](unsigned Reg) { + MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg); + SetBI.eraseFromParent(); + }; + + // Grab the register class used for this particular instruction. + auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg()); + + MachineBasicBlock &MBB = *SetBI.getParent(); + auto SetPos = SetBI.getIterator(); + auto SetLoc = SetBI.getDebugLoc(); + + auto AdjustReg = [&](unsigned Reg) { + auto &OrigRC = *MRI->getRegClass(Reg); + if (&OrigRC == &SetBRC) + return Reg; + + unsigned NewReg; + + int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8; + int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8; + assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!"); + assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!"); + int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit, + X86::NoSubRegister, X86::sub_32bit}; + + // If the original size is smaller than the target *and* is smaller than 4 + // bytes, we need to explicitly zero extend it. We always extend to 4-bytes + // to maximize the chance of being able to CSE that operation and to avoid + // partial dependency stalls extending to 2-bytes. + if (OrigRegSize < TargetRegSize && OrigRegSize < 4) { + NewReg = MRI->createVirtualRegister(&X86::GR32RegClass); + BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg) + .addReg(Reg); + if (&SetBRC == &X86::GR32RegClass) + return NewReg; + Reg = NewReg; + OrigRegSize = 4; + } + + NewReg = MRI->createVirtualRegister(&SetBRC); + if (OrigRegSize < TargetRegSize) { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG), + NewReg) + .addImm(0) + .addReg(Reg) + .addImm(SubRegIdx[OrigRegSize]); + } else if (OrigRegSize > TargetRegSize) { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG), + NewReg) + .addReg(Reg) + .addImm(SubRegIdx[TargetRegSize]); + } else { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg) + .addReg(Reg); + } + return NewReg; + }; + + unsigned &CondReg = CondRegs[X86::COND_B]; + if (!CondReg) + CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B); + + // Adjust the condition to have the desired register width by zero-extending + // as needed. + // FIXME: We should use a better API to avoid the local reference and using a + // different variable here. + unsigned ExtCondReg = AdjustReg(CondReg); + + // Now we need to turn this into a bitmask. We do this by subtracting it from + // zero. + unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass); + BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg); + ZeroReg = AdjustReg(ZeroReg); + + unsigned Sub; + switch (SetBI.getOpcode()) { + case X86::SETB_C8r: + Sub = X86::SUB8rr; + break; + + case X86::SETB_C16r: + Sub = X86::SUB16rr; + break; + + case X86::SETB_C32r: + Sub = X86::SUB32rr; + break; + + case X86::SETB_C64r: + Sub = X86::SUB64rr; + break; + + default: + llvm_unreachable("Invalid SETB_C* opcode!"); + } + unsigned ResultReg = MRI->createVirtualRegister(&SetBRC); + BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg) + .addReg(ZeroReg) + .addReg(ExtCondReg); + return RewriteToReg(ResultReg); +} + void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, Index: llvm/trunk/test/CodeGen/X86/copy-eflags.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/copy-eflags.ll +++ llvm/trunk/test/CodeGen/X86/copy-eflags.ll @@ -304,3 +304,62 @@ %tmp12 = trunc i32 %tmp11 to i16 br label %bb1 } + +; Use a particular instruction pattern in order to lower to the post-RA pseudo +; used to lower SETB into an SBB pattern in order to make sure that kind of +; usage of a copied EFLAGS continues to work. +define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) { +; X32-LABEL: PR37431: +; X32: # %bb.0: # %entry +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %esi, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: sarl $31, %ecx +; X32-NEXT: cmpl %eax, %eax +; X32-NEXT: sbbl %ecx, %eax +; X32-NEXT: setb %al +; X32-NEXT: sbbb %cl, %cl +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movb %cl, (%edx) +; X32-NEXT: movzbl %al, %eax +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: subl %eax, %ecx +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: idivl %ecx +; X32-NEXT: movb %dl, (%esi) +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: retl +; +; X64-LABEL: PR37431: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movslq (%rdi), %rax +; X64-NEXT: cmpq %rax, %rax +; X64-NEXT: sbbb %dl, %dl +; X64-NEXT: cmpq %rax, %rax +; X64-NEXT: movb %dl, (%rsi) +; X64-NEXT: sbbl %esi, %esi +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: idivl %esi +; X64-NEXT: movb %dl, (%rcx) +; X64-NEXT: retq +entry: + %tmp = load i32, i32* %arg1 + %tmp1 = sext i32 %tmp to i64 + %tmp2 = icmp ugt i64 %tmp1, undef + %tmp3 = zext i1 %tmp2 to i8 + %tmp4 = sub i8 0, %tmp3 + store i8 %tmp4, i8* %arg2 + %tmp5 = sext i8 %tmp4 to i32 + %tmp6 = srem i32 0, %tmp5 + %tmp7 = trunc i32 %tmp6 to i8 + store i8 %tmp7, i8* %arg3 + ret void +} Index: llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir +++ llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir @@ -66,6 +66,12 @@ call void @foo() ret void } + + define void @test_setb_c(i64 %a, i64 %b) { + entry: + call void @foo() + ret void + } ... --- name: test_branch @@ -482,3 +488,68 @@ RET 0 ... +--- +name: test_setb_c +# CHECK-LABEL: name: test_setb_c +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%1' } +body: | + bb.0: + liveins: $rdi, $rsi + + %0:gr64 = COPY $rdi + %1:gr64 = COPY $rsi + %2:gr64 = ADD64rr %0, %1, implicit-def $eflags + %3:gr64 = COPY $eflags + ; CHECK-NOT: COPY{{( killed)?}} $eflags + ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags + ; CHECK-NOT: COPY{{( killed)?}} $eflags + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + + $eflags = COPY %3 + %4:gr8 = SETB_C8r implicit-def $eflags, implicit $eflags + MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %4 + ; CHECK-NOT: $eflags = + ; CHECK: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags + ; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr8 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_8bit + ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]] + ; CHECK-NEXT: MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]] + + $eflags = COPY %3 + %5:gr16 = SETB_C16r implicit-def $eflags, implicit $eflags + MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %5 + ; CHECK-NOT: $eflags = + ; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]] + ; CHECK-NEXT: %[[CF_TRUNC:[^:]*]]:gr16 = EXTRACT_SUBREG %[[CF_EXT]], %subreg.sub_16bit + ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags + ; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr16 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_16bit + ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]] + ; CHECK-NEXT: MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]] + + $eflags = COPY %3 + %6:gr32 = SETB_C32r implicit-def $eflags, implicit $eflags + MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %6 + ; CHECK-NOT: $eflags = + ; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]] + ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags + ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]] + ; CHECK-NEXT: MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]] + + $eflags = COPY %3 + %7:gr64 = SETB_C64r implicit-def $eflags, implicit $eflags + MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %7 + ; CHECK-NOT: $eflags = + ; CHECK: %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]] + ; CHECK-NEXT: %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit + ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags + ; CHECK-NEXT: %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit + ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]] + ; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]] + + RET 0 + +...