Index: llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp =================================================================== --- llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -265,13 +265,16 @@ /// @param reg - The Reg to append. static void translateRegister(MCInst &mcInst, Reg reg) { #define ENTRY(x) X86::x, - uint8_t llvmRegnums[] = { - ALL_REGS - 0 + MCPhysReg llvmRegnums[] = { + // Expand enums for all the registers. + ALL_REGS + + // And then provide a zero terminating entry. + 0, }; #undef ENTRY - uint8_t llvmRegnum = llvmRegnums[reg]; + MCPhysReg llvmRegnum = llvmRegnums[reg]; mcInst.addOperand(MCOperand::createReg(llvmRegnum)); } Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28066,11 +28066,16 @@ MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64; unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r; MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF)); - // Permit reads of the FLAGS register without it being defined. + // Permit reads of the EFLAGS and DF registers without them being defined. // This intrinsic exists to read external processor state in flags, such as // the trap flag, interrupt flag, and direction flag, none of which are // modeled by the backend. + assert(Push->getOperand(2).getReg() == X86::EFLAGS && + "Unexpected register in operand!"); Push->getOperand(2).setIsUndef(); + assert(Push->getOperand(3).getReg() == X86::DF && + "Unexpected register in operand!"); + Push->getOperand(3).setIsUndef(); BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg()); MI.eraseFromParent(); // The pseudo is gone now. Index: llvm/lib/Target/X86/X86InstrCompiler.td =================================================================== --- llvm/lib/Target/X86/X86InstrCompiler.td +++ llvm/lib/Target/X86/X86InstrCompiler.td @@ -473,7 +473,7 @@ ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF], usesCustomInserter = 1, Uses = [ESP, SSP] in { def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", @@ -493,7 +493,7 @@ ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF], usesCustomInserter = 1, Uses = [RSP, SSP] in { def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_addr64", @@ -509,7 +509,7 @@ // For i386, the address of the thunk is passed on the stack, on return the // address of the variable is in %eax. %ecx is trashed during the function // call. All other registers are preserved. -let Defs = [EAX, ECX, EFLAGS], +let Defs = [EAX, ECX, EFLAGS, DF], Uses = [ESP, SSP], usesCustomInserter = 1 in def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), @@ -522,7 +522,7 @@ // %rdi. The lowering will do the right thing with RDI. // On return the address of the variable is in %rax. All other // registers are preserved. -let Defs = [RAX, EFLAGS], +let Defs = [RAX, EFLAGS, DF], Uses = [RSP, SSP], usesCustomInserter = 1 in def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), Index: llvm/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.cpp +++ llvm/lib/Target/X86/X86InstrInfo.cpp @@ -9481,8 +9481,9 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { // FIXME: Return false for x87 stack register classes for now. We can't // allow any loads of these registers before FpGet_ST0_80. - return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || - RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); + return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass || + RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass || + RC == &X86::RFP80RegClass); } /// Return a virtual register initialized with the Index: llvm/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.td +++ llvm/lib/Target/X86/X86InstrInfo.td @@ -1235,18 +1235,18 @@ let mayLoad = 1, mayStore = 1, usesCustomInserter = 1, SchedRW = [WriteRMW] in { - let Defs = [ESP, EFLAGS], Uses = [ESP] in + let Defs = [ESP, EFLAGS, DF], Uses = [ESP] in def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src), [(int_x86_flags_write_u32 GR32:$src)]>, Requires<[Not64BitMode]>; - let Defs = [RSP, EFLAGS], Uses = [RSP] in + let Defs = [RSP, EFLAGS, DF], Uses = [RSP] in def WRFLAGS64 : PseudoI<(outs), (ins GR64:$src), [(int_x86_flags_write_u64 GR64:$src)]>, Requires<[In64BitMode]>; } -let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0, +let Defs = [ESP, EFLAGS, DF], Uses = [ESP], mayLoad = 1, hasSideEffects=0, SchedRW = [WriteLoad] in { def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize16; @@ -1254,7 +1254,7 @@ OpSize32, Requires<[Not64BitMode]>; } -let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, hasSideEffects=0, +let Defs = [ESP], Uses = [ESP, EFLAGS, DF], mayStore = 1, hasSideEffects=0, SchedRW = [WriteStore] in { def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>, OpSize16; @@ -1294,10 +1294,10 @@ Requires<[In64BitMode]>; } -let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in +let Defs = [RSP, EFLAGS, DF], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>, OpSize32, Requires<[In64BitMode]>, Sched<[WriteLoad]>; -let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, hasSideEffects=0 in +let Defs = [RSP], Uses = [RSP, EFLAGS, DF], mayStore = 1, hasSideEffects=0 in def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, OpSize32, Requires<[In64BitMode]>, Sched<[WriteStore]>; @@ -1382,8 +1382,7 @@ } // Defs = [EFLAGS] let SchedRW = [WriteMicrocoded] in { -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { +let Defs = [EDI,ESI], Uses = [EDI,ESI,DF] in { def MOVSB : I<0xA4, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src), "movsb\t{$src, $dst|$dst, $src}", [], IIC_MOVS>; def MOVSW : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src), @@ -1395,38 +1394,35 @@ Requires<[In64BitMode]>; } -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in +let Defs = [EDI], Uses = [AL,EDI,DF] in def STOSB : I<0xAA, RawFrmDst, (outs), (ins dstidx8:$dst), "stosb\t{%al, $dst|$dst, al}", [], IIC_STOS>; -let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in +let Defs = [EDI], Uses = [AX,EDI,DF] in def STOSW : I<0xAB, RawFrmDst, (outs), (ins dstidx16:$dst), "stosw\t{%ax, $dst|$dst, ax}", [], IIC_STOS>, OpSize16; -let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in +let Defs = [EDI], Uses = [EAX,EDI,DF] in def STOSL : I<0xAB, RawFrmDst, (outs), (ins dstidx32:$dst), "stos{l|d}\t{%eax, $dst|$dst, eax}", [], IIC_STOS>, OpSize32; -let Defs = [RDI], Uses = [RAX,RDI,EFLAGS] in +let Defs = [RDI], Uses = [RAX,RDI,DF] in def STOSQ : RI<0xAB, RawFrmDst, (outs), (ins dstidx64:$dst), "stosq\t{%rax, $dst|$dst, rax}", [], IIC_STOS>, Requires<[In64BitMode]>; -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [EDI,EFLAGS], Uses = [AL,EDI,EFLAGS] in +let Defs = [EDI,EFLAGS], Uses = [AL,EDI,DF] in def SCASB : I<0xAE, RawFrmDst, (outs), (ins dstidx8:$dst), "scasb\t{$dst, %al|al, $dst}", [], IIC_SCAS>; -let Defs = [EDI,EFLAGS], Uses = [AX,EDI,EFLAGS] in +let Defs = [EDI,EFLAGS], Uses = [AX,EDI,DF] in def SCASW : I<0xAF, RawFrmDst, (outs), (ins dstidx16:$dst), "scasw\t{$dst, %ax|ax, $dst}", [], IIC_SCAS>, OpSize16; -let Defs = [EDI,EFLAGS], Uses = [EAX,EDI,EFLAGS] in +let Defs = [EDI,EFLAGS], Uses = [EAX,EDI,DF] in def SCASL : I<0xAF, RawFrmDst, (outs), (ins dstidx32:$dst), "scas{l|d}\t{$dst, %eax|eax, $dst}", [], IIC_SCAS>, OpSize32; -let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,EFLAGS] in +let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,DF] in def SCASQ : RI<0xAF, RawFrmDst, (outs), (ins dstidx64:$dst), "scasq\t{$dst, %rax|rax, $dst}", [], IIC_SCAS>, Requires<[In64BitMode]>; -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,EFLAGS] in { +let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,DF] in { def CMPSB : I<0xA6, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src), "cmpsb\t{$dst, $src|$src, $dst}", [], IIC_CMPS>; def CMPSW : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src), @@ -2069,8 +2065,7 @@ } // SchedRW // Repeat string operation instruction prefixes -// These use the DF flag in the EFLAGS register to inc or dec ECX -let Defs = [ECX], Uses = [ECX,EFLAGS], SchedRW = [WriteMicrocoded] in { +let Defs = [ECX], Uses = [ECX,DF], SchedRW = [WriteMicrocoded] in { // Repeat (used with INS, OUTS, MOVS, LODS and STOS) def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>; // Repeat while not equal (used with CMPS and SCAS) @@ -2079,25 +2074,23 @@ // String manipulation instructions let SchedRW = [WriteMicrocoded] in { -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [AL,ESI], Uses = [ESI,EFLAGS] in +let Defs = [AL,ESI], Uses = [ESI,DF] in def LODSB : I<0xAC, RawFrmSrc, (outs), (ins srcidx8:$src), "lodsb\t{$src, %al|al, $src}", [], IIC_LODS>; -let Defs = [AX,ESI], Uses = [ESI,EFLAGS] in +let Defs = [AX,ESI], Uses = [ESI,DF] in def LODSW : I<0xAD, RawFrmSrc, (outs), (ins srcidx16:$src), "lodsw\t{$src, %ax|ax, $src}", [], IIC_LODS>, OpSize16; -let Defs = [EAX,ESI], Uses = [ESI,EFLAGS] in +let Defs = [EAX,ESI], Uses = [ESI,DF] in def LODSL : I<0xAD, RawFrmSrc, (outs), (ins srcidx32:$src), "lods{l|d}\t{$src, %eax|eax, $src}", [], IIC_LODS>, OpSize32; -let Defs = [RAX,ESI], Uses = [ESI,EFLAGS] in +let Defs = [RAX,ESI], Uses = [ESI,DF] in def LODSQ : RI<0xAD, RawFrmSrc, (outs), (ins srcidx64:$src), "lodsq\t{$src, %rax|rax, $src}", [], IIC_LODS>, Requires<[In64BitMode]>; } let SchedRW = [WriteSystem] in { -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [ESI], Uses = [DX,ESI,EFLAGS] in { +let Defs = [ESI], Uses = [DX,ESI,DF] in { def OUTSB : I<0x6E, RawFrmSrc, (outs), (ins srcidx8:$src), "outsb\t{$src, %dx|dx, $src}", [], IIC_OUTS>; def OUTSW : I<0x6F, RawFrmSrc, (outs), (ins srcidx16:$src), @@ -2106,8 +2099,7 @@ "outs{l|d}\t{$src, %dx|dx, $src}", [], IIC_OUTS>, OpSize32; } -// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI -let Defs = [EDI], Uses = [DX,EDI,EFLAGS] in { +let Defs = [EDI], Uses = [DX,EDI,DF] in { def INSB : I<0x6C, RawFrmDst, (outs), (ins dstidx8:$dst), "insb\t{%dx, $dst|$dst, dx}", [], IIC_INS>; def INSW : I<0x6D, RawFrmDst, (outs), (ins dstidx16:$dst), @@ -2117,19 +2109,22 @@ } } -// Flag instructions -let SchedRW = [WriteALU] in { +// EFLAGS management instructions. +let SchedRW = [WriteALU], Defs = [EFLAGS], Uses = [EFLAGS] in { def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC_CMC_STC>; def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_CLC_CMC_STC>; -def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>; -def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>; -def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", [], IIC_CLD>; -def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>; def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CLC_CMC_STC>; +} -def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; +// DF management instructions. +// FIXME: These are a bit more expensive than CLC and STC. We should consider +// adjusting their schedule bucket. +let SchedRW = [WriteALU], Defs = [DF] in { +def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", [], IIC_CLD>; +def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>; } + // Table lookup instructions let Uses = [AL,EBX], Defs = [AL], hasSideEffects = 0, mayLoad = 1 in def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>, Index: llvm/lib/Target/X86/X86InstrSystem.td =================================================================== --- llvm/lib/Target/X86/X86InstrSystem.td +++ llvm/lib/Target/X86/X86InstrSystem.td @@ -699,6 +699,19 @@ } // Uses, Defs } // SchedRW +//===----------------------------------------------------------------------===// +// TS flag control instruction. +let SchedRW = [WriteSystem] in { +def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; +} + +//===----------------------------------------------------------------------===// +// IF (inside EFLAGS) management instructions. +let SchedRW = [WriteSystem], Uses = [EFLAGS], Defs = [EFLAGS] in { +def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>; +def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>; +} + //===----------------------------------------------------------------------===// // RDPID Instruction let SchedRW = [WriteSystem] in { Index: llvm/lib/Target/X86/X86RegisterInfo.td =================================================================== --- llvm/lib/Target/X86/X86RegisterInfo.td +++ llvm/lib/Target/X86/X86RegisterInfo.td @@ -265,9 +265,19 @@ // Floating-point status word def FPSW : X86Reg<"fpsw", 0>; -// Status flags register +// Status flags register. +// +// Note that some flags that are commonly thought of as part of the status +// flags register are modeled separately. Typically this is due to instructions +// reading and updating those flags independently of all the others. We don't +// want to create false dependencies between these instructions and so we use +// a separate register to model them. def EFLAGS : X86Reg<"flags", 0>; +// The direction flag. +def DF : X86Reg<"DF", 0>; + + // Segment registers def CS : X86Reg<"cs", 1>; def DS : X86Reg<"ds", 3>; @@ -510,6 +520,10 @@ let CopyCost = -1; // Don't allow copying of status registers. let isAllocatable = 0; } +def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> { + let CopyCost = -1; // Don't allow copying of status registers. + let isAllocatable = 0; +} // AVX-512 vector/mask registers. def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], Index: llvm/test/CodeGen/X86/ipra-reg-usage.ll =================================================================== --- llvm/test/CodeGen/X86/ipra-reg-usage.ll +++ llvm/test/CodeGen/X86/ipra-reg-usage.ll @@ -3,7 +3,7 @@ target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: $cs $ds $eflags $eip $eiz $es $fpsw $fs $gs $hip $ip $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11d $r11w +; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpsw $fs $gs $hip $ip $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11d $r11w call void @bar1() call void @bar2() ret void