diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36997,41 +36997,6 @@ case X86::CMOV_VK64: return EmitLoweredSelect(MI, BB); - case X86::RDFLAGS32: - case X86::RDFLAGS64: { - unsigned PushF = - MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64; - unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r; - MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF)); - // Permit reads of the EFLAGS and DF registers without them being defined. - // This intrinsic exists to read external processor state in flags, such as - // the trap flag, interrupt flag, and direction flag, none of which are - // modeled by the backend. - assert(Push->getOperand(2).getReg() == X86::EFLAGS && - "Unexpected register in operand!"); - Push->getOperand(2).setIsUndef(); - assert(Push->getOperand(3).getReg() == X86::DF && - "Unexpected register in operand!"); - Push->getOperand(3).setIsUndef(); - BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg()); - - MI.eraseFromParent(); // The pseudo is gone now. - return BB; - } - - case X86::WRFLAGS32: - case X86::WRFLAGS64: { - unsigned Push = - MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r; - unsigned PopF = - MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64; - BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg()); - BuildMI(*BB, MI, DL, TII->get(PopF)); - - MI.eraseFromParent(); // The pseudo is gone now. - return BB; - } - case X86::FP32_TO_INT16_IN_MEM: case X86::FP32_TO_INT32_IN_MEM: case X86::FP32_TO_INT64_IN_MEM: diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -5061,6 +5061,45 @@ return true; } + case X86::RDFLAGS32: + case X86::RDFLAGS64: { + unsigned Is64Bit = MI.getOpcode() == X86::RDFLAGS64; + MachineBasicBlock &MBB = *MIB->getParent(); + + MachineInstr *NewMI = + BuildMI(MBB, MI, MIB->getDebugLoc(), + get(Is64Bit ? X86::PUSHF64 : X86::PUSHF32)) + .getInstr(); + + // Permit reads of the EFLAGS and DF registers without them being defined. + // This intrinsic exists to read external processor state in flags, such as + // the trap flag, interrupt flag, and direction flag, none of which are + // modeled by the backend. + assert(NewMI->getOperand(2).getReg() == X86::EFLAGS && + "Unexpected register in operand! Should be EFLAGS."); + NewMI->getOperand(2).setIsUndef(); + assert(NewMI->getOperand(3).getReg() == X86::DF && + "Unexpected register in operand! Should be DF."); + NewMI->getOperand(3).setIsUndef(); + + MIB->setDesc(get(Is64Bit ? X86::POP64r : X86::POP32r)); + return true; + } + + case X86::WRFLAGS32: + case X86::WRFLAGS64: { + unsigned Is64Bit = MI.getOpcode() == X86::WRFLAGS64; + MachineBasicBlock &MBB = *MIB->getParent(); + + BuildMI(MBB, MI, MIB->getDebugLoc(), + get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(MI.getOperand(0).getReg()); + BuildMI(MBB, MI, MIB->getDebugLoc(), + get(Is64Bit ? X86::POPF64 : X86::POPF32)); + MI.eraseFromParent(); + return true; + } + // KNL does not recognize dependency-breaking idioms for mask registers, // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1. // Using %k0 as the undef input register is a performance heuristic based diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -1386,7 +1386,7 @@ } -let mayLoad = 1, mayStore = 1, usesCustomInserter = 1, +let isPseudo = 1, mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW], Defs = [ESP] in { let Uses = [ESP] in def RDFLAGS32 : PseudoI<(outs GR32:$dst), (ins), @@ -1399,7 +1399,7 @@ Requires<[In64BitMode]>; } -let mayLoad = 1, mayStore = 1, usesCustomInserter = 1, +let isPseudo = 1, mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in { let Defs = [ESP, EFLAGS, DF], Uses = [ESP] in def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src), diff --git a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll --- a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll +++ b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll @@ -52,3 +52,167 @@ call void @llvm.x86.flags.write.u64(i64 %arg) ret void } + +define i64 @read_flags_reg_pressure() nounwind { +; CHECK-LABEL: read_flags_reg_pressure: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movq %rdx, (%rsp) # 8-byte Spill +; CHECK-NEXT: pushfq +; CHECK-NEXT: popq %rdx +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq (%rsp), %rdx # 8-byte Reload +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +; +; WIN64-LABEL: read_flags_reg_pressure: +; WIN64: # %bb.0: +; WIN64-NEXT: pushq %rbp +; WIN64-NEXT: pushq %r15 +; WIN64-NEXT: pushq %r14 +; WIN64-NEXT: pushq %r13 +; WIN64-NEXT: pushq %r12 +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: pushq %rdi +; WIN64-NEXT: pushq %rbx +; WIN64-NEXT: subq $16, %rsp +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; WIN64-NEXT: #APP +; WIN64-NEXT: #NO_APP +; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; WIN64-NEXT: pushfq +; WIN64-NEXT: popq %rdx +; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; WIN64-NEXT: #APP +; WIN64-NEXT: #NO_APP +; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; WIN64-NEXT: addq $16, %rsp +; WIN64-NEXT: popq %rbx +; WIN64-NEXT: popq %rdi +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: popq %r12 +; WIN64-NEXT: popq %r13 +; WIN64-NEXT: popq %r14 +; WIN64-NEXT: popq %r15 +; WIN64-NEXT: popq %rbp +; WIN64-NEXT: retq + %1 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "", "={ax},={bx},={cx},={dx},={si},={di},={bp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"() + %2 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 0 + %3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 1 + %4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 2 + %5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 3 + %6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 4 + %7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 5 + %8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 6 + %9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 7 + %10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 8 + %11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 9 + %12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 10 + %13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 11 + %14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 12 + %15 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 13 + %16 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 14 + %17 = tail call i64 @llvm.x86.flags.read.u64() + tail call void asm sideeffect "", "{ax},{bx},{cx},{dx},{si},{di},{bp},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},~{dirflag},~{fpsr},~{flags}"(i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9, i64 %10, i64 %11, i64 %12, i64 %13, i64 %14, i64 %15, i64 %16) + ret i64 %17 +} + +define void @write_flags_reg_pressure(i64 noundef %0) nounwind { +; CHECK-LABEL: write_flags_reg_pressure: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movq %rdx, (%rsp) # 8-byte Spill +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; CHECK-NEXT: pushq %rdx +; CHECK-NEXT: popfq +; CHECK-NEXT: movq (%rsp), %rdx # 8-byte Reload +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +; +; WIN64-LABEL: write_flags_reg_pressure: +; WIN64: # %bb.0: +; WIN64-NEXT: pushq %rbp +; WIN64-NEXT: pushq %r15 +; WIN64-NEXT: pushq %r14 +; WIN64-NEXT: pushq %r13 +; WIN64-NEXT: pushq %r12 +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: pushq %rdi +; WIN64-NEXT: pushq %rbx +; WIN64-NEXT: subq $16, %rsp +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; WIN64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; WIN64-NEXT: #APP +; WIN64-NEXT: #NO_APP +; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; WIN64-NEXT: pushq %rdx +; WIN64-NEXT: popfq +; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; WIN64-NEXT: #APP +; WIN64-NEXT: #NO_APP +; WIN64-NEXT: addq $16, %rsp +; WIN64-NEXT: popq %rbx +; WIN64-NEXT: popq %rdi +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: popq %r12 +; WIN64-NEXT: popq %r13 +; WIN64-NEXT: popq %r14 +; WIN64-NEXT: popq %r15 +; WIN64-NEXT: popq %rbp +; WIN64-NEXT: retq + %2 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "", "={ax},={bx},={cx},={dx},={si},={di},={bp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"() + %3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 0 + %4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 1 + %5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 2 + %6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 3 + %7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 4 + %8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 5 + %9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 6 + %10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 7 + %11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 8 + %12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 9 + %13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 10 + %14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 11 + %15 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 12 + %16 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 13 + %17 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 14 + tail call void @llvm.x86.flags.write.u64(i64 %0) + tail call void asm sideeffect "", "{ax},{bx},{cx},{dx},{si},{di},{bp},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},~{dirflag},~{fpsr},~{flags}"(i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9, i64 %10, i64 %11, i64 %12, i64 %13, i64 %14, i64 %15, i64 %16, i64 %17) + ret void +}