Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -3263,34 +3263,67 @@ return; } - // Moving EFLAGS to / from another register requires a push and a pop. - // Notice that we have to adjust the stack if we don't want to clobber the - // first frame index. See X86FrameLowering.cpp - clobbersTheStack. - if (SrcReg == X86::EFLAGS) { - if (X86::GR64RegClass.contains(DestReg)) { - BuildMI(MBB, MI, DL, get(X86::PUSHF64)); - BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); - return; - } - if (X86::GR32RegClass.contains(DestReg)) { - BuildMI(MBB, MI, DL, get(X86::PUSHF32)); - BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); + bool FromEFLAGS = SrcReg == X86::EFLAGS; + bool ToEFLAGS = DestReg == X86::EFLAGS; + int Reg = FromEFLAGS ? DestReg : SrcReg; + bool is32 = X86::GR32RegClass.contains(Reg); + bool is64 = X86::GR64RegClass.contains(Reg); + if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) { + int Mov = is64 ? X86::MOV64rr : X86::MOV32rr; + int Push = is64 ? X86::PUSH64r : X86::PUSH32r; + int Pop = is64 ? X86::POP64r : X86::POP32r; + int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32; + int PopF = is64 ? X86::POPF64 : X86::POPF32; + int AX = is64 ? X86::RAX : X86::EAX; + + if (Subtarget.isTargetNaCl()) { + // NaCl's sandbox doesn't allow usage of PUSHF/POPF. Instead: + // - Save the overflow flag OF into AL using SETO, and restore it using + // a signed 8-bit addition of AL and INT8_MAX. + // - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from + // AH using LAHF/SAHF. + // - When RAX/EAX is live and isn't the destination register, make sure + // it isn't clobbered by PUSH/POP'ing it before and after + // saving/restoring the flags. + // This is fairly inefficient because it occurs after register allocation, + // and we don't know which flags are actually live. + + bool AXDead = (Reg == AX) || + (MachineBasicBlock::LQR_Dead == + MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI)); + + if (!AXDead) + BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true)); + if (FromEFLAGS) { + BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL); + BuildMI(MBB, MI, DL, get(X86::LAHF)); + BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX); + } + if (ToEFLAGS) { + BuildMI(MBB, MI, DL, get(Mov), AX) + .addReg(Reg, getKillRegState(KillSrc)); + BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL) + .addReg(X86::AL) + .addImm(INT8_MAX); + BuildMI(MBB, MI, DL, get(X86::SAHF)); + } + if (!AXDead) + BuildMI(MBB, MI, DL, get(Pop), AX); return; } - } - if (DestReg == X86::EFLAGS) { - if (X86::GR64RegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(X86::PUSH64r)) - .addReg(SrcReg, getKillRegState(KillSrc)); - BuildMI(MBB, MI, DL, get(X86::POPF64)); - return; + + // Moving EFLAGS to / from another register requires a push and a pop. + // Notice that we have to adjust the stack if we don't want to clobber the + // first frame index. See X86FrameLowering.cpp - clobbersTheStack. + if (FromEFLAGS) { + BuildMI(MBB, MI, DL, get(PushF)); + BuildMI(MBB, MI, DL, get(Pop), Reg); } - if (X86::GR32RegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(X86::PUSH32r)) - .addReg(SrcReg, getKillRegState(KillSrc)); - BuildMI(MBB, MI, DL, get(X86::POPF32)); - return; + if (ToEFLAGS) { + BuildMI(MBB, MI, DL, get(Push)).addReg(Reg, getKillRegState(KillSrc)); + BuildMI(MBB, MI, DL, get(PopF)); } + return; } DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) Index: test/CodeGen/X86/cmpxchg-clobber-flags.ll =================================================================== --- test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -2,6 +2,10 @@ ; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=i386-unknown-nacl %s -o - | FileCheck %s -check-prefix=NACL +; RUN: llc -verify-machineinstrs -mtriple=i386-unknown-nacl -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=NACL +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-nacl %s -o - | FileCheck %s -check-prefix=NACL +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-nacl -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=NACL declare i32 @bar() @@ -16,6 +20,25 @@ ; CHECK-NEXT: push[[LQ]] [[FLAGS]] ; CHECK-NEXT: popf[[LQ]] ; CHECK-NEXT: jne + + +; NACL-LABEL: test_intervening_call: +; NACL: cmpxchg +; NACL: push[[LQ:[lq]]] [[AX:%.*]] +; NACL-NEXT: seto %al +; NACL-NEXT: lahf +; NACL-NEXT: mov[[LQ]] [[AX]], [[FLAGS:%.*]] +; NACL-NEXT: pop[[LQ]] [[AX]] + +; NACL-NEXT: call[[LQ]] bar + +; NACL-NEXT: push[[LQ]] [[AX]] +; NACL-NEXT: mov[[LQ]] [[FLAGS]], [[AX]] +; NACL-NEXT: addb $127, %al +; NACL-NEXT: sahf +; NACL-NEXT: pop[[LQ]] [[AX]] +; NACL-NEXT: jne + %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst %p = extractvalue { i64, i1 } %cx, 1 call i32 @bar() @@ -31,9 +54,13 @@ ; Interesting in producing a clobber without any function calls. define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) { ; CHECK-LABEL: test_control_flow: - ; CHECK: cmpxchg ; CHECK-NEXT: jne + +; NACL-LABEL: test_control_flow: +; NACL: cmpxchg +; NACL-NEXT: jne + entry: %cmp = icmp sgt i32 %i, %j br i1 %cmp, label %loop_start, label %cond.end @@ -68,7 +95,6 @@ ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here. define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) { ; CHECK-LABEL: test_feed_cmov: - ; CHECK: cmpxchg ; CHECK: pushf[[LQ:[lq]]] ; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]] @@ -77,6 +103,24 @@ ; CHECK-NEXT: push[[LQ]] [[FLAGS]] ; CHECK-NEXT: popf[[LQ]] + + +; NACL-LABEL: test_feed_cmov: +; NACL: cmpxchg +; NACL: push[[LQ:[lq]]] [[AX:%.*]] +; NACL-NEXT: seto %al +; NACL-NEXT: lahf +; NACL-NEXT: mov[[LQ]] [[AX]], [[FLAGS:%.*]] +; NACL-NEXT: pop[[LQ]] [[AX]] + +; NACL-NEXT: call[[LQ]] bar + +; NACL-NEXT: push[[LQ]] [[AX]] +; NACL-NEXT: mov[[LQ]] [[FLAGS]], [[AX]] +; NACL-NEXT: addb $127, %al +; NACL-NEXT: sahf +; NACL-NEXT: pop[[LQ]] [[AX]] + %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %success = extractvalue { i32, i1 } %res, 1