Index: llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp =================================================================== --- llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -1498,13 +1498,6 @@ // pass specifically so that we have the complete set of instructions for // which we will do post-load hardening and can defer it in certain // circumstances. - // - // FIXME: This could probably be made even more effective by doing it - // across the entire function. Rather than just walking the flat list - // backwards here, we could walk the function in PO and each block bottom - // up, allowing us to in some cases sink hardening across block blocks. As - // long as the in-block predicate state is used at the eventual hardening - // site, this remains safe. for (MachineInstr &MI : MBB) { if (HardenLoads) { // We cannot both require hardening the def of a load and its address. @@ -1586,8 +1579,8 @@ } // Otherwise we have a call. We need to handle transferring the predicate - // state into a call and recovering it after the call returns unless this - // is a tail call. + // state into a call and recovering it after the call returns (unless this + // is a tail call). assert(MI.isCall() && "Should only reach here for calls!"); tracePredStateThroughCall(MI); } @@ -2109,21 +2102,10 @@ DebugLoc Loc = MI.getDebugLoc(); auto InsertPt = MI.getIterator(); - if (FenceCallAndRet) { - // Simply forcibly block speculation of loads out of the function by using - // an LFENCE. This is potentially a heavy-weight mitigation strategy, but - // should be secure, is simple from an ABI perspective, and the cost can be - // minimized through inlining. - // - // FIXME: We should investigate ways to establish a strong data-dependency - // on the return. However, poisoning the stack pointer is unlikely to work - // because the return is *predicted* rather than relying on the load of the - // return address to actually resolve. - BuildMI(MBB, InsertPt, Loc, TII->get(X86::LFENCE)); - ++NumInstsInserted; - ++NumLFENCEsInserted; + if (FenceCallAndRet) + // No need to fence here as we'll fence at the return site itself. That + // handles more cases than we can handle here. return; - } // Take our predicate state, shift it to the high 17 bits (so that we keep // pointers canonical) and merge it into RSP. This will allow the caller to @@ -2141,31 +2123,164 @@ /// /// For tail calls, this is all we need to do. /// -/// For calls where we might return to control flow, we further need to extract -/// the predicate state built up within that function from the high bits of the -/// stack pointer, and make that the newly available predicate state. +/// For calls where we might return and resume the control flow, we need to +/// extract the predicate state from the high bits of the stack pointer after +/// control returns from the called function. +/// +/// We also need to verify that we intended to return to this location in the +/// code. An attacker might arrange for the processor to mispredict the return +/// to this valid but incorrect return address in the program rather than the +/// correct one. See the paper on this attack, called "ret2spec" by the +/// researchers, here: +/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf +/// +/// The way we verify that we returned to the correct location is by preserving +/// the expected return address across the call. One technique involves taking +/// advantage of the red-zone to load the return address from `8(%rsp)` where it +/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can +/// directly save the address into a register that will be preserved across the +/// call. We compare this intended return address against the address +/// immediately following the call (the observed return address). If these +/// mismatch, we have detected misspeculation and can poison our predicate +/// state. void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); auto InsertPt = MI.getIterator(); DebugLoc Loc = MI.getDebugLoc(); + if (FenceCallAndRet) { + if (MI.isReturn()) + // Tail call, we don't return to this function. + // FIXME: We should also handle noreturn calls. + return; + + // We don't need to fence before the call because the function should fence + // in its entry. However, we do need to fence after the call returns. + // Fencing before the return doesn't correctly handle cases where the return + // itself is mispredicted. + BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE)); + ++NumInstsInserted; + ++NumLFENCEsInserted; + return; + } + // First, we transfer the predicate state into the called function by merging // it into the stack pointer. This will kill the current def of the state. unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg); // If this call is also a return, it is a tail call and we don't need anything - // else to handle it so just continue. - // FIXME: We should also handle noreturn calls. - if (MI.isReturn()) + // else to handle it so just return. Also, if there are no further + // instructions and no successors, this call does not return so we can also + // bail. + if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty())) return; - // We need to step past the call and recover the predicate state from SP after - // the return, and make this new state available. + // Create a symbol to track the return address and attach it to the call + // machine instruction. We will lower extra symbols attached to call + // instructions as label immediately following the call. + MCSymbol *RetSymbol = MF.getContext().createTempSymbol(); + MI.setPostInstrSymbol(MF, RetSymbol); + + const TargetRegisterClass *AddrRC = &X86::GR64RegClass; + unsigned ExpectedRetAddrReg; + + // If we have no red zones, we need to save the expected return address prior + // to the call. + if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone)) { + // If we don't have red zones, we need to compute the expected return + // address prior to the call and store it in a register that lives across + // the call. + // + // In some ways, this is doubly satisfying as a mitigation because it will + // also successfully detect stack smashing bugs in some cases (typically, + // when a callee-saved register is used and the callee doesn't push it onto + // the stack). But that isn't our primary goal, so we only use it as + // a fallback. + // + // FIXME: It isn't clear that this is reliable in the face of + // rematerialization in the register allocator. We somehow need to force + // that to not occur for this particular instruction, and instead to spill + // or otherwise preserve the value computed *prior* to the call. + // + // FIXME: It is even less clear why MachienCSE can't just fold this when we + // end up having to use identical instructions both before and after the + // call to feed the comparison. + ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC); + if (MF.getTarget().getCodeModel() == CodeModel::Small && + !Subtarget->isPositionIndependent()) { + BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg) + .addSym(RetSymbol); + } else { + BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg) + .addReg(/*Base*/ X86::RIP) + .addImm(/*Scale*/ 0) + .addReg(/*Index*/ 0) + .addSym(RetSymbol) + .addReg(/*Segment*/ 0); + } + } + + // Step past the call to handle when it returns. ++InsertPt; + + // If we have red zones enabled, then the return address is still available on + // the stack immediately after the call. As the very first instruction, we + // load it into a register. + if (!MF.getFunction().hasFnAttribute(Attribute::NoRedZone)) { + ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC); + BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg) + .addReg(/*Base*/ X86::RSP) + .addImm(/*Scale*/ 0) + .addReg(/*Index*/ 0) + .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so + // the return address is 8-bytes past it. + .addReg(/*Segment*/ 0); + } + + // Now we extract the callee's predicate state from the stack pointer. unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc); - PS->SSA.AddAvailableValue(&MBB, NewStateReg); + + // Test the expected return address against our actual address. If we can + // form this basic block's address as an immediate, this is easy. Otherwise + // we compute it. + if (MF.getTarget().getCodeModel() == CodeModel::Small && + !Subtarget->isPositionIndependent()) { + // FIXME: Could we fold this with the load? It would require careful EFLAGS + // management. + BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32)) + .addReg(ExpectedRetAddrReg, RegState::Kill) + .addSym(RetSymbol); + } else { + unsigned ActualRetAddrReg = MRI->createVirtualRegister(AddrRC); + BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg) + .addReg(/*Base*/ X86::RIP) + .addImm(/*Scale*/ 0) + .addReg(/*Index*/ 0) + .addSym(RetSymbol) + .addReg(/*Segment*/ 0); + BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr)) + .addReg(ExpectedRetAddrReg, RegState::Kill) + .addReg(ActualRetAddrReg, RegState::Kill); + } + + // Now conditionally update the predicate state we just extracted if we ended + // up at a different return address than expected. + int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; + auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes); + + unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); + auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), + UpdatedStateReg) + .addReg(NewStateReg, RegState::Kill) + .addReg(PS->PoisonReg); + CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); + ++NumInstsInserted; + LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); + + PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg); } /// An attacker may speculatively store over a value that is then speculatively Index: llvm/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll @@ -0,0 +1,274 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening | FileCheck %s --check-prefix=X64-NOPIC +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening -code-model medium | FileCheck %s --check-prefix=X64-NOPIC-MCM +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening -relocation-model pic | FileCheck %s --check-prefix=X64-PIC +; +; FIXME: Add support for 32-bit. + +declare void @f() + +define i32 @test_calls_and_rets(i32 *%ptr) nounwind { +; X64-NOPIC-LABEL: test_calls_and_rets: +; X64-NOPIC: # %bb.0: # %entry +; X64-NOPIC-NEXT: pushq %rbp +; X64-NOPIC-NEXT: pushq %r14 +; X64-NOPIC-NEXT: pushq %rbx +; X64-NOPIC-NEXT: movq %rsp, %rax +; X64-NOPIC-NEXT: movq %rdi, %rbx +; X64-NOPIC-NEXT: movq $-1, %r14 +; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: shlq $47, %rax +; X64-NOPIC-NEXT: orq %rax, %rsp +; X64-NOPIC-NEXT: callq f +; X64-NOPIC-NEXT: .Ltmp0: +; X64-NOPIC-NEXT: movq %rsp, %rax +; X64-NOPIC-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: cmpq $.Ltmp0, %rcx +; X64-NOPIC-NEXT: cmovneq %r14, %rax +; X64-NOPIC-NEXT: movl (%rbx), %ebp +; X64-NOPIC-NEXT: shlq $47, %rax +; X64-NOPIC-NEXT: orq %rax, %rsp +; X64-NOPIC-NEXT: callq f +; X64-NOPIC-NEXT: .Ltmp1: +; X64-NOPIC-NEXT: movq %rsp, %rcx +; X64-NOPIC-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-NOPIC-NEXT: sarq $63, %rcx +; X64-NOPIC-NEXT: cmpq $.Ltmp1, %rax +; X64-NOPIC-NEXT: cmovneq %r14, %rcx +; X64-NOPIC-NEXT: addl (%rbx), %ebp +; X64-NOPIC-NEXT: orl %ecx, %ebp +; X64-NOPIC-NEXT: shlq $47, %rcx +; X64-NOPIC-NEXT: movl %ebp, %eax +; X64-NOPIC-NEXT: orq %rcx, %rsp +; X64-NOPIC-NEXT: popq %rbx +; X64-NOPIC-NEXT: popq %r14 +; X64-NOPIC-NEXT: popq %rbp +; X64-NOPIC-NEXT: retq +; +; X64-NOPIC-MCM-LABEL: test_calls_and_rets: +; X64-NOPIC-MCM: # %bb.0: # %entry +; X64-NOPIC-MCM-NEXT: pushq %rbp +; X64-NOPIC-MCM-NEXT: pushq %r14 +; X64-NOPIC-MCM-NEXT: pushq %rbx +; X64-NOPIC-MCM-NEXT: movq %rsp, %rax +; X64-NOPIC-MCM-NEXT: movq %rdi, %rbx +; X64-NOPIC-MCM-NEXT: movq $-1, %r14 +; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: shlq $47, %rax +; X64-NOPIC-MCM-NEXT: orq %rax, %rsp +; X64-NOPIC-MCM-NEXT: callq f +; X64-NOPIC-MCM-NEXT: .Ltmp0: +; X64-NOPIC-MCM-NEXT: movq %rsp, %rax +; X64-NOPIC-MCM-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rdx +; X64-NOPIC-MCM-NEXT: cmpq %rdx, %rcx +; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rax +; X64-NOPIC-MCM-NEXT: movl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: shlq $47, %rax +; X64-NOPIC-MCM-NEXT: orq %rax, %rsp +; X64-NOPIC-MCM-NEXT: callq f +; X64-NOPIC-MCM-NEXT: .Ltmp1: +; X64-NOPIC-MCM-NEXT: movq %rsp, %rcx +; X64-NOPIC-MCM-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-NOPIC-MCM-NEXT: sarq $63, %rcx +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rdx +; X64-NOPIC-MCM-NEXT: cmpq %rdx, %rax +; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rcx +; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: orl %ecx, %ebp +; X64-NOPIC-MCM-NEXT: shlq $47, %rcx +; X64-NOPIC-MCM-NEXT: movl %ebp, %eax +; X64-NOPIC-MCM-NEXT: orq %rcx, %rsp +; X64-NOPIC-MCM-NEXT: popq %rbx +; X64-NOPIC-MCM-NEXT: popq %r14 +; X64-NOPIC-MCM-NEXT: popq %rbp +; X64-NOPIC-MCM-NEXT: retq +; +; X64-PIC-LABEL: test_calls_and_rets: +; X64-PIC: # %bb.0: # %entry +; X64-PIC-NEXT: pushq %rbp +; X64-PIC-NEXT: pushq %r14 +; X64-PIC-NEXT: pushq %rbx +; X64-PIC-NEXT: movq %rsp, %rax +; X64-PIC-NEXT: movq %rdi, %rbx +; X64-PIC-NEXT: movq $-1, %r14 +; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: orq %rax, %rsp +; X64-PIC-NEXT: callq f@PLT +; X64-PIC-NEXT: .Ltmp0: +; X64-PIC-NEXT: movq %rsp, %rax +; X64-PIC-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rdx +; X64-PIC-NEXT: cmpq %rdx, %rcx +; X64-PIC-NEXT: cmovneq %r14, %rax +; X64-PIC-NEXT: movl (%rbx), %ebp +; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: orq %rax, %rsp +; X64-PIC-NEXT: callq f@PLT +; X64-PIC-NEXT: .Ltmp1: +; X64-PIC-NEXT: movq %rsp, %rcx +; X64-PIC-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-PIC-NEXT: sarq $63, %rcx +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rdx +; X64-PIC-NEXT: cmpq %rdx, %rax +; X64-PIC-NEXT: cmovneq %r14, %rcx +; X64-PIC-NEXT: addl (%rbx), %ebp +; X64-PIC-NEXT: orl %ecx, %ebp +; X64-PIC-NEXT: shlq $47, %rcx +; X64-PIC-NEXT: movl %ebp, %eax +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: popq %rbx +; X64-PIC-NEXT: popq %r14 +; X64-PIC-NEXT: popq %rbp +; X64-PIC-NEXT: retq +entry: + call void @f() + %x = load i32, i32* %ptr + call void @f() + %y = load i32, i32* %ptr + %z = add i32 %x, %y + ret i32 %z +} + +define i32 @test_calls_and_rets_noredzone(i32 *%ptr) nounwind noredzone { +; X64-NOPIC-LABEL: test_calls_and_rets_noredzone: +; X64-NOPIC: # %bb.0: # %entry +; X64-NOPIC-NEXT: pushq %rbp +; X64-NOPIC-NEXT: pushq %r15 +; X64-NOPIC-NEXT: pushq %r14 +; X64-NOPIC-NEXT: pushq %rbx +; X64-NOPIC-NEXT: pushq %rax +; X64-NOPIC-NEXT: movq %rsp, %rax +; X64-NOPIC-NEXT: movq %rdi, %rbx +; X64-NOPIC-NEXT: movq $-1, %r14 +; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: shlq $47, %rax +; X64-NOPIC-NEXT: orq %rax, %rsp +; X64-NOPIC-NEXT: movq $.Ltmp2, %rbp +; X64-NOPIC-NEXT: callq f +; X64-NOPIC-NEXT: .Ltmp2: +; X64-NOPIC-NEXT: movq %rsp, %rax +; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: cmpq $.Ltmp2, %rbp +; X64-NOPIC-NEXT: cmovneq %r14, %rax +; X64-NOPIC-NEXT: movl (%rbx), %ebp +; X64-NOPIC-NEXT: shlq $47, %rax +; X64-NOPIC-NEXT: orq %rax, %rsp +; X64-NOPIC-NEXT: movq $.Ltmp3, %r15 +; X64-NOPIC-NEXT: callq f +; X64-NOPIC-NEXT: .Ltmp3: +; X64-NOPIC-NEXT: movq %rsp, %rcx +; X64-NOPIC-NEXT: sarq $63, %rcx +; X64-NOPIC-NEXT: cmpq $.Ltmp3, %r15 +; X64-NOPIC-NEXT: cmovneq %r14, %rcx +; X64-NOPIC-NEXT: addl (%rbx), %ebp +; X64-NOPIC-NEXT: orl %ecx, %ebp +; X64-NOPIC-NEXT: shlq $47, %rcx +; X64-NOPIC-NEXT: movl %ebp, %eax +; X64-NOPIC-NEXT: orq %rcx, %rsp +; X64-NOPIC-NEXT: addq $8, %rsp +; X64-NOPIC-NEXT: popq %rbx +; X64-NOPIC-NEXT: popq %r14 +; X64-NOPIC-NEXT: popq %r15 +; X64-NOPIC-NEXT: popq %rbp +; X64-NOPIC-NEXT: retq +; +; X64-NOPIC-MCM-LABEL: test_calls_and_rets_noredzone: +; X64-NOPIC-MCM: # %bb.0: # %entry +; X64-NOPIC-MCM-NEXT: pushq %rbp +; X64-NOPIC-MCM-NEXT: pushq %r15 +; X64-NOPIC-MCM-NEXT: pushq %r14 +; X64-NOPIC-MCM-NEXT: pushq %rbx +; X64-NOPIC-MCM-NEXT: pushq %rax +; X64-NOPIC-MCM-NEXT: movq %rsp, %rax +; X64-NOPIC-MCM-NEXT: movq %rdi, %rbx +; X64-NOPIC-MCM-NEXT: movq $-1, %r14 +; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: shlq $47, %rax +; X64-NOPIC-MCM-NEXT: orq %rax, %rsp +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rbp +; X64-NOPIC-MCM-NEXT: callq f +; X64-NOPIC-MCM-NEXT: .Ltmp2: +; X64-NOPIC-MCM-NEXT: movq %rsp, %rax +; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rcx +; X64-NOPIC-MCM-NEXT: cmpq %rcx, %rbp +; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rax +; X64-NOPIC-MCM-NEXT: movl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: shlq $47, %rax +; X64-NOPIC-MCM-NEXT: orq %rax, %rsp +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %r15 +; X64-NOPIC-MCM-NEXT: callq f +; X64-NOPIC-MCM-NEXT: .Ltmp3: +; X64-NOPIC-MCM-NEXT: movq %rsp, %rcx +; X64-NOPIC-MCM-NEXT: sarq $63, %rcx +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rax +; X64-NOPIC-MCM-NEXT: cmpq %rax, %r15 +; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rcx +; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp +; X64-NOPIC-MCM-NEXT: orl %ecx, %ebp +; X64-NOPIC-MCM-NEXT: shlq $47, %rcx +; X64-NOPIC-MCM-NEXT: movl %ebp, %eax +; X64-NOPIC-MCM-NEXT: orq %rcx, %rsp +; X64-NOPIC-MCM-NEXT: addq $8, %rsp +; X64-NOPIC-MCM-NEXT: popq %rbx +; X64-NOPIC-MCM-NEXT: popq %r14 +; X64-NOPIC-MCM-NEXT: popq %r15 +; X64-NOPIC-MCM-NEXT: popq %rbp +; X64-NOPIC-MCM-NEXT: retq +; +; X64-PIC-LABEL: test_calls_and_rets_noredzone: +; X64-PIC: # %bb.0: # %entry +; X64-PIC-NEXT: pushq %rbp +; X64-PIC-NEXT: pushq %r15 +; X64-PIC-NEXT: pushq %r14 +; X64-PIC-NEXT: pushq %rbx +; X64-PIC-NEXT: pushq %rax +; X64-PIC-NEXT: movq %rsp, %rax +; X64-PIC-NEXT: movq %rdi, %rbx +; X64-PIC-NEXT: movq $-1, %r14 +; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: orq %rax, %rsp +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rbp +; X64-PIC-NEXT: callq f@PLT +; X64-PIC-NEXT: .Ltmp2: +; X64-PIC-NEXT: movq %rsp, %rax +; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rcx +; X64-PIC-NEXT: cmpq %rcx, %rbp +; X64-PIC-NEXT: cmovneq %r14, %rax +; X64-PIC-NEXT: movl (%rbx), %ebp +; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: orq %rax, %rsp +; X64-PIC-NEXT: leaq {{.*}}(%rip), %r15 +; X64-PIC-NEXT: callq f@PLT +; X64-PIC-NEXT: .Ltmp3: +; X64-PIC-NEXT: movq %rsp, %rcx +; X64-PIC-NEXT: sarq $63, %rcx +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rax +; X64-PIC-NEXT: cmpq %rax, %r15 +; X64-PIC-NEXT: cmovneq %r14, %rcx +; X64-PIC-NEXT: addl (%rbx), %ebp +; X64-PIC-NEXT: orl %ecx, %ebp +; X64-PIC-NEXT: shlq $47, %rcx +; X64-PIC-NEXT: movl %ebp, %eax +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: addq $8, %rsp +; X64-PIC-NEXT: popq %rbx +; X64-PIC-NEXT: popq %r14 +; X64-PIC-NEXT: popq %r15 +; X64-PIC-NEXT: popq %rbp +; X64-PIC-NEXT: retq +entry: + call void @f() + %x = load i32, i32* %ptr + call void @f() + %y = load i32, i32* %ptr + %z = add i32 %x, %y + ret i32 %z +} Index: llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll =================================================================== --- llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll +++ llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll @@ -16,38 +16,46 @@ define i32 @test_indirect_call(i32 ()** %ptr) nounwind { ; X64-LABEL: test_indirect_call: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax +; X64-NEXT: pushq %rbx ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %rbx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: movq (%rdi), %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq *%rcx +; X64-NEXT: .Ltmp0: ; X64-NEXT: movq %rsp, %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-NEXT: sarq $63, %rcx +; X64-NEXT: cmpq $.Ltmp0, %rdx +; X64-NEXT: cmovneq %rbx, %rcx ; X64-NEXT: shlq $47, %rcx ; X64-NEXT: orq %rcx, %rsp -; X64-NEXT: popq %rcx +; X64-NEXT: popq %rbx ; X64-NEXT: retq ; ; X64-RETPOLINE-LABEL: test_indirect_call: ; X64-RETPOLINE: # %bb.0: # %entry -; X64-RETPOLINE-NEXT: pushq %rax +; X64-RETPOLINE-NEXT: pushq %rbx ; X64-RETPOLINE-NEXT: movq %rsp, %rax -; X64-RETPOLINE-NEXT: movq $-1, %rcx +; X64-RETPOLINE-NEXT: movq $-1, %rbx ; X64-RETPOLINE-NEXT: sarq $63, %rax ; X64-RETPOLINE-NEXT: movq (%rdi), %r11 ; X64-RETPOLINE-NEXT: orq %rax, %r11 ; X64-RETPOLINE-NEXT: shlq $47, %rax ; X64-RETPOLINE-NEXT: orq %rax, %rsp ; X64-RETPOLINE-NEXT: callq __llvm_retpoline_r11 +; X64-RETPOLINE-NEXT: .Ltmp0: ; X64-RETPOLINE-NEXT: movq %rsp, %rcx +; X64-RETPOLINE-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-RETPOLINE-NEXT: sarq $63, %rcx +; X64-RETPOLINE-NEXT: cmpq $.Ltmp0, %rdx +; X64-RETPOLINE-NEXT: cmovneq %rbx, %rcx ; X64-RETPOLINE-NEXT: shlq $47, %rcx ; X64-RETPOLINE-NEXT: orq %rcx, %rsp -; X64-RETPOLINE-NEXT: popq %rcx +; X64-RETPOLINE-NEXT: popq %rbx ; X64-RETPOLINE-NEXT: retq entry: %fp = load i32 ()*, i32 ()** %ptr @@ -86,37 +94,45 @@ define i32 @test_indirect_call_global() nounwind { ; X64-LABEL: test_indirect_call_global: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax +; X64-NEXT: pushq %rbx ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %rbx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: movq {{.*}}(%rip), %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq *%rcx +; X64-NEXT: .Ltmp1: ; X64-NEXT: movq %rsp, %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-NEXT: sarq $63, %rcx +; X64-NEXT: cmpq $.Ltmp1, %rdx +; X64-NEXT: cmovneq %rbx, %rcx ; X64-NEXT: shlq $47, %rcx ; X64-NEXT: orq %rcx, %rsp -; X64-NEXT: popq %rcx +; X64-NEXT: popq %rbx ; X64-NEXT: retq ; ; X64-RETPOLINE-LABEL: test_indirect_call_global: ; X64-RETPOLINE: # %bb.0: # %entry -; X64-RETPOLINE-NEXT: pushq %rax +; X64-RETPOLINE-NEXT: pushq %rbx ; X64-RETPOLINE-NEXT: movq %rsp, %rax -; X64-RETPOLINE-NEXT: movq $-1, %rcx +; X64-RETPOLINE-NEXT: movq $-1, %rbx ; X64-RETPOLINE-NEXT: sarq $63, %rax ; X64-RETPOLINE-NEXT: movq {{.*}}(%rip), %r11 ; X64-RETPOLINE-NEXT: shlq $47, %rax ; X64-RETPOLINE-NEXT: orq %rax, %rsp ; X64-RETPOLINE-NEXT: callq __llvm_retpoline_r11 +; X64-RETPOLINE-NEXT: .Ltmp1: ; X64-RETPOLINE-NEXT: movq %rsp, %rcx +; X64-RETPOLINE-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-RETPOLINE-NEXT: sarq $63, %rcx +; X64-RETPOLINE-NEXT: cmpq $.Ltmp1, %rdx +; X64-RETPOLINE-NEXT: cmovneq %rbx, %rcx ; X64-RETPOLINE-NEXT: shlq $47, %rcx ; X64-RETPOLINE-NEXT: orq %rcx, %rsp -; X64-RETPOLINE-NEXT: popq %rcx +; X64-RETPOLINE-NEXT: popq %rbx ; X64-RETPOLINE-NEXT: retq entry: %fp = load i32 ()*, i32 ()** @global_fnptr @@ -205,19 +221,19 @@ ; X64-NEXT: movq global_blockaddrs(,%rax,8), %rax ; X64-NEXT: orq %rcx, %rax ; X64-NEXT: jmpq *%rax -; X64-NEXT: .Ltmp0: # Block address taken +; X64-NEXT: .Ltmp2: # Block address taken ; X64-NEXT: .LBB5_1: # %bb0 ; X64-NEXT: movl $2, %eax ; X64-NEXT: jmp .LBB5_2 -; X64-NEXT: .Ltmp1: # Block address taken +; X64-NEXT: .Ltmp3: # Block address taken ; X64-NEXT: .LBB5_4: # %bb2 ; X64-NEXT: movl $13, %eax ; X64-NEXT: jmp .LBB5_2 -; X64-NEXT: .Ltmp2: # Block address taken +; X64-NEXT: .Ltmp4: # Block address taken ; X64-NEXT: .LBB5_5: # %bb3 ; X64-NEXT: movl $42, %eax ; X64-NEXT: jmp .LBB5_2 -; X64-NEXT: .Ltmp3: # Block address taken +; X64-NEXT: .Ltmp5: # Block address taken ; X64-NEXT: .LBB5_3: # %bb1 ; X64-NEXT: movl $7, %eax ; X64-NEXT: .LBB5_2: # %bb0 @@ -243,22 +259,22 @@ ; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx ; X64-RETPOLINE-NEXT: cmpq $4, %rdx ; X64-RETPOLINE-NEXT: jne .LBB6_3 -; X64-RETPOLINE-NEXT: .Ltmp0: # Block address taken +; X64-RETPOLINE-NEXT: .Ltmp2: # Block address taken ; X64-RETPOLINE-NEXT: # %bb.7: # %bb3 ; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx ; X64-RETPOLINE-NEXT: movl $42, %eax ; X64-RETPOLINE-NEXT: jmp .LBB6_4 -; X64-RETPOLINE-NEXT: .Ltmp1: # Block address taken +; X64-RETPOLINE-NEXT: .Ltmp3: # Block address taken ; X64-RETPOLINE-NEXT: .LBB6_5: # %bb1 ; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx ; X64-RETPOLINE-NEXT: movl $7, %eax ; X64-RETPOLINE-NEXT: jmp .LBB6_4 -; X64-RETPOLINE-NEXT: .Ltmp2: # Block address taken +; X64-RETPOLINE-NEXT: .Ltmp4: # Block address taken ; X64-RETPOLINE-NEXT: .LBB6_6: # %bb2 ; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx ; X64-RETPOLINE-NEXT: movl $13, %eax ; X64-RETPOLINE-NEXT: jmp .LBB6_4 -; X64-RETPOLINE-NEXT: .Ltmp3: # Block address taken +; X64-RETPOLINE-NEXT: .Ltmp5: # Block address taken ; X64-RETPOLINE-NEXT: .LBB6_3: # %bb0 ; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx ; X64-RETPOLINE-NEXT: movl $2, %eax Index: llvm/test/CodeGen/X86/speculative-load-hardening.ll =================================================================== --- llvm/test/CodeGen/X86/speculative-load-hardening.ll +++ llvm/test/CodeGen/X86/speculative-load-hardening.ll @@ -64,7 +64,7 @@ ; X64-NEXT: retq ; X64-NEXT: .LBB1_4: # %then2 ; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: movq %r8, %r15 +; X64-NEXT: movq %r8, %r14 ; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: testl %edx, %edx ; X64-NEXT: je .LBB1_6 @@ -72,30 +72,34 @@ ; X64-NEXT: cmoveq %rbx, %rax ; X64-NEXT: movslq (%r9), %rcx ; X64-NEXT: orq %rax, %rcx -; X64-NEXT: leaq (%r15,%rcx,4), %r14 -; X64-NEXT: movl %ecx, (%r15,%rcx,4) +; X64-NEXT: leaq (%r14,%rcx,4), %r15 +; X64-NEXT: movl %ecx, (%r14,%rcx,4) ; X64-NEXT: jmp .LBB1_7 ; X64-NEXT: .LBB1_6: # %then3 ; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: movl (%rcx), %ecx -; X64-NEXT: addl (%r15), %ecx +; X64-NEXT: addl (%r14), %ecx ; X64-NEXT: movslq %ecx, %rdi ; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movl (%r15,%rdi,4), %esi +; X64-NEXT: movl (%r14,%rdi,4), %esi ; X64-NEXT: orl %eax, %esi -; X64-NEXT: movq (%r9), %r14 -; X64-NEXT: orq %rax, %r14 -; X64-NEXT: addl (%r14), %esi +; X64-NEXT: movq (%r9), %r15 +; X64-NEXT: orq %rax, %r15 +; X64-NEXT: addl (%r15), %esi ; X64-NEXT: shlq $47, %rax ; X64-NEXT: # kill: def $edi killed $edi killed $rdi ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq leak +; X64-NEXT: .Ltmp0: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp0, %rcx +; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: .LBB1_7: # %merge -; X64-NEXT: movslq (%r14), %rcx +; X64-NEXT: movslq (%r15), %rcx ; X64-NEXT: orq %rax, %rcx -; X64-NEXT: movl $0, (%r15,%rcx,4) +; X64-NEXT: movl $0, (%r14,%rcx,4) ; X64-NEXT: jmp .LBB1_8 ; ; X64-LFENCE-LABEL: test_basic_conditions: @@ -225,8 +229,12 @@ ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Ltmp1: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp1, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: incl %ebx ; X64-NEXT: cmpl %ebp, %ebx ; X64-NEXT: jl .LBB2_6 @@ -304,19 +312,19 @@ ; X64-NEXT: pushq %rbx ; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %r12 +; X64-NEXT: movq $-1, %rbp ; X64-NEXT: sarq $63, %rax ; X64-NEXT: testl %edi, %edi ; X64-NEXT: je .LBB3_2 ; X64-NEXT: # %bb.1: -; X64-NEXT: cmoveq %r12, %rax +; X64-NEXT: cmoveq %rbp, %rax ; X64-NEXT: jmp .LBB3_10 ; X64-NEXT: .LBB3_2: # %l1.header.preheader ; X64-NEXT: movq %r8, %r14 ; X64-NEXT: movq %rcx, %rbx -; X64-NEXT: movl %edx, %ebp +; X64-NEXT: movl %edx, %r12d ; X64-NEXT: movl %esi, %r15d -; X64-NEXT: cmovneq %r12, %rax +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: xorl %r13d, %r13d ; X64-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: testl %r15d, %r15d @@ -324,16 +332,16 @@ ; X64-NEXT: jmp .LBB3_4 ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB3_12: -; X64-NEXT: cmovgeq %r12, %rax +; X64-NEXT: cmovgeq %rbp, %rax ; X64-NEXT: testl %r15d, %r15d ; X64-NEXT: jle .LBB3_4 ; X64-NEXT: .LBB3_5: # %l2.header.preheader -; X64-NEXT: cmovleq %r12, %rax +; X64-NEXT: cmovleq %rbp, %rax ; X64-NEXT: xorl %r15d, %r15d ; X64-NEXT: jmp .LBB3_6 ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB3_11: # in Loop: Header=BB3_6 Depth=1 -; X64-NEXT: cmovgeq %r12, %rax +; X64-NEXT: cmovgeq %rbp, %rax ; X64-NEXT: .LBB3_6: # %l2.header ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movslq (%rbx), %rcx @@ -344,18 +352,22 @@ ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Ltmp2: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp2, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: incl %r15d -; X64-NEXT: cmpl %ebp, %r15d +; X64-NEXT: cmpl %r12d, %r15d ; X64-NEXT: jl .LBB3_11 ; X64-NEXT: # %bb.7: -; X64-NEXT: cmovlq %r12, %rax +; X64-NEXT: cmovlq %rbp, %rax ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload ; X64-NEXT: jmp .LBB3_8 ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB3_4: -; X64-NEXT: cmovgq %r12, %rax +; X64-NEXT: cmovgq %rbp, %rax ; X64-NEXT: .LBB3_8: # %l1.latch ; X64-NEXT: movslq (%rbx), %rcx ; X64-NEXT: orq %rax, %rcx @@ -365,13 +377,17 @@ ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Ltmp3: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp3, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: incl %r13d ; X64-NEXT: cmpl %r15d, %r13d ; X64-NEXT: jl .LBB3_12 ; X64-NEXT: # %bb.9: -; X64-NEXT: cmovlq %r12, %rax +; X64-NEXT: cmovlq %rbp, %rax ; X64-NEXT: .LBB3_10: # %exit ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp @@ -486,22 +502,27 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %rbp ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %r15 ; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %r14 ; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: .cfi_offset %rbx, -32 -; X64-NEXT: .cfi_offset %r14, -24 +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: .cfi_offset %rbx, -40 +; X64-NEXT: .cfi_offset %r14, -32 +; X64-NEXT: .cfi_offset %r15, -24 ; X64-NEXT: .cfi_offset %rbp, -16 ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %r15 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: cmpl $41, %edi ; X64-NEXT: jg .LBB4_1 ; X64-NEXT: # %bb.2: # %thrower ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rsi, %rbx -; X64-NEXT: cmovgq %rcx, %rax +; X64-NEXT: cmovgq %r15, %rax ; X64-NEXT: movslq %edi, %rcx ; X64-NEXT: movl (%rsi,%rcx,4), %ebp ; X64-NEXT: orl %eax, %ebp @@ -509,35 +530,47 @@ ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq __cxa_allocate_exception +; X64-NEXT: .Ltmp7: ; X64-NEXT: movq %rsp, %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-NEXT: sarq $63, %rcx +; X64-NEXT: cmpq $.Ltmp7, %rdx +; X64-NEXT: cmovneq %r15, %rcx ; X64-NEXT: movl %ebp, (%rax) -; X64-NEXT: .Ltmp0: +; X64-NEXT: .Ltmp4: ; X64-NEXT: xorl %esi, %esi ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: orq %rcx, %rsp ; X64-NEXT: callq __cxa_throw +; X64-NEXT: .Ltmp8: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax -; X64-NEXT: .Ltmp1: +; X64-NEXT: cmpq $.Ltmp8, %rcx +; X64-NEXT: cmovneq %r15, %rax +; X64-NEXT: .Ltmp5: ; X64-NEXT: jmp .LBB4_3 ; X64-NEXT: .LBB4_1: -; X64-NEXT: cmovleq %rcx, %rax +; X64-NEXT: cmovleq %r15, %rax ; X64-NEXT: .LBB4_3: # %exit ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp +; X64-NEXT: addq $8, %rsp +; X64-NEXT: .cfi_def_cfa_offset 40 ; X64-NEXT: popq %rbx -; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: .cfi_def_cfa_offset 32 ; X64-NEXT: popq %r14 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: popq %r15 ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: popq %rbp ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq ; X64-NEXT: .LBB4_4: # %lpad -; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: .Ltmp2: +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: .Ltmp6: ; X64-NEXT: movq %rsp, %rcx ; X64-NEXT: sarq $63, %rcx ; X64-NEXT: movl (%rax), %eax @@ -549,8 +582,12 @@ ; X64-NEXT: shlq $47, %rcx ; X64-NEXT: orq %rcx, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Ltmp9: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp9, %rcx +; X64-NEXT: cmovneq %r15, %rax ; ; X64-LFENCE-LABEL: test_basic_eh: ; X64-LFENCE: # %bb.0: # %entry @@ -636,79 +673,111 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %r15 ; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %r13 ; X64-NEXT: pushq %r12 ; X64-NEXT: pushq %rbx -; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rax ; X64-NEXT: movq %rcx, %r15 ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rsi, %rbx ; X64-NEXT: movq %rdi, %r12 -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %r13 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: orq %rax, %r12 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_float +; X64-NEXT: .Ltmp10: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp10, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: orq %rax, %rbx ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_double +; X64-NEXT: .Ltmp11: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp11, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: cvtsd2ss %xmm0, %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_float +; X64-NEXT: .Ltmp12: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp12, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: cvtss2sd %xmm0, %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_double +; X64-NEXT: .Ltmp13: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp13, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: orq %rax, %r14 ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: cvtsi2ssl (%r14), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_float +; X64-NEXT: .Ltmp14: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp14, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: orq %rax, %r15 ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: cvtsi2sdq (%r15), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_double +; X64-NEXT: .Ltmp15: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp15, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: cvtsi2ssq (%r15), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_float +; X64-NEXT: .Ltmp16: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp16, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: cvtsi2sdl (%r14), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_double +; X64-NEXT: .Ltmp17: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp17, %rcx +; X64-NEXT: cmovneq %r13, %rax ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp -; X64-NEXT: addq $8, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 +; X64-NEXT: popq %r13 ; X64-NEXT: popq %r14 ; X64-NEXT: popq %r15 ; X64-NEXT: retq @@ -789,68 +858,96 @@ define void @test_vec_loads(<4 x float>* %v4f32ptr, <2 x double>* %v2f64ptr, <16 x i8>* %v16i8ptr, <8 x i16>* %v8i16ptr, <4 x i32>* %v4i32ptr, <2 x i64>* %v2i64ptr) nounwind { ; X64-LABEL: test_vec_loads: ; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rbp ; X64-NEXT: pushq %r15 ; X64-NEXT: pushq %r14 ; X64-NEXT: pushq %r13 ; X64-NEXT: pushq %r12 ; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rax ; X64-NEXT: movq %r9, %r14 ; X64-NEXT: movq %r8, %r15 ; X64-NEXT: movq %rcx, %r12 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rsi, %rbx -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %rbp ; X64-NEXT: sarq $63, %rax ; X64-NEXT: orq %rax, %rdi ; X64-NEXT: movaps (%rdi), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v4f32 +; X64-NEXT: .Ltmp18: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp18, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: orq %rax, %rbx ; X64-NEXT: movaps (%rbx), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v2f64 +; X64-NEXT: .Ltmp19: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp19, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: orq %rax, %r13 ; X64-NEXT: movaps (%r13), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v16i8 +; X64-NEXT: .Ltmp20: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp20, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: orq %rax, %r12 ; X64-NEXT: movaps (%r12), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v8i16 +; X64-NEXT: .Ltmp21: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp21, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: orq %rax, %r15 ; X64-NEXT: movaps (%r15), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v4i32 +; X64-NEXT: .Ltmp22: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp22, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: orq %rax, %r14 ; X64-NEXT: movaps (%r14), %xmm0 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v2i64 +; X64-NEXT: .Ltmp23: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp23, %rcx +; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp +; X64-NEXT: addq $8, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 ; X64-NEXT: popq %r13 ; X64-NEXT: popq %r14 ; X64-NEXT: popq %r15 +; X64-NEXT: popq %rbp ; X64-NEXT: retq ; ; X64-LFENCE-LABEL: test_vec_loads: @@ -902,13 +999,13 @@ define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind { ; X64-LABEL: test_deferred_hardening: ; X64: # %bb.0: # %entry +; X64-NEXT: pushq %r15 ; X64-NEXT: pushq %r14 ; X64-NEXT: pushq %rbx -; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rax ; X64-NEXT: movq %rsi, %r14 ; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: movq $-1, %rcx +; X64-NEXT: movq $-1, %r15 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: movl (%rdi), %edi ; X64-NEXT: incl %edi @@ -917,8 +1014,12 @@ ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Ltmp24: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp24, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: movl (%rbx), %ecx ; X64-NEXT: movl (%r14), %edx ; X64-NEXT: leal 1(%rcx,%rdx), %edi @@ -926,16 +1027,24 @@ ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Ltmp25: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp25, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: movl (%rbx), %edi ; X64-NEXT: shll $7, %edi ; X64-NEXT: orl %eax, %edi ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Ltmp26: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp26, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: movzwl (%rbx), %ecx ; X64-NEXT: sarw $7, %cx ; X64-NEXT: movzwl %cx, %edi @@ -944,8 +1053,12 @@ ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Ltmp27: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp27, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: movzwl (%rbx), %ecx ; X64-NEXT: rolw $9, %cx ; X64-NEXT: movswl %cx, %edi @@ -954,13 +1067,17 @@ ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink +; X64-NEXT: .Ltmp28: ; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Ltmp28, %rcx +; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp -; X64-NEXT: addq $8, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r14 +; X64-NEXT: popq %r15 ; X64-NEXT: retq ; ; X64-LFENCE-LABEL: test_deferred_hardening: