Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -1331,9 +1331,15 @@ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; + void emitSetJmpShadowStackFix(MachineInstr &MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; + MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *emitFMA3Instr(MachineInstr &MI, MachineBasicBlock *MBB) const; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -27619,6 +27619,60 @@ return BB; } +/// SetJmp implies future control flow change upon calling the corresponding +/// LongJmp. +/// Instead of using the 'return' instruction, the long jump fixes the stack and +/// performs an indirect branch. To do so it uses the registers that were stored +/// in the jump buffer (when calling SetJmp). +/// In case the shadow stack is enabled we need to fix it as well, because some +/// return addresses will be skipped. +/// The function will save the SSP for future fixing in the function +/// emitLongJmpShadowStackFix. +/// \sa emitLongJmpShadowStackFix +/// \param [in] MI The temporary Machine Instruction for the builtin. +/// \param [in] MBB The Machine Basic Block that will be modified. +void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineInstrBuilder MIB; + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); + + // Initialize a register with zero. + MVT PVT = getPointerTy(MF->getDataLayout()); + const TargetRegisterClass *PtrRC = getRegClassFor(PVT); + unsigned ZReg = MRI.createVirtualRegister(PtrRC); + unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr; + BuildMI(*MBB, MI, DL, TII->get(XorRROpc)) + .addDef(ZReg) + .addReg(ZReg, RegState::Undef) + .addReg(ZReg, RegState::Undef); + + // Read the current SSP Register value to the zeroed register. + unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC); + unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; + BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); + + // Write the SSP register value to offset 3 in input memory buffer. + unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; + MIB = BuildMI(*MBB, MI, DL, TII->get(PtrStoreOpc)); + const int64_t SSPOffset = 3 * PVT.getStoreSize(); + const unsigned MemOpndSlot = 1; + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + MIB.addDisp(MI.getOperand(MemOpndSlot + i), SSPOffset); + else + MIB.add(MI.getOperand(MemOpndSlot + i)); + } + MIB.addReg(SSPCopyReg); + MIB.setMemRefs(MMOBegin, MMOEnd); +} + MachineBasicBlock * X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -27728,6 +27782,11 @@ else MIB.addMBB(restoreMBB); MIB.setMemRefs(MMOBegin, MMOEnd); + + if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { + emitSetJmpShadowStackFix(MI, thisMBB); + } + // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup)) .addMBB(restoreMBB); @@ -27769,6 +27828,183 @@ return sinkMBB; } +/// Fix the shadow stack using the previously saved SSP pointer. +/// \sa emitSetJmpShadowStackFix +/// \param [in] MI The temporary Machine Instruction for the builtin. +/// \param [in] MBB The Machine Basic Block that will be modified. +/// \return The sink MBB that will perform the future indirect branch. +MachineBasicBlock * +X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end(); + + MVT PVT = getPointerTy(MF->getDataLayout()); + const TargetRegisterClass *PtrRC = getRegClassFor(PVT); + + // checkSspMBB: + // xor vreg1, vreg1 + // rdssp vreg1 + // test vreg1, vreg1 + // je sinkMBB # Jump if Shadow Stack is not supported + // fallMBB: + // mov buf+24/12(%rip), vreg2 + // sub vreg1, vreg2 + // jbe sinkMBB # No need to fix the Shadow Stack + // fixShadowMBB: + // shr 3/2, vreg2 + // incssp vreg2 # fix the SSP according to the lower 8 bits + // shr 8, vreg2 + // je sinkMBB + // fixShadowLoopPrepareMBB: + // shl vreg2 + // mov 128, vreg3 + // fixShadowLoopMBB: + // incssp vreg3 + // dec vreg2 + // jne fixShadowLoopMBB # Iterate until you finish fixing + // # the Shadow Stack + // sinkMBB: + + MachineFunction::iterator I = ++MBB->getIterator(); + const BasicBlock *BB = MBB->getBasicBlock(); + + MachineBasicBlock *checkSspMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *fixShadowMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *fixShadowLoopPrepareMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *fixShadowLoopMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, checkSspMBB); + MF->insert(I, fallMBB); + MF->insert(I, fixShadowMBB); + MF->insert(I, fixShadowLoopPrepareMBB); + MF->insert(I, fixShadowLoopMBB); + MF->insert(I, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, MachineBasicBlock::iterator(MI), + MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + MBB->addSuccessor(checkSspMBB); + + // Initialize a register with zero. + unsigned ZReg = MRI.createVirtualRegister(PtrRC); + unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr; + BuildMI(checkSspMBB, DL, TII->get(XorRROpc)) + .addDef(ZReg) + .addReg(ZReg, RegState::Undef) + .addReg(ZReg, RegState::Undef); + + // Read the current SSP Register value to the zeroed register. + unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC); + unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; + BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); + + // Check whether the result of the SSP register is zero and jump directly + // to the sink. + unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr; + BuildMI(checkSspMBB, DL, TII->get(TestRROpc)) + .addReg(SSPCopyReg) + .addReg(SSPCopyReg); + BuildMI(checkSspMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB); + checkSspMBB->addSuccessor(sinkMBB); + checkSspMBB->addSuccessor(fallMBB); + + // Reload the previously saved SSP register value. + unsigned PrevSSPReg = MRI.createVirtualRegister(PtrRC); + unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; + const int64_t SPPOffset = 3 * PVT.getStoreSize(); + MachineInstrBuilder MIB = + BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + MIB.addDisp(MI.getOperand(i), SPPOffset); + else + MIB.add(MI.getOperand(i)); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Subtract the current SSP from the previous SSP. + unsigned SspSubReg = MRI.createVirtualRegister(PtrRC); + unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr; + BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg) + .addReg(PrevSSPReg) + .addReg(SSPCopyReg); + + // Jump to sink in case PrevSSPReg <= SSPCopyReg. + BuildMI(fallMBB, DL, TII->get(X86::JBE_1)).addMBB(sinkMBB); + fallMBB->addSuccessor(sinkMBB); + fallMBB->addSuccessor(fixShadowMBB); + + // Shift right by 2/3 for 32/64 because incssp multiplies the argument by 4/8. + unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri; + unsigned Offset = (PVT == MVT::i64) ? 3 : 2; + unsigned SspFirstShrReg = MRI.createVirtualRegister(PtrRC); + BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg) + .addReg(SspSubReg) + .addImm(Offset); + + // Increase SSP when looking only on the lower 8 bits of the delta. + unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD; + BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg); + + // Reset the lower 8 bits. + unsigned SspSecondShrReg = MRI.createVirtualRegister(PtrRC); + BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg) + .addReg(SspFirstShrReg) + .addImm(8); + + // Jump if the result of the shift is zero. + BuildMI(fixShadowMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB); + fixShadowMBB->addSuccessor(sinkMBB); + fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB); + + // Do a single shift left. + unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1; + unsigned SspAfterShlReg = MRI.createVirtualRegister(PtrRC); + BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg) + .addReg(SspSecondShrReg); + + // Save the value 128 to a register (will be used next with incssp). + unsigned Value128InReg = MRI.createVirtualRegister(PtrRC); + unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri; + BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg) + .addImm(128); + fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB); + + // Since incssp only looks at the lower 8 bits, we might need to do several + // iterations of incssp until we finish fixing the shadow stack. + unsigned DecReg = MRI.createVirtualRegister(PtrRC); + unsigned CounterReg = MRI.createVirtualRegister(PtrRC); + BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg) + .addReg(SspAfterShlReg) + .addMBB(fixShadowLoopPrepareMBB) + .addReg(DecReg) + .addMBB(fixShadowLoopMBB); + + // Every iteration we increase the SSP by 128. + BuildMI(fixShadowLoopMBB, DL, TII->get(IncsspOpc)).addReg(Value128InReg); + + // Every iteration we decrement the counter by 1. + unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r; + BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg); + + // Jump if the counter is not zero yet. + BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JNE_1)).addMBB(fixShadowLoopMBB); + fixShadowLoopMBB->addSuccessor(sinkMBB); + fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB); + + return sinkMBB; +} + MachineBasicBlock * X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -27801,13 +28037,21 @@ unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r; + MachineBasicBlock *thisMBB = MBB; + + // When CET and shadow stack is enabled, we need to fix the Shadow Stack. + if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { + thisMBB = emitLongJmpShadowStackFix(MI, thisMBB); + } + // Reload FP - MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) MIB.add(MI.getOperand(i)); MIB.setMemRefs(MMOBegin, MMOEnd); + // Reload IP - MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { if (i == X86::AddrDisp) MIB.addDisp(MI.getOperand(i), LabelOffset); @@ -27815,8 +28059,9 @@ MIB.add(MI.getOperand(i)); } MIB.setMemRefs(MMOBegin, MMOEnd); + // Reload SP - MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { if (i == X86::AddrDisp) MIB.addDisp(MI.getOperand(i), SPOffset); @@ -27824,11 +28069,12 @@ MIB.add(MI.getOperand(i)); } MIB.setMemRefs(MMOBegin, MMOEnd); + // Jump - BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp); + BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp); MI.eraseFromParent(); - return MBB; + return thisMBB; } void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, Index: llvm/trunk/test/CodeGen/X86/shadow-stack.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/shadow-stack.ll +++ llvm/trunk/test/CodeGen/X86/shadow-stack.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple x86_64-apple-macosx10.13.0 < %s | FileCheck %s --check-prefix=X86_64 +; RUN: llc -mtriple i386-apple-macosx10.13.0 < %s | FileCheck %s --check-prefix=X86 + +; The MacOS tripples are used to get trapping behavior on the "unreachable" IR +; instruction, so that the placement of the ud2 instruction could be verified. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; The IR was created using the following C code: +;; typedef void *jmp_buf; +;; jmp_buf buf; +;; +;; __attribute__((noinline)) int bar(int i) { +;; int j = i - 111; +;; __builtin_longjmp(&buf, 1); +;; return j; +;; } +;; +;; int foo(int i) { +;; int j = i * 11; +;; if (!__builtin_setjmp(&buf)) { +;; j += 33 + bar(j); +;; } +;; return j + i; +;; } +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +@buf = common local_unnamed_addr global i8* null, align 8 + +; Functions that use LongJmp should fix the Shadow Stack using previosuly saved +; ShadowStackPointer in the input buffer. +; The fix requires unwinding the shadow stack to the last SSP. +define i32 @bar(i32 %i) local_unnamed_addr { +; X86_64-LABEL: bar: +; X86_64: ## %bb.0: ## %entry +; X86_64-NEXT: pushq %rbp +; X86_64-NEXT: .cfi_def_cfa_offset 16 +; X86_64-NEXT: .cfi_offset %rbp, -16 +; X86_64-NEXT: movq _buf@{{.*}}(%rip), %rax +; X86_64-NEXT: movq (%rax), %rax +; X86_64-NEXT: xorq %rdx, %rdx +; X86_64-NEXT: rdsspq %rdx +; X86_64-NEXT: testq %rdx, %rdx +; X86_64-NEXT: je LBB0_5 +; X86_64-NEXT: ## %bb.1: ## %entry +; X86_64-NEXT: movq 24(%rax), %rcx +; X86_64-NEXT: subq %rdx, %rcx +; X86_64-NEXT: jbe LBB0_5 +; X86_64-NEXT: ## %bb.2: ## %entry +; X86_64-NEXT: shrq $3, %rcx +; X86_64-NEXT: incsspq %rcx +; X86_64-NEXT: shrq $8, %rcx +; X86_64-NEXT: je LBB0_5 +; X86_64-NEXT: ## %bb.3: ## %entry +; X86_64-NEXT: shlq %rcx +; X86_64-NEXT: movq $128, %rdx +; X86_64-NEXT: LBB0_4: ## %entry +; X86_64-NEXT: ## =>This Inner Loop Header: Depth=1 +; X86_64-NEXT: incsspq %rdx +; X86_64-NEXT: decq %rcx +; X86_64-NEXT: jne LBB0_4 +; X86_64-NEXT: LBB0_5: ## %entry +; X86_64-NEXT: movq (%rax), %rbp +; X86_64-NEXT: movq 8(%rax), %rcx +; X86_64-NEXT: movq 16(%rax), %rsp +; X86_64-NEXT: jmpq *%rcx +; X86_64-NEXT: ud2 +; +; X86-LABEL: bar: +; X86: ## %bb.0: ## %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl L_buf$non_lazy_ptr, %eax +; X86-NEXT: movl (%eax), %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: rdsspd %edx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: je LBB0_5 +; X86-NEXT: ## %bb.1: ## %entry +; X86-NEXT: movl 12(%eax), %ecx +; X86-NEXT: subl %edx, %ecx +; X86-NEXT: jbe LBB0_5 +; X86-NEXT: ## %bb.2: ## %entry +; X86-NEXT: shrl $2, %ecx +; X86-NEXT: incsspd %ecx +; X86-NEXT: shrl $8, %ecx +; X86-NEXT: je LBB0_5 +; X86-NEXT: ## %bb.3: ## %entry +; X86-NEXT: shll %ecx +; X86-NEXT: movl $128, %edx +; X86-NEXT: LBB0_4: ## %entry +; X86-NEXT: ## =>This Inner Loop Header: Depth=1 +; X86-NEXT: incsspd %edx +; X86-NEXT: decl %ecx +; X86-NEXT: jne LBB0_4 +; X86-NEXT: LBB0_5: ## %entry +; X86-NEXT: movl (%eax), %ebp +; X86-NEXT: movl 4(%eax), %ecx +; X86-NEXT: movl 8(%eax), %esp +; X86-NEXT: jmpl *%ecx +; X86-NEXT: ud2 +entry: + %0 = load i8*, i8** @buf, align 8 + tail call void @llvm.eh.sjlj.longjmp(i8* %0) + unreachable +} + +declare void @llvm.eh.sjlj.longjmp(i8*) + +; Functions that call SetJmp should save the current ShadowStackPointer for +; future fixing of the Shadow Stack. +define i32 @foo(i32 %i) local_unnamed_addr { +; X86_64-LABEL: foo: +; X86_64: ## %bb.0: ## %entry +; X86_64-NEXT: pushq %rbp +; X86_64-NEXT: .cfi_def_cfa_offset 16 +; X86_64-NEXT: .cfi_offset %rbp, -16 +; X86_64-NEXT: movq %rsp, %rbp +; X86_64-NEXT: .cfi_def_cfa_register %rbp +; X86_64-NEXT: pushq %r15 +; X86_64-NEXT: pushq %r14 +; X86_64-NEXT: pushq %r13 +; X86_64-NEXT: pushq %r12 +; X86_64-NEXT: pushq %rbx +; X86_64-NEXT: pushq %rax +; X86_64-NEXT: .cfi_offset %rbx, -56 +; X86_64-NEXT: .cfi_offset %r12, -48 +; X86_64-NEXT: .cfi_offset %r13, -40 +; X86_64-NEXT: .cfi_offset %r14, -32 +; X86_64-NEXT: .cfi_offset %r15, -24 +; X86_64-NEXT: ## kill: def $edi killed $edi def $rdi +; X86_64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; X86_64-NEXT: movq _buf@{{.*}}(%rip), %rax +; X86_64-NEXT: movq (%rax), %rax +; X86_64-NEXT: movq %rbp, (%rax) +; X86_64-NEXT: movq %rsp, 16(%rax) +; X86_64-NEXT: leaq {{.*}}(%rip), %rcx +; X86_64-NEXT: movq %rcx, 8(%rax) +; X86_64-NEXT: xorq %rcx, %rcx +; X86_64-NEXT: rdsspq %rcx +; X86_64-NEXT: movq %rcx, 24(%rax) +; X86_64-NEXT: #EH_SjLj_Setup LBB1_4 +; X86_64-NEXT: ## %bb.1: ## %entry +; X86_64-NEXT: xorl %eax, %eax +; X86_64-NEXT: testl %eax, %eax +; X86_64-NEXT: jne LBB1_3 +; X86_64-NEXT: jmp LBB1_5 +; X86_64-NEXT: LBB1_4: ## Block address taken +; X86_64-NEXT: ## %entry +; X86_64-NEXT: movl $1, %eax +; X86_64-NEXT: testl %eax, %eax +; X86_64-NEXT: je LBB1_5 +; X86_64-NEXT: LBB1_3: ## %if.end +; X86_64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload +; X86_64-NEXT: shll $2, %eax +; X86_64-NEXT: leal (%rax,%rax,2), %eax +; X86_64-NEXT: addq $8, %rsp +; X86_64-NEXT: popq %rbx +; X86_64-NEXT: popq %r12 +; X86_64-NEXT: popq %r13 +; X86_64-NEXT: popq %r14 +; X86_64-NEXT: popq %r15 +; X86_64-NEXT: popq %rbp +; X86_64-NEXT: retq +; X86_64-NEXT: LBB1_5: ## %if.then +; X86_64-NEXT: callq _bar +; X86_64-NEXT: ud2 +; +; X86-LABEL: foo: +; X86: ## %bb.0: ## %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl L_buf$non_lazy_ptr, %eax +; X86-NEXT: movl (%eax), %eax +; X86-NEXT: movl %ebp, (%eax) +; X86-NEXT: movl %esp, 16(%eax) +; X86-NEXT: movl $LBB1_4, 4(%eax) +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: rdsspd %ecx +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: #EH_SjLj_Setup LBB1_4 +; X86-NEXT: ## %bb.1: ## %entry +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testl %eax, %eax +; X86-NEXT: jne LBB1_3 +; X86-NEXT: jmp LBB1_5 +; X86-NEXT: LBB1_4: ## Block address taken +; X86-NEXT: ## %entry +; X86-NEXT: movl $1, %eax +; X86-NEXT: testl %eax, %eax +; X86-NEXT: je LBB1_5 +; X86-NEXT: LBB1_3: ## %if.end +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: shll $2, %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl $12, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; X86-NEXT: LBB1_5: ## %if.then +; X86-NEXT: calll _bar +; X86-NEXT: ud2 +entry: + %0 = load i8*, i8** @buf, align 8 + %1 = bitcast i8* %0 to i8** + %2 = tail call i8* @llvm.frameaddress(i32 0) + store i8* %2, i8** %1, align 8 + %3 = tail call i8* @llvm.stacksave() + %4 = getelementptr inbounds i8, i8* %0, i64 16 + %5 = bitcast i8* %4 to i8** + store i8* %3, i8** %5, align 8 + %6 = tail call i32 @llvm.eh.sjlj.setjmp(i8* %0) + %tobool = icmp eq i32 %6, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + %call = tail call i32 @bar(i32 undef) + unreachable + +if.end: ; preds = %entry + %add2 = mul nsw i32 %i, 12 + ret i32 %add2 +} + +declare i8* @llvm.frameaddress(i32) +declare i8* @llvm.stacksave() +declare i32 @llvm.eh.sjlj.setjmp(i8*) + +!llvm.module.flags = !{!0} + +!0 = !{i32 4, !"cf-protection-return", i32 1}