diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -642,6 +642,8 @@ bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); bool HasRedZone = isPPC64 || !isSVR4ABI; + bool HasROPProtect = Subtarget.hasROPProtect(); + bool HasPrivileged = Subtarget.hasPrivileged(); Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; Register BPReg = RegInfo->getBaseRegister(MF); @@ -672,6 +674,8 @@ const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 : PPC::MFCR); const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); + const MCInstrDesc &HashST = + TII.get(HasPrivileged ? PPC::HASHSTP : PPC::HASHST); // Regarding this assert: Even though LR is saved in the caller's frame (i.e., // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no @@ -817,11 +821,29 @@ .addReg(SPReg); } - if (MustSaveLR) + // Generate the instruction to store the LR. In the case where ROP protection + // is required the register holding the LR should not be killed as it will be + // used by the hash store instruction. + if (MustSaveLR) { BuildMI(MBB, StackUpdateLoc, dl, StoreInst) - .addReg(ScratchReg, getKillRegState(true)) - .addImm(LROffset) - .addReg(SPReg); + .addReg(ScratchReg, getKillRegState(!HasROPProtect)) + .addImm(LROffset) + .addReg(SPReg); + + // Add the ROP protection Hash Store instruction. + if (HasROPProtect) { + const int SaveIndex = FI->getROPProtectionHashSaveIndex(); + const int ImmOffset = MFI.getObjectOffset(SaveIndex); + assert((ImmOffset <= -8 && ImmOffset >= -512) && + "ROP hash save offset out of range."); + assert(((ImmOffset & 0x7) == 0) && + "ROP hash save offset must be 8 byte aligned."); + BuildMI(MBB, StackUpdateLoc, dl, HashST) + .addReg(ScratchReg, getKillRegState(true)) + .addImm(ImmOffset) + .addReg(SPReg); + } + } if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { @@ -1512,6 +1534,8 @@ bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); + bool HasROPProtect = Subtarget.hasROPProtect(); + bool HasPrivileged = Subtarget.hasPrivileged(); Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; Register BPReg = RegInfo->getBaseRegister(MF); @@ -1536,6 +1560,8 @@ : PPC::LWZ); const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 : PPC::MTOCRF); + const MCInstrDesc &HashChk = + TII.get(HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK); int LROffset = getReturnSaveOffset(); int FPOffset = 0; @@ -1798,8 +1824,23 @@ BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) .addReg(TempReg, getKillRegState(i == e-1)); - if (MustSaveLR) + if (MustSaveLR) { + // If ROP protection is required, an extra instruction is added to compute a + // hash and then compare it to the hash stored in the prologue. + if (HasROPProtect) { + const int SaveIndex = FI->getROPProtectionHashSaveIndex(); + const int ImmOffset = MFI.getObjectOffset(SaveIndex); + assert((ImmOffset <= -8 && ImmOffset >= -512) && + "ROP hash check location offset out of range."); + assert(((ImmOffset & 0x7) == 0) && + "ROP hash check location offset must be 8 byte aligned."); + BuildMI(MBB, StackUpdateLoc, dl, HashChk) + .addReg(ScratchReg) + .addImm(ImmOffset) + .addReg(SPReg); + } BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); + } // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -152,6 +152,15 @@ GlobalBaseReg = 0; Subtarget = &MF.getSubtarget(); PPCLowering = Subtarget->getTargetLowering(); + if (Subtarget->hasROPProtect()) { + // Create a place on the stack for the ROP Protection Hash. + // The ROP Protection Hash will always be 8 bytes and aligned to 8 + // bytes. + MachineFrameInfo &MFI = MF.getFrameInfo(); + PPCFunctionInfo *FI = MF.getInfo(); + const int Result = MFI.CreateStackObject(8, Align(8), false); + FI->setROPProtectionHashSaveIndex(Result); + } SelectionDAGISel::runOnMachineFunction(MF); return true; diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h --- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -49,6 +49,9 @@ /// Frame index where the old PIC base pointer is stored. int PICBasePointerSaveIndex = 0; + /// Frame index where the ROP Protection Hash is stored. + int ROPProtectionHashSaveIndex = 0; + /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current /// function. This is only valid after the initial scan of the function by /// PEI. @@ -161,6 +164,13 @@ int getPICBasePointerSaveIndex() const { return PICBasePointerSaveIndex; } void setPICBasePointerSaveIndex(int Idx) { PICBasePointerSaveIndex = Idx; } + int getROPProtectionHashSaveIndex() const { + return ROPProtectionHashSaveIndex; + } + void setROPProtectionHashSaveIndex(int Idx) { + ROPProtectionHashSaveIndex = Idx; + } + unsigned getMinReservedArea() const { return MinReservedArea; } void setMinReservedArea(unsigned size) { MinReservedArea = size; } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -101,9 +101,7 @@ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; - bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override { - return true; - } + bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override; void lowerDynamicAlloc(MachineBasicBlock::iterator II) const; void lowerDynamicAreaOffset(MachineBasicBlock::iterator II) const; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -398,6 +398,16 @@ return false; } +bool PPCRegisterInfo::requiresVirtualBaseRegisters( + const MachineFunction &MF) const { + const PPCSubtarget &Subtarget = MF.getSubtarget(); + // Do not use virtual base registers when ROP protection is turned on. + // Virtual base registers break the layout of the local variable space and may + // push the ROP Hash location past the 512 byte range of the ROP store + // instruction. + return !Subtarget.hasROPProtect(); +} + bool PPCRegisterInfo::isCallerPreservedPhysReg(MCRegister PhysReg, const MachineFunction &MF) const { assert(Register::isPhysicalRegister(PhysReg)); diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll @@ -0,0 +1,357 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect < %s | FileCheck %s --check-prefix BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix LE-PRIV +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix LE-PRIV +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix LE-PRIV +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix BE-PRIV +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix BE-PRIV +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mattr=+rop-protect -mattr=+privileged < %s | FileCheck %s --check-prefix BE-PRIV + + + +;; This test checks that the ROP protect instructions have been correctly +;; added when the ROP protect option has been specified. The hashst +;; instruction should be added to the prologue and the hashchk should be added +;; to the epilogue. + +define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0 { +; LE-LABEL: caller: +; LE: mflr r0 +; LE-NEXT: std r30, -16(r1) +; LE-NEXT: std r0, 16(r1) +; LE-NEXT: hashst r0, -24(r1) +; LE-NEXT: stdu r1, -64(r1) +; LE: bl callee +; LE: addi r1, r1, 64 +; LE-NEXT: ld r0, 16(r1) +; LE-DAG: mtlr r0 +; LE-DAG: hashchk r0, -24(r1) +; LE-DAG: ld r30, -16(r1) +; LE-NEXT: blr +; BE-LABEL: caller: +; BE: mflr r0 +; BE-NEXT: std r0, 16(r1) +; BE-NEXT: hashst r0, -24(r1) +; BE-NEXT: stdu r1, -144(r1) +; BE: bl callee +; BE: addi r1, r1, 144 +; BE-NEXT: ld r0, 16(r1) +; BE-DAG: mtlr r0 +; BE-DAG: hashchk r0, -24(r1) +; BE-NEXT: blr +; LE-PRIV-LABEL: caller: +; LE-PRIV: mflr r0 +; LE-PRIV-NEXT: std r30, -16(r1) +; LE-PRIV-NEXT: std r0, 16(r1) +; LE-PRIV-NEXT: hashstp r0, -24(r1) +; LE-PRIV-NEXT: stdu r1, -64(r1) +; LE-PRIV: bl callee +; LE-PRIV: addi r1, r1, 64 +; LE-PRIV-NEXT: ld r0, 16(r1) +; LE-PRIV-DAG: mtlr r0 +; LE-PRIV-DAG: hashchkp r0, -24(r1) +; LE-PRIV-DAG: ld r30, -16(r1) +; LE-PRIV-NEXT: blr +; BE-PRIV-LABEL: caller: +; BE-PRIV: mflr r0 +; BE-PRIV-NEXT: std r0, 16(r1) +; BE-PRIV-NEXT: hashstp r0, -24(r1) +; BE-PRIV-NEXT: stdu r1, -144(r1) +; BE-PRIV: bl callee +; BE-PRIV: addi r1, r1, 144 +; BE-PRIV-NEXT: ld r0, 16(r1) +; BE-PRIV-DAG: mtlr r0 +; BE-PRIV-DAG: hashchkp r0, -24(r1) +; BE-PRIV-NEXT: blr + +entry: + %call = tail call zeroext i32 @callee(i32 zeroext %in) + %add = add i32 %call, %add_after + ret i32 %add +} + +define dso_local zeroext i32 @spill(i32* nocapture readonly %in) #0 { +; LE-LABEL: spill: +; LE: mflr r0 +; LE-DAG: std r0, 16(r1) +; LE-DAG: hashst r0, -488(r1) +; LE-DAG: stw r12, 8(r1) +; LE-NEXT: stdu r1, -544(r1) +; LE: bl callee2 +; LE: addi r1, r1, 544 +; LE-NEXT: ld r0, 16(r1) +; LE-NEXT: lwz r12, 8(r1) +; LE-DAG: mtlr r0 +; LE-DAG: hashchk r0, -488(r1) +; LE: blr +; BE-LABEL: spill: +; BE: mflr r0 +; BE-DAG: std r0, 16(r1) +; BE-DAG: hashst r0, -488(r1) +; BE-DAG: stw r12, 8(r1) +; BE-NEXT: stdu r1, -624(r1) +; BE: bl callee2 +; BE: addi r1, r1, 624 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: lwz r12, 8(r1) +; BE-DAG: mtlr r0 +; BE-DAG: hashchk r0, -488(r1) +; BE: blr +; LE-PRIV-LABEL: spill: +; LE-PRIV: mflr r0 +; LE-PRIV-DAG: std r0, 16(r1) +; LE-PRIV-DAG: hashstp r0, -488(r1) +; LE-PRIV-DAG: stw r12, 8(r1) +; LE-PRIV-NEXT: stdu r1, -544(r1) +; LE-PRIV: bl callee2 +; LE-PRIV: addi r1, r1, 544 +; LE-PRIV-NEXT: ld r0, 16(r1) +; LE-PRIV-NEXT: lwz r12, 8(r1) +; LE-PRIV-DAG: mtlr r0 +; LE-PRIV-DAG: hashchkp r0, -488(r1) +; LE-PRIV: blr +; BE-PRIV-LABEL: spill: +; BE-PRIV: mflr r0 +; BE-PRIV-DAG: std r0, 16(r1) +; BE-PRIV-DAG: hashstp r0, -488(r1) +; BE-PRIV-DAG: stw r12, 8(r1) +; BE-PRIV-NEXT: stdu r1, -624(r1) +; BE-PRIV: bl callee2 +; BE-PRIV: addi r1, r1, 624 +; BE-PRIV-NEXT: ld r0, 16(r1) +; BE-PRIV-NEXT: lwz r12, 8(r1) +; BE-PRIV-DAG: mtlr r0 +; BE-PRIV-DAG: hashchkp r0, -488(r1) +; BE-PRIV: blr + +entry: + %local = alloca i32, align 4 + %0 = bitcast i32* %local to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %arrayidx = getelementptr inbounds i32, i32* %in, i64 3 + %1 = load i32, i32* %arrayidx, align 4 + store i32 %1, i32* %local, align 4 + tail call void asm sideeffect "nop", "~{cr2},~{cr3},~{cr4},~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %call = call zeroext i32 @callee2(i32* nonnull %local) + %arrayidx1 = getelementptr inbounds i32, i32* %in, i64 4 + %2 = load i32, i32* %arrayidx1, align 4 + %add = add i32 %2, %call + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret i32 %add +} + +define dso_local zeroext i32 @shrinkwrap(i32* readonly %in) #0 { +; LE-LABEL: shrinkwrap: +; LE: cmpldi r3, 0 +; LE-NEXT: beq cr0, .LBB +; LE: mflr r0 +; LE-NEXT: std r30, -16(r1) +; LE-NEXT: std r0, 16(r1) +; LE-NEXT: hashst r0, -24(r1) +; LE-NEXT: stdu r1, -64(r1) +; LE: bl callee2 +; LE: addi r1, r1, 64 +; LE-NEXT: ld r0, 16(r1) +; LE-DAG: mtlr r0 +; LE-DAG: hashchk r0, -24(r1) +; LE-DAG: ld r30, -16(r1) +; LE-NEXT: blr +; LE-NEXT: .LBB +; LE-NEXT: li r3, 0 +; LE-NEXT: blr +; BE-LABEL: shrinkwrap: +; BE: cmpldi r3, 0 +; BE-NEXT: beq cr0, .LBB +; BE: mflr r0 +; BE-NEXT: std r0, 16(r1) +; BE-NEXT: hashst r0, -24(r1) +; BE-NEXT: stdu r1, -144(r1) +; BE: bl callee2 +; BE: addi r1, r1, 144 +; BE-NEXT: ld r0, 16(r1) +; BE-DAG: mtlr r0 +; BE-DAG: hashchk r0, -24(r1) +; BE-NEXT: blr +; BE-NEXT: .LBB +; BE-NEXT: li r3, 0 +; BE-NEXT: blr +; LE-PRIV-LABEL: shrinkwrap: +; LE-PRIV: cmpldi r3, 0 +; LE-PRIV-NEXT: beq cr0, .LBB +; LE-PRIV: mflr r0 +; LE-PRIV-NEXT: std r30, -16(r1) +; LE-PRIV-NEXT: std r0, 16(r1) +; LE-PRIV-NEXT: hashstp r0, -24(r1) +; LE-PRIV-NEXT: stdu r1, -64(r1) +; LE-PRIV: bl callee2 +; LE-PRIV: addi r1, r1, 64 +; LE-PRIV-NEXT: ld r0, 16(r1) +; LE-PRIV-DAG: mtlr r0 +; LE-PRIV-DAG: hashchkp r0, -24(r1) +; LE-PRIV-DAG: ld r30, -16(r1) +; LE-PRIV-NEXT: blr +; LE-PRIV-NEXT: .LBB +; LE-PRIV-NEXT: li r3, 0 +; LE-PRIV-NEXT: blr +; BE-PRIV-LABEL: shrinkwrap: +; BE-PRIV: cmpldi r3, 0 +; BE-PRIV-NEXT: beq cr0, .LBB +; BE-PRIV: mflr r0 +; BE-PRIV-NEXT: std r0, 16(r1) +; BE-PRIV-NEXT: hashstp r0, -24(r1) +; BE-PRIV-NEXT: stdu r1, -144(r1) +; BE-PRIV: bl callee2 +; BE-PRIV: addi r1, r1, 144 +; BE-PRIV-NEXT: ld r0, 16(r1) +; BE-PRIV-DAG: mtlr r0 +; BE-PRIV-DAG: hashchkp r0, -24(r1) +; BE-PRIV-NEXT: blr +; BE-PRIV-NEXT: .LBB +; BE-PRIV-NEXT: li r3, 0 +; BE-PRIV-NEXT: blr + +entry: + %local = alloca i32, align 4 + %tobool.not = icmp eq i32* %in, null + br i1 %tobool.not, label %return, label %if.end + +if.end: ; preds = %entry + %0 = bitcast i32* %local to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %arrayidx = getelementptr inbounds i32, i32* %in, i64 3 + %1 = load i32, i32* %arrayidx, align 4 + store i32 %1, i32* %local, align 4 + %call = call zeroext i32 @callee2(i32* nonnull %local) + %arrayidx1 = getelementptr inbounds i32, i32* %in, i64 4 + %2 = load i32, i32* %arrayidx1, align 4 + %add = add i32 %2, %call + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + br label %return + +return: ; preds = %entry, %if.end + %retval.0 = phi i32 [ %add, %if.end ], [ 0, %entry ] + ret i32 %retval.0 +} + +define dso_local zeroext i32 @aligned(i32* nocapture readonly %in) #0 { +; LE-LABEL: aligned: +; LE: mflr r0 +; LE: std r30, -16(r1) +; LE-NEXT: mr r30, r1 +; LE: std r0, 16(r1) +; LE: hashst r0, -32(r1) +; LE: stdux r1, r1, r0 +; LE: bl callee3 +; LE: mr r1, r30 +; LE-NEXT: ld r0, 16(r1) +; LE-DAG: ld r30, -16(r1) +; LE-DAG: mtlr r0 +; LE-DAG: hashchk r0, -32(r1) +; LE-NEXT: blr +; BE-LABEL: aligned: +; BE: mflr r0 +; BE: std r30, -16(r1) +; BE-NEXT: mr r30, r1 +; BE: std r0, 16(r1) +; BE: hashst r0, -32(r1) +; BE: stdux r1, r1, r0 +; BE: bl callee3 +; BE: mr r1, r30 +; BE-NEXT: ld r0, 16(r1) +; BE-DAG: ld r30, -16(r1) +; BE-DAG: mtlr r0 +; BE-DAG: hashchk r0, -32(r1) +; BE-NEXT: blr +; LE-PRIV-LABEL: aligned: +; LE-PRIV: mflr r0 +; LE-PRIV: std r30, -16(r1) +; LE-PRIV-NEXT: mr r30, r1 +; LE-PRIV: std r0, 16(r1) +; LE-PRIV: hashstp r0, -32(r1) +; LE-PRIV: stdux r1, r1, r0 +; LE-PRIV: bl callee3 +; LE-PRIV: mr r1, r30 +; LE-PRIV-NEXT: ld r0, 16(r1) +; LE-PRIV-DAG: ld r30, -16(r1) +; LE-PRIV-DAG: mtlr r0 +; LE-PRIV-DAG: hashchkp r0, -32(r1) +; LE-PRIV-NEXT: blr +; BE-PRIV-LABEL: aligned: +; BE-PRIV: mflr r0 +; BE-PRIV: std r30, -16(r1) +; BE-PRIV-NEXT: mr r30, r1 +; BE-PRIV: std r0, 16(r1) +; BE-PRIV: hashstp r0, -32(r1) +; BE-PRIV: stdux r1, r1, r0 +; BE-PRIV: bl callee3 +; BE-PRIV: mr r1, r30 +; BE-PRIV-NEXT: ld r0, 16(r1) +; BE-PRIV-DAG: ld r30, -16(r1) +; BE-PRIV-DAG: mtlr r0 +; BE-PRIV-DAG: hashchkp r0, -32(r1) +; BE-PRIV-NEXT: blr + +entry: + %beforeLocal = alloca i32, align 4 + %local = alloca i32, align 32768 + %afterLocal = alloca i32, align 4 + %0 = bitcast i32* %beforeLocal to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) + %arrayidx = getelementptr inbounds i32, i32* %in, i64 1 + %1 = load i32, i32* %arrayidx, align 4 + store i32 %1, i32* %beforeLocal, align 4 + %2 = bitcast i32* %local to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) + %arrayidx1 = getelementptr inbounds i32, i32* %in, i64 3 + %3 = load i32, i32* %arrayidx1, align 4 + store i32 %3, i32* %local, align 32768 + %4 = bitcast i32* %afterLocal to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %4) + %arrayidx2 = getelementptr inbounds i32, i32* %in, i64 5 + %5 = load i32, i32* %arrayidx2, align 4 + store i32 %5, i32* %afterLocal, align 4 + %call = call zeroext i32 @callee3(i32* nonnull %local, i32* nonnull %beforeLocal, i32* nonnull %afterLocal) + %arrayidx3 = getelementptr inbounds i32, i32* %in, i64 4 + %6 = load i32, i32* %arrayidx3, align 4 + %add = add i32 %6, %call + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %4) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + ret i32 %add +} + +declare zeroext i32 @callee(i32 zeroext) local_unnamed_addr +declare zeroext i32 @callee2(i32*) local_unnamed_addr +declare zeroext i32 @callee3(i32*, i32*, i32*) local_unnamed_addr +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +attributes #0 = { nounwind }