diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp --- a/llvm/lib/Target/VE/VERegisterInfo.cpp +++ b/llvm/lib/Target/VE/VERegisterInfo.cpp @@ -27,6 +27,8 @@ using namespace llvm; +#define DEBUG_TYPE "ve-register-info" + #define GET_REGINFO_TARGET_DESC #include "VEGenRegisterInfo.inc" @@ -133,66 +135,179 @@ return OffDisp; } -static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II, - MachineInstr &MI, const DebugLoc &dl, - unsigned FIOperandNum, int Offset, Register FrameReg) { - // Replace frame index with a frame pointer reference directly. - // VE has 32 bit offset field, so no need to expand a target instruction. - // Directly encode it. +class EliminateFrameIndex { + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + const DebugLoc &DL; + MachineBasicBlock &MBB; + MachineBasicBlock::iterator II; + Register clobber; + + // Some helper functions for the ease of instruction building. + MachineFunction &getFunc() const { return *MBB.getParent(); } + inline MCRegister getSubReg(MCRegister Reg, unsigned Idx) const { + return TRI.getSubReg(Reg, Idx); + } + inline const MCInstrDesc &get(unsigned Opcode) const { + return TII.get(Opcode); + } + inline MachineInstrBuilder build(const MCInstrDesc &MCID, Register DestReg) { + return BuildMI(MBB, II, DL, MCID, DestReg); + } + inline MachineInstrBuilder build(unsigned InstOpc, Register DestReg) { + return build(get(InstOpc), DestReg); + } + inline MachineInstrBuilder build(const MCInstrDesc &MCID) { + return BuildMI(MBB, II, DL, MCID); + } + inline MachineInstrBuilder build(unsigned InstOpc) { + return build(get(InstOpc)); + } + + // Calculate an address of frame index from a frame register and a given + // offset if the offset doesn't fit in the immediate field. Use a clobber + // register to hold calculated address. + void prepareReplaceFI(MachineInstr &MI, Register &FrameReg, int64_t &Offset, + int64_t Bytes = 0); + // Replace the frame index in \p MI with a frame register and a given offset + // if it fits in the immediate field. Otherwise, use pre-calculated address + // in a clobber regsiter. + void replaceFI(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + + // Expand and eliminate Frame Index of pseudo STQrii and LDQrii. + void processSTQ(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + void processLDQ(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + +public: + EliminateFrameIndex(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, + const DebugLoc &DL, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II) + : TII(TII), TRI(TRI), DL(DL), MBB(MBB), II(II), clobber(VE::SX13) {} + + // Expand and eliminate Frame Index from MI + void processMI(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); +}; + +// Prepare the frame index if it doesn't fit in the immediate field. Use +// clobber register to hold calculated address. +void EliminateFrameIndex::prepareReplaceFI(MachineInstr &MI, Register &FrameReg, + int64_t &Offset, int64_t Bytes) { + if (isInt<32>(Offset) && isInt<32>(Offset + Bytes)) { + // If the offset is small enough to fit in the immediate field, directly + // encode it. So, nothing to prepare here. + return; + } + + // If the offset doesn't fit, emit following codes. This clobbers SX13 + // which we always know is available here. + // lea %clobber, Offset@lo + // and %clobber, %clobber, (32)0 + // lea.sl %clobber, Offset@hi(FrameReg, %clobber) + build(VE::LEAzii, clobber).addImm(0).addImm(0).addImm(Lo_32(Offset)); + build(VE::ANDrm, clobber).addReg(clobber).addImm(M0(32)); + build(VE::LEASLrri, clobber) + .addReg(clobber) + .addReg(FrameReg) + .addImm(Hi_32(Offset)); + + // Use clobber register as a frame register and 0 offset + FrameReg = clobber; + Offset = 0; +} + +// Replace the frame index in \p MI with a proper byte and framereg offset. +void EliminateFrameIndex::replaceFI(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(isInt<32>(Offset)); + + // The offset must be small enough to fit in the immediate field after + // call of prepareReplaceFI. Therefore, we directly encode it. MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); MI.getOperand(FIOperandNum + offsetToDisp(MI)).ChangeToImmediate(Offset); } +void EliminateFrameIndex::processSTQ(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::STQrii); + LLVM_DEBUG(dbgs() << "processSTQ: "; MI.dump()); + + prepareReplaceFI(MI, FrameReg, Offset, 8); + + Register SrcReg = MI.getOperand(3).getReg(); + Register SrcHiReg = getSubReg(SrcReg, VE::sub_even); + Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd); + // VE stores HiReg to 8(addr) and LoReg to 0(addr) + MachineInstr *StMI = + build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg(SrcLoReg); + replaceFI(*StMI, FrameReg, Offset, 0); + // Mutate to 'hi' store. + MI.setDesc(get(VE::STrii)); + MI.getOperand(3).setReg(SrcHiReg); + Offset += 8; + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + +void EliminateFrameIndex::processLDQ(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::LDQrii); + LLVM_DEBUG(dbgs() << "processLDQ: "; MI.dump()); + + prepareReplaceFI(MI, FrameReg, Offset, 8); + + Register DestReg = MI.getOperand(0).getReg(); + Register DestHiReg = getSubReg(DestReg, VE::sub_even); + Register DestLoReg = getSubReg(DestReg, VE::sub_odd); + // VE loads HiReg from 8(addr) and LoReg from 0(addr) + MachineInstr *StMI = + build(VE::LDrii, DestLoReg).addReg(FrameReg).addImm(0).addImm(0); + replaceFI(*StMI, FrameReg, Offset, 1); + MI.setDesc(get(VE::LDrii)); + MI.getOperand(0).setReg(DestHiReg); + Offset += 8; + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + +void EliminateFrameIndex::processMI(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + switch (MI.getOpcode()) { + case VE::STQrii: + processSTQ(MI, FrameReg, Offset, FIOperandNum); + return; + case VE::LDQrii: + processLDQ(MI, FrameReg, Offset, FIOperandNum); + return; + } + prepareReplaceFI(MI, FrameReg, Offset); + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; - DebugLoc dl = MI.getDebugLoc(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + MachineFunction &MF = *MI.getParent()->getParent(); - const VEFrameLowering *TFI = getFrameLowering(MF); + const VESubtarget &Subtarget = MF.getSubtarget(); + const VEFrameLowering &TFI = *getFrameLowering(MF); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + const VERegisterInfo &TRI = *Subtarget.getRegisterInfo(); + DebugLoc DL = MI.getDebugLoc(); + EliminateFrameIndex EFI(TII, TRI, DL, *MI.getParent(), II); + // Retrieve FrameReg and byte offset for stack slot. Register FrameReg; - int Offset; - Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed(); - + int64_t Offset = + TFI.getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed(); Offset += MI.getOperand(FIOperandNum + offsetToDisp(MI)).getImm(); - if (MI.getOpcode() == VE::STQrii) { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - Register SrcReg = MI.getOperand(3).getReg(); - Register SrcHiReg = getSubReg(SrcReg, VE::sub_even); - Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd); - // VE stores HiReg to 8(addr) and LoReg to 0(addr) - MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(VE::STrii)) - .addReg(FrameReg) - .addImm(0) - .addImm(0) - .addReg(SrcLoReg); - replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg); - MI.setDesc(TII.get(VE::STrii)); - MI.getOperand(3).setReg(SrcHiReg); - Offset += 8; - } else if (MI.getOpcode() == VE::LDQrii) { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - Register DestReg = MI.getOperand(0).getReg(); - Register DestHiReg = getSubReg(DestReg, VE::sub_even); - Register DestLoReg = getSubReg(DestReg, VE::sub_odd); - // VE loads HiReg from 8(addr) and LoReg from 0(addr) - MachineInstr *StMI = - BuildMI(*MI.getParent(), II, dl, TII.get(VE::LDrii), DestLoReg) - .addReg(FrameReg) - .addImm(0) - .addImm(0); - replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg); - MI.setDesc(TII.get(VE::LDrii)); - MI.getOperand(0).setReg(DestHiReg); - Offset += 8; - } - - replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg); + EFI.processMI(MI, FrameReg, Offset, FIOperandNum); } Register VERegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/llvm/test/CodeGen/VE/Scalar/load_stk.ll b/llvm/test/CodeGen/VE/Scalar/load_stk.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/load_stk.ll @@ -0,0 +1,795 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve | FileCheck %s + +;;; Test store instructions +;;; +;;; Note: +;;; We test store instructions using general stack, stack with dynamic +;;; allocation, stack with dynamic allocation and alignment, and stack +;;; with dynamic allocation, alignment, and spill. +;;; +;;; Fist test using a stack for leaf function. +;;; +;;; | | Higher address +;;; |----------------------------------------------| <- old sp +;;; | Local variables of fixed size | +;;; |----------------------------------------------| <- sp +;;; | | Lower address +;;; +;;; Access local variable using sp (%s11). In addition, please remember +;;; that stack is aligned by 16 bytes. +;;; +;;; Second test using a general stack. +;;; +;;; | | Higher address +;;; |----------------------------------------------| +;;; | Parameter area for this function | +;;; |----------------------------------------------| +;;; | Register save area (RSA) for this function | +;;; |----------------------------------------------| +;;; | Return address for this function | +;;; |----------------------------------------------| +;;; | Frame pointer for this function | +;;; |----------------------------------------------| <- fp(=old sp) +;;; | Local variables of fixed size | +;;; |----------------------------------------------| +;;; |.variable-sized.local.variables.(VLAs)........| +;;; |..............................................| +;;; |..............................................| +;;; |----------------------------------------------| <- returned by alloca +;;; | Parameter area for callee | +;;; |----------------------------------------------| +;;; | Register save area (RSA) for callee | +;;; |----------------------------------------------| +;;; | Return address for callee | +;;; |----------------------------------------------| +;;; | Frame pointer for callee | +;;; |----------------------------------------------| <- sp +;;; | | Lower address +;;; +;;; Access local variable using fp (%s9) since the size of VLA is not +;;; known. At the beginning of the functions, allocates 240 + data +;;; bytes. 240 means RSA+RA+FP (=176) + Parameter (=64). +;;; +;;; Third test using a general stack. +;;; +;;; | | Higher address +;;; |----------------------------------------------| +;;; | Parameter area for this function | +;;; |----------------------------------------------| +;;; | Register save area (RSA) for this function | +;;; |----------------------------------------------| +;;; | Return address for this function | +;;; |----------------------------------------------| +;;; | Frame pointer for this function | +;;; |----------------------------------------------| <- fp(=old sp) +;;; |.empty.space.to.make.part.below.aligned.in....| +;;; |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is +;;; |.alignment....................................| unknown at compile time) +;;; |----------------------------------------------| +;;; | Local variables of fixed size including spill| +;;; | slots | +;;; |----------------------------------------------| <- bp(not defined by ABI, +;;; |.variable-sized.local.variables.(VLAs)........| LLVM chooses SX17) +;;; |..............................................| (size of this area is +;;; |..............................................| unknown at compile time) +;;; |----------------------------------------------| <- stack top (returned by +;;; | Parameter area for callee | alloca) +;;; |----------------------------------------------| +;;; | Register save area (RSA) for callee | +;;; |----------------------------------------------| +;;; | Return address for callee | +;;; |----------------------------------------------| +;;; | Frame pointer for callee | +;;; |----------------------------------------------| <- sp +;;; | | Lower address +;;; +;;; Access local variable using bp (%s17) since the size of alignment +;;; and VLA are not known. At the beginning of the functions, allocates +;;; pad(240 + data + align) bytes. Then, access data through bp + pad(240) +;;; since this address doesn't change even if VLA is dynamically allocated. +;;; +;;; Fourth test using a general stack with some spills. +;;; + +; Function Attrs: argmemonly mustprogress nofree nounwind willreturn +define x86_fastcallcc i64 @loadi64_stk() { +; CHECK-LABEL: loadi64_stk: +; CHECK: # %bb.0: +; CHECK-NEXT: adds.l %s11, -16, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: ld %s0, 8(, %s11) +; CHECK-NEXT: adds.l %s11, 16, %s11 +; CHECK-NEXT: b.l.t (, %s10) + %1 = alloca i64, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1) + %2 = load volatile i64, ptr %1, align 8, !tbaa !3 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %1) + ret i64 %2 +} + +; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc i64 @loadi64_stk_big() { +; CHECK-LABEL: loadi64_stk_big: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s11, -2147483648(, %s11) +; CHECK-NEXT: brge.l %s11, %s8, .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: ld %s0, 2147483640(, %s11) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: lea %s2, 2147483640 +; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ld %s3, (%s1, %s11) +; CHECK-NEXT: lea %s1, 8(, %s1) +; CHECK-NEXT: brne.l %s1, %s2, .LBB1_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lea %s13, -2147483648 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, (%s13, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %1 = alloca i64, align 8 + %2 = alloca [268435455 x i64], align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1) + call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %2) + %3 = load volatile i64, ptr %1, align 8, !tbaa !3 + br label %5 + +4: ; preds = %5 + call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %1) + ret i64 %3 + +5: ; preds = %0, %5 + %6 = phi i64 [ 0, %0 ], [ %9, %5 ] + %7 = getelementptr inbounds [268435455 x i64], ptr %2, i64 0, i64 %6 + %8 = load volatile i64, ptr %7, align 8, !tbaa !3 + %9 = add nuw nsw i64 %6, 1 + %10 = icmp eq i64 %9, 268435455 + br i1 %10, label %4, label %5, !llvm.loop !7 +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc i64 @loadi64_stk_big2() { +; CHECK-LABEL: loadi64_stk_big2: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s13, 2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l %s11, %s8, .LBB2_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: lea %s13, -2147483640 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s13, (%s11, %s13) +; CHECK-NEXT: ld %s0, (, %s13) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: lea %s2, -2147483648 +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ld %s3, 8(%s1, %s11) +; CHECK-NEXT: lea %s1, 8(, %s1) +; CHECK-NEXT: brne.l %s1, %s2, .LBB2_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lea %s13, -2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, (%s13, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %1 = alloca i64, align 8 + %2 = alloca [268435456 x i64], align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1) + call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2) + %3 = load volatile i64, ptr %1, align 8, !tbaa !3 + br label %5 + +4: ; preds = %5 + call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %1) + ret i64 %3 + +5: ; preds = %0, %5 + %6 = phi i64 [ 0, %0 ], [ %9, %5 ] + %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6 + %8 = load volatile i64, ptr %7, align 8, !tbaa !3 + %9 = add nuw nsw i64 %6, 1 + %10 = icmp eq i64 %9, 268435456 + br i1 %10, label %4, label %5, !llvm.loop !9 +} + +; Function Attrs: argmemonly mustprogress nofree nounwind willreturn +define x86_fastcallcc i64 @loadi64_stk_dyn(i64 noundef %0) { +; CHECK-LABEL: loadi64_stk_dyn: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -256(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: ld %s0, -8(, %s9) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca i64, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2) + %3 = alloca i8, i64 %0, align 8 + %4 = load volatile i64, ptr %3, align 8, !tbaa !3 + %5 = load volatile i64, ptr %2, align 8, !tbaa !3 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2) + ret i64 %5 +} + +; Function Attrs: argmemonly mustprogress nofree nounwind willreturn +define x86_fastcallcc i64 @loadi64_stk_dyn_align(i64 noundef %0) { +; CHECK-LABEL: loadi64_stk_dyn_align: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -288(, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: ld %s0, 256(, %s17) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca i64, align 32 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2) + %3 = alloca i8, i64 %0, align 8 + %4 = load volatile i64, ptr %3, align 8, !tbaa !3 + %5 = load volatile i64, ptr %2, align 32, !tbaa !10 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2) + ret i64 %5 +} + +; Function Attrs: argmemonly mustprogress nofree nounwind willreturn +define x86_fastcallcc i64 @loadi64_stk_dyn_align2(i64 noundef %0) { +; CHECK-LABEL: loadi64_stk_dyn_align2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -320(, %s11) +; CHECK-NEXT: and %s11, %s11, (58)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: ld %s0, 288(, %s17) +; CHECK-NEXT: ld %s1, 256(, %s17) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca i64, align 32 + %3 = alloca i64, align 64 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2) + %4 = alloca i8, i64 %0, align 8 + %5 = load volatile i64, ptr %4, align 8, !tbaa !3 + %6 = load volatile i64, ptr %2, align 32, !tbaa !10 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3) + %7 = load volatile i64, ptr %3, align 64, !tbaa !10 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2) + ret i64 %6 +} + +; Function Attrs: nounwind +define x86_fastcallcc i64 @loadi64_stk_dyn_align_spill(i64 noundef %0) { +; CHECK-LABEL: loadi64_stk_dyn_align_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -288(, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: st %s19, 56(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: or %s18, 0, %s0 +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: ld %s19, 256(, %s17) +; CHECK-NEXT: lea %s0, dummy@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, dummy@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, pass@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, pass@hi(, %s0) +; CHECK-NEXT: or %s0, 0, %s18 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s0, 0, %s19 +; CHECK-NEXT: ld %s19, 56(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca i64, align 32 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2) + %3 = alloca i8, i64 %0, align 8 + %4 = load volatile i64, ptr %3, align 8, !tbaa !3 + %5 = load volatile i64, ptr %2, align 32, !tbaa !10 + tail call void (...) @dummy() + tail call void @pass(i64 noundef %0) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2) + ret i64 %5 +} + +declare void @dummy(...) + +declare void @pass(i64 noundef) + +; Function Attrs: argmemonly mustprogress nofree nounwind willreturn +define x86_fastcallcc fp128 @loadquad_stk() { +; CHECK-LABEL: loadquad_stk: +; CHECK: # %bb.0: +; CHECK-NEXT: adds.l %s11, -16, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: ld %s1, (, %s11) +; CHECK-NEXT: ld %s0, 8(, %s11) +; CHECK-NEXT: adds.l %s11, 16, %s11 +; CHECK-NEXT: b.l.t (, %s10) + %1 = alloca fp128, align 16 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %1) + %2 = load volatile fp128, ptr %1, align 16, !tbaa !12 + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %1) + ret fp128 %2 +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc fp128 @loadquad_stk_big() { +; CHECK-LABEL: loadquad_stk_big: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s13, 2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l %s11, %s8, .LBB8_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB8_4: +; CHECK-NEXT: lea %s13, -2147483648 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s13, (%s11, %s13) +; CHECK-NEXT: ld %s1, (, %s13) +; CHECK-NEXT: ld %s0, 8(, %s13) +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: lea %s3, 2147483640 +; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ld %s4, 8(%s2, %s11) +; CHECK-NEXT: lea %s2, 8(, %s2) +; CHECK-NEXT: brne.l %s2, %s3, .LBB8_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lea %s13, -2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, (%s13, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %1 = alloca fp128, align 16 + %2 = alloca [268435455 x i64], align 8 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %1) + call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %2) + %3 = load volatile fp128, ptr %1, align 16, !tbaa !12 + br label %5 + +4: ; preds = %5 + call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %2) + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %1) + ret fp128 %3 + +5: ; preds = %0, %5 + %6 = phi i64 [ 0, %0 ], [ %9, %5 ] + %7 = getelementptr inbounds [268435455 x i64], ptr %2, i64 0, i64 %6 + %8 = load volatile i64, ptr %7, align 8, !tbaa !3 + %9 = add nuw nsw i64 %6, 1 + %10 = icmp eq i64 %9, 268435455 + br i1 %10, label %4, label %5, !llvm.loop !14 +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc fp128 @loadquad_stk_big2() { +; CHECK-LABEL: loadquad_stk_big2: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s13, 2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l %s11, %s8, .LBB9_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB9_4: +; CHECK-NEXT: lea %s13, -2147483648 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s13, (%s11, %s13) +; CHECK-NEXT: ld %s1, (, %s13) +; CHECK-NEXT: ld %s0, 8(, %s13) +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: lea %s3, -2147483648 +; CHECK-NEXT: and %s3, %s3, (32)0 +; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ld %s4, (%s2, %s11) +; CHECK-NEXT: lea %s2, 8(, %s2) +; CHECK-NEXT: brne.l %s2, %s3, .LBB9_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lea %s13, -2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, (%s13, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %1 = alloca fp128, align 16 + %2 = alloca [268435456 x i64], align 8 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %1) + call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2) + %3 = load volatile fp128, ptr %1, align 16, !tbaa !12 + br label %5 + +4: ; preds = %5 + call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2) + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %1) + ret fp128 %3 + +5: ; preds = %0, %5 + %6 = phi i64 [ 0, %0 ], [ %9, %5 ] + %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6 + %8 = load volatile i64, ptr %7, align 8, !tbaa !3 + %9 = add nuw nsw i64 %6, 1 + %10 = icmp eq i64 %9, 268435456 + br i1 %10, label %4, label %5, !llvm.loop !15 +} + +; Function Attrs: argmemonly mustprogress nofree nounwind willreturn +define x86_fastcallcc fp128 @loadquad_stk_dyn(i64 noundef %0) { +; CHECK-LABEL: loadquad_stk_dyn: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -256(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld %s1, 8(, %s0) +; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: ld %s1, -16(, %s9) +; CHECK-NEXT: ld %s0, -8(, %s9) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca fp128, align 16 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2) + %3 = alloca i8, i64 %0, align 16 + %4 = load volatile fp128, ptr %3, align 16, !tbaa !12 + %5 = load volatile fp128, ptr %2, align 16, !tbaa !12 + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2) + ret fp128 %5 +} + +; Function Attrs: argmemonly mustprogress nofree nounwind willreturn +define x86_fastcallcc fp128 @loadquad_stk_dyn_align(i64 noundef %0) { +; CHECK-LABEL: loadquad_stk_dyn_align: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -288(, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld %s1, 8(, %s0) +; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: ld %s1, 256(, %s17) +; CHECK-NEXT: ld %s0, 264(, %s17) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca fp128, align 32 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2) + %3 = alloca i8, i64 %0, align 16 + %4 = load volatile fp128, ptr %3, align 16, !tbaa !12 + %5 = load volatile fp128, ptr %2, align 32, !tbaa !16 + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2) + ret fp128 %5 +} + +; Function Attrs: argmemonly mustprogress nofree nounwind willreturn +define x86_fastcallcc fp128 @loadquad_stk_dyn_align2(i64 noundef %0) { +; CHECK-LABEL: loadquad_stk_dyn_align2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -320(, %s11) +; CHECK-NEXT: and %s11, %s11, (58)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB12_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld %s1, 8(, %s0) +; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: ld %s1, 288(, %s17) +; CHECK-NEXT: ld %s0, 296(, %s17) +; CHECK-NEXT: ld %s3, 256(, %s17) +; CHECK-NEXT: ld %s2, 264(, %s17) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca fp128, align 32 + %3 = alloca fp128, align 64 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2) + %4 = alloca i8, i64 %0, align 16 + %5 = load volatile fp128, ptr %4, align 16, !tbaa !12 + %6 = load volatile fp128, ptr %2, align 32, !tbaa !16 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3) + %7 = load volatile fp128, ptr %3, align 64, !tbaa !16 + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3) + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2) + ret fp128 %6 +} + +; Function Attrs: nounwind +define x86_fastcallcc fp128 @loadquad_stk_dyn_align_spill(i64 noundef %0) { +; CHECK-LABEL: loadquad_stk_dyn_align_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -288(, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB13_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: st %s20, 64(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: st %s21, 72(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: or %s18, 0, %s0 +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld %s1, 8(, %s0) +; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: ld %s21, 256(, %s17) +; CHECK-NEXT: ld %s20, 264(, %s17) +; CHECK-NEXT: lea %s0, dummy@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, dummy@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, pass@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, pass@hi(, %s0) +; CHECK-NEXT: or %s0, 0, %s18 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s0, 0, %s20 +; CHECK-NEXT: or %s1, 0, %s21 +; CHECK-NEXT: ld %s21, 72(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: ld %s20, 64(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca fp128, align 32 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2) + %3 = alloca i8, i64 %0, align 16 + %4 = load volatile fp128, ptr %3, align 16, !tbaa !12 + %5 = load volatile fp128, ptr %2, align 32, !tbaa !16 + tail call void (...) @dummy() + tail call void @pass(i64 noundef %0) + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2) + ret fp128 %5 +} + +!3 = !{!4, !4, i64 0} +!4 = !{!"long", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = distinct !{!7, !8} +!8 = !{!"llvm.loop.mustprogress"} +!9 = distinct !{!9, !8} +!10 = !{!11, !4, i64 0} +!11 = !{!"", !4, i64 0} +!12 = !{!13, !13, i64 0} +!13 = !{!"long double", !5, i64 0} +!14 = distinct !{!14, !8} +!15 = distinct !{!15, !8} +!16 = !{!17, !13, i64 0} +!17 = !{!"", !13, i64 0} diff --git a/llvm/test/CodeGen/VE/Scalar/store_stk.ll b/llvm/test/CodeGen/VE/Scalar/store_stk.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/store_stk.ll @@ -0,0 +1,808 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve | FileCheck %s + +;;; Test store instructions +;;; +;;; Note: +;;; We test store instructions using general stack, stack with dynamic +;;; allocation, stack with dynamic allocation and alignment, and stack +;;; with dynamic allocation, alignment, and spill. +;;; +;;; Fist test using a stack for leaf function. +;;; +;;; | | Higher address +;;; |----------------------------------------------| <- old sp +;;; | Local variables of fixed size | +;;; |----------------------------------------------| <- sp +;;; | | Lower address +;;; +;;; Access local variable using sp (%s11). In addition, please remember +;;; that stack is aligned by 16 bytes. +;;; +;;; Second test using a general stack. +;;; +;;; | | Higher address +;;; |----------------------------------------------| +;;; | Parameter area for this function | +;;; |----------------------------------------------| +;;; | Register save area (RSA) for this function | +;;; |----------------------------------------------| +;;; | Return address for this function | +;;; |----------------------------------------------| +;;; | Frame pointer for this function | +;;; |----------------------------------------------| <- fp(=old sp) +;;; | Local variables of fixed size | +;;; |----------------------------------------------| +;;; |.variable-sized.local.variables.(VLAs)........| +;;; |..............................................| +;;; |..............................................| +;;; |----------------------------------------------| <- returned by alloca +;;; | Parameter area for callee | +;;; |----------------------------------------------| +;;; | Register save area (RSA) for callee | +;;; |----------------------------------------------| +;;; | Return address for callee | +;;; |----------------------------------------------| +;;; | Frame pointer for callee | +;;; |----------------------------------------------| <- sp +;;; | | Lower address +;;; +;;; Access local variable using fp (%s9) since the size of VLA is not +;;; known. At the beginning of the functions, allocates 240 + data +;;; bytes. 240 means RSA+RA+FP (=176) + Parameter (=64). +;;; +;;; Third test using a general stack. +;;; +;;; | | Higher address +;;; |----------------------------------------------| +;;; | Parameter area for this function | +;;; |----------------------------------------------| +;;; | Register save area (RSA) for this function | +;;; |----------------------------------------------| +;;; | Return address for this function | +;;; |----------------------------------------------| +;;; | Frame pointer for this function | +;;; |----------------------------------------------| <- fp(=old sp) +;;; |.empty.space.to.make.part.below.aligned.in....| +;;; |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is +;;; |.alignment....................................| unknown at compile time) +;;; |----------------------------------------------| +;;; | Local variables of fixed size including spill| +;;; | slots | +;;; |----------------------------------------------| <- bp(not defined by ABI, +;;; |.variable-sized.local.variables.(VLAs)........| LLVM chooses SX17) +;;; |..............................................| (size of this area is +;;; |..............................................| unknown at compile time) +;;; |----------------------------------------------| <- stack top (returned by +;;; | Parameter area for callee | alloca) +;;; |----------------------------------------------| +;;; | Register save area (RSA) for callee | +;;; |----------------------------------------------| +;;; | Return address for callee | +;;; |----------------------------------------------| +;;; | Frame pointer for callee | +;;; |----------------------------------------------| <- sp +;;; | | Lower address +;;; +;;; Access local variable using bp (%s17) since the size of alignment +;;; and VLA are not known. At the beginning of the functions, allocates +;;; pad(240 + data + align) bytes. Then, access data through bp + pad(240) +;;; since this address doesn't change even if VLA is dynamically allocated. +;;; +;;; Fourth test using a general stack with some spills. +;;; + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storei64_stk(i64 noundef %0) { +; CHECK-LABEL: storei64_stk: +; CHECK: # %bb.0: +; CHECK-NEXT: adds.l %s11, -16, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: st %s0, 8(, %s11) +; CHECK-NEXT: adds.l %s11, 16, %s11 +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca i64, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2) + store volatile i64 %0, ptr %2, align 8, !tbaa !3 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2) + ret void +} + +; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storei64_stk_big(i64 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storei64_stk_big: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s11, -2147483648(, %s11) +; CHECK-NEXT: brge.l %s11, %s8, .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: st %s0, 2147483640(, %s11) +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: lea %s2, 2147483640 +; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st %s1, (%s0, %s11) +; CHECK-NEXT: lea %s0, 8(, %s0) +; CHECK-NEXT: brne.l %s0, %s2, .LBB1_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lea %s13, -2147483648 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, (%s13, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca i64, align 8 + %4 = alloca [268435455 x i64], align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3) + call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %4) + store volatile i64 %0, ptr %3, align 8, !tbaa !3 + br label %6 + +5: ; preds = %6 + call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %4) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3) + ret void + +6: ; preds = %2, %6 + %7 = phi i64 [ 0, %2 ], [ %9, %6 ] + %8 = getelementptr inbounds [268435455 x i64], ptr %4, i64 0, i64 %7 + store volatile i64 %1, ptr %8, align 8, !tbaa !3 + %9 = add nuw nsw i64 %7, 1 + %10 = icmp eq i64 %9, 268435455 + br i1 %10, label %5, label %6, !llvm.loop !7 +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storei64_stk_big2(i64 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storei64_stk_big2: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s13, 2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l %s11, %s8, .LBB2_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: lea %s13, -2147483640 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s13, (%s11, %s13) +; CHECK-NEXT: st %s0, (, %s13) +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: lea %s2, -2147483648 +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st %s1, 8(%s0, %s11) +; CHECK-NEXT: lea %s0, 8(, %s0) +; CHECK-NEXT: brne.l %s0, %s2, .LBB2_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lea %s13, -2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, (%s13, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca i64, align 8 + %4 = alloca [268435456 x i64], align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3) + call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %4) + store volatile i64 %0, ptr %3, align 8, !tbaa !3 + br label %6 + +5: ; preds = %6 + call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %4) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3) + ret void + +6: ; preds = %2, %6 + %7 = phi i64 [ 0, %2 ], [ %9, %6 ] + %8 = getelementptr inbounds [268435456 x i64], ptr %4, i64 0, i64 %7 + store volatile i64 %1, ptr %8, align 8, !tbaa !3 + %9 = add nuw nsw i64 %7, 1 + %10 = icmp eq i64 %9, 268435456 + br i1 %10, label %5, label %6, !llvm.loop !9 +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storei64_stk_dyn(i64 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storei64_stk_dyn: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -256(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, 15(, %s1) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: st %s2, (, %s0) +; CHECK-NEXT: st %s2, -8(, %s9) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca i64, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3) + %4 = alloca i8, i64 %1, align 8 + store volatile i64 %0, ptr %4, align 8, !tbaa !3 + store volatile i64 %0, ptr %3, align 8, !tbaa !3 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3) + ret void +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storei64_stk_dyn_align(i64 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storei64_stk_dyn_align: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -288(, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, 15(, %s1) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: st %s2, (, %s0) +; CHECK-NEXT: st %s2, 256(, %s17) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca i64, align 32 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3) + %4 = alloca i8, i64 %1, align 8 + store volatile i64 %0, ptr %4, align 8, !tbaa !3 + store volatile i64 %0, ptr %3, align 32, !tbaa !10 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3) + ret void +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storei64_stk_dyn_align2(i64 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storei64_stk_dyn_align2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -320(, %s11) +; CHECK-NEXT: and %s11, %s11, (58)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, 15(, %s1) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: st %s2, (, %s0) +; CHECK-NEXT: st %s2, 288(, %s17) +; CHECK-NEXT: st %s2, 256(, %s17) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca i64, align 32 + %4 = alloca i64, align 64 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3) + %5 = alloca i8, i64 %1, align 8 + store volatile i64 %0, ptr %5, align 8, !tbaa !3 + store volatile i64 %0, ptr %3, align 32, !tbaa !10 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %4) + store volatile i64 %0, ptr %4, align 64, !tbaa !10 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %4) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3) + ret void +} + +; Function Attrs: nounwind +define x86_fastcallcc void @storei64_stk_dyn_align_spill(i64 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storei64_stk_dyn_align_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -288(, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: st %s19, 56(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: st %s20, 64(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: or %s18, 0, %s1 +; CHECK-NEXT: or %s19, 0, %s0 +; CHECK-NEXT: lea %s0, 15(, %s1) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s20, 240(, %s11) +; CHECK-NEXT: lea %s0, dummy@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, dummy@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, pass@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, pass@hi(, %s0) +; CHECK-NEXT: or %s0, 0, %s18 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: st %s19, (, %s20) +; CHECK-NEXT: st %s19, 256(, %s17) +; CHECK-NEXT: ld %s20, 64(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: ld %s19, 56(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca i64, align 32 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3) + %4 = alloca i8, i64 %1, align 8 + tail call void (...) @dummy() + tail call void @pass(i64 noundef %1) + store volatile i64 %0, ptr %4, align 8, !tbaa !3 + store volatile i64 %0, ptr %3, align 32, !tbaa !10 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3) + ret void +} + +declare void @dummy(...) + +declare void @pass(i64 noundef) + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storequad_stk(fp128 noundef %0) { +; CHECK-LABEL: storequad_stk: +; CHECK: # %bb.0: +; CHECK-NEXT: adds.l %s11, -16, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: st %s1, (, %s11) +; CHECK-NEXT: st %s0, 8(, %s11) +; CHECK-NEXT: adds.l %s11, 16, %s11 +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca fp128, align 16 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2) + store volatile fp128 %0, ptr %2, align 16, !tbaa !12 + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2) + ret void +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storequad_stk_big(fp128 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storequad_stk_big: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s13, 2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l %s11, %s8, .LBB8_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB8_4: +; CHECK-NEXT: lea %s13, -2147483648 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s13, (%s11, %s13) +; CHECK-NEXT: st %s1, (, %s13) +; CHECK-NEXT: st %s0, 8(, %s13) +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: lea %s1, 2147483640 +; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st %s2, 8(%s0, %s11) +; CHECK-NEXT: lea %s0, 8(, %s0) +; CHECK-NEXT: brne.l %s0, %s1, .LBB8_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lea %s13, -2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, (%s13, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca fp128, align 16 + %4 = alloca [268435455 x i64], align 8 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3) + call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %4) + store volatile fp128 %0, ptr %3, align 16, !tbaa !12 + br label %6 + +5: ; preds = %6 + call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %4) + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3) + ret void + +6: ; preds = %2, %6 + %7 = phi i64 [ 0, %2 ], [ %9, %6 ] + %8 = getelementptr inbounds [268435455 x i64], ptr %4, i64 0, i64 %7 + store volatile i64 %1, ptr %8, align 8, !tbaa !3 + %9 = add nuw nsw i64 %7, 1 + %10 = icmp eq i64 %9, 268435455 + br i1 %10, label %5, label %6, !llvm.loop !14 +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storequad_stk_big2(fp128 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storequad_stk_big2: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s13, 2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l %s11, %s8, .LBB9_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB9_4: +; CHECK-NEXT: lea %s13, -2147483648 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s13, (%s11, %s13) +; CHECK-NEXT: st %s1, (, %s13) +; CHECK-NEXT: st %s0, 8(, %s13) +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: lea %s1, -2147483648 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st %s2, (%s0, %s11) +; CHECK-NEXT: lea %s0, 8(, %s0) +; CHECK-NEXT: brne.l %s0, %s1, .LBB9_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lea %s13, -2147483632 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, (%s13, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca fp128, align 16 + %4 = alloca [268435456 x i64], align 8 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3) + call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %4) + store volatile fp128 %0, ptr %3, align 16, !tbaa !12 + br label %6 + +5: ; preds = %6 + call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %4) + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3) + ret void + +6: ; preds = %2, %6 + %7 = phi i64 [ 0, %2 ], [ %9, %6 ] + %8 = getelementptr inbounds [268435456 x i64], ptr %4, i64 0, i64 %7 + store volatile i64 %1, ptr %8, align 8, !tbaa !3 + %9 = add nuw nsw i64 %7, 1 + %10 = icmp eq i64 %9, 268435456 + br i1 %10, label %5, label %6, !llvm.loop !15 +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storequad_stk_dyn(fp128 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storequad_stk_dyn: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -256(, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: or %s4, 0, %s0 +; CHECK-NEXT: or %s5, 0, %s1 +; CHECK-NEXT: lea %s0, 15(, %s2) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: st %s4, 8(, %s0) +; CHECK-NEXT: st %s5, (, %s0) +; CHECK-NEXT: st %s5, -16(, %s9) +; CHECK-NEXT: st %s4, -8(, %s9) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca fp128, align 16 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3) + %4 = alloca i8, i64 %1, align 16 + store volatile fp128 %0, ptr %4, align 16, !tbaa !12 + store volatile fp128 %0, ptr %3, align 16, !tbaa !12 + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3) + ret void +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storequad_stk_dyn_align(fp128 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storequad_stk_dyn_align: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -288(, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB11_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: or %s4, 0, %s0 +; CHECK-NEXT: or %s5, 0, %s1 +; CHECK-NEXT: lea %s0, 15(, %s2) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: st %s4, 8(, %s0) +; CHECK-NEXT: st %s5, (, %s0) +; CHECK-NEXT: st %s5, 256(, %s17) +; CHECK-NEXT: st %s4, 264(, %s17) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca fp128, align 32 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3) + %4 = alloca i8, i64 %1, align 16 + store volatile fp128 %0, ptr %4, align 16, !tbaa !12 + store volatile fp128 %0, ptr %3, align 32, !tbaa !16 + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3) + ret void +} + +; Function Attrs: argmemonly nofree nounwind +define x86_fastcallcc void @storequad_stk_dyn_align2(fp128 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storequad_stk_dyn_align2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -320(, %s11) +; CHECK-NEXT: and %s11, %s11, (58)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB12_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: or %s4, 0, %s0 +; CHECK-NEXT: or %s5, 0, %s1 +; CHECK-NEXT: lea %s0, 15(, %s2) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: st %s4, 8(, %s0) +; CHECK-NEXT: st %s5, (, %s0) +; CHECK-NEXT: st %s5, 288(, %s17) +; CHECK-NEXT: st %s4, 296(, %s17) +; CHECK-NEXT: st %s5, 256(, %s17) +; CHECK-NEXT: st %s4, 264(, %s17) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca fp128, align 32 + %4 = alloca fp128, align 64 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3) + %5 = alloca i8, i64 %1, align 16 + store volatile fp128 %0, ptr %5, align 16, !tbaa !12 + store volatile fp128 %0, ptr %3, align 32, !tbaa !16 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %4) + store volatile fp128 %0, ptr %4, align 64, !tbaa !16 + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %4) + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3) + ret void +} + +; Function Attrs: nounwind +define x86_fastcallcc void @storequad_stk_dyn_align_spill(fp128 noundef %0, i64 noundef %1) { +; CHECK-LABEL: storequad_stk_dyn_align_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s11, -288(, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB13_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: st %s19, 56(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: st %s20, 64(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: st %s21, 72(, %s9) # 8-byte Folded Spill +; CHECK-NEXT: or %s18, 0, %s2 +; CHECK-NEXT: or %s20, 0, %s0 +; CHECK-NEXT: or %s21, 0, %s1 +; CHECK-NEXT: lea %s0, 15(, %s2) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s19, 240(, %s11) +; CHECK-NEXT: lea %s0, dummy@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, dummy@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, pass@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, pass@hi(, %s0) +; CHECK-NEXT: or %s0, 0, %s18 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: st %s20, 8(, %s19) +; CHECK-NEXT: st %s21, (, %s19) +; CHECK-NEXT: st %s21, 256(, %s17) +; CHECK-NEXT: st %s20, 264(, %s17) +; CHECK-NEXT: ld %s21, 72(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: ld %s20, 64(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: ld %s19, 56(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %3 = alloca fp128, align 32 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3) + %4 = alloca i8, i64 %1, align 16 + tail call void (...) @dummy() + tail call void @pass(i64 noundef %1) + store volatile fp128 %0, ptr %4, align 16, !tbaa !12 + store volatile fp128 %0, ptr %3, align 32, !tbaa !16 + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3) + ret void +} + +!3 = !{!4, !4, i64 0} +!4 = !{!"long", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = distinct !{!7, !8} +!8 = !{!"llvm.loop.mustprogress"} +!9 = distinct !{!9, !8} +!10 = !{!11, !4, i64 0} +!11 = !{!"", !4, i64 0} +!12 = !{!13, !13, i64 0} +!13 = !{!"long double", !5, i64 0} +!14 = distinct !{!14, !8} +!15 = distinct !{!15, !8} +!16 = !{!17, !13, i64 0} +!17 = !{!"", !13, i64 0}