diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h @@ -29,6 +29,8 @@ class HexagonFrameLowering : public TargetFrameLowering { public: + // First register which could possibly hold a variable argument. + int FirstVarArgSavedReg; explicit HexagonFrameLowering() : TargetFrameLowering(StackGrowsDown, Align(8), 0, Align::None(), true) {} diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -395,6 +395,9 @@ MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const { static unsigned ShrinkCounter = 0; + if (MF.getSubtarget().isEnvironmentMusl() && + MF.getFunction().isVarArg()) + return; if (ShrinkLimit.getPosition()) { if (ShrinkCounter >= ShrinkLimit) return; @@ -622,6 +625,118 @@ DebugLoc dl = MBB.findDebugLoc(InsertPt); + if (MF.getFunction().isVarArg() && + MF.getSubtarget().isEnvironmentMusl()) { + // Calculate the size of register saved area. + int NumVarArgRegs = 6 - FirstVarArgSavedReg; + int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) + ? NumVarArgRegs * 4 + : NumVarArgRegs * 4 + 4; + if (RegisterSavedAreaSizePlusPadding > 0) { + // Decrement the stack pointer by size of register saved area plus + // padding if any. + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) + .addReg(SP) + .addImm(-RegisterSavedAreaSizePlusPadding) + .setMIFlag(MachineInstr::FrameSetup); + + int NumBytes = 0; + // Copy all the named arguments below register saved area. + auto &HMFI = *MF.getInfo(); + for (int i = HMFI.getFirstNamedArgFrameIndex(), + e = HMFI.getLastNamedArgFrameIndex(); i >= e; --i) { + int ObjSize = MFI.getObjectSize(i); + int ObjAlign = MFI.getObjectAlignment(i); + + // Determine the kind of load/store that should be used. + unsigned LDOpc, STOpc; + int OpcodeChecker = ObjAlign; + + // Handle cases where alignment of an object is > its size. + if (ObjSize < ObjAlign) { + if (ObjSize <= 1) + OpcodeChecker = 1; + else if (ObjSize <= 2) + OpcodeChecker = 2; + else if (ObjSize <= 4) + OpcodeChecker = 4; + else if (ObjSize > 4) + OpcodeChecker = 8; + } + + switch (OpcodeChecker) { + case 1: + LDOpc = Hexagon::L2_loadrb_io; + STOpc = Hexagon::S2_storerb_io; + break; + case 2: + LDOpc = Hexagon::L2_loadrh_io; + STOpc = Hexagon::S2_storerh_io; + break; + case 4: + LDOpc = Hexagon::L2_loadri_io; + STOpc = Hexagon::S2_storeri_io; + break; + case 8: + default: + LDOpc = Hexagon::L2_loadrd_io; + STOpc = Hexagon::S2_storerd_io; + break; + } + + unsigned RegUsed = LDOpc == Hexagon::L2_loadrd_io ? Hexagon::D3 + : Hexagon::R6; + int LoadStoreCount = ObjSize / OpcodeChecker; + + if (ObjSize % OpcodeChecker) + ++LoadStoreCount; + + // Get the start location of the load. NumBytes is basically the + // offset from the stack pointer of previous function, which would be + // the caller in this case, as this function has variable argument + // list. + if (NumBytes != 0) + NumBytes = alignTo(NumBytes, ObjAlign); + + int Count = 0; + while (Count < LoadStoreCount) { + // Load the value of the named argument on stack. + BuildMI(MBB, InsertPt, dl, HII.get(LDOpc), RegUsed) + .addReg(SP) + .addImm(RegisterSavedAreaSizePlusPadding + + ObjAlign * Count + NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + + // Store it below the register saved area plus padding. + BuildMI(MBB, InsertPt, dl, HII.get(STOpc)) + .addReg(SP) + .addImm(ObjAlign * Count + NumBytes) + .addReg(RegUsed) + .setMIFlag(MachineInstr::FrameSetup); + + Count++; + } + NumBytes += MFI.getObjectSize(i); + } + + // Make NumBytes 8 byte aligned + NumBytes = alignTo(NumBytes, 8); + + // If the number of registers having variable arguments is odd, + // leave 4 bytes of padding to get to the location where first + // variable argument which was passed through register was copied. + NumBytes = (NumVarArgRegs % 2 == 0) ? NumBytes : NumBytes + 4; + + for (int j = FirstVarArgSavedReg, i = 0; j < 6; ++j, ++i) { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_storeri_io)) + .addReg(SP) + .addImm(NumBytes + 4 * i) + .addReg(Hexagon::R0 + j) + .setMIFlag(MachineInstr::FrameSetup); + } + } + } + if (hasFP(MF)) { insertAllocframe(MBB, InsertPt, NumBytes); if (AlignStack) { @@ -655,7 +770,16 @@ if (!hasFP(MF)) { MachineFrameInfo &MFI = MF.getFrameInfo(); - if (unsigned NumBytes = MFI.getStackSize()) { + unsigned NumBytes = MFI.getStackSize(); + if (MF.getFunction().isVarArg() && + MF.getSubtarget().isEnvironmentMusl()) { + // On Hexagon Linux, deallocate the stack for the register saved area. + int NumVarArgRegs = 6 - FirstVarArgSavedReg; + int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ? + (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4); + NumBytes += RegisterSavedAreaSizePlusPadding; + } + if (NumBytes) { BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) .addReg(SP) .addImm(NumBytes); @@ -710,24 +834,49 @@ NeedsDeallocframe = false; } - if (!NeedsDeallocframe) - return; - // If the returning instruction is PS_jmpret, replace it with dealloc_return, - // otherwise just add deallocframe. The function could be returning via a - // tail call. - if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) { - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)) + if (!MF.getSubtarget().isEnvironmentMusl() || + !MF.getFunction().isVarArg()) { + if (!NeedsDeallocframe) + return; + // If the returning instruction is PS_jmpret, replace it with + // dealloc_return, otherwise just add deallocframe. The function + // could be returning via a tail call. + if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)) .addDef(Hexagon::D15) .addReg(Hexagon::R30); - return; - } - unsigned NewOpc = Hexagon::L4_return; - MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc)) + return; + } + unsigned NewOpc = Hexagon::L4_return; + MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc)) .addDef(Hexagon::D15) .addReg(Hexagon::R30); - // Transfer the function live-out registers. - NewI->copyImplicitOps(MF, *RetI); - MBB.erase(RetI); + // Transfer the function live-out registers. + NewI->copyImplicitOps(MF, *RetI); + MBB.erase(RetI); + } else { + // L2_deallocframe instruction after it. + // Calculate the size of register saved area. + int NumVarArgRegs = 6 - FirstVarArgSavedReg; + int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ? + (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4); + + MachineBasicBlock::iterator Term = MBB.getFirstTerminator(); + MachineBasicBlock::iterator I = (Term == MBB.begin()) ? MBB.end() + : std::prev(Term); + if (I == MBB.end() || + (I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT && + I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC && + I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 && + I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC)) + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)) + .addDef(Hexagon::D15) + .addReg(Hexagon::R30); + if (RegisterSavedAreaSizePlusPadding != 0) + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) + .addReg(SP) + .addImm(RegisterSavedAreaSizePlusPadding); + } } void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB, @@ -2473,6 +2622,8 @@ /// checks are performed, which may still lead to the inline code. bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF, const CSIVect &CSI) const { + if (MF.getSubtarget().isEnvironmentMusl()) + return true; if (MF.getInfo()->hasEHReturn()) return true; if (!hasFP(MF)) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -248,6 +248,7 @@ } SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -393,9 +393,12 @@ if (GlobalAddressSDNode *GAN = dyn_cast(Callee)) Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32); + // Linux ABI treats var-arg calls the same way as regular ones. + bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg; + // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(), + HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext(), NumParams); if (Subtarget.useHVXOps()) @@ -750,9 +753,13 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); + // Linux ABI treats var-arg calls the same way as regular ones. + bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg; + // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(), + HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, + *DAG.getContext(), MF.getFunction().getFunctionType()->getNumParams()); if (Subtarget.useHVXOps()) @@ -766,8 +773,24 @@ // caller's stack is passed only when the struct size is smaller than (and // equal to) 8 bytes. If not, no address will be passed into callee and // callee return the result direclty through R0/R1. + auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) { + switch (RC.getID()) { + case Hexagon::IntRegsRegClassID: + return Reg - Hexagon::R0 + 1; + case Hexagon::DoubleRegsRegClassID: + return (Reg - Hexagon::D0 + 1) * 2; + case Hexagon::HvxVRRegClassID: + return Reg - Hexagon::V0 + 1; + case Hexagon::HvxWRRegClassID: + return (Reg - Hexagon::W0 + 1) * 2; + } + llvm_unreachable("Unexpected register class"); + }; + auto &HFL = const_cast(*Subtarget.getFrameLowering()); auto &HMFI = *MF.getInfo(); + HFL.FirstVarArgSavedReg = 0; + HMFI.setFirstNamedArgFrameIndex(-int(MFI.getNumFixedObjects())); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -811,6 +834,7 @@ } InVals.push_back(Copy); MRI.addLiveIn(VA.getLocReg(), VReg); + HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg()); } else { assert(VA.isMemLoc() && "Argument should be passed in memory"); @@ -838,8 +862,48 @@ } } + if (IsVarArg && Subtarget.isEnvironmentMusl()) { + for (int i = HFL.FirstVarArgSavedReg; i < 6; i++) + MRI.addLiveIn(Hexagon::R0+i); + } + + if (IsVarArg && Subtarget.isEnvironmentMusl()) { + HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - 1); + HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects())); + + // Create Frame index for the start of register saved area. + int NumVarArgRegs = 6 - HFL.FirstVarArgSavedReg; + bool RequiresPadding = (NumVarArgRegs & 1); + int RegSaveAreaSizePlusPadding = RequiresPadding + ? (NumVarArgRegs + 1) * 4 + : NumVarArgRegs * 4; + + if (RegSaveAreaSizePlusPadding > 0) { + // The offset to saved register area should be 8 byte aligned. + int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset(); + if (!(RegAreaStart % 8)) + RegAreaStart = (RegAreaStart + 7) & -8; - if (IsVarArg) { + int RegSaveAreaFrameIndex = + MFI.CreateFixedObject(RegSaveAreaSizePlusPadding, RegAreaStart, true); + HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex); + + // This will point to the next argument passed via stack. + int Offset = RegAreaStart + RegSaveAreaSizePlusPadding; + int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true); + HMFI.setVarArgsFrameIndex(FI); + } else { + // This will point to the next argument passed via stack, when + // there is no saved register area. + int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset(); + int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true); + HMFI.setRegSavedAreaStartFrameIndex(FI); + HMFI.setVarArgsFrameIndex(FI); + } + } + + + if (IsVarArg && !Subtarget.isEnvironmentMusl()) { // This will point to the next argument passed via stack. int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset(); int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true); @@ -857,8 +921,82 @@ HexagonMachineFunctionInfo *QFI = MF.getInfo(); SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32); const Value *SV = cast(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1), - MachinePointerInfo(SV)); + + if (!Subtarget.isEnvironmentMusl()) { + return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1), + MachinePointerInfo(SV)); + } + auto &FuncInfo = *MF.getInfo(); + auto &HFL = *Subtarget.getFrameLowering(); + SDLoc DL(Op); + SmallVector MemOps; + + // Get frame index of va_list. + SDValue FIN = Op.getOperand(1); + + // If first Vararg register is odd, add 4 bytes to start of + // saved register area to point to the first register location. + // This is because the saved register area has to be 8 byte aligned. + // Incase of an odd start register, there will be 4 bytes of padding in + // the beginning of saved register area. If all registers area used up, + // the following condition will handle it correctly. + SDValue SavedRegAreaStartFrameIndex = + DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), MVT::i32); + + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + if (HFL.FirstVarArgSavedReg & 1) + SavedRegAreaStartFrameIndex = + DAG.getNode(ISD::ADD, DL, PtrVT, + DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), + MVT::i32), + DAG.getIntPtrConstant(4, DL)); + + // Store the saved register area start pointer. + SDValue Store = + DAG.getStore(Op.getOperand(0), DL, + SavedRegAreaStartFrameIndex, + FIN, MachinePointerInfo(SV)); + MemOps.push_back(Store); + + // Store saved register area end pointer. + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, + FIN, DAG.getIntPtrConstant(4, DL)); + Store = DAG.getStore(Op.getOperand(0), DL, + DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(), + PtrVT), + FIN, MachinePointerInfo(SV, 4)); + MemOps.push_back(Store); + + // Store overflow area pointer. + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, + FIN, DAG.getIntPtrConstant(4, DL)); + Store = DAG.getStore(Op.getOperand(0), DL, + DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(), + PtrVT), + FIN, MachinePointerInfo(SV, 8)); + MemOps.push_back(Store); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); +} + +SDValue +HexagonTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { + // Assert that the linux ABI is enabled for the current compilation. + assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled"); + SDValue Chain = Op.getOperand(0); + SDValue DestPtr = Op.getOperand(1); + SDValue SrcPtr = Op.getOperand(2); + const Value *DestSV = cast(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast(Op.getOperand(4))->getValue(); + SDLoc DL(Op); + // Size of the va_list is 12 bytes as it has 3 pointers. Therefore, + // we need to memcopy 12 bytes from va_list to another similar list. + return DAG.getMemcpy(Chain, DL, DestPtr, SrcPtr, + DAG.getIntPtrConstant(12, DL), 4, /*isVolatile*/false, + false, false, + MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); + } SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { @@ -1375,7 +1513,10 @@ setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::VAARG, MVT::Other, Expand); - setOperationAction(ISD::VACOPY, MVT::Other, Expand); + if (Subtarget.isEnvironmentMusl()) + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + else + setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -2928,6 +3069,7 @@ case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); + case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); diff --git a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h --- a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -30,6 +30,9 @@ unsigned StackAlignBaseVReg = 0; // Aligned-stack base register (virtual) unsigned StackAlignBasePhysReg = 0; // (physical) int VarArgsFrameIndex; + int RegSavedAreaStartFrameIndex; + int FirstNamedArgFrameIndex; + int LastNamedArgFrameIndex; bool HasClobberLR = false; bool HasEHReturn = false; std::map PacketInfo; @@ -46,6 +49,15 @@ void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; } int getVarArgsFrameIndex() { return VarArgsFrameIndex; } + void setRegSavedAreaStartFrameIndex(int v) { RegSavedAreaStartFrameIndex = v;} + int getRegSavedAreaStartFrameIndex() { return RegSavedAreaStartFrameIndex; } + + void setFirstNamedArgFrameIndex(int v) { FirstNamedArgFrameIndex = v; } + int getFirstNamedArgFrameIndex() { return FirstNamedArgFrameIndex; } + + void setLastNamedArgFrameIndex(int v) { LastNamedArgFrameIndex = v; } + int getLastNamedArgFrameIndex() { return LastNamedArgFrameIndex; } + void setStartPacket(MachineInstr* MI) { PacketInfo[MI] |= Hexagon::StartPacket; } diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -85,6 +85,7 @@ private: std::string CPUString; + Triple TargetTriple; HexagonInstrInfo InstrInfo; HexagonRegisterInfo RegInfo; HexagonTargetLowering TLInfo; @@ -96,6 +97,11 @@ HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM); + const Triple &getTargetTriple() const { return TargetTriple; } + bool isEnvironmentMusl() const { + return TargetTriple.getEnvironment() == Triple::Musl; + } + /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. const InstrItineraryData *getInstrItineraryData() const override { diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -79,7 +79,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM) : HexagonGenSubtargetInfo(TT, CPU, FS), OptLevel(TM.getOptLevel()), - CPUString(Hexagon_MC::selectHexagonCPU(CPU)), + CPUString(Hexagon_MC::selectHexagonCPU(CPU)), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), RegInfo(getHwMode()), TLInfo(TM, *this), InstrItins(getInstrItineraryForCPU(CPUString)) { diff --git a/llvm/test/CodeGen/Hexagon/vacopy.ll b/llvm/test/CodeGen/Hexagon/vacopy.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vacopy.ll @@ -0,0 +1,39 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s +; CHECK-LABEL: PrintInts: +; CHECK-DAG: memw{{.*}} = r{{[0-9]+}} +; CHECK-DAG: memw{{.*}} = r{{[0-9]+}} +; CHECK-DAG: r{{[0-9]+}}:{{[0-9]+}} = memd{{.*}} +; CHECK-DAG: memd{{.*}} = r{{[0-9]+}}:{{[0-9]+}} + +%struct.__va_list_tag = type { i8*, i8*, i8* } + +; Function Attrs: nounwind +define void @PrintInts(i32 %first, ...) #0 { +entry: + %vl = alloca [1 x %struct.__va_list_tag], align 8 + %vl_count = alloca [1 x %struct.__va_list_tag], align 8 + %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %vl to i8* + call void @llvm.va_start(i8* %arraydecay1) + %0 = bitcast [1 x %struct.__va_list_tag]* %vl_count to i8* + call void @llvm.va_copy(i8* %0, i8* %arraydecay1) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.va_start(i8*) #1 + +; Function Attrs: nounwind +declare void @llvm.va_copy(i8*, i8*) #1 + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + tail call void (i32, ...) @PrintInts(i32 undef, i32 20, i32 30, i32 40, i32 50, i32 0) + ret i32 0 +} + +attributes #0 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"Clang 3.1"} diff --git a/llvm/test/CodeGen/Hexagon/vararg-deallocate-sp.ll b/llvm/test/CodeGen/Hexagon/vararg-deallocate-sp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vararg-deallocate-sp.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=hexagon -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s + +; Test that the compiler deallocates the register saved area on Linux +; for functions that do not need a frame pointer. + +; CHECK: r29 = add(r29,#-[[SIZE:[0-9]+]] +; CHECK: r29 = add(r29,#[[SIZE]]) + +define void @test(...) { +entry: + ret void +} + diff --git a/llvm/test/CodeGen/Hexagon/vararg-linux-abi.ll b/llvm/test/CodeGen/Hexagon/vararg-linux-abi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vararg-linux-abi.ll @@ -0,0 +1,93 @@ +; RUN: llc -march=hexagon -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s + +; Check that we update the stack pointer before we do allocframe, so that +; the LR/FP are stored in the location required by the Linux ABI. +; CHECK: r29 = add(r29,#-24) +; CHECK: allocframe + +target triple = "hexagon-unknown-linux" + +%s.0 = type { i8*, i8*, i8* } + +define dso_local i32 @f0(i32 %a0, ...) local_unnamed_addr #0 { +b0: + %v0 = alloca [1 x %s.0], align 8 + %v1 = bitcast [1 x %s.0]* %v0 to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %v1) #2 + call void @llvm.va_start(i8* nonnull %v1) + %v2 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 0 + %v3 = load i8*, i8** %v2, align 8 + %v4 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 1 + %v5 = load i8*, i8** %v4, align 4 + %v6 = getelementptr i8, i8* %v3, i32 4 + %v7 = icmp sgt i8* %v6, %v5 + br i1 %v7, label %b1, label %b2 + +b1: ; preds = %b0 + %v8 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 2 + %v9 = load i8*, i8** %v8, align 8 + %v10 = getelementptr i8, i8* %v9, i32 4 + store i8* %v10, i8** %v8, align 8 + br label %b2 + +b2: ; preds = %b1, %b0 + %v11 = phi i8* [ %v10, %b1 ], [ %v6, %b0 ] + %v12 = phi i8* [ %v9, %b1 ], [ %v3, %b0 ] + %v13 = bitcast i8* %v12 to i32* + store i8* %v11, i8** %v2, align 8 + %v14 = load i32, i32* %v13, align 4 + %v15 = icmp eq i32 %v14, 0 + br i1 %v15, label %b7, label %b3 + +b3: ; preds = %b2 + %v16 = getelementptr inbounds [1 x %s.0], [1 x %s.0]* %v0, i32 0, i32 0, i32 2 + br label %b4 + +b4: ; preds = %b6, %b3 + %v17 = phi i32 [ %v14, %b3 ], [ %v28, %b6 ] + %v18 = phi i32 [ %a0, %b3 ], [ %v20, %b6 ] + %v19 = phi i8* [ %v11, %b3 ], [ %v25, %b6 ] + %v20 = add nsw i32 %v17, %v18 + %v21 = getelementptr i8, i8* %v19, i32 4 + %v22 = icmp sgt i8* %v21, %v5 + br i1 %v22, label %b5, label %b6 + +b5: ; preds = %b4 + %v23 = load i8*, i8** %v16, align 8 + %v24 = getelementptr i8, i8* %v23, i32 4 + store i8* %v24, i8** %v16, align 8 + br label %b6 + +b6: ; preds = %b5, %b4 + %v25 = phi i8* [ %v24, %b5 ], [ %v21, %b4 ] + %v26 = phi i8* [ %v23, %b5 ], [ %v19, %b4 ] + %v27 = bitcast i8* %v26 to i32* + store i8* %v25, i8** %v2, align 8 + %v28 = load i32, i32* %v27, align 4 + %v29 = icmp eq i32 %v28, 0 + br i1 %v29, label %b7, label %b4 + +b7: ; preds = %b6, %b2 + %v30 = phi i32 [ %a0, %b2 ], [ %v20, %b6 ] + call void @llvm.va_end(i8* nonnull %v1) + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %v1) #2 + ret i32 %v30 +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: nounwind +declare void @llvm.va_start(i8*) #2 + +; Function Attrs: nounwind +declare void @llvm.va_end(i8*) #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +attributes #0 = { argmemonly nounwind "frame-pointer"="all" } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/CodeGen/Hexagon/vararg.ll b/llvm/test/CodeGen/Hexagon/vararg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vararg.ll @@ -0,0 +1,97 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl -O0 < %s | FileCheck %s + +; CHECK-LABEL: foo: + +; Check function prologue generation +; CHECK: r29 = add(r29,#-24) +; CHECK: memw(r29+#4) = r1 +; CHECK: memw(r29+#8) = r2 +; CHECK: memw(r29+#12) = r3 +; CHECK: memw(r29+#16) = r4 +; CHECK: memw(r29+#20) = r5 +; CHECK: r29 = add(r29,#24) + + +%struct.AAA = type { i32, i32, i32, i32 } +%struct.__va_list_tag = type { i8*, i8*, i8* } + +@aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4 +@.str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1 + +; Function Attrs: nounwind +define i32 @foo(i32 %xx, ...) #0 { +entry: + %ap = alloca [1 x %struct.__va_list_tag], align 8 + %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %ap to i8* + call void @llvm.va_start(i8* %arraydecay1) + %__current_saved_reg_area_pointer_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 0 + %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p, align 8 + %__saved_reg_area_end_pointer_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 1 + %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p, align 4 + %__new_saved_reg_area_pointer = getelementptr i8, i8* %__current_saved_reg_area_pointer, i32 4 + %0 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer + %__overflow_area_pointer_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 2 + %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p, align 8 + br i1 %0, label %vaarg.on_stack, label %vaarg.end + +vaarg.on_stack: ; preds = %entry + %__overflow_area_pointer.next = getelementptr i8, i8* %__overflow_area_pointer, i32 4 + store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p, align 8 + br label %vaarg.end + +vaarg.end: ; preds = %entry, %vaarg.on_stack + %__overflow_area_pointer5 = phi i8* [ %__overflow_area_pointer.next, %vaarg.on_stack ], [ %__overflow_area_pointer, %entry ] + %storemerge32 = phi i8* [ %__overflow_area_pointer.next, %vaarg.on_stack ], [ %__new_saved_reg_area_pointer, %entry ] + %vaarg.addr.in = phi i8* [ %__overflow_area_pointer, %vaarg.on_stack ], [ %__current_saved_reg_area_pointer, %entry ] + store i8* %storemerge32, i8** %__current_saved_reg_area_pointer_p, align 8 + %vaarg.addr = bitcast i8* %vaarg.addr.in to i32* + %1 = load i32, i32* %vaarg.addr, align 4 + %__overflow_area_pointer_p4 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 2 + %__overflow_area_pointer.next6 = getelementptr i8, i8* %__overflow_area_pointer5, i32 16 + store i8* %__overflow_area_pointer.next6, i8** %__overflow_area_pointer_p4, align 8 + %bbb.sroa.1.0.idx27 = getelementptr inbounds i8, i8* %__overflow_area_pointer5, i32 12 + %2 = bitcast i8* %bbb.sroa.1.0.idx27 to i32* + %bbb.sroa.1.0.copyload = load i32, i32* %2, align 4 + %add8 = add nsw i32 %bbb.sroa.1.0.copyload, %1 + %__new_saved_reg_area_pointer15 = getelementptr i8, i8* %storemerge32, i32 4 + %3 = icmp sgt i8* %__new_saved_reg_area_pointer15, %__saved_reg_area_end_pointer + br i1 %3, label %vaarg.on_stack17, label %vaarg.end21 + +vaarg.on_stack17: ; preds = %vaarg.end + %__overflow_area_pointer.next20 = getelementptr i8, i8* %__overflow_area_pointer5, i32 20 + store i8* %__overflow_area_pointer.next20, i8** %__overflow_area_pointer_p4, align 8 + br label %vaarg.end21 + +vaarg.end21: ; preds = %vaarg.end, %vaarg.on_stack17 + %storemerge = phi i8* [ %__overflow_area_pointer.next20, %vaarg.on_stack17 ], [ %__new_saved_reg_area_pointer15, %vaarg.end ] + %vaarg.addr22.in = phi i8* [ %__overflow_area_pointer.next6, %vaarg.on_stack17 ], [ %storemerge32, %vaarg.end ] + store i8* %storemerge, i8** %__current_saved_reg_area_pointer_p, align 8 + %vaarg.addr22 = bitcast i8* %vaarg.addr22.in to i32* + %4 = load i32, i32* %vaarg.addr22, align 4 + %add23 = add nsw i32 %add8, %4 + call void @llvm.va_end(i8* %arraydecay1) + ret i32 %add23 +} + +; Function Attrs: nounwind +declare void @llvm.va_start(i8*) #1 + +; Function Attrs: nounwind +declare void @llvm.va_end(i8*) #1 + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %call = tail call i32 (i32, ...) @foo(i32 undef, i32 2, %struct.AAA* byval align 4 @aaa, i32 4) + %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %call) #1 + ret i32 %call +} + +; Function Attrs: nounwind +declare i32 @printf(i8* nocapture readonly, ...) #0 + +attributes #0 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"Clang 3.1"} diff --git a/llvm/test/CodeGen/Hexagon/vararg_align_check.ll b/llvm/test/CodeGen/Hexagon/vararg_align_check.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vararg_align_check.ll @@ -0,0 +1,186 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl -O0 < %s | FileCheck %s + +; CHECK-LABEL: foo: + +; Check Function prologue. +; Note. All register numbers and offset are fixed. +; Hence, no need of regular expression. + +; CHECK: r29 = add(r29,#-24) +; CHECK: r7:6 = memd(r29+#24) +; CHECK: memd(r29+#0) = r7:6 +; CHECK: r7:6 = memd(r29+#32) +; CHECK: memd(r29+#8) = r7:6 +; CHECK: r7:6 = memd(r29+#40) +; CHECK: memd(r29+#16) = r7:6 +; CHECK: memw(r29+#28) = r1 +; CHECK: memw(r29+#32) = r2 +; CHECK: memw(r29+#36) = r3 +; CHECK: memw(r29+#40) = r4 +; CHECK: memw(r29+#44) = r5 +; CHECK: r29 = add(r29,#24) + +%struct.AAA = type { i32, i32, i32, i32 } +%struct.BBB = type { i8, i64, i32 } +%struct.__va_list_tag = type { i8*, i8*, i8* } + +@aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4 +@ddd = global { i8, i64, i32, [4 x i8] } { i8 1, i64 1000000, i32 5, [4 x i8] undef }, align 8 +@.str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1 + +; Function Attrs: nounwind +define i32 @foo(i32 %xx, %struct.BBB* byval align 8 %eee, ...) #0 { +entry: + %xx.addr = alloca i32, align 4 + %ap = alloca [1 x %struct.__va_list_tag], align 8 + %d = alloca i32, align 4 + %k = alloca i64, align 8 + %ret = alloca i32, align 4 + %bbb = alloca %struct.AAA, align 4 + store i32 %xx, i32* %xx.addr, align 4 + store i32 0, i32* %ret, align 4 + %x = getelementptr inbounds %struct.BBB, %struct.BBB* %eee, i32 0, i32 0 + %0 = load i8, i8* %x, align 1 + %tobool = trunc i8 %0 to i1 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 1, i32* %ret, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8* + call void @llvm.va_start(i8* %arraydecay1) + %arraydecay2 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + br label %vaarg.maybe_reg + +vaarg.maybe_reg: ; preds = %if.end + %__current_saved_reg_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 0 + %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p + %__saved_reg_area_end_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 1 + %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p + %1 = ptrtoint i8* %__current_saved_reg_area_pointer to i32 + %align_current_saved_reg_area_pointer = add i32 %1, 7 + %align_current_saved_reg_area_pointer3 = and i32 %align_current_saved_reg_area_pointer, -8 + %align_current_saved_reg_area_pointer4 = inttoptr i32 %align_current_saved_reg_area_pointer3 to i8* + %__new_saved_reg_area_pointer = getelementptr i8, i8* %align_current_saved_reg_area_pointer4, i32 8 + %2 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer + br i1 %2, label %vaarg.on_stack, label %vaarg.in_reg + +vaarg.in_reg: ; preds = %vaarg.maybe_reg + %3 = bitcast i8* %align_current_saved_reg_area_pointer4 to i64* + store i8* %__new_saved_reg_area_pointer, i8** %__current_saved_reg_area_pointer_p + br label %vaarg.end + +vaarg.on_stack: ; preds = %vaarg.maybe_reg + %__overflow_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 2 + %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p + %4 = ptrtoint i8* %__overflow_area_pointer to i32 + %align_overflow_area_pointer = add i32 %4, 7 + %align_overflow_area_pointer5 = and i32 %align_overflow_area_pointer, -8 + %align_overflow_area_pointer6 = inttoptr i32 %align_overflow_area_pointer5 to i8* + %__overflow_area_pointer.next = getelementptr i8, i8* %align_overflow_area_pointer6, i32 8 + store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p + store i8* %__overflow_area_pointer.next, i8** %__current_saved_reg_area_pointer_p + %5 = bitcast i8* %align_overflow_area_pointer6 to i64* + br label %vaarg.end + +vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg + %vaarg.addr = phi i64* [ %3, %vaarg.in_reg ], [ %5, %vaarg.on_stack ] + %6 = load i64, i64* %vaarg.addr + store i64 %6, i64* %k, align 8 + %7 = load i64, i64* %k, align 8 + %conv = trunc i64 %7 to i32 + %div = sdiv i32 %conv, 1000 + %8 = load i32, i32* %ret, align 4 + %add = add nsw i32 %8, %div + store i32 %add, i32* %ret, align 4 + %arraydecay7 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + %__overflow_area_pointer_p8 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay7, i32 0, i32 2 + %__overflow_area_pointer9 = load i8*, i8** %__overflow_area_pointer_p8 + %9 = bitcast i8* %__overflow_area_pointer9 to %struct.AAA* + %__overflow_area_pointer.next10 = getelementptr i8, i8* %__overflow_area_pointer9, i32 16 + store i8* %__overflow_area_pointer.next10, i8** %__overflow_area_pointer_p8 + %10 = bitcast %struct.AAA* %bbb to i8* + %11 = bitcast %struct.AAA* %9 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %10, i8* %11, i32 16, i32 4, i1 false) + %d11 = getelementptr inbounds %struct.AAA, %struct.AAA* %bbb, i32 0, i32 3 + %12 = load i32, i32* %d11, align 4 + %13 = load i32, i32* %ret, align 4 + %add12 = add nsw i32 %13, %12 + store i32 %add12, i32* %ret, align 4 + %arraydecay13 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + br label %vaarg.maybe_reg14 + +vaarg.maybe_reg14: ; preds = %vaarg.end + %__current_saved_reg_area_pointer_p15 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 0 + %__current_saved_reg_area_pointer16 = load i8*, i8** %__current_saved_reg_area_pointer_p15 + %__saved_reg_area_end_pointer_p17 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 1 + %__saved_reg_area_end_pointer18 = load i8*, i8** %__saved_reg_area_end_pointer_p17 + %__new_saved_reg_area_pointer19 = getelementptr i8, i8* %__current_saved_reg_area_pointer16, i32 4 + %14 = icmp sgt i8* %__new_saved_reg_area_pointer19, %__saved_reg_area_end_pointer18 + br i1 %14, label %vaarg.on_stack21, label %vaarg.in_reg20 + +vaarg.in_reg20: ; preds = %vaarg.maybe_reg14 + %15 = bitcast i8* %__current_saved_reg_area_pointer16 to i32* + store i8* %__new_saved_reg_area_pointer19, i8** %__current_saved_reg_area_pointer_p15 + br label %vaarg.end25 + +vaarg.on_stack21: ; preds = %vaarg.maybe_reg14 + %__overflow_area_pointer_p22 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 2 + %__overflow_area_pointer23 = load i8*, i8** %__overflow_area_pointer_p22 + %__overflow_area_pointer.next24 = getelementptr i8, i8* %__overflow_area_pointer23, i32 4 + store i8* %__overflow_area_pointer.next24, i8** %__overflow_area_pointer_p22 + store i8* %__overflow_area_pointer.next24, i8** %__current_saved_reg_area_pointer_p15 + %16 = bitcast i8* %__overflow_area_pointer23 to i32* + br label %vaarg.end25 + +vaarg.end25: ; preds = %vaarg.on_stack21, %vaarg.in_reg20 + %vaarg.addr26 = phi i32* [ %15, %vaarg.in_reg20 ], [ %16, %vaarg.on_stack21 ] + %17 = load i32, i32* %vaarg.addr26 + store i32 %17, i32* %d, align 4 + %18 = load i32, i32* %d, align 4 + %19 = load i32, i32* %ret, align 4 + %add27 = add nsw i32 %19, %18 + store i32 %add27, i32* %ret, align 4 + %arraydecay28 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + %arraydecay2829 = bitcast %struct.__va_list_tag* %arraydecay28 to i8* + call void @llvm.va_end(i8* %arraydecay2829) + %20 = load i32, i32* %ret, align 4 + ret i32 %20 +} + +; Function Attrs: nounwind +declare void @llvm.va_start(i8*) #1 + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 + +; Function Attrs: nounwind +declare void @llvm.va_end(i8*) #1 + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %x = alloca i32, align 4 + %m = alloca i64, align 8 + store i32 0, i32* %retval + store i64 1000000, i64* %m, align 8 + %0 = load i64, i64* %m, align 8 + %call = call i32 (i32, %struct.BBB*, ...) @foo(i32 1, %struct.BBB* byval align 8 bitcast ({ i8, i64, i32, [4 x i8] }* @ddd to %struct.BBB*), i64 %0, %struct.AAA* byval align 4 @aaa, i32 4) + store i32 %call, i32* %x, align 4 + %1 = load i32, i32* %x, align 4 + %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %1) + %2 = load i32, i32* %x, align 4 + ret i32 %2 +} + +declare i32 @printf(i8*, ...) #2 + +attributes #1 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"Clang 3.1"} diff --git a/llvm/test/CodeGen/Hexagon/vararg_double_onstack.ll b/llvm/test/CodeGen/Hexagon/vararg_double_onstack.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vararg_double_onstack.ll @@ -0,0 +1,214 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl -O0 < %s | FileCheck %s + +; CHECK-LABEL: foo: + +; Check Function prologue. +; Note. All register numbers and offset are fixed. +; Hence, no need of regular expression. + +; CHECK: r29 = add(r29,#-8) +; CHECK: memw(r29+#4) = r5 +; CHECK: r29 = add(r29,#8) + +%struct.AAA = type { i32, i32, i32, i32 } +%struct.__va_list_tag = type { i8*, i8*, i8* } + +@aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4 +@.str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1 + +; Function Attrs: nounwind +define i32 @foo(i32 %xx, i32 %a, i32 %b, i32 %c, i32 %x, ...) #0 { +entry: + %xx.addr = alloca i32, align 4 + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %ap = alloca [1 x %struct.__va_list_tag], align 8 + %d = alloca i32, align 4 + %ret = alloca i32, align 4 + %bbb = alloca %struct.AAA, align 4 + store i32 %xx, i32* %xx.addr, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 0, i32* %ret, align 4 + %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8* + call void @llvm.va_start(i8* %arraydecay1) + %arraydecay2 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + br label %vaarg.maybe_reg + +vaarg.maybe_reg: ; preds = %entry + %__current_saved_reg_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 0 + %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p + %__saved_reg_area_end_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 1 + %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p + %0 = ptrtoint i8* %__current_saved_reg_area_pointer to i32 + %align_current_saved_reg_area_pointer = add i32 %0, 7 + %align_current_saved_reg_area_pointer3 = and i32 %align_current_saved_reg_area_pointer, -8 + %align_current_saved_reg_area_pointer4 = inttoptr i32 %align_current_saved_reg_area_pointer3 to i8* + %__new_saved_reg_area_pointer = getelementptr i8, i8* %align_current_saved_reg_area_pointer4, i32 8 + %1 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer + br i1 %1, label %vaarg.on_stack, label %vaarg.in_reg + +vaarg.in_reg: ; preds = %vaarg.maybe_reg + %2 = bitcast i8* %align_current_saved_reg_area_pointer4 to i64* + store i8* %__new_saved_reg_area_pointer, i8** %__current_saved_reg_area_pointer_p + br label %vaarg.end + +vaarg.on_stack: ; preds = %vaarg.maybe_reg + %__overflow_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay2, i32 0, i32 2 + %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p + %3 = ptrtoint i8* %__overflow_area_pointer to i32 + %align_overflow_area_pointer = add i32 %3, 7 + %align_overflow_area_pointer5 = and i32 %align_overflow_area_pointer, -8 + %align_overflow_area_pointer6 = inttoptr i32 %align_overflow_area_pointer5 to i8* + %__overflow_area_pointer.next = getelementptr i8, i8* %align_overflow_area_pointer6, i32 8 + store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p + store i8* %__overflow_area_pointer.next, i8** %__current_saved_reg_area_pointer_p + %4 = bitcast i8* %align_overflow_area_pointer6 to i64* + br label %vaarg.end + +vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg + %vaarg.addr = phi i64* [ %2, %vaarg.in_reg ], [ %4, %vaarg.on_stack ] + %5 = load i64, i64* %vaarg.addr + %conv = trunc i64 %5 to i32 + store i32 %conv, i32* %d, align 4 + %6 = load i32, i32* %d, align 4 + %7 = load i32, i32* %ret, align 4 + %add = add nsw i32 %7, %6 + store i32 %add, i32* %ret, align 4 + %arraydecay7 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + %__overflow_area_pointer_p8 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay7, i32 0, i32 2 + %__overflow_area_pointer9 = load i8*, i8** %__overflow_area_pointer_p8 + %8 = bitcast i8* %__overflow_area_pointer9 to %struct.AAA* + %__overflow_area_pointer.next10 = getelementptr i8, i8* %__overflow_area_pointer9, i32 16 + store i8* %__overflow_area_pointer.next10, i8** %__overflow_area_pointer_p8 + %9 = bitcast %struct.AAA* %bbb to i8* + %10 = bitcast %struct.AAA* %8 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %9, i8* %10, i32 16, i32 4, i1 false) + %d11 = getelementptr inbounds %struct.AAA, %struct.AAA* %bbb, i32 0, i32 3 + %11 = load i32, i32* %d11, align 4 + %12 = load i32, i32* %ret, align 4 + %add12 = add nsw i32 %12, %11 + store i32 %add12, i32* %ret, align 4 + %arraydecay13 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + br label %vaarg.maybe_reg14 + +vaarg.maybe_reg14: ; preds = %vaarg.end + %__current_saved_reg_area_pointer_p15 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 0 + %__current_saved_reg_area_pointer16 = load i8*, i8** %__current_saved_reg_area_pointer_p15 + %__saved_reg_area_end_pointer_p17 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 1 + %__saved_reg_area_end_pointer18 = load i8*, i8** %__saved_reg_area_end_pointer_p17 + %__new_saved_reg_area_pointer19 = getelementptr i8, i8* %__current_saved_reg_area_pointer16, i32 4 + %13 = icmp sgt i8* %__new_saved_reg_area_pointer19, %__saved_reg_area_end_pointer18 + br i1 %13, label %vaarg.on_stack21, label %vaarg.in_reg20 + +vaarg.in_reg20: ; preds = %vaarg.maybe_reg14 + %14 = bitcast i8* %__current_saved_reg_area_pointer16 to i32* + store i8* %__new_saved_reg_area_pointer19, i8** %__current_saved_reg_area_pointer_p15 + br label %vaarg.end25 + +vaarg.on_stack21: ; preds = %vaarg.maybe_reg14 + %__overflow_area_pointer_p22 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay13, i32 0, i32 2 + %__overflow_area_pointer23 = load i8*, i8** %__overflow_area_pointer_p22 + %__overflow_area_pointer.next24 = getelementptr i8, i8* %__overflow_area_pointer23, i32 4 + store i8* %__overflow_area_pointer.next24, i8** %__overflow_area_pointer_p22 + store i8* %__overflow_area_pointer.next24, i8** %__current_saved_reg_area_pointer_p15 + %15 = bitcast i8* %__overflow_area_pointer23 to i32* + br label %vaarg.end25 + +vaarg.end25: ; preds = %vaarg.on_stack21, %vaarg.in_reg20 + %vaarg.addr26 = phi i32* [ %14, %vaarg.in_reg20 ], [ %15, %vaarg.on_stack21 ] + %16 = load i32, i32* %vaarg.addr26 + store i32 %16, i32* %d, align 4 + %17 = load i32, i32* %d, align 4 + %18 = load i32, i32* %ret, align 4 + %add27 = add nsw i32 %18, %17 + store i32 %add27, i32* %ret, align 4 + %arraydecay28 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + br label %vaarg.maybe_reg29 + +vaarg.maybe_reg29: ; preds = %vaarg.end25 + %__current_saved_reg_area_pointer_p30 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay28, i32 0, i32 0 + %__current_saved_reg_area_pointer31 = load i8*, i8** %__current_saved_reg_area_pointer_p30 + %__saved_reg_area_end_pointer_p32 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay28, i32 0, i32 1 + %__saved_reg_area_end_pointer33 = load i8*, i8** %__saved_reg_area_end_pointer_p32 + %19 = ptrtoint i8* %__current_saved_reg_area_pointer31 to i32 + %align_current_saved_reg_area_pointer34 = add i32 %19, 7 + %align_current_saved_reg_area_pointer35 = and i32 %align_current_saved_reg_area_pointer34, -8 + %align_current_saved_reg_area_pointer36 = inttoptr i32 %align_current_saved_reg_area_pointer35 to i8* + %__new_saved_reg_area_pointer37 = getelementptr i8, i8* %align_current_saved_reg_area_pointer36, i32 8 + %20 = icmp sgt i8* %__new_saved_reg_area_pointer37, %__saved_reg_area_end_pointer33 + br i1 %20, label %vaarg.on_stack39, label %vaarg.in_reg38 + +vaarg.in_reg38: ; preds = %vaarg.maybe_reg29 + %21 = bitcast i8* %align_current_saved_reg_area_pointer36 to i64* + store i8* %__new_saved_reg_area_pointer37, i8** %__current_saved_reg_area_pointer_p30 + br label %vaarg.end46 + +vaarg.on_stack39: ; preds = %vaarg.maybe_reg29 + %__overflow_area_pointer_p40 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay28, i32 0, i32 2 + %__overflow_area_pointer41 = load i8*, i8** %__overflow_area_pointer_p40 + %22 = ptrtoint i8* %__overflow_area_pointer41 to i32 + %align_overflow_area_pointer42 = add i32 %22, 7 + %align_overflow_area_pointer43 = and i32 %align_overflow_area_pointer42, -8 + %align_overflow_area_pointer44 = inttoptr i32 %align_overflow_area_pointer43 to i8* + %__overflow_area_pointer.next45 = getelementptr i8, i8* %align_overflow_area_pointer44, i32 8 + store i8* %__overflow_area_pointer.next45, i8** %__overflow_area_pointer_p40 + store i8* %__overflow_area_pointer.next45, i8** %__current_saved_reg_area_pointer_p30 + %23 = bitcast i8* %align_overflow_area_pointer44 to i64* + br label %vaarg.end46 + +vaarg.end46: ; preds = %vaarg.on_stack39, %vaarg.in_reg38 + %vaarg.addr47 = phi i64* [ %21, %vaarg.in_reg38 ], [ %23, %vaarg.on_stack39 ] + %24 = load i64, i64* %vaarg.addr47 + %conv48 = trunc i64 %24 to i32 + store i32 %conv48, i32* %d, align 4 + %25 = load i32, i32* %d, align 4 + %26 = load i32, i32* %ret, align 4 + %add49 = add nsw i32 %26, %25 + store i32 %add49, i32* %ret, align 4 + %arraydecay50 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + %arraydecay5051 = bitcast %struct.__va_list_tag* %arraydecay50 to i8* + call void @llvm.va_end(i8* %arraydecay5051) + %27 = load i32, i32* %ret, align 4 + ret i32 %27 +} + +; Function Attrs: nounwind +declare void @llvm.va_start(i8*) #1 + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 + +; Function Attrs: nounwind +declare void @llvm.va_end(i8*) #1 + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %x = alloca i32, align 4 + %y = alloca i64, align 8 + store i32 0, i32* %retval + store i64 1000000, i64* %y, align 8 + %0 = load i64, i64* %y, align 8 + %1 = load i64, i64* %y, align 8 + %call = call i32 (i32, i32, i32, i32, i32, ...) @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i64 %0, %struct.AAA* byval align 4 @aaa, i32 4, i64 %1) + store i32 %call, i32* %x, align 4 + %2 = load i32, i32* %x, align 4 + %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %2) + %3 = load i32, i32* %x, align 4 + ret i32 %3 +} + +declare i32 @printf(i8*, ...) #2 + +attributes #0 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"Clang 3.1"} diff --git a/llvm/test/CodeGen/Hexagon/vararg_named.ll b/llvm/test/CodeGen/Hexagon/vararg_named.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vararg_named.ll @@ -0,0 +1,211 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv62 -mtriple=hexagon-unknown-linux-musl -O0 < %s | FileCheck %s + +; CHECK-LABEL: foo: + +; Check Function prologue. +; Note. All register numbers and offset are fixed. +; Hence, no need of regular expression. + +; CHECK: r29 = add(r29,#-16) +; CHECK: r7:6 = memd(r29+#16) +; CHECK: memd(r29+#0) = r7:6 +; CHECK: r7:6 = memd(r29+#24) +; CHECK: memd(r29+#8) = r7:6 +; CHECK: r7:6 = memd(r29+#32) +; CHECK: memd(r29+#16) = r7:6 +; CHECK: r7:6 = memd(r29+#40) +; CHECK: memd(r29+#24) = r7:6 +; CHECK: memw(r29+#36) = r3 +; CHECK: memw(r29+#40) = r4 +; CHECK: memw(r29+#44) = r5 +; CHECK: r29 = add(r29,#16) + +%struct.AAA = type { i32, i32, i32, i32 } +%struct.__va_list_tag = type { i8*, i8*, i8* } + +@aaa = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4 +@xxx = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4 +@yyy = global %struct.AAA { i32 100, i32 200, i32 300, i32 400 }, align 4 +@ccc = global %struct.AAA { i32 10, i32 20, i32 30, i32 40 }, align 4 +@fff = global %struct.AAA { i32 1, i32 2, i32 3, i32 4 }, align 4 +@.str = private unnamed_addr constant [13 x i8] c"result = %d\0A\00", align 1 + +; Function Attrs: nounwind +define i32 @foo(i32 %xx, i32 %z, i32 %m, %struct.AAA* byval align 4 %bbb, %struct.AAA* byval align 4 %GGG, ...) #0 { +entry: + %xx.addr = alloca i32, align 4 + %z.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %ap = alloca [1 x %struct.__va_list_tag], align 8 + %d = alloca i32, align 4 + %ret = alloca i32, align 4 + %ddd = alloca %struct.AAA, align 4 + %ggg = alloca %struct.AAA, align 4 + %nnn = alloca %struct.AAA, align 4 + store i32 %xx, i32* %xx.addr, align 4 + store i32 %z, i32* %z.addr, align 4 + store i32 %m, i32* %m.addr, align 4 + store i32 0, i32* %ret, align 4 + %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + %arraydecay1 = bitcast %struct.__va_list_tag* %arraydecay to i8* + call void @llvm.va_start(i8* %arraydecay1) + %d2 = getelementptr inbounds %struct.AAA, %struct.AAA* %bbb, i32 0, i32 3 + %0 = load i32, i32* %d2, align 4 + %1 = load i32, i32* %ret, align 4 + %add = add nsw i32 %1, %0 + store i32 %add, i32* %ret, align 4 + %2 = load i32, i32* %z.addr, align 4 + %3 = load i32, i32* %ret, align 4 + %add3 = add nsw i32 %3, %2 + store i32 %add3, i32* %ret, align 4 + %arraydecay4 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + br label %vaarg.maybe_reg + +vaarg.maybe_reg: ; preds = %entry + %__current_saved_reg_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay4, i32 0, i32 0 + %__current_saved_reg_area_pointer = load i8*, i8** %__current_saved_reg_area_pointer_p + %__saved_reg_area_end_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay4, i32 0, i32 1 + %__saved_reg_area_end_pointer = load i8*, i8** %__saved_reg_area_end_pointer_p + %__new_saved_reg_area_pointer = getelementptr i8, i8* %__current_saved_reg_area_pointer, i32 4 + %4 = icmp sgt i8* %__new_saved_reg_area_pointer, %__saved_reg_area_end_pointer + br i1 %4, label %vaarg.on_stack, label %vaarg.in_reg + +vaarg.in_reg: ; preds = %vaarg.maybe_reg + %5 = bitcast i8* %__current_saved_reg_area_pointer to i32* + store i8* %__new_saved_reg_area_pointer, i8** %__current_saved_reg_area_pointer_p + br label %vaarg.end + +vaarg.on_stack: ; preds = %vaarg.maybe_reg + %__overflow_area_pointer_p = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay4, i32 0, i32 2 + %__overflow_area_pointer = load i8*, i8** %__overflow_area_pointer_p + %__overflow_area_pointer.next = getelementptr i8, i8* %__overflow_area_pointer, i32 4 + store i8* %__overflow_area_pointer.next, i8** %__overflow_area_pointer_p + store i8* %__overflow_area_pointer.next, i8** %__current_saved_reg_area_pointer_p + %6 = bitcast i8* %__overflow_area_pointer to i32* + br label %vaarg.end + +vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg + %vaarg.addr = phi i32* [ %5, %vaarg.in_reg ], [ %6, %vaarg.on_stack ] + %7 = load i32, i32* %vaarg.addr + store i32 %7, i32* %d, align 4 + %8 = load i32, i32* %d, align 4 + %9 = load i32, i32* %ret, align 4 + %add5 = add nsw i32 %9, %8 + store i32 %add5, i32* %ret, align 4 + %arraydecay6 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + %__overflow_area_pointer_p7 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay6, i32 0, i32 2 + %__overflow_area_pointer8 = load i8*, i8** %__overflow_area_pointer_p7 + %10 = bitcast i8* %__overflow_area_pointer8 to %struct.AAA* + %__overflow_area_pointer.next9 = getelementptr i8, i8* %__overflow_area_pointer8, i32 16 + store i8* %__overflow_area_pointer.next9, i8** %__overflow_area_pointer_p7 + %11 = bitcast %struct.AAA* %ddd to i8* + %12 = bitcast %struct.AAA* %10 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %11, i8* %12, i32 16, i32 4, i1 false) + %d10 = getelementptr inbounds %struct.AAA, %struct.AAA* %ddd, i32 0, i32 3 + %13 = load i32, i32* %d10, align 4 + %14 = load i32, i32* %ret, align 4 + %add11 = add nsw i32 %14, %13 + store i32 %add11, i32* %ret, align 4 + %arraydecay12 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + %__overflow_area_pointer_p13 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay12, i32 0, i32 2 + %__overflow_area_pointer14 = load i8*, i8** %__overflow_area_pointer_p13 + %15 = bitcast i8* %__overflow_area_pointer14 to %struct.AAA* + %__overflow_area_pointer.next15 = getelementptr i8, i8* %__overflow_area_pointer14, i32 16 + store i8* %__overflow_area_pointer.next15, i8** %__overflow_area_pointer_p13 + %16 = bitcast %struct.AAA* %ggg to i8* + %17 = bitcast %struct.AAA* %15 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %16, i8* %17, i32 16, i32 4, i1 false) + %d16 = getelementptr inbounds %struct.AAA, %struct.AAA* %ggg, i32 0, i32 3 + %18 = load i32, i32* %d16, align 4 + %19 = load i32, i32* %ret, align 4 + %add17 = add nsw i32 %19, %18 + store i32 %add17, i32* %ret, align 4 + %arraydecay18 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + %__overflow_area_pointer_p19 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay18, i32 0, i32 2 + %__overflow_area_pointer20 = load i8*, i8** %__overflow_area_pointer_p19 + %20 = bitcast i8* %__overflow_area_pointer20 to %struct.AAA* + %__overflow_area_pointer.next21 = getelementptr i8, i8* %__overflow_area_pointer20, i32 16 + store i8* %__overflow_area_pointer.next21, i8** %__overflow_area_pointer_p19 + %21 = bitcast %struct.AAA* %nnn to i8* + %22 = bitcast %struct.AAA* %20 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %21, i8* %22, i32 16, i32 4, i1 false) + %d22 = getelementptr inbounds %struct.AAA, %struct.AAA* %nnn, i32 0, i32 3 + %23 = load i32, i32* %d22, align 4 + %24 = load i32, i32* %ret, align 4 + %add23 = add nsw i32 %24, %23 + store i32 %add23, i32* %ret, align 4 + %arraydecay24 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + br label %vaarg.maybe_reg25 + +vaarg.maybe_reg25: ; preds = %vaarg.end + %__current_saved_reg_area_pointer_p26 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay24, i32 0, i32 0 + %__current_saved_reg_area_pointer27 = load i8*, i8** %__current_saved_reg_area_pointer_p26 + %__saved_reg_area_end_pointer_p28 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay24, i32 0, i32 1 + %__saved_reg_area_end_pointer29 = load i8*, i8** %__saved_reg_area_end_pointer_p28 + %__new_saved_reg_area_pointer30 = getelementptr i8, i8* %__current_saved_reg_area_pointer27, i32 4 + %25 = icmp sgt i8* %__new_saved_reg_area_pointer30, %__saved_reg_area_end_pointer29 + br i1 %25, label %vaarg.on_stack32, label %vaarg.in_reg31 + +vaarg.in_reg31: ; preds = %vaarg.maybe_reg25 + %26 = bitcast i8* %__current_saved_reg_area_pointer27 to i32* + store i8* %__new_saved_reg_area_pointer30, i8** %__current_saved_reg_area_pointer_p26 + br label %vaarg.end36 + +vaarg.on_stack32: ; preds = %vaarg.maybe_reg25 + %__overflow_area_pointer_p33 = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %arraydecay24, i32 0, i32 2 + %__overflow_area_pointer34 = load i8*, i8** %__overflow_area_pointer_p33 + %__overflow_area_pointer.next35 = getelementptr i8, i8* %__overflow_area_pointer34, i32 4 + store i8* %__overflow_area_pointer.next35, i8** %__overflow_area_pointer_p33 + store i8* %__overflow_area_pointer.next35, i8** %__current_saved_reg_area_pointer_p26 + %27 = bitcast i8* %__overflow_area_pointer34 to i32* + br label %vaarg.end36 + +vaarg.end36: ; preds = %vaarg.on_stack32, %vaarg.in_reg31 + %vaarg.addr37 = phi i32* [ %26, %vaarg.in_reg31 ], [ %27, %vaarg.on_stack32 ] + %28 = load i32, i32* %vaarg.addr37 + store i32 %28, i32* %d, align 4 + %29 = load i32, i32* %d, align 4 + %30 = load i32, i32* %ret, align 4 + %add38 = add nsw i32 %30, %29 + store i32 %add38, i32* %ret, align 4 + %31 = load i32, i32* %m.addr, align 4 + %32 = load i32, i32* %ret, align 4 + %add39 = add nsw i32 %32, %31 + store i32 %add39, i32* %ret, align 4 + %arraydecay40 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0 + %arraydecay4041 = bitcast %struct.__va_list_tag* %arraydecay40 to i8* + call void @llvm.va_end(i8* %arraydecay4041) + %33 = load i32, i32* %ret, align 4 + ret i32 %33 +} + +; Function Attrs: nounwind +declare void @llvm.va_start(i8*) #1 + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 + +; Function Attrs: nounwind +declare void @llvm.va_end(i8*) #1 + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %x = alloca i32, align 4 + store i32 0, i32* %retval + %call = call i32 (i32, i32, i32, %struct.AAA*, %struct.AAA*, ...) @foo(i32 1, i32 3, i32 5, %struct.AAA* byval align 4 @aaa, %struct.AAA* byval align 4 @fff, i32 2, %struct.AAA* byval align 4 @xxx, %struct.AAA* byval align 4 @yyy, %struct.AAA* byval align 4 @ccc, i32 4) + store i32 %call, i32* %x, align 4 + %0 = load i32, i32* %x, align 4 + %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %0) + %1 = load i32, i32* %x, align 4 + ret i32 %1 +} + +declare i32 @printf(i8*, ...) #2 + +attributes #0 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"Clang 3.1"}