Index: lib/Target/NDS32/NDS32ISelLowering.h =================================================================== --- lib/Target/NDS32/NDS32ISelLowering.h +++ lib/Target/NDS32/NDS32ISelLowering.h @@ -138,6 +138,30 @@ SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const; + typedef SmallVector, 8> RegsToPassVector; + + /// copyByValArg - Copy argument registers which were used to pass a byval + /// argument to the stack. Create a stack frame object for the byval + /// argument. + void copyByValRegs(SDValue Chain, const SDLoc &DL, + std::vector &OutChains, SelectionDAG &DAG, + const ISD::ArgFlagsTy &Flags, + SmallVectorImpl &InVals, + const Argument *FuncArg, unsigned FirstReg, + unsigned LastReg, const CCValAssign &VA, + CCState &State) const; + + /// passByValArg - Pass a byval argument in registers or on stack. + void passByValArg(SDValue Chain, const SDLoc &DL, + RegsToPassVector &RegsToPass, + SmallVectorImpl &MemOpChains, SDValue StackPtr, + MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, + unsigned FirstReg, unsigned LastReg, + const ISD::ArgFlagsTy &Flags, bool isLittle, + const CCValAssign &VA) const; + + void HandleByVal(CCState *, unsigned &, unsigned) const override; + /// RestoreVarArgRegs - Restore variable function arguments passed in /// registers to the stack. Also create a stack frame object for the /// first variable argument. @@ -151,8 +175,6 @@ const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; - typedef SmallVector, 8> RegsToPassVector; - SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; Index: lib/Target/NDS32/NDS32ISelLowering.cpp =================================================================== --- lib/Target/NDS32/NDS32ISelLowering.cpp +++ lib/Target/NDS32/NDS32ISelLowering.cpp @@ -354,6 +354,189 @@ NDS32::R3, NDS32::R4, NDS32::R5 }; +void NDS32TargetLowering::copyByValRegs( + SDValue Chain, const SDLoc &DL, std::vector &OutChains, + SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags, + SmallVectorImpl &InVals, const Argument *FuncArg, + unsigned FirstReg, unsigned LastReg, const CCValAssign &VA, + CCState &State) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned GPRSizeInBytes = 4; + unsigned NumRegs = LastReg - FirstReg; + unsigned RegAreaSize = NumRegs * GPRSizeInBytes; + unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize); + int FrameObjOffset; + ArrayRef ByValArgRegs = makeArrayRef(NDS32ArgRegs); + + if (RegAreaSize) + FrameObjOffset = + - (int)((ByValArgRegs.size() - FirstReg) * GPRSizeInBytes); + else + FrameObjOffset = VA.getLocMemOffset(); + + // Create frame object. + EVT PtrTy = getPointerTy(DAG.getDataLayout()); + int FI = MFI.CreateFixedObject(FrameObjSize, FrameObjOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + InVals.push_back(FIN); + + if (!NumRegs) + return; + + // Copy arg registers. + MVT RegTy = MVT::getIntegerVT(GPRSizeInBytes * 8); + const TargetRegisterClass *RC = getRegClassFor(RegTy); + + for (unsigned I = 0; I < NumRegs; ++I) { + unsigned ArgReg = ByValArgRegs[FirstReg + I]; + unsigned VReg = addLiveIn(MF, ArgReg, RC); + unsigned Offset = I * GPRSizeInBytes; + SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN, + DAG.getConstant(Offset, DL, PtrTy)); + SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy), + StorePtr, MachinePointerInfo(FuncArg, Offset)); + OutChains.push_back(Store); + } +} + +// Copy byVal arg to registers and stack. +void NDS32TargetLowering::passByValArg( + SDValue Chain, const SDLoc &DL, + RegsToPassVector &RegsToPass, + SmallVectorImpl &MemOpChains, SDValue StackPtr, + MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg, + unsigned LastReg, const ISD::ArgFlagsTy &Flags, bool isLittle, + const CCValAssign &VA) const { + unsigned ByValSizeInBytes = Flags.getByValSize(); + unsigned OffsetInBytes = 0; // From beginning of struct + unsigned RegSizeInBytes = 4; + unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes); + EVT PtrTy = getPointerTy(DAG.getDataLayout()), + RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); + unsigned NumRegs = LastReg - FirstReg; + + if (NumRegs) { + ArrayRef ArgRegs = makeArrayRef(NDS32ArgRegs); + bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes); + unsigned I = 0; + + // Copy words to registers. + for (; I < NumRegs - LeftoverBytes; ++I, OffsetInBytes += RegSizeInBytes) { + SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(OffsetInBytes, DL, PtrTy)); + SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, + MachinePointerInfo(), Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + unsigned ArgReg = ArgRegs[FirstReg + I]; + RegsToPass.push_back(std::make_pair(ArgReg, LoadVal)); + } + + // Return if the struct has been fully copied. + if (ByValSizeInBytes == OffsetInBytes) + return; + + // Copy the remainder of the byval argument with sub-word loads and shifts. + if (LeftoverBytes) { + SDValue Val; + + for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0; + OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) { + unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes; + + if (RemainingSizeInBytes < LoadSizeInBytes) + continue; + + // Load subword. + SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(OffsetInBytes, DL, + PtrTy)); + SDValue LoadVal = DAG.getExtLoad( + ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(), + MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + + // Shift the loaded value. + unsigned Shamt; + + if (isLittle) + Shamt = TotalBytesLoaded * 8; + else + Shamt = (RegSizeInBytes - (TotalBytesLoaded + LoadSizeInBytes)) * 8; + + SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal, + DAG.getConstant(Shamt, DL, MVT::i32)); + + if (Val.getNode()) + Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift); + else + Val = Shift; + + OffsetInBytes += LoadSizeInBytes; + TotalBytesLoaded += LoadSizeInBytes; + Alignment = std::min(Alignment, LoadSizeInBytes); + } + + unsigned ArgReg = ArgRegs[FirstReg + I]; + RegsToPass.push_back(std::make_pair(ArgReg, Val)); + return; + } + } + + // Copy remainder of byval arg to it with memcpy. + unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes; + SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(OffsetInBytes, DL, PtrTy)); + SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); + Chain = DAG.getMemcpy(Chain, DL, Dst, Src, + DAG.getConstant(MemCpySize, DL, PtrTy), + Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false, + /*isTailCall=*/false, + MachinePointerInfo(), MachinePointerInfo()); + MemOpChains.push_back(Chain); +} + +void NDS32TargetLowering::HandleByVal(CCState *State, unsigned &Size, + unsigned Align) const { + const TargetFrameLowering *TFL = Subtarget->getFrameLowering(); + + assert(Size && "Byval argument's size shouldn't be 0."); + + Align = std::min(Align, TFL->getStackAlignment()); + + unsigned FirstReg = 0; + unsigned NumRegs = 0; + + if (State->getCallingConv() != CallingConv::Fast) { + unsigned RegSizeInBytes = 4; + ArrayRef IntArgRegs = makeArrayRef(NDS32ArgRegs); + const MCPhysReg *ShadowRegs = IntArgRegs.data(); + + // We used to check the size as well but we can't do that anymore since + // CCState::HandleByVal() rounds up the size after calling this function. + assert(!(Align % RegSizeInBytes) && + "Byval argument's alignment should be a multiple of" + "RegSizeInBytes."); + + FirstReg = State->getFirstUnallocated(IntArgRegs); + + // If Align > RegSizeInBytes, the first arg register must be even. + if ((Align > RegSizeInBytes) && (FirstReg % 2)) { + State->AllocateReg(IntArgRegs[FirstReg], ShadowRegs[FirstReg]); + ++FirstReg; + } + + // Mark the registers allocated. + Size = alignTo(Size, RegSizeInBytes); + for (unsigned I = FirstReg; Size > 0 && (I < IntArgRegs.size()); + Size -= RegSizeInBytes, ++I, ++NumRegs) + State->AllocateReg(IntArgRegs[I], ShadowRegs[I]); + } + + State->addInRegsParamInfo(FirstReg, FirstReg + NumRegs); +} + // RestoreVarArgRegs - Store VarArg register to the stack void NDS32TargetLowering::RestoreVarArgRegs(std::vector &OutChains, SDValue Chain, const SDLoc &DL, @@ -448,6 +631,23 @@ std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[i].getOrigArgIndex(); } + ISD::ArgFlagsTy Flags = Ins[i].Flags; + + if (Flags.isByVal()) { + assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit"); + unsigned FirstByValReg, LastByValReg; + unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); + CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); + + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + assert(ByValIdx < CCInfo.getInRegsParamsCount()); + copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg, + FirstByValReg, LastByValReg, VA, CCInfo); + CCInfo.nextInRegsParam(); + continue; + } + bool IsRegLoc = VA.isRegLoc(); // Arguments stored on registers if (IsRegLoc) { @@ -573,6 +773,9 @@ CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + // NDS32 target does not support tail call optimization yet. IsTailCall = false; @@ -609,6 +812,23 @@ CCValAssign &VA = ArgLocs[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; + // ByVal Arg. + if (Flags.isByVal()) { + unsigned FirstByValReg, LastByValReg; + unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); + CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); + + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + assert(ByValIdx < CCInfo.getInRegsParamsCount()); + assert(!IsTailCall && + "Do not tail-call optimize if there is a byval argument."); + passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, + FirstByValReg, LastByValReg, Flags, true, VA); + CCInfo.nextInRegsParam(); + continue; + } + // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); Index: test/CodeGen/NDS32/by-val.ll =================================================================== --- /dev/null +++ test/CodeGen/NDS32/by-val.ll @@ -0,0 +1,81 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-a:0:32-n32-S64" +target triple = "nds32le---elf" + +%struct.tiny = type { i32 } + +; Function Attrs: nounwind +define i32 @f(i32 %n, %struct.tiny* byval nocapture readonly align 4 %x, %struct.tiny* byval nocapture readonly align 4 %y, %struct.tiny* byval nocapture readonly align 4 %z) local_unnamed_addr #0 { +entry: + %c = getelementptr inbounds %struct.tiny, %struct.tiny* %x, i32 0, i32 0 + %0 = load i32, i32* %c, align 4 + %cmp = icmp eq i32 %0, 10 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @abort() #4 + unreachable + +if.end: ; preds = %entry + %c1 = getelementptr inbounds %struct.tiny, %struct.tiny* %y, i32 0, i32 0 + %1 = load i32, i32* %c1, align 4 + %cmp2 = icmp eq i32 %1, 11 + br i1 %cmp2, label %if.end4, label %if.then3 + +if.then3: ; preds = %if.end + tail call void @abort() #4 + unreachable + +if.end4: ; preds = %if.end + %c5 = getelementptr inbounds %struct.tiny, %struct.tiny* %z, i32 0, i32 0 + %2 = load i32, i32* %c5, align 4 + %cmp6 = icmp eq i32 %2, 12 + br i1 %cmp6, label %if.end8, label %if.then7 + +if.then7: ; preds = %if.end4 + tail call void @abort() #4 + unreachable + +if.end8: ; preds = %if.end4 + ret i32 undef +} + +; Function Attrs: noreturn +declare void @abort() local_unnamed_addr #1 + +; Function Attrs: noreturn nounwind +define i32 @main() local_unnamed_addr #2 { +entry: + %x = alloca [3 x %struct.tiny], align 4 + %0 = bitcast [3 x %struct.tiny]* %x to i8* + call void @llvm.lifetime.start(i64 12, i8* nonnull %0) #5 + %arrayidx = getelementptr inbounds [3 x %struct.tiny], [3 x %struct.tiny]* %x, i32 0, i32 0 + %c = getelementptr inbounds [3 x %struct.tiny], [3 x %struct.tiny]* %x, i32 0, i32 0, i32 0 + store i32 10, i32* %c, align 4 + %arrayidx1 = getelementptr inbounds [3 x %struct.tiny], [3 x %struct.tiny]* %x, i32 0, i32 1 + %c2 = getelementptr inbounds %struct.tiny, %struct.tiny* %arrayidx1, i32 0, i32 0 + store i32 11, i32* %c2, align 4 + %arrayidx3 = getelementptr inbounds [3 x %struct.tiny], [3 x %struct.tiny]* %x, i32 0, i32 2 + %c4 = getelementptr inbounds %struct.tiny, %struct.tiny* %arrayidx3, i32 0, i32 0 + store i32 12, i32* %c4, align 4 +; CHECK: movi55 $r0, 10 +; CHECK: swi $r0, [$sp + (0)] +; CHECK: movi55 $r1, 4 +; CHECK: movi55 $r2, 11 +; CHECK: sw $r2, [$r1 + $r0] +; CHECK: movi55 $r2, 8 +; CHECK: movi55 $r3, 12 +; CHECK: sw $r3, [$r2 + $r0] +; CHECK: lw $r3, [$r2 + $r0] +; CHECK: lw $r2, [$r1 + $r0] +; CHECK: lwi $r1, [$sp + (0)] + %call = call i32 @f(i32 undef, %struct.tiny* byval nonnull align 4 %arrayidx, %struct.tiny* byval align 4 %arrayidx1, %struct.tiny* byval align 4 %arrayidx3) + tail call void @exit(i32 0) #4 + unreachable +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #3 + +; Function Attrs: noreturn +declare void @exit(i32) local_unnamed_addr #1