Index: lib/Target/NDS32/CMakeLists.txt =================================================================== --- lib/Target/NDS32/CMakeLists.txt +++ lib/Target/NDS32/CMakeLists.txt @@ -16,6 +16,7 @@ NDS32InstrInfo.cpp NDS32RegisterInfo.cpp NDS32FrameLowering.cpp + NDS32SelectionDAGInfo.cpp NDS32TargetMachine.cpp ) Index: lib/Target/NDS32/NDS32SelectionDAGInfo.h =================================================================== --- /dev/null +++ lib/Target/NDS32/NDS32SelectionDAGInfo.h @@ -0,0 +1,39 @@ +//===-- NDS32SelectionDAGInfo.h - NDS32 SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the NDS32 subclass for SelectionDAGTargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NDS32_NDS32SELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_NDS32_NDS32SELECTIONDAGINFO_H + +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +namespace llvm { + +class NDS32SelectionDAGInfo : public SelectionDAGTargetInfo { +public: + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + + SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + RTLIB::Libcall LC) const; +}; + +} + +#endif Index: lib/Target/NDS32/NDS32SelectionDAGInfo.cpp =================================================================== --- /dev/null +++ lib/Target/NDS32/NDS32SelectionDAGInfo.cpp @@ -0,0 +1,162 @@ +//===-- NDS32SelectionDAGInfo.cpp - NDS32 SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the NDS32SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "NDS32TargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +#define DEBUG_TYPE "nds32-selectiondag-info" + +SDValue NDS32SelectionDAGInfo::EmitSpecializedLibcall( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, RTLIB::Libcall LC) const { + const NDS32Subtarget &Subtarget = + DAG.getMachineFunction().getSubtarget(); + const NDS32TargetLowering *TLI = Subtarget.getTargetLowering(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + + // Push memcpy function arguments + Entry.Node = Dst; + Args.push_back(Entry); + Entry.Node = Src; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee( + TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("memcpy", + TLI->getPointerTy(DAG.getDataLayout())), + std::move(Args)) + .setDiscardResult(); + std::pair CallResult = TLI->LowerCallTo(CLI); + + return CallResult.second; +} + +SDValue NDS32SelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast(Size); + + if (!ConstantSize) + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMCPY); + + uint64_t SizeVal = ConstantSize->getZExtValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MaxLoadsInLDM = 10; + SDValue TFOps[6]; + SDValue Loads[6]; + uint64_t SrcOff = 0, DstOff = 0; + + // The number of MEMCPY pseudo-instructions to emit. We use up to + // MaxLoadsInLDM registers per mcopy, which will get lowered into lmw/smw + // later on. This is a lower bound on the number of MEMCPY operations we must + // emit. + unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; + + // Code size optimisation: do not inline memcpy if expansion results in + // more instructions than the libary call. + if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) + return SDValue(); + + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); + + for (unsigned I = 0; I != NumMEMCPYs; ++I) { + // Evenly distribute registers among MEMCPY operations to reduce register + // pressure. + unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; + unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; + + Dst = DAG.getNode(NDS32ISD::MEMCPY, dl, VTs, Chain, Dst, Src, + DAG.getConstant(NumRegs, dl, MVT::i32)); + Src = Dst.getValue(1); + Chain = Dst.getValue(2); + + DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); + SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); + + EmittedNumMemOps = NextEmittedNumMemOps; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, dl, MVT::i32)), + SrcPtrInfo.getWithOffset(SrcOff)); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, dl, MVT::i32)), + DstPtrInfo.getWithOffset(DstOff)); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); +}