This is an archive of the discontinued LLVM Phabricator instance.

[SelectionDAG] Add statistics for inline emission of memory intrinsics
Needs ReviewPublic

Authored by arichardson on Apr 21 2022, 6:43 AM.

Download Raw Diff

This revision needs review, but there are no reviewers specified.

Details

Reviewers: None

Summary

While doing some benchmark performance comparison analysis I noticed that
one version was performing noticeably worse due to additional calls to
memcpy() being generated. These statistics have been useful when looking
into memcpy() inlining behaviour for the CHERI LLVM fork, and are hopefully
also useful upstream.

Diff Detail

Repository: rG LLVM Github Monorepo

Unit TestsFailed

	Time	Test
	60,060 ms	x64 debian > libomp.worksharing/for::omp_for_schedule_runtime.c

Event Timeline

arichardson created this revision.Apr 21 2022, 6:43 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 21 2022, 6:43 AM

Herald added subscribers: ecnelises, hiraditya. · View Herald Transcript

arichardson requested review of this revision.Apr 21 2022, 6:43 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 21 2022, 6:43 AM

Herald added a subscriber: llvm-commits. · View Herald Transcript

Harbormaster completed remote builds in B160644: Diff 424182.Apr 21 2022, 7:38 AM

Revision Contents

Path

Size

llvm/

lib/

CodeGen/

SelectionDAG/

SelectionDAG.cpp

9 lines

Diff 424182

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show All 16 Lines
#include "llvm/ADT/APSInt.h"		#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"		#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"		#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/FoldingSet.h"		#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/None.h"		#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"		#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"		#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"		#include "llvm/ADT/SmallVector.h"
		#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"		#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"		#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/MemoryLocation.h"		#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"		#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"		#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"		#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"		#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"		#include "llvm/CodeGen/MachineBasicBlock.h"
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines
void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode, SDNode) {}		void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode, SDNode) {}
void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}		void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {}		void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {}

void SelectionDAG::DAGNodeDeletedListener::anchor() {}		void SelectionDAG::DAGNodeDeletedListener::anchor() {}

#define DEBUG_TYPE "selectiondag"		#define DEBUG_TYPE "selectiondag"

		STATISTIC(MemcpyInline, "Number of memcpy() calls emitted inline");
		STATISTIC(MemmoveInline, "Number of memmove() calls emitted inline");
		STATISTIC(MemsetInline, "Number of memset() calls emitted inline");

static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",		static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",
cl::Hidden, cl::init(true),		cl::Hidden, cl::init(true),
cl::desc("Gang up loads and stores generated by inlining of memcpy"));		cl::desc("Gang up loads and stores generated by inlining of memcpy"));

static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max",		static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max",
cl::desc("Number limit for gluing ld/st of memcpy."),		cl::desc("Number limit for gluing ld/st of memcpy."),
cl::Hidden, cl::init(0));		cl::Hidden, cl::init(0));

▲ Show 20 Lines • Show All 6,709 Lines • ▼ Show 20 Lines	if ((GluedLdStLimit <= 1) \|\| !EnableMemCpyDAGOpt) {
if (RemainingLdStInMemcpy) {		if (RemainingLdStInMemcpy) {
chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,		chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
RemainingLdStInMemcpy, OutLoadChains,		RemainingLdStInMemcpy, OutLoadChains,
OutStoreChains);		OutStoreChains);
}		}
}		}
}		}
}		}

		MemcpyInline++;
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);		return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}		}

static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,		static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,		SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, Align Alignment,		uint64_t Size, Align Alignment,
bool isVol, bool AlwaysInline,		bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,		MachinePointerInfo DstPtrInfo,
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines	for (unsigned i = 0; i < NumMemOps; i++) {
Store = DAG.getStore(		Store = DAG.getStore(
Chain, dl, LoadValues[i],		Chain, dl, LoadValues[i],
DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),		DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);		DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
OutChains.push_back(Store);		OutChains.push_back(Store);
DstOff += VTSize;		DstOff += VTSize;
}		}

		MemmoveInline++;
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);		return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}		}

/// Lower the call to 'memset' intrinsic function into a series of store		/// Lower the call to 'memset' intrinsic function into a series of store
/// operations.		/// operations.
///		///
/// \param DAG Selection DAG where lowered code is placed.		/// \param DAG Selection DAG where lowered code is placed.
/// \param dl Link to corresponding IR location.		/// \param dl Link to corresponding IR location.
▲ Show 20 Lines • Show All 92 Lines • ▼ Show 20 Lines	SDValue Store = DAG.getStore(
DstPtrInfo.getWithOffset(DstOff), Alignment,		DstPtrInfo.getWithOffset(DstOff), Alignment,
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,		isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
NewAAInfo);		NewAAInfo);
OutChains.push_back(Store);		OutChains.push_back(Store);
DstOff += VT.getSizeInBits() / 8;		DstOff += VT.getSizeInBits() / 8;
Size -= VTSize;		Size -= VTSize;
}		}

		MemsetInline++;
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);		return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}		}

static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,		static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
unsigned AS) {		unsigned AS) {
// Lowering memcpy / memset / memmove intrinsics to calls is only valid if all		// Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
// pointer operands can be losslessly bitcasted to pointers of address space 0		// pointer operands can be losslessly bitcasted to pointers of address space 0
if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(AS, 0)) {		if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(AS, 0)) {
▲ Show 20 Lines • Show All 4,609 Lines • Show Last 20 Lines