This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Disable the load and store cluster mutations for the schedulers
AbandonedPublic

Authored by tstellar on Jul 25 2019, 9:12 AM.

Download Raw Diff

Details

Reviewers

rampitec
arsenm
nhaehnle

Summary

These tend to increase register usage, and I'm not sure they provide
that much benefit now that the SILoadStoreOptimizer is run before
scheduling.

shader-db results:

SGPRS: 2395408 -> 2245808 (-6.25 %)
VGPRS: 1385652 -> 1377068 (-0.62 %)
Spilled SGPRs: 13732 -> 12147 (-11.54 %)
Spilled VGPRs: 67 -> 104 (55.22 %)
Private memory VGPRs: 5872 -> 5872 (0.00 %)
Scratch size: 6848 -> 6864 (0.23 %) dwords per thread
Code Size: 57847052 -> 58209484 (0.63 %) bytes
LDS: 132 -> 132 (0.00 %) blocks
Max Waves: 470488 -> 472363 (0.40 %)
Wait states: 0 -> 0 (0.00 %)

Diff Detail

Repository

rG LLVM Github Monorepo

Build Status

Buildable 35644
Build 35643: arc lint + arc unit

Event Timeline

tstellar created this revision.Jul 25 2019, 9:12 AM

Herald added a project: Restricted Project. · View Herald TranscriptJul 25 2019, 9:12 AM

Herald added subscribers: hiraditya, t-tye, tpr and 5 others. · View Herald Transcript

Harbormaster completed remote builds in B35644: Diff 211775.Jul 25 2019, 9:13 AM

That is known clustering increases register pressure. However in our experiments it gives performance benefits by better cache utilization. Clustering might need a tuning, but not disabled.

You can add an option to disable them though.

This revision now requires changes to proceed.Jul 25 2019, 9:20 AM

Ok, thanks for the info. I will drop this patch.

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

AMDGPUTargetMachine.cpp

8 lines

Diff 211775

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Show First 20 Lines • Show All 252 Lines • ▼ Show 20 Lines
static ScheduleDAGInstrs createSIMachineScheduler(MachineSchedContext C) {		static ScheduleDAGInstrs createSIMachineScheduler(MachineSchedContext C) {
return new SIScheduleDAGMI(C);		return new SIScheduleDAGMI(C);
}		}

static ScheduleDAGInstrs *		static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {		createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =		ScheduleDAGMILive *DAG =
new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));		new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());		DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
return DAG;		return DAG;
}		}

static ScheduleDAGInstrs *		static ScheduleDAGInstrs *
createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {		createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
auto DAG = new GCNIterativeScheduler(C,		auto DAG = new GCNIterativeScheduler(C,
GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);		GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;		return DAG;
}		}

static ScheduleDAGInstrs createMinRegScheduler(MachineSchedContext C) {		static ScheduleDAGInstrs createMinRegScheduler(MachineSchedContext C) {
return new GCNIterativeScheduler(C,		return new GCNIterativeScheduler(C,
GCNIterativeScheduler::SCHEDULE_MINREGFORCED);		GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
}		}

static ScheduleDAGInstrs *		static ScheduleDAGInstrs *
createIterativeILPMachineScheduler(MachineSchedContext *C) {		createIterativeILPMachineScheduler(MachineSchedContext *C) {
auto DAG = new GCNIterativeScheduler(C,		auto DAG = new GCNIterativeScheduler(C,
GCNIterativeScheduler::SCHEDULE_ILP);		GCNIterativeScheduler::SCHEDULE_ILP);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());		DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
return DAG;		return DAG;
}		}

static MachineSchedRegistry		static MachineSchedRegistry
R600SchedRegistry("r600", "Run R600's custom scheduler",		R600SchedRegistry("r600", "Run R600's custom scheduler",
createR600MachineScheduler);		createR600MachineScheduler);

▲ Show 20 Lines • Show All 254 Lines • ▼ Show 20 Lines	public:

AMDGPUTargetMachine &getAMDGPUTargetMachine() const {		AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
return getTM<AMDGPUTargetMachine>();		return getTM<AMDGPUTargetMachine>();
}		}

ScheduleDAGInstrs *		ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {		createMachineScheduler(MachineSchedContext *C) const override {
ScheduleDAGMILive *DAG = createGenericSchedLive(C);		ScheduleDAGMILive *DAG = createGenericSchedLive(C);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;		return DAG;
}		}

void addEarlyCSEOrGVNPass();		void addEarlyCSEOrGVNPass();
void addStraightLineScalarOptimizationPasses();		void addStraightLineScalarOptimizationPasses();
void addIRPasses() override;		void addIRPasses() override;
void addCodeGenPrepare() override;		void addCodeGenPrepare() override;
bool addPreISel() override;		bool addPreISel() override;
▲ Show 20 Lines • Show All 585 Lines • Show Last 20 Lines