This is an archive of the discontinued LLVM Phabricator instance.

Differential D106648

[RISCV] Optimize mul in the zba extension with SH*ADD
ClosedPublic

Authored by benshi001 on Jul 23 2021, 4:16 AM.

Download Raw Diff

Details

Reviewers

craig.topper
LevyHsu
asb
luismarques
jrtc27

Commits

rG264b8e2a20b3: [RISCV] Optimize mul in the zba extension with SH*ADD

Summary

This patch makes the following optimization, if the
immediate multiplier is not a simm12.

(mul x, (power_of_2 + 2)) => (SH1ADD x, (SLLI x, bits))
(mul x, (power_of_2 + 4)) => (SH2ADD x, (SLLI x, bits))
(mul x, (power_of_2 + 8)) => (SH3ADD x, (SLLI x, bits))

Diff Detail

Unit TestsFailed

	Time	Test
	3,150 ms	x64 debian > libarcher.critical::critical.c
	3,290 ms	x64 debian > libarcher.critical::lock-nested.c
	3,190 ms	x64 debian > libarcher.parallel::parallel-simple.c
	3,420 ms	x64 debian > libarcher.parallel::parallel-simple2.c
	2,990 ms	x64 debian > libarcher.races::critical-unrelated.c
		View Full Test Results (20 Failed)

Event Timeline

benshi001 created this revision.Jul 23 2021, 4:16 AM

Herald added subscribers: vkmr, frasercrmck, evandro and 22 others. · View Herald TranscriptJul 23 2021, 4:16 AM

benshi001 requested review of this revision.Jul 23 2021, 4:16 AM

Herald added a project: Restricted Project. · View Herald TranscriptJul 23 2021, 4:16 AM

Herald added subscribers: llvm-commits, MaskRay. · View Herald Transcript

benshi001 added a parent revision: D106647: [RISCV][test] Add new tests for mul optimization in the zba extension with SH*ADD.Jul 23 2021, 4:16 AM

It is obvious a win if the immediate is non-simm12, but not sure for simm12, so I leave that unchanged.

Harbormaster completed remote builds in B115819: Diff 361154.Jul 23 2021, 4:51 AM

Ping ...

I saw the similar optimization on x86, and think it will also benefit riscv.

LGTM

This revision is now accepted and ready to land.Jul 28 2021, 5:41 PM

benshi001 mentioned this in D106647: [RISCV][test] Add new tests for mul optimization in the zba extension with SH*ADD.Jul 28 2021, 5:59 PM

This revision was landed with ongoing or failed builds.Jul 28 2021, 6:47 PM

Closed by commit rG264b8e2a20b3: [RISCV] Optimize mul in the zba extension with SH*ADD (authored by benshi001). · Explain Why

This revision was automatically updated to reflect the committed changes.

benshi001 added a commit: rG264b8e2a20b3: [RISCV] Optimize mul in the zba extension with SH*ADD.

Revision Contents

Path

Size

llvm/

lib/

Target/

RISCV/

RISCVISelLowering.cpp

5 lines

test/

CodeGen/

RISCV/

30 lines

30 lines

Diff 361154

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 8,881 Lines • ▼ Show 20 Lines		if (VT.isScalarInteger()) {
	if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())			if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
	return false;			return false;
	if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {			if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
	// Break the MUL to a SLLI and an ADD/SUB.			// Break the MUL to a SLLI and an ADD/SUB.
	const APInt &Imm = ConstNode->getAPIntValue();			const APInt &Imm = ConstNode->getAPIntValue();
	if ((Imm + 1).isPowerOf2() \|\| (Imm - 1).isPowerOf2() \|\|			if ((Imm + 1).isPowerOf2() \|\| (Imm - 1).isPowerOf2() \|\|
	(1 - Imm).isPowerOf2() \|\| (-1 - Imm).isPowerOf2())			(1 - Imm).isPowerOf2() \|\| (-1 - Imm).isPowerOf2())
	return true;			return true;
				// Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
				if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
				((Imm - 2).isPowerOf2() \|\| (Imm - 4).isPowerOf2() \|\|
				(Imm - 8).isPowerOf2()))
				return true;
	// Omit the following optimization if the sub target has the M extension			// Omit the following optimization if the sub target has the M extension
	// and the data size >= XLen.			// and the data size >= XLen.
	if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())			if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
	return false;			return false;
	// Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs			// Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
	// a pair of LUI/ADDI.			// a pair of LUI/ADDI.
	if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {			if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
	APInt ImmS = Imm.ashr(Imm.countTrailingZeros());			APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
	▲ Show 20 Lines • Show All 137 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/rv32zba.ll

	Show First 20 Lines • Show All 609 Lines • ▼ Show 20 Lines
	; RV32I: # %bb.0:			; RV32I: # %bb.0:
	; RV32I-NEXT: lui a1, 1			; RV32I-NEXT: lui a1, 1
	; RV32I-NEXT: addi a1, a1, 2			; RV32I-NEXT: addi a1, a1, 2
	; RV32I-NEXT: mul a0, a0, a1			; RV32I-NEXT: mul a0, a0, a1
	; RV32I-NEXT: ret			; RV32I-NEXT: ret
	;			;
	; RV32IB-LABEL: mul4098:			; RV32IB-LABEL: mul4098:
	; RV32IB: # %bb.0:			; RV32IB: # %bb.0:
	; RV32IB-NEXT: lui a1, 1			; RV32IB-NEXT: slli a1, a0, 12
	; RV32IB-NEXT: addi a1, a1, 2			; RV32IB-NEXT: sh1add a0, a0, a1
	; RV32IB-NEXT: mul a0, a0, a1
	; RV32IB-NEXT: ret			; RV32IB-NEXT: ret
	;			;
	; RV32IBA-LABEL: mul4098:			; RV32IBA-LABEL: mul4098:
	; RV32IBA: # %bb.0:			; RV32IBA: # %bb.0:
	; RV32IBA-NEXT: lui a1, 1			; RV32IBA-NEXT: slli a1, a0, 12
	; RV32IBA-NEXT: addi a1, a1, 2			; RV32IBA-NEXT: sh1add a0, a0, a1
	; RV32IBA-NEXT: mul a0, a0, a1
	; RV32IBA-NEXT: ret			; RV32IBA-NEXT: ret
	%c = mul i32 %a, 4098			%c = mul i32 %a, 4098
	ret i32 %c			ret i32 %c
	}			}

	define i32 @mul4100(i32 %a) {			define i32 @mul4100(i32 %a) {
	; RV32I-LABEL: mul4100:			; RV32I-LABEL: mul4100:
	; RV32I: # %bb.0:			; RV32I: # %bb.0:
	; RV32I-NEXT: lui a1, 1			; RV32I-NEXT: lui a1, 1
	; RV32I-NEXT: addi a1, a1, 4			; RV32I-NEXT: addi a1, a1, 4
	; RV32I-NEXT: mul a0, a0, a1			; RV32I-NEXT: mul a0, a0, a1
	; RV32I-NEXT: ret			; RV32I-NEXT: ret
	;			;
	; RV32IB-LABEL: mul4100:			; RV32IB-LABEL: mul4100:
	; RV32IB: # %bb.0:			; RV32IB: # %bb.0:
	; RV32IB-NEXT: lui a1, 1			; RV32IB-NEXT: slli a1, a0, 12
	; RV32IB-NEXT: addi a1, a1, 4			; RV32IB-NEXT: sh2add a0, a0, a1
	; RV32IB-NEXT: mul a0, a0, a1
	; RV32IB-NEXT: ret			; RV32IB-NEXT: ret
	;			;
	; RV32IBA-LABEL: mul4100:			; RV32IBA-LABEL: mul4100:
	; RV32IBA: # %bb.0:			; RV32IBA: # %bb.0:
	; RV32IBA-NEXT: lui a1, 1			; RV32IBA-NEXT: slli a1, a0, 12
	; RV32IBA-NEXT: addi a1, a1, 4			; RV32IBA-NEXT: sh2add a0, a0, a1
	; RV32IBA-NEXT: mul a0, a0, a1
	; RV32IBA-NEXT: ret			; RV32IBA-NEXT: ret
	%c = mul i32 %a, 4100			%c = mul i32 %a, 4100
	ret i32 %c			ret i32 %c
	}			}

	define i32 @mul4104(i32 %a) {			define i32 @mul4104(i32 %a) {
	; RV32I-LABEL: mul4104:			; RV32I-LABEL: mul4104:
	; RV32I: # %bb.0:			; RV32I: # %bb.0:
	; RV32I-NEXT: lui a1, 1			; RV32I-NEXT: lui a1, 1
	; RV32I-NEXT: addi a1, a1, 8			; RV32I-NEXT: addi a1, a1, 8
	; RV32I-NEXT: mul a0, a0, a1			; RV32I-NEXT: mul a0, a0, a1
	; RV32I-NEXT: ret			; RV32I-NEXT: ret
	;			;
	; RV32IB-LABEL: mul4104:			; RV32IB-LABEL: mul4104:
	; RV32IB: # %bb.0:			; RV32IB: # %bb.0:
	; RV32IB-NEXT: lui a1, 1			; RV32IB-NEXT: slli a1, a0, 12
	; RV32IB-NEXT: addi a1, a1, 8			; RV32IB-NEXT: sh3add a0, a0, a1
	; RV32IB-NEXT: mul a0, a0, a1
	; RV32IB-NEXT: ret			; RV32IB-NEXT: ret
	;			;
	; RV32IBA-LABEL: mul4104:			; RV32IBA-LABEL: mul4104:
	; RV32IBA: # %bb.0:			; RV32IBA: # %bb.0:
	; RV32IBA-NEXT: lui a1, 1			; RV32IBA-NEXT: slli a1, a0, 12
	; RV32IBA-NEXT: addi a1, a1, 8			; RV32IBA-NEXT: sh3add a0, a0, a1
	; RV32IBA-NEXT: mul a0, a0, a1
	; RV32IBA-NEXT: ret			; RV32IBA-NEXT: ret
	%c = mul i32 %a, 4104			%c = mul i32 %a, 4104
	ret i32 %c			ret i32 %c
	}			}

llvm/test/CodeGen/RISCV/rv64zba.ll

	Show First 20 Lines • Show All 1,114 Lines • ▼ Show 20 Lines
	; RV64I: # %bb.0:			; RV64I: # %bb.0:
	; RV64I-NEXT: lui a1, 1			; RV64I-NEXT: lui a1, 1
	; RV64I-NEXT: addiw a1, a1, 2			; RV64I-NEXT: addiw a1, a1, 2
	; RV64I-NEXT: mul a0, a0, a1			; RV64I-NEXT: mul a0, a0, a1
	; RV64I-NEXT: ret			; RV64I-NEXT: ret
	;			;
	; RV64IB-LABEL: mul4098:			; RV64IB-LABEL: mul4098:
	; RV64IB: # %bb.0:			; RV64IB: # %bb.0:
	; RV64IB-NEXT: lui a1, 1			; RV64IB-NEXT: slli a1, a0, 12
	; RV64IB-NEXT: addiw a1, a1, 2			; RV64IB-NEXT: sh1add a0, a0, a1
	; RV64IB-NEXT: mul a0, a0, a1
	; RV64IB-NEXT: ret			; RV64IB-NEXT: ret
	;			;
	; RV64IBA-LABEL: mul4098:			; RV64IBA-LABEL: mul4098:
	; RV64IBA: # %bb.0:			; RV64IBA: # %bb.0:
	; RV64IBA-NEXT: lui a1, 1			; RV64IBA-NEXT: slli a1, a0, 12
	; RV64IBA-NEXT: addiw a1, a1, 2			; RV64IBA-NEXT: sh1add a0, a0, a1
	; RV64IBA-NEXT: mul a0, a0, a1
	; RV64IBA-NEXT: ret			; RV64IBA-NEXT: ret
	%c = mul i64 %a, 4098			%c = mul i64 %a, 4098
	ret i64 %c			ret i64 %c
	}			}

	define i64 @mul4100(i64 %a) {			define i64 @mul4100(i64 %a) {
	; RV64I-LABEL: mul4100:			; RV64I-LABEL: mul4100:
	; RV64I: # %bb.0:			; RV64I: # %bb.0:
	; RV64I-NEXT: lui a1, 1			; RV64I-NEXT: lui a1, 1
	; RV64I-NEXT: addiw a1, a1, 4			; RV64I-NEXT: addiw a1, a1, 4
	; RV64I-NEXT: mul a0, a0, a1			; RV64I-NEXT: mul a0, a0, a1
	; RV64I-NEXT: ret			; RV64I-NEXT: ret
	;			;
	; RV64IB-LABEL: mul4100:			; RV64IB-LABEL: mul4100:
	; RV64IB: # %bb.0:			; RV64IB: # %bb.0:
	; RV64IB-NEXT: lui a1, 1			; RV64IB-NEXT: slli a1, a0, 12
	; RV64IB-NEXT: addiw a1, a1, 4			; RV64IB-NEXT: sh2add a0, a0, a1
	; RV64IB-NEXT: mul a0, a0, a1
	; RV64IB-NEXT: ret			; RV64IB-NEXT: ret
	;			;
	; RV64IBA-LABEL: mul4100:			; RV64IBA-LABEL: mul4100:
	; RV64IBA: # %bb.0:			; RV64IBA: # %bb.0:
	; RV64IBA-NEXT: lui a1, 1			; RV64IBA-NEXT: slli a1, a0, 12
	; RV64IBA-NEXT: addiw a1, a1, 4			; RV64IBA-NEXT: sh2add a0, a0, a1
	; RV64IBA-NEXT: mul a0, a0, a1
	; RV64IBA-NEXT: ret			; RV64IBA-NEXT: ret
	%c = mul i64 %a, 4100			%c = mul i64 %a, 4100
	ret i64 %c			ret i64 %c
	}			}

	define i64 @mul4104(i64 %a) {			define i64 @mul4104(i64 %a) {
	; RV64I-LABEL: mul4104:			; RV64I-LABEL: mul4104:
	; RV64I: # %bb.0:			; RV64I: # %bb.0:
	; RV64I-NEXT: lui a1, 1			; RV64I-NEXT: lui a1, 1
	; RV64I-NEXT: addiw a1, a1, 8			; RV64I-NEXT: addiw a1, a1, 8
	; RV64I-NEXT: mul a0, a0, a1			; RV64I-NEXT: mul a0, a0, a1
	; RV64I-NEXT: ret			; RV64I-NEXT: ret
	;			;
	; RV64IB-LABEL: mul4104:			; RV64IB-LABEL: mul4104:
	; RV64IB: # %bb.0:			; RV64IB: # %bb.0:
	; RV64IB-NEXT: lui a1, 1			; RV64IB-NEXT: slli a1, a0, 12
	; RV64IB-NEXT: addiw a1, a1, 8			; RV64IB-NEXT: sh3add a0, a0, a1
	; RV64IB-NEXT: mul a0, a0, a1
	; RV64IB-NEXT: ret			; RV64IB-NEXT: ret
	;			;
	; RV64IBA-LABEL: mul4104:			; RV64IBA-LABEL: mul4104:
	; RV64IBA: # %bb.0:			; RV64IBA: # %bb.0:
	; RV64IBA-NEXT: lui a1, 1			; RV64IBA-NEXT: slli a1, a0, 12
	; RV64IBA-NEXT: addiw a1, a1, 8			; RV64IBA-NEXT: sh3add a0, a0, a1
	; RV64IBA-NEXT: mul a0, a0, a1
	; RV64IBA-NEXT: ret			; RV64IBA-NEXT: ret
	%c = mul i64 %a, 4104			%c = mul i64 %a, 4104
	ret i64 %c			ret i64 %c
	}			}