Diff 276300

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Show First 20 Lines • Show All 69 Lines • ▼ Show 20 Lines	void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Instruction Selection not handled by the auto-generated tablegen selection		// Instruction Selection not handled by the auto-generated tablegen selection
// should be handled here.		// should be handled here.
unsigned Opcode = Node->getOpcode();		unsigned Opcode = Node->getOpcode();
MVT XLenVT = Subtarget->getXLenVT();		MVT XLenVT = Subtarget->getXLenVT();
SDLoc DL(Node);		SDLoc DL(Node);
EVT VT = Node->getValueType(0);		EVT VT = Node->getValueType(0);

switch (Opcode) {		switch (Opcode) {
		case ISD::ADD: {
		// Optimize (add r, imm) to (addi (addi r, imm0) imm1) if applicable. The
		// immediate must be in specific ranges and have a single use.
		if (auto *ConstOp = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
		MaskRayUnsubmitted Done Reply Inline Actions The two comments can be combined and placed under `case ISD::ADD:` Optimize (add r, imm) to (addi (addi r, imm0) imm1) if applicable. MaskRay: The two comments can be combined and placed under `case ISD::ADD:` Optimize (add r, imm) to…
		if (!(ConstOp->hasOneUse()))
		break;
		// The imm must be in range [-4096,-2049] or [2048,4094].
		MaskRayUnsubmitted Done Reply Inline Actions `if (!ConstOp->hasOneUse())` is not tested. MaskRay: `if (!ConstOp->hasOneUse())` is not tested.
		int64_t Imm = ConstOp->getSExtValue();
		if (!(-4096 <= Imm && Imm <= -2049) && !(2048 <= Imm && Imm <= 4094))
		luismarquesUnsubmitted Done Reply Inline Actions Given that you can have an ADDI of `-2048`, that should be `-4096`, not `-4095`. (Please update the comment, the code and the test results accordingly). luismarques: Given that you can have an ADDI of `-2048`, that should be `-4096`, not `-4095`. (Please update…
		break;
		// Break the imm to imm0+imm1.
		SDLoc DL(Node);
		luismarquesUnsubmitted Done Reply Inline Actions This is a matter of style so it's not very important, but I would lean towards implementing this check with a comparison logic that matches the comment about the required range, as that would be make it easier to check at a glance. luismarques: This is a matter of style so it's not very important, but I would lean towards implementing…
		EVT VT = Node->getValueType(0);
		const SDValue ImmOp0 = CurDAG->getTargetConstant(Imm - Imm / 2, DL, VT);
		const SDValue ImmOp1 = CurDAG->getTargetConstant(Imm / 2, DL, VT);
		auto *NodeAddi0 = CurDAG->getMachineNode(RISCV::ADDI, DL, VT,
		MaskRayUnsubmitted Not Done Reply Inline Actions Add const if applicable. MaskRay: Add const if applicable.
		Node->getOperand(0), ImmOp0);
		luismarquesUnsubmitted Done Reply Inline Actions Nitpick: make it the other way around, so that the second addi is the "smaller" one. That generated code seems more intuitive to me that way :-) luismarques: Nitpick: make it the other way around, so that the second addi is the "smaller" one. That…
		auto *NodeAddi1 = CurDAG->getMachineNode(RISCV::ADDI, DL, VT,
		SDValue(NodeAddi0, 0), ImmOp1);
		ReplaceNode(Node, NodeAddi1);
		return;
		}
		break;
		}
case ISD::Constant: {		case ISD::Constant: {
auto ConstNode = cast<ConstantSDNode>(Node);		auto ConstNode = cast<ConstantSDNode>(Node);
if (VT == XLenVT && ConstNode->isNullValue()) {		if (VT == XLenVT && ConstNode->isNullValue()) {
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),		SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
RISCV::X0, XLenVT);		RISCV::X0, XLenVT);
ReplaceNode(Node, New.getNode());		ReplaceNode(Node, New.getNode());
return;		return;
}		}
▲ Show 20 Lines • Show All 193 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/add-imm.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
				; RUN: \| FileCheck -check-prefix=RV32I %s
				; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
				; RUN: \| FileCheck -check-prefix=RV64I %s

				; These test how the immediate in an addition is materialized.

				define i32 @add_positive_low_bound_reject(i32 %a) nounwind {
				; RV32I-LABEL: add_positive_low_bound_reject:
				; RV32I: # %bb.0:
				; RV32I-NEXT: addi a0, a0, 2047
				; RV32I-NEXT: ret
				;
				; RV64I-LABEL: add_positive_low_bound_reject:
				; RV64I: # %bb.0:
				; RV64I-NEXT: addi a0, a0, 2047
				; RV64I-NEXT: ret
				%1 = add i32 %a, 2047
				ret i32 %1
				}

				define i32 @add_positive_low_bound_accept(i32 %a) nounwind {
				; RV32I-LABEL: add_positive_low_bound_accept:
				; RV32I: # %bb.0:
				; RV32I-NEXT: addi a0, a0, 1024
				luismarquesUnsubmitted Done Reply Inline Actions When you have `i32` and the `i64` tests they should both have riscv32 and riscv64 test CHECKs. You can have operations on `i64`s in riscv32, they just doesn't correspond to a single native instruction. But in this case I don't see much advantage in having test variants for both types, as this optimization should be orthogonal to that. I suggest keeping one `i32`+`i64` test (to show that it works with both types), and trimming the remaining tests down to just the `i32` variant. luismarques: When you have `i32` and the `i64` tests they should both have riscv32 and riscv64 test CHECKs.
				; RV32I-NEXT: addi a0, a0, 1024
				; RV32I-NEXT: ret
				;
				; RV64I-LABEL: add_positive_low_bound_accept:
				; RV64I: # %bb.0:
				; RV64I-NEXT: addi a0, a0, 1024
				; RV64I-NEXT: addi a0, a0, 1024
				; RV64I-NEXT: ret
				%1 = add i32 %a, 2048
				ret i32 %1
				}

				define i32 @add_positive_high_bound_accept(i32 %a) nounwind {
				; RV32I-LABEL: add_positive_high_bound_accept:
				; RV32I: # %bb.0:
				; RV32I-NEXT: addi a0, a0, 2047
				; RV32I-NEXT: addi a0, a0, 2047
				; RV32I-NEXT: ret
				;
				; RV64I-LABEL: add_positive_high_bound_accept:
				; RV64I: # %bb.0:
				; RV64I-NEXT: addi a0, a0, 2047
				; RV64I-NEXT: addi a0, a0, 2047
				; RV64I-NEXT: ret
				%1 = add i32 %a, 4094
				ret i32 %1
				}

				define i32 @add_positive_high_bound_reject(i32 %a) nounwind {
				; RV32I-LABEL: add_positive_high_bound_reject:
				; RV32I: # %bb.0:
				; RV32I-NEXT: lui a1, 1
				; RV32I-NEXT: addi a1, a1, -1
				; RV32I-NEXT: add a0, a0, a1
				; RV32I-NEXT: ret
				;
				; RV64I-LABEL: add_positive_high_bound_reject:
				; RV64I: # %bb.0:
				; RV64I-NEXT: lui a1, 1
				; RV64I-NEXT: addiw a1, a1, -1
				; RV64I-NEXT: add a0, a0, a1
				; RV64I-NEXT: ret
				%1 = add i32 %a, 4095
				ret i32 %1
				}

				define i32 @add_negative_high_bound_reject(i32 %a) nounwind {
				; RV32I-LABEL: add_negative_high_bound_reject:
				; RV32I: # %bb.0:
				; RV32I-NEXT: addi a0, a0, -2048
				; RV32I-NEXT: ret
				;
				; RV64I-LABEL: add_negative_high_bound_reject:
				; RV64I: # %bb.0:
				; RV64I-NEXT: addi a0, a0, -2048
				; RV64I-NEXT: ret
				%1 = add i32 %a, -2048
				ret i32 %1
				}

				define i32 @add_negative_high_bound_accept(i32 %a) nounwind {
				; RV32I-LABEL: add_negative_high_bound_accept:
				; RV32I: # %bb.0:
				; RV32I-NEXT: addi a0, a0, -1025
				; RV32I-NEXT: addi a0, a0, -1024
				; RV32I-NEXT: ret
				;
				; RV64I-LABEL: add_negative_high_bound_accept:
				; RV64I: # %bb.0:
				; RV64I-NEXT: addi a0, a0, -1025
				; RV64I-NEXT: addi a0, a0, -1024
				; RV64I-NEXT: ret
				%1 = add i32 %a, -2049
				ret i32 %1
				}

				define i32 @add_negative_low_bound_accept(i32 %a) nounwind {
				; RV32I-LABEL: add_negative_low_bound_accept:
				; RV32I: # %bb.0:
				; RV32I-NEXT: addi a0, a0, -2048
				; RV32I-NEXT: addi a0, a0, -2048
				; RV32I-NEXT: ret
				;
				; RV64I-LABEL: add_negative_low_bound_accept:
				; RV64I: # %bb.0:
				; RV64I-NEXT: addi a0, a0, -2048
				; RV64I-NEXT: addi a0, a0, -2048
				; RV64I-NEXT: ret
				%1 = add i32 %a, -4096
				ret i32 %1
				}

				define i32 @add_negative_low_bound_reject(i32 %a) nounwind {
				; RV32I-LABEL: add_negative_low_bound_reject:
				; RV32I: # %bb.0:
				; RV32I-NEXT: lui a1, 1048575
				; RV32I-NEXT: addi a1, a1, -1
				; RV32I-NEXT: add a0, a0, a1
				; RV32I-NEXT: ret
				;
				; RV64I-LABEL: add_negative_low_bound_reject:
				; RV64I: # %bb.0:
				; RV64I-NEXT: lui a1, 1048575
				; RV64I-NEXT: addiw a1, a1, -1
				; RV64I-NEXT: add a0, a0, a1
				; RV64I-NEXT: ret
				%1 = add i32 %a, -4097
				ret i32 %1
				}

				define i32 @add32_accept(i32 %a) nounwind {
				; RV32I-LABEL: add32_accept:
				; RV32I: # %bb.0:
				; RV32I-NEXT: addi a0, a0, 1500
				; RV32I-NEXT: addi a0, a0, 1499
				; RV32I-NEXT: ret
				;
				; RV64I-LABEL: add32_accept:
				; RV64I: # %bb.0:
				; RV64I-NEXT: addi a0, a0, 1500
				; RV64I-NEXT: addi a0, a0, 1499
				; RV64I-NEXT: ret
				%1 = add i32 %a, 2999
				ret i32 %1
				}

				define i64 @add64_accept(i64 %a) nounwind {
				; RV32I-LABEL: add64_accept:
				; RV32I: # %bb.0:
				; RV32I-NEXT: addi a2, a0, 1500
				; RV32I-NEXT: addi a2, a2, 1499
				; RV32I-NEXT: sltu a0, a2, a0
				; RV32I-NEXT: add a1, a1, a0
				; RV32I-NEXT: mv a0, a2
				; RV32I-NEXT: ret
				;
				; RV64I-LABEL: add64_accept:
				; RV64I: # %bb.0:
				; RV64I-NEXT: addi a0, a0, 1500
				; RV64I-NEXT: addi a0, a0, 1499
				; RV64I-NEXT: ret
				%1 = add i64 %a, 2999
				ret i64 %1
				}

				@ga = global i32 0, align 4
				@gb = global i32 0, align 4
				define void @add32_reject() nounwind {
				; RV32I-LABEL: add32_reject:
				; RV32I: # %bb.0:
				; RV32I-NEXT: lui a0, %hi(ga)
				; RV32I-NEXT: lw a1, %lo(ga)(a0)
				; RV32I-NEXT: lui a2, %hi(gb)
				; RV32I-NEXT: lw a3, %lo(gb)(a2)
				; RV32I-NEXT: lui a4, 1
				; RV32I-NEXT: addi a4, a4, -1096
				; RV32I-NEXT: add a1, a1, a4
				; RV32I-NEXT: add a3, a3, a4
				; RV32I-NEXT: sw a1, %lo(ga)(a0)
				; RV32I-NEXT: sw a3, %lo(gb)(a2)
				; RV32I-NEXT: ret
				;
				; RV64I-LABEL: add32_reject:
				; RV64I: # %bb.0:
				; RV64I-NEXT: lui a0, %hi(ga)
				; RV64I-NEXT: lw a1, %lo(ga)(a0)
				; RV64I-NEXT: lui a2, %hi(gb)
				; RV64I-NEXT: lw a3, %lo(gb)(a2)
				; RV64I-NEXT: lui a4, 1
				; RV64I-NEXT: addiw a4, a4, -1096
				; RV64I-NEXT: add a1, a1, a4
				; RV64I-NEXT: add a3, a3, a4
				; RV64I-NEXT: sw a1, %lo(ga)(a0)
				; RV64I-NEXT: sw a3, %lo(gb)(a2)
				; RV64I-NEXT: ret
				%1 = load i32, i32* @ga, align 4
				%2 = load i32, i32* @gb, align 4
				%3 = add i32 %1, 3000
				%4 = add i32 %2, 3000
				store i32 %3, i32* @ga, align 4
				store i32 %4, i32* @gb, align 4
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] optimize addition with a pair of (addi imm)
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 276300

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

llvm/test/CodeGen/RISCV/add-imm.ll

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] optimize addition with a pair of (addi imm)ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 276300

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

llvm/test/CodeGen/RISCV/add-imm.ll

[RISCV] optimize addition with a pair of (addi imm)
ClosedPublic