This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/RISCV/
-
Target/
-
RISCV/
-
RISCVInstrInfo.td
-
test/CodeGen/RISCV/
-
CodeGen/
-
RISCV/
-
i32-icmp.ll
-
setcc-logic.ll

Differential D71774

[RISCV] Optimize seteq/setne pattern expansions for better code size
ClosedPublic

Authored by wwei on Dec 20 2019, 9:10 AM.

Download Raw Diff

Details

Reviewers

asb
lenary
lewis-revill
simoncook
rogfer01

Commits

rGdb875f665544: [RISCV] Optimize seteq/setne pattern expansions for better code size

Summary

ADDI(C.ADDI) may achieve better code size than XORI, since XORI has no C extension.
This patch transforms two patterns and gets almost equivalent results.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

wwei created this revision.Dec 20 2019, 9:10 AM

Herald added subscribers: llvm-commits, luismarques, apazos and 22 others. · View Herald TranscriptDec 20 2019, 9:10 AM

The case was from dhrystone(dhry_2.c):

Boolean Func_3 (Enum_Par_Val)
/***************************/
    /* executed once        */
    /* Enum_Par_Val == Ident_3 */

Enumeration Enum_Par_Val;
{
  Enumeration Enum_Loc;
  Enum_Loc = Enum_Par_Val;
  if (Enum_Loc == Ident_3)
    /* then, executed */
    return (true);
  else /* not executed */
    return (false);
} /* Func_3 */

I found gcc will have better code size (c.addi + sqez) than llvm (xori + sqez), so this patch try to implement the same optimization.

Jim added a subscriber: Jim.Dec 23 2019, 10:08 PM

This seems like a fun issue:

addi is compressible
xor is almost certainly easier to analyse (from the view of KnownBits and the like).

Have you seen any regressions in code generation from this change?

In D71774#1821760, @lenary wrote:

This seems like a fun issue:

addi is compressible

xor is almost certainly easier to analyse (from the view of KnownBits and the like).

Have you seen any regressions in code generation from this change?

No, there's no any regressions. For the case where the register is compared with an immediate value (equal or unequal), using xori or addi with neg imm in the pattern seteq or setne is equivalent, in order to get a result equal to 0 or not equal to 0.

Well spotted, this seems like a good change to me. I wonder if there are other optimization opportunities to use this NegImm/simm12_plus1 pattern in further patches?

In D71774#1821760, @lenary wrote:

This seems like a fun issue:

addi is compressible

xor is almost certainly easier to analyse (from the view of KnownBits and the like).

Have you seen any regressions in code generation from this change?

That's a fair comment, though I'd hope the situation where the result of the xor in this pattern is used for anything else later on would be extremely rare/impossible?

In D71774#1856710, @lewis-revill wrote:

In D71774#1821760, @lenary wrote:

Have you seen any regressions in code generation from this change?

That's a fair comment, though I'd hope the situation where the result of the xor in this pattern is used for anything else later on would be extremely rare/impossible?

Ah, yes, that does make sense, given the addi is generated with one use.

I'm happy for this to land now!

This revision is now accepted and ready to land.Feb 4 2020, 5:51 AM

Closed by commit rGdb875f665544: [RISCV] Optimize seteq/setne pattern expansions for better code size (authored by wwei). · Explain WhyFeb 11 2020, 6:46 AM

This revision was automatically updated to reflect the committed changes.

Herald added a subscriber: evandro. · View Herald TranscriptFeb 11 2020, 6:46 AM

Revision Contents

Path

Size

llvm/

lib/

Target/

RISCV/

RISCVInstrInfo.td

28 lines

test/

CodeGen/

RISCV/

i32-icmp.ll

61 lines

setcc-logic.ll

8 lines

Diff 243854

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Show First 20 Lines • Show All 138 Lines • ▼ Show 20 Lines	let MCOperandPredicate = [{
if (MCOp.evaluateAsConstantImm(Imm))		if (MCOp.evaluateAsConstantImm(Imm))
return isInt<12>(Imm);		return isInt<12>(Imm);
return MCOp.isBareSymbolRef();		return MCOp.isBareSymbolRef();
}];		}];
let OperandType = "OPERAND_SIMM12";		let OperandType = "OPERAND_SIMM12";
let OperandNamespace = "RISCVOp";		let OperandNamespace = "RISCVOp";
}		}

		// A 12-bit signed immediate plus one where the imm range will be -2047~2048.
		def simm12_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
		[{return (isInt<12>(Imm) && Imm != -2048) \|\| Imm == 2048;}]> {
		let ParserMatchClass = SImmAsmOperand<12>;
		let EncoderMethod = "getImmOpValue";
		let DecoderMethod = "decodeSImmOperand<12>";
		let MCOperandPredicate = [{
		int64_t Imm;
		if (MCOp.evaluateAsConstantImm(Imm))
		return (isInt<12>(Imm) && Imm != -2048) \|\| Imm == 2048;
		return MCOp.isBareSymbolRef();
		}];
		}

// A 13-bit signed immediate where the least significant bit is zero.		// A 13-bit signed immediate where the least significant bit is zero.
def simm13_lsb0 : Operand<OtherVT> {		def simm13_lsb0 : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<13, "Lsb0">;		let ParserMatchClass = SImmAsmOperand<13, "Lsb0">;
let EncoderMethod = "getImmOpValueAsr1";		let EncoderMethod = "getImmOpValueAsr1";
let DecoderMethod = "decodeSImmOperandAndLsl1<13>";		let DecoderMethod = "decodeSImmOperandAndLsl1<13>";
let MCOperandPredicate = [{		let MCOperandPredicate = [{
int64_t Imm;		int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))		if (MCOp.evaluateAsConstantImm(Imm))
▲ Show 20 Lines • Show All 136 Lines • ▼ Show 20 Lines
// Extract the most significant 20 bits from an immediate value. Add 1 if bit		// Extract the most significant 20 bits from an immediate value. Add 1 if bit
// 11 is 1, to compensate for the low 12 bits in the matching immediate addi		// 11 is 1, to compensate for the low 12 bits in the matching immediate addi
// or ld/st being negative.		// or ld/st being negative.
def HI20 : SDNodeXForm<imm, [{		def HI20 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(((N->getZExtValue()+0x800) >> 12) & 0xfffff,		return CurDAG->getTargetConstant(((N->getZExtValue()+0x800) >> 12) & 0xfffff,
SDLoc(N), N->getValueType(0));		SDLoc(N), N->getValueType(0));
}]>;		}]>;

		// Return the negation of an immediate value.
		def NegImm : SDNodeXForm<imm, [{
		return CurDAG->getTargetConstant(-N->getSExtValue(), SDLoc(N),
		N->getValueType(0));
		}]>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Instruction Formats		// Instruction Formats
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

include "RISCVInstrFormats.td"		include "RISCVInstrFormats.td"

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Instruction Class Templates		// Instruction Class Templates
▲ Show 20 Lines • Show All 545 Lines • ▼ Show 20 Lines
def : PatGprSimm12<setlt, SLTI>;		def : PatGprSimm12<setlt, SLTI>;
def : PatGprGpr<setult, SLTU>;		def : PatGprGpr<setult, SLTU>;
def : PatGprSimm12<setult, SLTIU>;		def : PatGprSimm12<setult, SLTIU>;

// Define pattern expansions for setcc operations that aren't directly		// Define pattern expansions for setcc operations that aren't directly
// handled by a RISC-V instruction.		// handled by a RISC-V instruction.
def : Pat<(seteq GPR:$rs1, 0), (SLTIU GPR:$rs1, 1)>;		def : Pat<(seteq GPR:$rs1, 0), (SLTIU GPR:$rs1, 1)>;
def : Pat<(seteq GPR:$rs1, GPR:$rs2), (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>;		def : Pat<(seteq GPR:$rs1, GPR:$rs2), (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>;
def : Pat<(seteq GPR:$rs1, simm12:$imm12),		def : Pat<(seteq GPR:$rs1, simm12_plus1:$imm12),
(SLTIU (XORI GPR:$rs1, simm12:$imm12), 1)>;		(SLTIU (ADDI GPR:$rs1, (NegImm simm12_plus1:$imm12)), 1)>;
def : Pat<(setne GPR:$rs1, 0), (SLTU X0, GPR:$rs1)>;		def : Pat<(setne GPR:$rs1, 0), (SLTU X0, GPR:$rs1)>;
def : Pat<(setne GPR:$rs1, GPR:$rs2), (SLTU X0, (XOR GPR:$rs1, GPR:$rs2))>;		def : Pat<(setne GPR:$rs1, GPR:$rs2), (SLTU X0, (XOR GPR:$rs1, GPR:$rs2))>;
def : Pat<(setne GPR:$rs1, simm12:$imm12),		def : Pat<(setne GPR:$rs1, simm12_plus1:$imm12),
(SLTU X0, (XORI GPR:$rs1, simm12:$imm12))>;		(SLTU X0, (ADDI GPR:$rs1, (NegImm simm12_plus1:$imm12)))>;
def : Pat<(setugt GPR:$rs1, GPR:$rs2), (SLTU GPR:$rs2, GPR:$rs1)>;		def : Pat<(setugt GPR:$rs1, GPR:$rs2), (SLTU GPR:$rs2, GPR:$rs1)>;
def : Pat<(setuge GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>;		def : Pat<(setuge GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>;
def : Pat<(setule GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs2, GPR:$rs1), 1)>;		def : Pat<(setule GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs2, GPR:$rs1), 1)>;
def : Pat<(setgt GPR:$rs1, GPR:$rs2), (SLT GPR:$rs2, GPR:$rs1)>;		def : Pat<(setgt GPR:$rs1, GPR:$rs2), (SLT GPR:$rs2, GPR:$rs1)>;
def : Pat<(setge GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs1, GPR:$rs2), 1)>;		def : Pat<(setge GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs1, GPR:$rs2), 1)>;
def : Pat<(setle GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>;		def : Pat<(setle GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>;

let usesCustomInserter = 1 in		let usesCustomInserter = 1 in
▲ Show 20 Lines • Show All 273 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/i32-icmp.ll

Show All 13 Lines	; RV32I-NEXT: ret
%1 = icmp eq i32 %a, %b		%1 = icmp eq i32 %a, %b
%2 = zext i1 %1 to i32		%2 = zext i1 %1 to i32
ret i32 %2		ret i32 %2
}		}

define i32 @icmp_eq_constant(i32 %a) nounwind {		define i32 @icmp_eq_constant(i32 %a) nounwind {
; RV32I-LABEL: icmp_eq_constant:		; RV32I-LABEL: icmp_eq_constant:
; RV32I: # %bb.0:		; RV32I: # %bb.0:
; RV32I-NEXT: xori a0, a0, 42		; RV32I-NEXT: addi a0, a0, -42
; RV32I-NEXT: seqz a0, a0		; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: ret		; RV32I-NEXT: ret
%1 = icmp eq i32 %a, 42		%1 = icmp eq i32 %a, 42
%2 = zext i1 %1 to i32		%2 = zext i1 %1 to i32
ret i32 %2		ret i32 %2
}		}

		define i32 @icmp_eq_constant_2048(i32 %a) nounwind {
		; RV32I-LABEL: icmp_eq_constant_2048:
		; RV32I: # %bb.0:
		; RV32I-NEXT: addi a0, a0, -2048
		; RV32I-NEXT: seqz a0, a0
		; RV32I-NEXT: ret
		%1 = icmp eq i32 %a, 2048
		%2 = zext i1 %1 to i32
		ret i32 %2
		}

		define i32 @icmp_eq_constant_neg_2048(i32 %a) nounwind {
		; RV32I-LABEL: icmp_eq_constant_neg_2048:
		; RV32I: # %bb.0:
		; RV32I-NEXT: addi a1, zero, -2048
		; RV32I-NEXT: xor a0, a0, a1
		; RV32I-NEXT: seqz a0, a0
		; RV32I-NEXT: ret
		%1 = icmp eq i32 %a, -2048
		%2 = zext i1 %1 to i32
		ret i32 %2
		}

		define i32 @icmp_eq_constant_neg_2047(i32 %a) nounwind {
		; RV32I-LABEL: icmp_eq_constant_neg_2047:
		; RV32I: # %bb.0:
		; RV32I-NEXT: addi a0, a0, 2047
		; RV32I-NEXT: seqz a0, a0
		; RV32I-NEXT: ret
		%1 = icmp eq i32 %a, -2047
		%2 = zext i1 %1 to i32
		ret i32 %2
		}

define i32 @icmp_eqz(i32 %a) nounwind {		define i32 @icmp_eqz(i32 %a) nounwind {
; RV32I-LABEL: icmp_eqz:		; RV32I-LABEL: icmp_eqz:
; RV32I: # %bb.0:		; RV32I: # %bb.0:
; RV32I-NEXT: seqz a0, a0		; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: ret		; RV32I-NEXT: ret
%1 = icmp eq i32 %a, 0		%1 = icmp eq i32 %a, 0
%2 = zext i1 %1 to i32		%2 = zext i1 %1 to i32
ret i32 %2		ret i32 %2
}		}

define i32 @icmp_ne(i32 %a, i32 %b) nounwind {		define i32 @icmp_ne(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: icmp_ne:		; RV32I-LABEL: icmp_ne:
; RV32I: # %bb.0:		; RV32I: # %bb.0:
; RV32I-NEXT: xor a0, a0, a1		; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: snez a0, a0		; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: ret		; RV32I-NEXT: ret
%1 = icmp ne i32 %a, %b		%1 = icmp ne i32 %a, %b
%2 = zext i1 %1 to i32		%2 = zext i1 %1 to i32
ret i32 %2		ret i32 %2
}		}

define i32 @icmp_ne_constant(i32 %a) nounwind {		define i32 @icmp_ne_constant(i32 %a) nounwind {
; RV32I-LABEL: icmp_ne_constant:		; RV32I-LABEL: icmp_ne_constant:
; RV32I: # %bb.0:		; RV32I: # %bb.0:
; RV32I-NEXT: xori a0, a0, 42		; RV32I-NEXT: addi a0, a0, -42
; RV32I-NEXT: snez a0, a0		; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: ret		; RV32I-NEXT: ret
%1 = icmp ne i32 %a, 42		%1 = icmp ne i32 %a, 42
%2 = zext i1 %1 to i32		%2 = zext i1 %1 to i32
ret i32 %2		ret i32 %2
}		}

		define i32 @icmp_ne_constant_2048(i32 %a) nounwind {
		; RV32I-LABEL: icmp_ne_constant_2048:
		; RV32I: # %bb.0:
		; RV32I-NEXT: addi a0, a0, -2048
		; RV32I-NEXT: snez a0, a0
		; RV32I-NEXT: ret
		%1 = icmp ne i32 %a, 2048
		%2 = zext i1 %1 to i32
		ret i32 %2
		}

		define i32 @icmp_ne_constant_neg_2048(i32 %a) nounwind {
		; RV32I-LABEL: icmp_ne_constant_neg_2048:
		; RV32I: # %bb.0:
		; RV32I-NEXT: addi a1, zero, -2048
		; RV32I-NEXT: xor a0, a0, a1
		; RV32I-NEXT: snez a0, a0
		; RV32I-NEXT: ret
		%1 = icmp ne i32 %a, -2048
		%2 = zext i1 %1 to i32
		ret i32 %2
		}

define i32 @icmp_nez(i32 %a) nounwind {		define i32 @icmp_nez(i32 %a) nounwind {
; RV32I-LABEL: icmp_nez:		; RV32I-LABEL: icmp_nez:
; RV32I: # %bb.0:		; RV32I: # %bb.0:
; RV32I-NEXT: snez a0, a0		; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: ret		; RV32I-NEXT: ret
%1 = icmp ne i32 %a, 0		%1 = icmp ne i32 %a, 0
%2 = zext i1 %1 to i32		%2 = zext i1 %1 to i32
ret i32 %2		ret i32 %2
▲ Show 20 Lines • Show All 87 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/setcc-logic.ll

Show First 20 Lines • Show All 96 Lines • ▼ Show 20 Lines	; RV64I-NEXT: ret
%b = icmp ne i32 %x, 60		%b = icmp ne i32 %x, 60
%r = and i1 %a, %b		%r = and i1 %a, %b
ret i1 %r		ret i1 %r
}		}

define i1 @and_icmps_const_not1bit_diff(i32 %x) nounwind {		define i1 @and_icmps_const_not1bit_diff(i32 %x) nounwind {
; RV32I-LABEL: and_icmps_const_not1bit_diff:		; RV32I-LABEL: and_icmps_const_not1bit_diff:
; RV32I: # %bb.0:		; RV32I: # %bb.0:
; RV32I-NEXT: xori a1, a0, 44		; RV32I-NEXT: addi a1, a0, -44
; RV32I-NEXT: snez a1, a1		; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: xori a0, a0, 92		; RV32I-NEXT: addi a0, a0, -92
; RV32I-NEXT: snez a0, a0		; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: and a0, a1, a0		; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: ret		; RV32I-NEXT: ret
;		;
; RV64I-LABEL: and_icmps_const_not1bit_diff:		; RV64I-LABEL: and_icmps_const_not1bit_diff:
; RV64I: # %bb.0:		; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 32		; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32		; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: xori a1, a0, 44		; RV64I-NEXT: addi a1, a0, -44
; RV64I-NEXT: snez a1, a1		; RV64I-NEXT: snez a1, a1
; RV64I-NEXT: xori a0, a0, 92		; RV64I-NEXT: addi a0, a0, -92
; RV64I-NEXT: snez a0, a0		; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: and a0, a1, a0		; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: ret		; RV64I-NEXT: ret
%a = icmp ne i32 %x, 44		%a = icmp ne i32 %x, 44
%b = icmp ne i32 %x, 92		%b = icmp ne i32 %x, 92
%r = and i1 %a, %b		%r = and i1 %a, %b
ret i1 %r		ret i1 %r
}		}