This is an archive of the discontinued LLVM Phabricator instance.

[SystemZ] Improve handling of @llvm.ctlz intrinsic
ClosedPublic

Authored by jonpa on Feb 4 2019, 1:42 PM.

Download Raw Diff

Details

Reviewers

Summary

Since SystemZ supports counting of leading zeros with the FLOGR instruction, it seems isCheapToSpeculateCtlz() should return true.

The effect on spec by doing this is a few less branches but also a few more other instructions which I have not looked into in detail.

So far I have assumed that speculation is better than a branch also in the cases that requires extension and subtraction (i8 and i16).

I also discovered that even with this in place (which stops CodeGenPrepare from emitting the ctlz_zero_undef (instructions), these nodes appear in benchmarks. I therefore also added isel handling for them, just as for ctlz nodes. This improved some cases it seems that previously got expanded into huge sequences instead of using flogr.

The new tests can be run and the effects of the patch are demonstrated by them. I also saw an issue with unfolded adds of immediates (see FIXME note in test file).

Diff Detail

Event Timeline

jonpa created this revision.Feb 4 2019, 1:42 PM

Removed cltz_zero_undef i64 since it is not needed.

LGTM, thanks!

This revision is now accepted and ready to land.Feb 6 2019, 2:27 AM

Thanks for review. r353330.

Revision Contents

Path

Size

lib/

Target/

SystemZ/

SystemZISelLowering.h

1 line

SystemZISelLowering.cpp

1 line

test/

CodeGen/

SystemZ/

scalar-ctlz.ll

107 lines

Diff 185437

lib/Target/SystemZ/SystemZISelLowering.h

Show First 20 Lines • Show All 389 Lines • ▼ Show 20 Lines	TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
// so promoting the integers is less efficient.		// so promoting the integers is less efficient.
//		//
// (c) there are no multiplication instructions for the widest integer		// (c) there are no multiplication instructions for the widest integer
// type (v2i64).		// type (v2i64).
if (VT.getScalarSizeInBits() % 8 == 0)		if (VT.getScalarSizeInBits() % 8 == 0)
return TypeWidenVector;		return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);		return TargetLoweringBase::getPreferredVectorAction(VT);
}		}
		bool isCheapToSpeculateCtlz() const override { return true; }
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,		EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT) const override;		EVT) const override;
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;		bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;		bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;		bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;		bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,		bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,		unsigned AS,
▲ Show 20 Lines • Show All 242 Lines • Show Last 20 Lines

lib/Target/SystemZ/SystemZISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 243 Lines • ▼ Show 20 Lines	SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Handle unsigned 32-bit types as signed 64-bit types.		// Handle unsigned 32-bit types as signed 64-bit types.
if (!Subtarget.hasFPExtension()) {		if (!Subtarget.hasFPExtension()) {
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);		setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);		setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
}		}

// We have native support for a 64-bit CTLZ, via FLOGR.		// We have native support for a 64-bit CTLZ, via FLOGR.
setOperationAction(ISD::CTLZ, MVT::i32, Promote);		setOperationAction(ISD::CTLZ, MVT::i32, Promote);
		setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
setOperationAction(ISD::CTLZ, MVT::i64, Legal);		setOperationAction(ISD::CTLZ, MVT::i64, Legal);

// Give LowerOperation the chance to replace 64-bit ORs with subregs.		// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);		setOperationAction(ISD::OR, MVT::i64, Custom);

// FIXME: Can we support these natively?		// FIXME: Can we support these natively?
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);		setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);		setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
▲ Show 20 Lines • Show All 7,155 Lines • Show Last 20 Lines

test/CodeGen/SystemZ/scalar-ctlz.ll

This file was added.

				; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \| FileCheck %s
				;
				; FIXME: two consecutive immediate adds not fused in i16/i8 functions.

				declare i64 @llvm.ctlz.i64(i64, i1)
				declare i32 @llvm.ctlz.i32(i32, i1)
				declare i16 @llvm.ctlz.i16(i16, i1)
				declare i8 @llvm.ctlz.i8(i8, i1)

				define i64 @f0(i64 %arg) {
				; CHECK-LABEL: f0:
				; CHECK-LABEL: %bb.0:
				; CHECK-NOT: %bb.1:
				; CHECK: flogr
				%1 = tail call i64 @llvm.ctlz.i64(i64 %arg, i1 false)
				ret i64 %1
				}

				define i64 @f1(i64 %arg) {
				; CHECK-LABEL: f1:
				; CHECK-LABEL: %bb.0:
				; CHECK-NEXT: flogr
				; CHECK-NEXT: # kill
				; CHECK-NEXT: br %r14
				%1 = tail call i64 @llvm.ctlz.i64(i64 %arg, i1 true)
				ret i64 %1
				}

				define i32 @f2(i32 %arg) {
				; CHECK-LABEL: f2:
				; CHECK-LABEL: %bb.0:
				; CHECK-NEXT: llgfr %r0, %r2
				; CHECK-NEXT: flogr %r2, %r0
				; CHECK-NEXT: aghi %r2, -32
				; CHECK-NEXT: # kill
				; CHECK-NEXT: br %r14
				%1 = tail call i32 @llvm.ctlz.i32(i32 %arg, i1 false)
				ret i32 %1
				}

				define i32 @f3(i32 %arg) {
				; CHECK-LABEL: f3:
				; CHECK-LABEL: %bb.0:
				; CHECK-NEXT: llgfr %r0, %r2
				; CHECK-NEXT: flogr %r2, %r0
				; CHECK-NEXT: aghi %r2, -32
				; CHECK-NEXT: # kill
				; CHECK-NEXT: br %r14
				%1 = tail call i32 @llvm.ctlz.i32(i32 %arg, i1 true)
				ret i32 %1
				}

				define i16 @f4(i16 %arg) {
				; CHECK-LABEL: f4:
				; CHECK-LABEL: %bb.0:
				; CHECK-NEXT: # kill
				; CHECK-NEXT: llghr %r0, %r2
				; CHECK-NEXT: flogr %r2, %r0
				; CHECK-NEXT: aghi %r2, -32
				; CHECK-NEXT: ahi %r2, -16
				; CHECK-NEXT: # kill
				; CHECK-NEXT: br %r14
				%1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 false)
				ret i16 %1
				}

				define i16 @f5(i16 %arg) {
				; CHECK-LABEL: f5:
				; CHECK-LABEL: %bb.0:
				; CHECK-NEXT: # kill
				; CHECK-NEXT: llghr %r0, %r2
				; CHECK-NEXT: flogr %r2, %r0
				; CHECK-NEXT: aghi %r2, -32
				; CHECK-NEXT: ahi %r2, -16
				; CHECK-NEXT: # kill
				; CHECK-NEXT: br %r14
				%1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 true)
				ret i16 %1
				}

				define i8 @f6(i8 %arg) {
				; CHECK-LABEL: f6:
				; CHECK-LABEL: %bb.0:
				; CHECK-NEXT: # kill
				; CHECK-NEXT: llgcr %r0, %r2
				; CHECK-NEXT: flogr %r2, %r0
				; CHECK-NEXT: aghi %r2, -32
				; CHECK-NEXT: ahi %r2, -24
				; CHECK-NEXT: # kill
				; CHECK-NEXT: br %r14
				%1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 false)
				ret i8 %1
				}

				define i8 @f7(i8 %arg) {
				; CHECK-LABEL: f7:
				; CHECK-LABEL: %bb.0:
				; CHECK-NEXT: # kill
				; CHECK-NEXT: llgcr %r0, %r2
				; CHECK-NEXT: flogr %r2, %r0
				; CHECK-NEXT: aghi %r2, -32
				; CHECK-NEXT: ahi %r2, -24
				; CHECK-NEXT: # kill
				; CHECK-NEXT: br %r14
				%1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 true)
				ret i8 %1
				}