This is an archive of the discontinued LLVM Phabricator instance.

Add DAG optimisation for FP16_TO_FP
ClosedPublic

Authored by olista01 on Aug 19 2015, 6:25 AM.

Download Raw Diff

Details

Reviewers

Summary

The FP16_TO_FP node only uses the bottom 16 bits of its input, so the
following pattern can be optimised by removing the AND:

(FP16_TO_FP (AND op, 0xffff)) -> (FP16_TO_FP op)

This is a common pattern for ARM targets when functions have __fp16
arguments, as they are passed as floats (so that they get passed in the
correct registers), but then bitcast and truncated to ignore the top 16
bits.

Diff Detail

Event Timeline

olista01 updated this revision to Diff 32544.Aug 19 2015, 6:25 AM

olista01 retitled this revision from to Add DAG optimisation for FP16_TO_FP.

olista01 updated this object.

olista01 set the repository for this revision to rL LLVM.

olista01 added a subscriber: llvm-commits.

Herald added a subscriber: aemerson. · View Herald TranscriptAug 19 2015, 6:25 AM

olista01 added a parent revision: D12148: [ARM] Allow passing/returning of __fp16 arguments.Aug 19 2015, 6:26 AM

Apart from my comment, looks good.

lib/CodeGen/SelectionDAG/DAGCombiner.cpp
12987	Is this always (op & 0xffff) or can you also have (0xffff & op)?

olista01 added inline comments.Aug 24 2015, 2:39 AM

lib/CodeGen/SelectionDAG/DAGCombiner.cpp
12987	visitAND canonicalises ANDs to have the constant (if there is one) on the RHS, so this isn't necessary.

olista01 accepted this revision.Aug 24 2015, 2:48 AM

olista01 added a reviewer: olista01.

This revision is now accepted and ready to land.Aug 24 2015, 2:48 AM

Thanks, committed revision 245832.

Could you also have a look at the related clang patch, D12148?

Revision Contents

Path

Size

lib/

CodeGen/

SelectionDAG/

DAGCombiner.cpp

17 lines

test/

CodeGen/

ARM/

fp16-args.ll

40 lines

Diff 32544

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 306 Lines • ▼ Show 20 Lines	private:
SDValue visitVECTOR_SHUFFLE(SDNode *N);		SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitSCALAR_TO_VECTOR(SDNode *N);		SDValue visitSCALAR_TO_VECTOR(SDNode *N);
SDValue visitINSERT_SUBVECTOR(SDNode *N);		SDValue visitINSERT_SUBVECTOR(SDNode *N);
SDValue visitMLOAD(SDNode *N);		SDValue visitMLOAD(SDNode *N);
SDValue visitMSTORE(SDNode *N);		SDValue visitMSTORE(SDNode *N);
SDValue visitMGATHER(SDNode *N);		SDValue visitMGATHER(SDNode *N);
SDValue visitMSCATTER(SDNode *N);		SDValue visitMSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);		SDValue visitFP_TO_FP16(SDNode *N);
		SDValue visitFP16_TO_FP(SDNode *N);

SDValue visitFADDForFMACombine(SDNode *N);		SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);		SDValue visitFSUBForFMACombine(SDNode *N);

SDValue XformToShuffleWithZero(SDNode *N);		SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);		SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);

SDValue visitShiftByConstant(SDNode N, ConstantSDNode Amt);		SDValue visitShiftByConstant(SDNode N, ConstantSDNode Amt);
▲ Show 20 Lines • Show All 1,078 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);		case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);		case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);		case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
case ISD::MGATHER: return visitMGATHER(N);		case ISD::MGATHER: return visitMGATHER(N);
case ISD::MLOAD: return visitMLOAD(N);		case ISD::MLOAD: return visitMLOAD(N);
case ISD::MSCATTER: return visitMSCATTER(N);		case ISD::MSCATTER: return visitMSCATTER(N);
case ISD::MSTORE: return visitMSTORE(N);		case ISD::MSTORE: return visitMSTORE(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);		case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
		case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
}		}
return SDValue();		return SDValue();
}		}

SDValue DAGCombiner::combine(SDNode *N) {		SDValue DAGCombiner::combine(SDNode *N) {
SDValue RV = visit(N);		SDValue RV = visit(N);

// If nothing happened, try a target-specific DAG combine.		// If nothing happened, try a target-specific DAG combine.
▲ Show 20 Lines • Show All 11,555 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {

// fold (fp_to_fp16 (fp16_to_fp op)) -> op		// fold (fp_to_fp16 (fp16_to_fp op)) -> op
if (N0->getOpcode() == ISD::FP16_TO_FP)		if (N0->getOpcode() == ISD::FP16_TO_FP)
return N0->getOperand(0);		return N0->getOperand(0);

return SDValue();		return SDValue();
}		}

		SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
		SDValue N0 = N->getOperand(0);

		// fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
		if (N0->getOpcode() == ISD::AND) {
		ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
		rengolinUnsubmitted Not Done Reply Inline Actions Is this always (op & 0xffff) or can you also have (0xffff & op)? rengolin: Is this always (op & 0xffff) or can you also have (0xffff & op)?
		olista01AuthorUnsubmitted Not Done Reply Inline Actions visitAND canonicalises ANDs to have the constant (if there is one) on the RHS, so this isn't necessary. olista01: visitAND canonicalises ANDs to have the constant (if there is one) on the RHS, so this isn't…
		if (AndConst && AndConst->getAPIntValue() == 0xffff) {
		return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
		N0.getOperand(0));
		}
		}

		return SDValue();
		}

/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle		/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.		/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>		/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
/// vector_shuffle V, Zero, <0, 4, 2, 4>		/// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {		SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);		SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);		SDValue RHS = N->getOperand(1);
▲ Show 20 Lines • Show All 1,155 Lines • Show Last 20 Lines

test/CodeGen/ARM/fp16-args.ll

This file was added.

				; RUN: llc -float-abi soft -mattr=+fp16 < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
				; RUN: llc -float-abi hard -mattr=+fp16 < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=HARD

				target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
				target triple = "armv7a--none-eabi"

				define float @foo(float %a.coerce, float %b.coerce) {
				entry:
				%0 = bitcast float %a.coerce to i32
				%tmp.0.extract.trunc = trunc i32 %0 to i16
				%1 = bitcast i16 %tmp.0.extract.trunc to half
				%2 = bitcast float %b.coerce to i32
				%tmp1.0.extract.trunc = trunc i32 %2 to i16
				%3 = bitcast i16 %tmp1.0.extract.trunc to half
				%4 = fadd half %1, %3
				%5 = bitcast half %4 to i16
				%tmp5.0.insert.ext = zext i16 %5 to i32
				%6 = bitcast i32 %tmp5.0.insert.ext to float
				ret float %6
				; CHECK: foo:

				; SOFT: vmov {{s[0-9]+}}, r1
				; SOFT: vmov {{s[0-9]+}}, r0
				; SOFT: vcvtb.f32.f16 {{s[0-9]+}}, {{s[0-9]+}}
				; SOFT: vcvtb.f32.f16 {{s[0-9]+}}, {{s[0-9]+}}
				; SOFT: vadd.f32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
				; SOFT: vcvtb.f16.f32 {{s[0-9]+}}, {{s[0-9]+}}
				; SOFT: vmov r0, {{s[0-9]+}}

				; HARD-NOT: vmov
				; HARD-NOT: uxth
				; HARD: vcvtb.f32.f16 {{s[0-9]+}}, s1
				; HARD: vcvtb.f32.f16 {{s[0-9]+}}, s0
				; HARD: vadd.f32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
				; HARD: vcvtb.f16.f32 s0, {{s[0-9]+}}
				; HARD-NOT: vmov
				; HARD-NOT: uxth

				; CHECK: bx lr
				}