This is an archive of the discontinued LLVM Phabricator instance.

ARM: Add missing selection patterns for vnmla
ClosedPublic

Authored by gergo- on Jul 26 2017, 1:32 PM.

Download Raw Diff

Details

Reviewers

t.p.northover
efriedma
rengolin
RKSimon

Commits

rG2b1c3bb25dad: [ARM] Add missing selection patterns for vnmla
rL313972: [ARM] Add missing selection patterns for vnmla

Summary

(This is my first patch to LLVM. Let me know if there's anything I could improve.)

For the following function:

double fn1(double d0, double d1, double d2) {
  double a = -d0 - d1 * d2;
  return a;
}

on ARM, LLVM generates code along the lines of

	vneg.f64	d0, d0
	vmls.f64	d0, d1, d2

i.e., a negate and a multiply-subtract. The attached patch adds instruction selection patterns to allow it to generate the single instruction

	vnmla.f64	d0, d1, d2

(multiply-add with negation) instead, like GCC does.

Diff Detail

Repository: rL LLVM

Event Timeline

gergo- created this revision.Jul 26 2017, 1:32 PM

Herald added subscribers: kristof.beyls, javed.absar, aemerson. · View Herald TranscriptJul 26 2017, 1:32 PM

fhahn added a subscriber: fhahn.Aug 4 2017, 7:18 AM

Makes sense to me, but the ARM guys should take a look

Yes, LGTM.

This revision is now accepted and ready to land.Sep 19 2017, 2:16 PM

@gergo- Do you have access rights to commit this?

No, I have no commit rights, someone else will have to commit.

Closed by commit rL313972: [ARM] Add missing selection patterns for vnmla (authored by RKSimon). · Explain WhySep 22 2017, 2:52 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

ARM/

ARMInstrVFP.td

9 lines

test/

CodeGen/

ARM/

fnmscs.ll

69 lines

Diff 116318

llvm/trunk/lib/Target/ARM/ARMInstrVFP.td

	Show First 20 Lines • Show All 1,851 Lines • ▼ Show 20 Lines

	def VNMLAH : AHbI<0b11100, 0b01, 1, 0,			def VNMLAH : AHbI<0b11100, 0b01, 1, 0,
	(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),			(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
	IIC_fpMAC16, "vnmla", ".f16\t$Sd, $Sn, $Sm",			IIC_fpMAC16, "vnmla", ".f16\t$Sd, $Sn, $Sm",
	[]>,			[]>,
	RegConstraint<"$Sdin = $Sd">,			RegConstraint<"$Sdin = $Sd">,
	Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;			Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;

				// (-(a * b) - dst) -> -(dst + (a * b))
	def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),			def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
	(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,			(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
	Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;			Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
	def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),			def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
	(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,			(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
	Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;			Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;

				// (-dst - (a * b)) -> -(dst + (a * b))
				def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))),
				(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
				Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
				def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)),
				(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
				Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;

	def VNMLSD : ADbI<0b11100, 0b01, 0, 0,			def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
	(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),			(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
	IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",			IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
	[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),			[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
	(f64 DPR:$Ddin)))]>,			(f64 DPR:$Ddin)))]>,
	RegConstraint<"$Ddin = $Dd">,			RegConstraint<"$Ddin = $Dd">,
	Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,			Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
	Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;			Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
	▲ Show 20 Lines • Show All 516 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/ARM/fnmscs.ll

	; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \			; RUN: llc -mtriple=arm-eabihf -mattr=+vfp2 %s -o - \
	; RUN: \| FileCheck %s -check-prefix=VFP2			; RUN: \| FileCheck %s -check-prefix=VFP2

	; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \			; RUN: llc -mtriple=arm-eabihf -mattr=+vfp3 %s -o - \
				; RUN: \| FileCheck %s -check-prefix=VFP3

				; RUN: llc -mtriple=arm-eabihf -mattr=+neon %s -o - \
	; RUN: \| FileCheck %s -check-prefix=NEON			; RUN: \| FileCheck %s -check-prefix=NEON

	; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \			; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
	; RUN: \| FileCheck %s -check-prefix=A8			; RUN: \| FileCheck %s -check-prefix=A8

	; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic %s -o - \			; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic %s -o - \
	; RUN: \| FileCheck %s -check-prefix=A8			; RUN: \| FileCheck %s -check-prefix=A8

	; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \			; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
	; RUN: \| FileCheck %s -check-prefix=A8U			; RUN: \| FileCheck %s -check-prefix=A8U

	; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \			; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
	; RUN: \| FileCheck %s -check-prefix=A8U			; RUN: \| FileCheck %s -check-prefix=A8U

	define float @t1(float %acc, float %a, float %b) nounwind {			define float @t1(float %acc, float %a, float %b) nounwind {
	entry:			entry:
	; VFP2-LABEL: t1:			; VFP2-LABEL: t1:
	; VFP2: vnmla.f32			; VFP2: vnmla.f32

				; VFP3-LABEL: t1:
				; VFP3: vnmla.f32

	; NEON-LABEL: t1:			; NEON-LABEL: t1:
	; NEON: vnmla.f32			; NEON: vnmla.f32

	; A8U-LABEL: t1:			; A8U-LABEL: t1:
	; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}			; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
	; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}			; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}

	; A8-LABEL: t1:			; A8-LABEL: t1:
	; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}			; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
	; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}			; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
	%0 = fmul float %a, %b			%0 = fmul float %a, %b
	%1 = fsub float -0.0, %0			%1 = fsub float -0.0, %0
	%2 = fsub float %1, %acc			%2 = fsub float %1, %acc
	ret float %2			ret float %2
	}			}

	define float @t2(float %acc, float %a, float %b) nounwind {			define float @t2(float %acc, float %a, float %b) nounwind {
	entry:			entry:
	; VFP2-LABEL: t2:			; VFP2-LABEL: t2:
	; VFP2: vnmla.f32			; VFP2: vnmla.f32

				; VFP3-LABEL: t2:
				; VFP3: vnmla.f32

	; NEON-LABEL: t2:			; NEON-LABEL: t2:
	; NEON: vnmla.f32			; NEON: vnmla.f32

	; A8U-LABEL: t2:			; A8U-LABEL: t2:
	; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}			; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
	; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}			; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}

	; A8-LABEL: t2:			; A8-LABEL: t2:
	; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}			; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
	; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}			; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
	%0 = fmul float %a, %b			%0 = fmul float %a, %b
	%1 = fmul float -1.0, %0			%1 = fmul float -1.0, %0
	%2 = fsub float %1, %acc			%2 = fsub float %1, %acc
	ret float %2			ret float %2
	}			}

	define double @t3(double %acc, double %a, double %b) nounwind {			define double @t3(double %acc, double %a, double %b) nounwind {
	entry:			entry:
	; VFP2-LABEL: t3:			; VFP2-LABEL: t3:
	; VFP2: vnmla.f64			; VFP2: vnmla.f64

				; VFP3-LABEL: t3:
				; VFP3: vnmla.f64

	; NEON-LABEL: t3:			; NEON-LABEL: t3:
	; NEON: vnmla.f64			; NEON: vnmla.f64

	; A8U-LABEL: t3:			; A8U-LABEL: t3:
	; A8U: vnmul.f64 d			; A8U: vnmul.f64 d
	; A8U: vsub.f64 d			; A8U: vsub.f64 d

	; A8-LABEL: t3:			; A8-LABEL: t3:
	; A8: vnmul.f64 d			; A8: vnmul.f64 d
	; A8: vsub.f64 d			; A8: vsub.f64 d
	%0 = fmul double %a, %b			%0 = fmul double %a, %b
	%1 = fsub double -0.0, %0			%1 = fsub double -0.0, %0
	%2 = fsub double %1, %acc			%2 = fsub double %1, %acc
	ret double %2			ret double %2
	}			}

	define double @t4(double %acc, double %a, double %b) nounwind {			define double @t4(double %acc, double %a, double %b) nounwind {
	entry:			entry:
	; VFP2-LABEL: t4:			; VFP2-LABEL: t4:
	; VFP2: vnmla.f64			; VFP2: vnmla.f64

				; VFP3-LABEL: t4:
				; VFP3: vnmla.f64

	; NEON-LABEL: t4:			; NEON-LABEL: t4:
	; NEON: vnmla.f64			; NEON: vnmla.f64

	; A8U-LABEL: t4:			; A8U-LABEL: t4:
	; A8U: vnmul.f64 d			; A8U: vnmul.f64 d
	; A8U: vsub.f64 d			; A8U: vsub.f64 d

	; A8-LABEL: t4:			; A8-LABEL: t4:
	; A8: vnmul.f64 d			; A8: vnmul.f64 d
	; A8: vsub.f64 d			; A8: vsub.f64 d
	%0 = fmul double %a, %b			%0 = fmul double %a, %b
	%1 = fmul double -1.0, %0			%1 = fmul double -1.0, %0
	%2 = fsub double %1, %acc			%2 = fsub double %1, %acc
	ret double %2			ret double %2
	}			}

				define double @t5(double %acc, double %a, double %b) nounwind {
				entry:
				; VFP2-LABEL: t5:
				; VFP2: vnmla.f64

				; VFP3-LABEL: t5:
				; VFP3: vnmla.f64

				; NEON-LABEL: t5:
				; NEON: vnmla.f64

				; A8U-LABEL: t5:
				; A8U: vmul.f64 d
				; A8U: vsub.f64 d

				; A8-LABEL: t5:
				; A8: vmul.f64 d
				; A8: vsub.f64 d

				%0 = fsub double -0.0, %acc
				%1 = fmul double %a, %b
				%2 = fsub double %0, %1
				ret double %2
				}

				define float @t6(float %acc, float %a, float %b) nounwind {
				entry:
				; VFP2-LABEL: t6:
				; VFP2: vnmla.f32

				; VFP3-LABEL: t6:
				; VFP3: vnmla.f32

				; NEON-LABEL: t6:
				; NEON: vnmla.f32

				; A8U-LABEL: t6:
				; A8U: vmul.f32 d
				; A8U: vsub.f32 d

				; A8-LABEL: t6:
				; A8: vmul.f32 s
				; A8: vsub.f32 s

				%0 = fsub float -0.0, %acc
				%1 = fmul float %a, %b
				%2 = fsub float %0, %1
				ret float %2
				}