Diff 156524

llvm/trunk/lib/Target/ARM/A15SDOptimizer.cpp

	Show First 20 Lines • Show All 654 Lines • ▼ Show 20 Lines

	bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {			bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
	if (skipFunction(Fn.getFunction()))			if (skipFunction(Fn.getFunction()))
	return false;			return false;

	const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();			const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
	// Since the A15SDOptimizer pass can insert VDUP instructions, it can only be			// Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
	// enabled when NEON is available.			// enabled when NEON is available.
	if (!(STI.isCortexA15() && STI.hasNEON()))			if (!(STI.useSplatVFPToNeon() && STI.hasNEON()))
	return false;			return false;

	TII = STI.getInstrInfo();			TII = STI.getInstrInfo();
	TRI = STI.getRegisterInfo();			TRI = STI.getRegisterInfo();
	MRI = &Fn.getRegInfo();			MRI = &Fn.getRegInfo();
	bool Modified = false;			bool Modified = false;

	LLVM_DEBUG(dbgs() << "Running on function " << Fn.getName() << "\n");			LLVM_DEBUG(dbgs() << "Running on function " << Fn.getName() << "\n");

	DeadInstr.clear();			DeadInstr.clear();
	Show All 18 Lines

llvm/trunk/lib/Target/ARM/ARM.td

Show First 20 Lines • Show All 189 Lines • ▼ Show 20 Lines	def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
"SlowLoadDSubregister", "true",		"SlowLoadDSubregister", "true",
"Loading into D subregs is slow">;		"Loading into D subregs is slow">;

// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.		// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.
def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",		def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
"DontWidenVMOVS", "true",		"DontWidenVMOVS", "true",
"Don't widen VMOVS to VMOVD">;		"Don't widen VMOVS to VMOVD">;

		// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different
		// VFP register widths.
		def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon",
		"SplatVFPToNeon", "true",
		"Splat register from VFP to NEON",
		[FeatureDontWidenVMOVS]>;

// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions.		// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions.
def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx",		def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx",
"ExpandMLx", "true",		"ExpandMLx", "true",
"Expand VFP/NEON MLA/MLS instructions">;		"Expand VFP/NEON MLA/MLS instructions">;

// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS.		// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS.
def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards",		def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards",
"true", "Has VMLx hazards">;		"true", "Has VMLx hazards">;
▲ Show 20 Lines • Show All 608 Lines • ▼ Show 20 Lines	def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
FeatureVMLxForwarding,		FeatureVMLxForwarding,
FeatureVFP4,		FeatureVFP4,
FeatureAvoidPartialCPSR,		FeatureAvoidPartialCPSR,
FeatureVirtualization,		FeatureVirtualization,
FeatureMP]>;		FeatureMP]>;

def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,		def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
FeatureDontWidenVMOVS,		FeatureDontWidenVMOVS,
		FeatureSplatVFPToNeon,
FeatureHasRetAddrStack,		FeatureHasRetAddrStack,
FeatureMuxedUnits,		FeatureMuxedUnits,
FeatureTrustZone,		FeatureTrustZone,
FeatureVFP4,		FeatureVFP4,
FeatureMP,		FeatureMP,
FeatureCheckVLDnAlign,		FeatureCheckVLDnAlign,
FeatureAvoidPartialCPSR,		FeatureAvoidPartialCPSR,
FeatureVirtualization]>;		FeatureVirtualization]>;
▲ Show 20 Lines • Show All 257 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/ARM/ARMSubtarget.h

Show First 20 Lines • Show All 346 Lines • ▼ Show 20 Lines	protected:
bool SlowOddRegister = false;		bool SlowOddRegister = false;

/// If true, loading into a D subregister will be penalized.		/// If true, loading into a D subregister will be penalized.
bool SlowLoadDSubregister = false;		bool SlowLoadDSubregister = false;

/// If true, the AGU and NEON/FPU units are multiplexed.		/// If true, the AGU and NEON/FPU units are multiplexed.
bool HasMuxedUnits = false;		bool HasMuxedUnits = false;

/// If true, VMOVS will never be widened to VMOVD		/// If true, VMOVS will never be widened to VMOVD.
bool DontWidenVMOVS = false;		bool DontWidenVMOVS = false;

		/// If true, splat a register between VFP and NEON instructions.
		bool SplatVFPToNeon = false;

/// If true, run the MLx expansion pass.		/// If true, run the MLx expansion pass.
bool ExpandMLx = false;		bool ExpandMLx = false;

/// If true, VFP/NEON VMLA/VMLS have special RAW hazards.		/// If true, VFP/NEON VMLA/VMLS have special RAW hazards.
bool HasVMLxHazards = false;		bool HasVMLxHazards = false;

// If true, read thread pointer from coprocessor register.		// If true, read thread pointer from coprocessor register.
bool ReadTPHard = false;		bool ReadTPHard = false;
▲ Show 20 Lines • Show All 220 Lines • ▼ Show 20 Lines	public:
bool preferVMOVSR() const { return PreferVMOVSR; }		bool preferVMOVSR() const { return PreferVMOVSR; }
bool preferISHSTBarriers() const { return PreferISHST; }		bool preferISHSTBarriers() const { return PreferISHST; }
bool expandMLx() const { return ExpandMLx; }		bool expandMLx() const { return ExpandMLx; }
bool hasVMLxHazards() const { return HasVMLxHazards; }		bool hasVMLxHazards() const { return HasVMLxHazards; }
bool hasSlowOddRegister() const { return SlowOddRegister; }		bool hasSlowOddRegister() const { return SlowOddRegister; }
bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }		bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
bool hasMuxedUnits() const { return HasMuxedUnits; }		bool hasMuxedUnits() const { return HasMuxedUnits; }
bool dontWidenVMOVS() const { return DontWidenVMOVS; }		bool dontWidenVMOVS() const { return DontWidenVMOVS; }
		bool useSplatVFPToNeon() const { return SplatVFPToNeon; }
bool useNEONForFPMovs() const { return UseNEONForFPMovs; }		bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }		bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
bool nonpipelinedVFP() const { return NonpipelinedVFP; }		bool nonpipelinedVFP() const { return NonpipelinedVFP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }		bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }		bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; }		bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; }
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }		bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRetAddrStack() const { return HasRetAddrStack; }		bool hasRetAddrStack() const { return HasRetAddrStack; }
▲ Show 20 Lines • Show All 193 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/ARM/a15-SD-dep.ll

; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -disable-a15-sd-optimization -verify-machineinstrs < %s \| FileCheck -check-prefix=CHECK-DISABLED %s		; RUN: llc -O1 -mattr=+splat-vfp-neon -mtriple=armv7-linux-gnueabi -verify-machineinstrs -disable-a15-sd-optimization < %s \| FileCheck -check-prefixes=CHECK,CHECK-DISABLED %s
; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s \| FileCheck -check-prefix=CHECK-ENABLED %s		; RUN: llc -O1 -mattr=-splat-vfp-neon -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s \| FileCheck -check-prefixes=CHECK,CHECK-DISABLED %s
		; RUN: llc -O1 -mattr=+splat-vfp-neon -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s \| FileCheck -check-prefixes=CHECK,CHECK-ENABLED %s

; CHECK-ENABLED-LABEL: t1:		; CHECK-LABEL: t1:
; CHECK-DISABLED-LABEL: t1:
define <2 x float> @t1(float %f) {		define <2 x float> @t1(float %f) {
; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]		; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]		; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
%i1 = insertelement <2 x float> undef, float %f, i32 1		%i1 = insertelement <2 x float> undef, float %f, i32 1
%i2 = fadd <2 x float> %i1, %i1		%i2 = fadd <2 x float> %i1, %i1
ret <2 x float> %i2		ret <2 x float> %i2
}		}

; CHECK-ENABLED-LABEL: t2:		; CHECK-LABEL: t2:
; CHECK-DISABLED-LABEL: t2:
define <4 x float> @t2(float %g, float %f) {		define <4 x float> @t2(float %g, float %f) {
; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d0[0]		; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d0[0]
; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]		; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
%i1 = insertelement <4 x float> undef, float %f, i32 1		%i1 = insertelement <4 x float> undef, float %f, i32 1
%i2 = fadd <4 x float> %i1, %i1		%i2 = fadd <4 x float> %i1, %i1
ret <4 x float> %i2		ret <4 x float> %i2
}		}

; CHECK-ENABLED-LABEL: t3:		; CHECK-LABEL: t3:
; CHECK-DISABLED-LABEL: t3:
define arm_aapcs_vfpcc <2 x float> @t3(float %f) {		define arm_aapcs_vfpcc <2 x float> @t3(float %f) {
; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]		; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]		; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
%i1 = insertelement <2 x float> undef, float %f, i32 1		%i1 = insertelement <2 x float> undef, float %f, i32 1
%i2 = fadd <2 x float> %i1, %i1		%i2 = fadd <2 x float> %i1, %i1
ret <2 x float> %i2		ret <2 x float> %i2
}		}

; CHECK-ENABLED-LABEL: t4:		; CHECK-LABEL: t4:
; CHECK-DISABLED-LABEL: t4:
define <2 x float> @t4(float %f) {		define <2 x float> @t4(float %f) {
; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]		; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
; CHECK-DISABLED-NOT: vdup		; CHECK-DISABLED-NOT: vdup
%i1 = insertelement <2 x float> undef, float %f, i32 1		%i1 = insertelement <2 x float> undef, float %f, i32 1
br label %b		br label %b

; Block %b has an S-reg as live-in.		; Block %b has an S-reg as live-in.
b:		b:
%i2 = fadd <2 x float> %i1, %i1		%i2 = fadd <2 x float> %i1, %i1
ret <2 x float> %i2		ret <2 x float> %i2
}		}

; CHECK-ENABLED-LABEL: t5:		; CHECK-LABEL: t5:
; CHECK-DISABLED-LABEL: t5:
define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {		define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {
; CHECK-ENABLED: vdup.32 d{{[0-9]}}, d{{[0-9]}}[0]		; CHECK-ENABLED: vdup.32 d{{[0-9]}}, d{{[0-9]}}[0]
; CHECK-ENABLED: vadd.f32		; CHECK-ENABLED: vadd.f32
; CHECK-ENABLED-NEXT: bx lr		; CHECK-ENABLED-NEXT: bx lr
; CHECK-DISABLED-NOT: vdup		; CHECK-DISABLED-NOT: vdup
%i1 = insertelement <4 x float> %q, float %f, i32 1		%i1 = insertelement <4 x float> %q, float %f, i32 1
%i2 = fadd <4 x float> %i1, %i1		%i2 = fadd <4 x float> %i1, %i1
ret <4 x float> %i2		ret <4 x float> %i2
}		}

; Test that DPair can be successfully passed as QPR.		; Test that DPair can be successfully passed as QPR.
; CHECK-ENABLED-LABEL: test_DPair1:		; CHECK-LABEL: test_DPair1:
; CHECK-DISABLED-LABEL: test_DPair1:
define void @test_DPair1(i32 %vsout, i8* nocapture %out, float %x, float %y) {		define void @test_DPair1(i32 %vsout, i8* nocapture %out, float %x, float %y) {
entry:		entry:
%0 = insertelement <4 x float> undef, float %x, i32 1		%0 = insertelement <4 x float> undef, float %x, i32 1
%1 = insertelement <4 x float> %0, float %y, i32 0		%1 = insertelement <4 x float> %0, float %y, i32 0
; CHECK-ENABLED: vdup.32 d{{[0-9]}}, d{{[0-9]}}[0]		; CHECK-ENABLED: vdup.32 d{{[0-9]}}, d{{[0-9]}}[0]
; CHECK-ENABLED: vdup.32 d{{[0-9]}}, d{{[0-9]}}[1]		; CHECK-ENABLED: vdup.32 d{{[0-9]}}, d{{[0-9]}}[1]
; CHECK-ENABLED: vdup.32 d{{[0-9]}}, d{{[0-9]}}[0]		; CHECK-ENABLED: vdup.32 d{{[0-9]}}, d{{[0-9]}}[0]
; CHECK-ENABLED: vdup.32 d{{[0-9]}}, d{{[0-9]}}[1]		; CHECK-ENABLED: vdup.32 d{{[0-9]}}, d{{[0-9]}}[1]
Show All 13 Lines	sw.bb6: ; preds = %sw.bb, %entry
%conv = fptoui float %3 to i8		%conv = fptoui float %3 to i8
store i8 %conv, i8* %out, align 1		store i8 %conv, i8* %out, align 1
ret void		ret void

sw.epilog: ; preds = %entry		sw.epilog: ; preds = %entry
ret void		ret void
}		}

; CHECK-ENABLED-LABEL: test_DPair2:		; CHECK-LABEL: test_DPair2:
; CHECK-DISABLED-LABEL: test_DPair2:
define void @test_DPair2(i32 %vsout, i8* nocapture %out, float %x) {		define void @test_DPair2(i32 %vsout, i8* nocapture %out, float %x) {
entry:		entry:
%0 = insertelement <4 x float> undef, float %x, i32 0		%0 = insertelement <4 x float> undef, float %x, i32 0
; CHECK-ENABLED: vdup.32 q{{[0-9]}}, d{{[0-9]}}[0]		; CHECK-ENABLED: vdup.32 q{{[0-9]}}, d{{[0-9]}}[0]
; CHECK-DISABLED-NOT: vdup		; CHECK-DISABLED-NOT: vdup
switch i32 %vsout, label %sw.epilog [		switch i32 %vsout, label %sw.epilog [
i32 1, label %sw.bb		i32 1, label %sw.bb
i32 0, label %sw.bb1		i32 0, label %sw.bb1
Show All 16 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Add new feature to enable optimizing the VFP registers
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 156524

llvm/trunk/lib/Target/ARM/A15SDOptimizer.cpp

llvm/trunk/lib/Target/ARM/ARM.td

llvm/trunk/lib/Target/ARM/ARMSubtarget.h

llvm/trunk/test/CodeGen/ARM/a15-SD-dep.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Add new feature to enable optimizing the VFP registersClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 156524

llvm/trunk/lib/Target/ARM/A15SDOptimizer.cpp

llvm/trunk/lib/Target/ARM/ARM.td

llvm/trunk/lib/Target/ARM/ARMSubtarget.h

llvm/trunk/test/CodeGen/ARM/a15-SD-dep.ll

[ARM] Add new feature to enable optimizing the VFP registers
ClosedPublic