Diff 397535

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//		//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
//		//
		dmgreenUnsubmitted Done Reply Inline Actions The alignments in AArch64 are usually set in MachineBlockPlacement::alignBlocks, which is probably the more general place for this. The alignments can then be set in the same places as setPrefLoopAlignment for each backend. Maybe at the moment, before we've fixed on proper values for AArch64 cpus, we could use an option as a way of testing that the values are getting propagated correctly. dmgreen: The alignments in AArch64 are usually set in MachineBlockPlacement::alignBlocks, which is…
		dmgreenUnsubmitted Done Reply Inline Actions Do we need to override this method, or can it work more consistently like TargetLoweringBase::setPrefLoopAlignment and getPrefLoopAlignment? That way we can set the value in AArch64TargetLowering::AArch64TargetLowering like we already do for other loop alignments, and keep everything together. The (LoopAlign, MaxBytesForLoopAlign) can be thought of as a pair. I think it makes sense to keep them as separate variables, but ideally they are set and used together. dmgreen: Do we need to override this method, or can it work more consistently like TargetLoweringBase…
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// This file implements the AArch64TargetLowering class.		// This file implements the AArch64TargetLowering class.
//		//
▲ Show 20 Lines • Show All 946 Lines • ▼ Show 20 Lines	#undef LCALLNAME5
setSchedulingPreference(Sched::Hybrid);		setSchedulingPreference(Sched::Hybrid);

EnableExtLdPromotion = true;		EnableExtLdPromotion = true;

// Set required alignment.		// Set required alignment.
setMinFunctionAlignment(Align(4));		setMinFunctionAlignment(Align(4));
// Set preferred alignments.		// Set preferred alignments.
setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));		setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
		setMaxBytesForAlignment(STI.getMaxBytesForLoopAlignment());
setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));		setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));

// Only change the limit for entries in a jump table if specified by		// Only change the limit for entries in a jump table if specified by
// the sub target, but not at the command line.		// the sub target, but not at the command line.
unsigned MaxJT = STI.getMaximumJumpTableSize();		unsigned MaxJT = STI.getMaximumJumpTableSize();
if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)		if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
setMaximumJumpTableSize(MaxJT);		setMaximumJumpTableSize(MaxJT);

▲ Show 20 Lines • Show All 18,809 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64Subtarget.h

//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -- C++ ---===//		//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -- C++ ---===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
		dmgreenUnsubmitted Not Done Reply Inline Actions MaxLoopAlignment -> MaxBytesForLoopAlignment dmgreen: MaxLoopAlignment -> MaxBytesForLoopAlignment
		dmgreenUnsubmitted Not Done Reply Inline Actions getMaxBytesForAlignment -> getMaxBytesForLoopAlignment dmgreen: getMaxBytesForAlignment -> getMaxBytesForLoopAlignment
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// This file declares the AArch64 specific subclass of TargetSubtarget.		// This file declares the AArch64 specific subclass of TargetSubtarget.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H		#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H		#define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
▲ Show 20 Lines • Show All 259 Lines • ▼ Show 20 Lines	protected:
uint8_t MaxInterleaveFactor = 2;		uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;		uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;		uint16_t CacheLineSize = 0;
uint16_t PrefetchDistance = 0;		uint16_t PrefetchDistance = 0;
uint16_t MinPrefetchStride = 1;		uint16_t MinPrefetchStride = 1;
unsigned MaxPrefetchIterationsAhead = UINT_MAX;		unsigned MaxPrefetchIterationsAhead = UINT_MAX;
unsigned PrefFunctionLogAlignment = 0;		unsigned PrefFunctionLogAlignment = 0;
unsigned PrefLoopLogAlignment = 0;		unsigned PrefLoopLogAlignment = 0;
		unsigned MaxBytesForLoopAlignment = 0;
unsigned MaxJumpTableSize = 0;		unsigned MaxJumpTableSize = 0;
unsigned WideningBaseCost = 0;		unsigned WideningBaseCost = 0;

// ReserveXRegister[i] - X#i is not available as a general purpose register.		// ReserveXRegister[i] - X#i is not available as a general purpose register.
BitVector ReserveXRegister;		BitVector ReserveXRegister;

// CustomCallUsedXRegister[i] - X#i call saved.		// CustomCallUsedXRegister[i] - X#i call saved.
BitVector CustomCallSavedXRegs;		BitVector CustomCallSavedXRegs;
▲ Show 20 Lines • Show All 175 Lines • ▼ Show 20 Lines	public:
unsigned getMaxPrefetchIterationsAhead() const override {		unsigned getMaxPrefetchIterationsAhead() const override {
return MaxPrefetchIterationsAhead;		return MaxPrefetchIterationsAhead;
}		}
unsigned getPrefFunctionLogAlignment() const {		unsigned getPrefFunctionLogAlignment() const {
return PrefFunctionLogAlignment;		return PrefFunctionLogAlignment;
}		}
unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }		unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }

		unsigned getMaxBytesForLoopAlignment() const {
		return MaxBytesForLoopAlignment;
		}

unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }		unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }

unsigned getWideningBaseCost() const { return WideningBaseCost; }		unsigned getWideningBaseCost() const { return WideningBaseCost; }

bool useExperimentalZeroingPseudos() const {		bool useExperimentalZeroingPseudos() const {
return UseExperimentalZeroingPseudos;		return UseExperimentalZeroingPseudos;
}		}

▲ Show 20 Lines • Show All 202 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Show First 20 Lines • Show All 151 Lines • ▼ Show 20 Lines	case Kryo:
// FIXME: remove this to enable 64-bit SLP if performance looks good.		// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;		MinVectorRegisterBitWidth = 128;
break;		break;
case NeoverseE1:		case NeoverseE1:
PrefFunctionLogAlignment = 3;		PrefFunctionLogAlignment = 3;
break;		break;
case NeoverseN1:		case NeoverseN1:
PrefFunctionLogAlignment = 4;		PrefFunctionLogAlignment = 4;
		PrefLoopLogAlignment = 5;
		MaxBytesForLoopAlignment = 16;
break;		break;
case NeoverseN2:		case NeoverseN2:
PrefFunctionLogAlignment = 4;		PrefFunctionLogAlignment = 4;
		PrefLoopLogAlignment = 5;
		MaxBytesForLoopAlignment = 16;
VScaleForTuning = 1;		VScaleForTuning = 1;
break;		break;
case NeoverseV1:		case NeoverseV1:
PrefFunctionLogAlignment = 4;		PrefFunctionLogAlignment = 4;
		PrefLoopLogAlignment = 5;
		MaxBytesForLoopAlignment = 16;
VScaleForTuning = 2;		VScaleForTuning = 2;
break;		break;
case Neoverse512TVB:		case Neoverse512TVB:
PrefFunctionLogAlignment = 4;		PrefFunctionLogAlignment = 4;
VScaleForTuning = 1;		VScaleForTuning = 1;
MaxInterleaveFactor = 4;		MaxInterleaveFactor = 4;
break;		break;
case Saphira:		case Saphira:
▲ Show 20 Lines • Show All 201 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/aarch64-p2align-max-bytes-neoverse.ll

This file was added.

				; RUN: llc -mtriple=aarch64-none-linux-gnu -align-loops=32 < %s -o -\| FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT
				; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-n1 < %s -o -\| FileCheck %s --check-prefixes=CHECK,CHECK-N1

				define i32 @a(i32 %x, i32* nocapture readonly %y, i32* nocapture readonly %z) {
				; CHECK-DEFAULT: .p2align 5
				; CHECK-N1: .p2align 5, 0x0, 16
				; CHECK-NEXT: .LBB0_5: // %vector.body
				; CHECK-DEFAULT: .p2align 5
				; CHECK-N1: .p2align 5, 0x0, 16
				; CHECK-NEXT: .LBB0_8: // %for.body
				entry:
				%cmp10 = icmp sgt i32 %x, 0
				dmgreenUnsubmitted Done Reply Inline Actions This file needn't have the objfile checks, so long as they are tested elsewhere. dmgreen: This file needn't have the objfile checks, so long as they are tested elsewhere.
				br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup

				for.body.preheader: ; preds = %entry
				%wide.trip.count = zext i32 %x to i64
				%min.iters.check = icmp ult i32 %x, 8
				br i1 %min.iters.check, label %for.body.preheader17, label %vector.ph

				vector.ph: ; preds = %for.body.preheader
				%n.vec = and i64 %wide.trip.count, 4294967288
				br label %vector.body

				vector.body: ; preds = %vector.body, %vector.ph
				%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %10, %vector.body ]
				%vec.phi13 = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %11, %vector.body ]
				%0 = getelementptr inbounds i32, i32* %y, i64 %index
				%1 = bitcast i32* %0 to <4 x i32>*
				%wide.load = load <4 x i32>, <4 x i32>* %1, align 4
				%2 = getelementptr inbounds i32, i32* %0, i64 4
				%3 = bitcast i32* %2 to <4 x i32>*
				%wide.load14 = load <4 x i32>, <4 x i32>* %3, align 4
				%4 = getelementptr inbounds i32, i32* %z, i64 %index
				%5 = bitcast i32* %4 to <4 x i32>*
				%wide.load15 = load <4 x i32>, <4 x i32>* %5, align 4
				%6 = getelementptr inbounds i32, i32* %4, i64 4
				%7 = bitcast i32* %6 to <4 x i32>*
				%wide.load16 = load <4 x i32>, <4 x i32>* %7, align 4
				%8 = add <4 x i32> %wide.load, %vec.phi
				%9 = add <4 x i32> %wide.load14, %vec.phi13
				%10 = add <4 x i32> %8, %wide.load15
				%11 = add <4 x i32> %9, %wide.load16
				%index.next = add nuw i64 %index, 8
				%12 = icmp eq i64 %index.next, %n.vec
				br i1 %12, label %middle.block, label %vector.body

				middle.block: ; preds = %vector.body
				%bin.rdx = add <4 x i32> %11, %10
				%13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %bin.rdx)
				%cmp.n = icmp eq i64 %n.vec, %wide.trip.count
				br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader17

				for.body.preheader17: ; preds = %for.body.preheader, %middle.block
				%indvars.iv.ph = phi i64 [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
				%b.011.ph = phi i32 [ 0, %for.body.preheader ], [ %13, %middle.block ]
				br label %for.body

				for.cond.cleanup: ; preds = %for.body, %middle.block, %entry
				%b.0.lcssa = phi i32 [ 0, %entry ], [ %13, %middle.block ], [ %add3, %for.body ]
				ret i32 %b.0.lcssa

				for.body: ; preds = %for.body.preheader17, %for.body
				%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader17 ]
				%b.011 = phi i32 [ %add3, %for.body ], [ %b.011.ph, %for.body.preheader17 ]
				%arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
				%14 = load i32, i32* %arrayidx, align 4
				%arrayidx2 = getelementptr inbounds i32, i32* %z, i64 %indvars.iv
				%15 = load i32, i32* %arrayidx2, align 4
				%add = add i32 %14, %b.011
				%add3 = add i32 %add, %15
				%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
				%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
				br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
				}

				declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][CodeGen] Emit alignment "Max Skip" operand for AArch64 loops
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 397535

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64Subtarget.h

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

llvm/test/CodeGen/AArch64/aarch64-p2align-max-bytes-neoverse.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][CodeGen] Emit alignment "Max Skip" operand for AArch64 loopsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 397535

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64Subtarget.h

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

llvm/test/CodeGen/AArch64/aarch64-p2align-max-bytes-neoverse.ll

[AArch64][CodeGen] Emit alignment "Max Skip" operand for AArch64 loops
ClosedPublic