Diff 352017

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 11,801 Lines • ▼ Show 20 Lines	bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
// No global is ever allowed as a base.		// No global is ever allowed as a base.
if (AM.BaseGV)		if (AM.BaseGV)
return false;		return false;

// No reg+reg+imm addressing.		// No reg+reg+imm addressing.
if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)		if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
return false;		return false;

// FIXME: Update this method to support scalable addressing modes.		// FIXME: Update this method to support scalable addressing modes.
		dmgreenUnsubmitted Not Done Reply Inline Actions What does a AM.Scale == 1 relate to? Using a ld1b? dmgreen: What does a AM.Scale == 1 relate to? Using a ld1b?
		huihuizAuthorUnsubmitted Done Reply Inline Actions For ld1b cases, AM.Scale could be 1 when trying to match [r+r] with a base register. But such case is covered by "(uint64_t)AM.Scale == VecElemNumBytes", since VecElemNumBytes is also 1. I simplify this checking into "(AM.Scale == 0 \|\| (uint64_t)AM.Scale == VecElemNumBytes)". Let me know if there is any case I missed ? huihuiz: For ld1b cases, AM.Scale could be 1 when trying to match [r+r] with a base register. But such…
if (isa<ScalableVectorType>(Ty))		if (isa<ScalableVectorType>(Ty)) {
		efriedmaUnsubmitted Done Reply Inline Actions "AM.Scale > 0" check is redundant. efriedma: "AM.Scale > 0" check is redundant.
return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale;		uint64_t VecElemNumBytes =
		DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
		return AM.HasBaseReg && !AM.BaseOffs &&
		(AM.Scale == 0 \|\| (uint64_t)AM.Scale == VecElemNumBytes);
		}

// check reg + imm case:		// check reg + imm case:
// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12		// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
uint64_t NumBytes = 0;		uint64_t NumBytes = 0;
if (Ty->isSized()) {		if (Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);		uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;		NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))		if (!isPowerOf2_64(NumBits))
▲ Show 20 Lines • Show All 6,553 Lines • Show Last 20 Lines

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Show First 20 Lines • Show All 659 Lines • ▼ Show 20 Lines	static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
Type *WideTy =		Type *WideTy =
IntegerType::get(SE.getContext(),		IntegerType::get(SE.getContext(),
SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());		SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));		return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
}		}

/// Return an expression for LHS /s RHS, if it can be determined and if the		/// Return an expression for LHS /s RHS, if it can be determined and if the
/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits		/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
/// is true, expressions like (X * Y) /s Y are simplified to Y, ignoring that		/// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that
/// the multiplication may overflow, which is useful when the result will be		/// the multiplication may overflow, which is useful when the result will be
/// used in a context where the most significant bits are ignored.		/// used in a context where the most significant bits are ignored.
static const SCEV getExactSDiv(const SCEV LHS, const SCEV *RHS,		static const SCEV getExactSDiv(const SCEV LHS, const SCEV *RHS,
ScalarEvolution &SE,		ScalarEvolution &SE,
bool IgnoreSignificantBits = false) {		bool IgnoreSignificantBits = false) {
// Handle the trivial case, which works for any SCEV type.		// Handle the trivial case, which works for any SCEV type.
if (LHS == RHS)		if (LHS == RHS)
return SE.getConstant(LHS->getType(), 1);		return SE.getConstant(LHS->getType(), 1);
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	if (IgnoreSignificantBits \|\| isAddSExtable(Add, SE)) {
return SE.getAddExpr(Ops);		return SE.getAddExpr(Ops);
}		}
return nullptr;		return nullptr;
}		}

// Check for a multiply operand that we can pull RHS out of.		// Check for a multiply operand that we can pull RHS out of.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {		if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
if (IgnoreSignificantBits \|\| isMulSExtable(Mul, SE)) {		if (IgnoreSignificantBits \|\| isMulSExtable(Mul, SE)) {
		// Handle special case C1XY /s C2XY.
		if (const SCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(RHS)) {
		sdesmalenUnsubmitted Not Done Reply Inline Actions Does the MulRHS also need checks for `IgnoreSignificantBits \|\| isMulSExtable(MulRHS, SE)` ? sdesmalen: Does the MulRHS also need checks for `IgnoreSignificantBits \|\| isMulSExtable(MulRHS, SE)` ?
		huihuizAuthorUnsubmitted Done Reply Inline Actions I didn't include this because I was thinking, even with "IgnoreSignificantBits" equal to false and MulRHS a negative value, we can still extract the factor. For instance, (16X) /s (-8X), -2 can be extracted. But this seems to violate the intention of setting IgnoreSignificnatBits to false. Therefore, I am restricting the checking to MulRHS as well. huihuiz: I didn't include this because I was thinking, even with "IgnoreSignificantBits" equal to false…
		if (IgnoreSignificantBits \|\| isMulSExtable(MulRHS, SE)) {
		const SCEVConstant *LC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
		const SCEVConstant *RC =
		dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
		if (LC && RC) {
		SmallVector<const SCEV *, 4> LOps(drop_begin(Mul->operands()));
		efriedmaUnsubmitted Done Reply Inline Actions LOps == ROps? efriedma: LOps == ROps?
		SmallVector<const SCEV *, 4> ROps(drop_begin(MulRHS->operands()));
		if (LOps == ROps)
		return getExactSDiv(LC, RC, SE, IgnoreSignificantBits);
		}
		}
		}

SmallVector<const SCEV *, 4> Ops;		SmallVector<const SCEV *, 4> Ops;
bool Found = false;		bool Found = false;
for (const SCEV *S : Mul->operands()) {		for (const SCEV *S : Mul->operands()) {
if (!Found)		if (!Found)
if (const SCEV *Q = getExactSDiv(S, RHS, SE,		if (const SCEV *Q = getExactSDiv(S, RHS, SE,
IgnoreSignificantBits)) {		IgnoreSignificantBits)) {
S = Q;		S = Q;
Found = true;		Found = true;
▲ Show 20 Lines • Show All 5,263 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-fold-vscale.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -disable-lsr < %s \| FileCheck %s			; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

	; Check that vscale call is recognised by load/store reg/reg pattern and			; Check that vscale call is recognised by load/store reg/reg pattern and
	; partially folded, with the rest pulled out of the loop. This requires LSR to			; partially folded, with the rest pulled out of the loop.
	; be disabled, which is something that will be addressed at a later date.

	define void @ld1w_reg_loop([32000 x i32]* %addr) {			define void @ld1w_reg_loop([32000 x i32]* %addr) {
	; CHECK-LABEL: ld1w_reg_loop:			; CHECK-LABEL: ld1w_reg_loop:
	; CHECK: // %bb.0: // %entry			; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: mov x8, xzr			; CHECK-NEXT: mov x8, xzr
	; CHECK-NEXT: cntw x9			; CHECK-NEXT: cntw x9
	; CHECK-NEXT: ptrue p0.s			; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: .LBB0_1: // %vector.body			; CHECK-NEXT: .LBB0_1: // %vector.body
	▲ Show 20 Lines • Show All 56 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll

This file was added.

				; RUN: opt -S -loop-reduce < %s \| FileCheck %s --check-prefix=IR
				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s --check-prefix=ASM
				; Note: To update this test, please run utils/update_test_checks.py and utils/update_llc_test_checks.py separately on opt/llc run line.

				target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64-linux-gnu"

				; These tests check that the IR coming out of LSR does not cast input/output pointer from i16* to i8* type.
				; And scaled-index addressing mode is leveraged in the generated assembly, i.e. ld1h { z1.h }, p0/z, [x0, x8, lsl #1].

				define void @ld_st_nxv8i16(i16* %in, i16* %out) {
				; IR-LABEL: @ld_st_nxv8i16(
				sdesmalenUnsubmitted Done Reply Inline Actions nit: I'm not really sure what the convention is here, but I wonder if it's better to just have two RUN lines, where the former checks the full output of the IR itself (more than just the IR-NOT, and instead of checking the debug output of the pass), and one that checks the full output of the asm (or at least more than just the load/store). The reason I'm suggesting checking more of the IR/asm is to check that there are no redundant instructions for other purposes. I'm also not sure if testing debug-output is normally desirable, but if we just check the output, we can remove the `REQUIRES: asserts` line. sdesmalen: nit: I'm not really sure what the convention is here, but I wonder if it's better to just have…
				; IR-NEXT: entry:
				; IR-NEXT: br label [[LOOP_PH:%.*]]
				; IR: loop.ph:
				; IR-NEXT: [[P_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
				; IR-NEXT: [[P_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[P_VEC_SPLATINSERT]], <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
				; IR-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
				; IR-NEXT: [[SCALED_VF:%.*]] = shl i64 [[VSCALE]], 3
				; IR-NEXT: br label [[LOOP:%.*]]
				; IR: loop:
				; IR-NEXT: [[INDVAR:%.]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.]], [[LOOP]] ]
				; IR-NEXT: [[SCEVGEP2:%.]] = getelementptr i16, i16 [[IN:%.*]], i64 [[INDVAR]]
				; IR-NEXT: [[SCEVGEP23:%.]] = bitcast i16 [[SCEVGEP2]] to <vscale x 8 x i16>*
				; IR-NEXT: [[SCEVGEP:%.]] = getelementptr i16, i16 [[OUT:%.*]], i64 [[INDVAR]]
				; IR-NEXT: [[SCEVGEP1:%.]] = bitcast i16 [[SCEVGEP]] to <vscale x 8 x i16>*
				; IR-NEXT: [[VAL:%.]] = load <vscale x 8 x i16>, <vscale x 8 x i16> [[SCEVGEP23]], align 16
				; IR-NEXT: [[ADDP_VEC:%.*]] = add <vscale x 8 x i16> [[VAL]], [[P_VEC_SPLAT]]
				; IR-NEXT: store <vscale x 8 x i16> [[ADDP_VEC]], <vscale x 8 x i16>* [[SCEVGEP1]], align 16
				; IR-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]]
				; IR-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1024
				; IR-NEXT: br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
				; IR: loop.exit:
				; IR-NEXT: br label [[EXIT:%.*]]
				; IR: exit:
				; IR-NEXT: ret void
				;
				; ASM-LABEL: ld_st_nxv8i16:
				; ASM: // %bb.0: // %entry
				; ASM-NEXT: mov x8, xzr
				; ASM-NEXT: mov z0.h, #3 // =0x3
				; ASM-NEXT: cnth x9
				dmgreenUnsubmitted Not Done Reply Inline Actions Why is this load volatile? dmgreen: Why is this load volatile?
				huihuizAuthorUnsubmitted Done Reply Inline Actions Removed, not needed. huihuiz: Removed, not needed.
				; ASM-NEXT: ptrue p0.h
				; ASM-NEXT: .LBB0_1: // %loop
				; ASM-NEXT: // =>This Inner Loop Header: Depth=1
				; ASM-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
				; ASM-NEXT: add z1.h, z1.h, z0.h
				; ASM-NEXT: st1h { z1.h }, p0, [x1, x8, lsl #1]
				; ASM-NEXT: add x8, x8, x9
				; ASM-NEXT: cmp x8, #1024 // =1024
				; ASM-NEXT: b.ne .LBB0_1
				; ASM-NEXT: // %bb.2: // %exit
				; ASM-NEXT: ret
				entry:
				br label %loop.ph

				loop.ph:
				%p_vec.splatinsert = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
				dmgreenUnsubmitted Not Done Reply Inline Actions This test isn't using masked loads/stores, but it would be good to make sure they work sensibly. dmgreen: This test isn't using masked loads/stores, but it would be good to make sure they work sensibly.
				huihuizAuthorUnsubmitted Done Reply Inline Actions Thanks for catching it! masked load/store case added, work as expected. huihuiz: Thanks for catching it! masked load/store case added, work as expected.
				%p_vec.splat = shufflevector <vscale x 8 x i16> %p_vec.splatinsert, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
				%vscale = call i64 @llvm.vscale.i64()
				%scaled_vf = shl i64 %vscale, 3
				br label %loop

				loop: ; preds = %loop, %loop.ph
				%indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ]
				%ptr.in = getelementptr inbounds i16, i16* %in, i64 %indvar
				%ptr.out = getelementptr inbounds i16, i16* %out, i64 %indvar
				%in.ptrcast = bitcast i16* %ptr.in to <vscale x 8 x i16>*
				%out.ptrcast = bitcast i16* %ptr.out to <vscale x 8 x i16>*
				%val = load <vscale x 8 x i16>, <vscale x 8 x i16>* %in.ptrcast, align 16
				%addp_vec = add <vscale x 8 x i16> %val, %p_vec.splat
				store <vscale x 8 x i16> %addp_vec, <vscale x 8 x i16>* %out.ptrcast, align 16
				%indvar.next = add nsw i64 %indvar, %scaled_vf
				%exit.cond = icmp eq i64 %indvar.next, 1024
				br i1 %exit.cond, label %loop.exit, label %loop

				loop.exit: ; preds = %loop
				br label %exit

				exit:
				ret void
				}

				define void @masked_ld_st_nxv8i16(i16* %in, i16* %out, i64 %n) {
				; IR-LABEL: @masked_ld_st_nxv8i16(
				; IR-NEXT: entry:
				; IR-NEXT: br label [[LOOP_PH:%.*]]
				; IR: loop.ph:
				; IR-NEXT: [[P_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
				; IR-NEXT: [[P_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[P_VEC_SPLATINSERT]], <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
				; IR-NEXT: [[PTRUE_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
				; IR-NEXT: [[PTRUE_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i1> [[PTRUE_VEC_SPLATINSERT]], <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
				; IR-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
				; IR-NEXT: [[SCALED_VF:%.*]] = shl i64 [[VSCALE]], 3
				; IR-NEXT: br label [[LOOP:%.*]]
				; IR: loop:
				; IR-NEXT: [[INDVAR:%.]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.]], [[LOOP]] ]
				; IR-NEXT: [[SCEVGEP2:%.]] = getelementptr i16, i16 [[IN:%.*]], i64 [[INDVAR]]
				; IR-NEXT: [[SCEVGEP23:%.]] = bitcast i16 [[SCEVGEP2]] to <vscale x 8 x i16>*
				; IR-NEXT: [[SCEVGEP:%.]] = getelementptr i16, i16 [[OUT:%.*]], i64 [[INDVAR]]
				; IR-NEXT: [[SCEVGEP1:%.]] = bitcast i16 [[SCEVGEP]] to <vscale x 8 x i16>*
				; IR-NEXT: [[VAL:%.]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[SCEVGEP23]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]], <vscale x 8 x i16> undef)
				; IR-NEXT: [[ADDP_VEC:%.*]] = add <vscale x 8 x i16> [[VAL]], [[P_VEC_SPLAT]]
				; IR-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[ADDP_VEC]], <vscale x 8 x i16>* [[SCEVGEP1]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]])
				; IR-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]]
				; IR-NEXT: [[EXIT_COND:%.]] = icmp eq i64 [[N:%.]], [[INDVAR_NEXT]]
				; IR-NEXT: br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
				; IR: loop.exit:
				; IR-NEXT: br label [[EXIT:%.*]]
				; IR: exit:
				; IR-NEXT: ret void
				;
				; ASM-LABEL: masked_ld_st_nxv8i16:
				; ASM: // %bb.0: // %entry
				; ASM-NEXT: mov x8, xzr
				; ASM-NEXT: mov z0.h, #3 // =0x3
				; ASM-NEXT: ptrue p0.h
				; ASM-NEXT: cnth x9
				; ASM-NEXT: .LBB1_1: // %loop
				; ASM-NEXT: // =>This Inner Loop Header: Depth=1
				; ASM-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
				; ASM-NEXT: add z1.h, z1.h, z0.h
				; ASM-NEXT: st1h { z1.h }, p0, [x1, x8, lsl #1]
				; ASM-NEXT: add x8, x8, x9
				; ASM-NEXT: cmp x2, x8
				; ASM-NEXT: b.ne .LBB1_1
				; ASM-NEXT: // %bb.2: // %exit
				; ASM-NEXT: ret
				entry:
				br label %loop.ph

				loop.ph:
				%p_vec.splatinsert = insertelement <vscale x 8 x i16> undef, i16 3, i32 0
				%p_vec.splat = shufflevector <vscale x 8 x i16> %p_vec.splatinsert, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
				%ptrue_vec.splatinsert = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
				%ptrue_vec.splat = shufflevector <vscale x 8 x i1> %ptrue_vec.splatinsert, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
				%vscale = call i64 @llvm.vscale.i64()
				%scaled_vf = shl i64 %vscale, 3
				br label %loop

				loop: ; preds = %loop, %loop.ph
				%indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ]
				%ptr.in = getelementptr inbounds i16, i16* %in, i64 %indvar
				%ptr.out = getelementptr inbounds i16, i16* %out, i64 %indvar
				%in.ptrcast = bitcast i16* %ptr.in to <vscale x 8 x i16>*
				%out.ptrcast = bitcast i16* %ptr.out to <vscale x 8 x i16>*
				%val = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %in.ptrcast, i32 4, <vscale x 8 x i1> %ptrue_vec.splat, <vscale x 8 x i16> undef)
				%addp_vec = add <vscale x 8 x i16> %val, %p_vec.splat
				call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %addp_vec, <vscale x 8 x i16>* %out.ptrcast, i32 4, <vscale x 8 x i1> %ptrue_vec.splat)
				%indvar.next = add nsw i64 %indvar, %scaled_vf
				%exit.cond = icmp eq i64 %indvar.next, %n
				br i1 %exit.cond, label %loop.exit, label %loop

				loop.exit: ; preds = %loop
				br label %exit

				exit:
				ret void
				}

				declare i64 @llvm.vscale.i64()

				declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>*, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i16>)

				declare void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>*, i32 immarg, <vscale x 8 x i1>)

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][LSR] Teach LSR to enable simple scaled-index addressing mode generation for SVE.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 352017

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

llvm/test/CodeGen/AArch64/sve-fold-vscale.ll

llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][LSR] Teach LSR to enable simple scaled-index addressing mode generation for SVE.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 352017

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

llvm/test/CodeGen/AArch64/sve-fold-vscale.ll

llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll

[SVE][LSR] Teach LSR to enable simple scaled-index addressing mode generation for SVE.
ClosedPublic