Diff 507825

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Show First 20 Lines • Show All 286 Lines • ▼ Show 20 Lines	InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,

// Handle scalable vectors (and fixed vectors legalized to scalable vectors).		// Handle scalable vectors (and fixed vectors legalized to scalable vectors).
switch (Kind) {		switch (Kind) {
default:		default:
// Fallthrough to generic handling.		// Fallthrough to generic handling.
// TODO: Most of these cases will return getInvalid in generic code, and		// TODO: Most of these cases will return getInvalid in generic code, and
// must be implemented here.		// must be implemented here.
break;		break;
		case TTI::SK_Select:
		case TTI::SK_InsertSubvector: {
		// Example sequence:
		// vsetivli zero, 8, e8, mf2, ta, ma (ignored)
		// li a0, 225
		arcbbbUnsubmitted Not Done Reply Inline Actions Thanks for implementing this! I have a question: LT is from Tp, is it supposed to use SubTp instead of Tp here? arcbbb: Thanks for implementing this! I have a question: LT is from Tp, is it supposed to use SubTp…
		lukeAuthorUnsubmitted Done Reply Inline Actions Good point, I'm not sure. Aarch64 seems to SubTp for costing their subvector inserts. Should we be using both legalisation costs? luke: Good point, I'm not sure. Aarch64 seems to SubTp for costing their subvector inserts. Should we…
		lukeAuthorUnsubmitted Done Reply Inline Actions Using `SubTp` for the legalisation cost here gives us this diff for this test case define <8 x i64> @insert_subvector_offset_1_v8i64(<8 x i64> %v, <8 x i64> %w) { ; CHECK-LABEL: 'insert_subvector_offset_1_v8i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res ; %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7> ret <8 x i64> %res } This is what's actually generated: insert_subvector_offset_1_v8i64: # @insert_subvector_offset_1_v8i64 .cfi_startproc # %bb.0: vsetivli zero, 5, e64, m4, tu, ma vslideup.vi v8, v12, 1 ret It's using LMUL=4 here so I would presume we still want to cost it as 4 * one vslideup. luke: Using `SubTp` for the legalisation cost here gives us this diff for this test case ``` define…
		arcbbbUnsubmitted Not Done Reply Inline Actions vsetivli zero, 5, e64, m4, tu, ma vslideup.vi v8, v12, 1 I thought the operation was done after 2VLEN was written even though LMUL is 4, but after having second thoughts, I think it really depends on HW implementation. So it is fine to me now. arcbbb:* > vsetivli zero, 5, e64, m4, tu, ma > vslideup.vi v8, v12, 1 I thought the operation was done…
		// vmv.s.x v0, a0
		// vslideup.vi v10, v9, 1 (only needed if index != 0)
		// vmerge.vvm v8, v10, v8, v0
		// ret
		int SlideCost = (Kind == TTI::SK_InsertSubvector && Index == 0) ? 0 : 1;
		return LT.first * (3 + SlideCost) * getLMULCost(LT.second);
		}
case TTI::SK_Broadcast: {		case TTI::SK_Broadcast: {
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==		bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
Instruction::InsertElement);		Instruction::InsertElement);
if (LT.second.getScalarSizeInBits() == 1) {		if (LT.second.getScalarSizeInBits() == 1) {
if (HasScalar) {		if (HasScalar) {
// Example sequence:		// Example sequence:
// andi a0, a0, 1		// andi a0, a0, 1
// vsetivli zero, 2, e8, mf8, ta, ma (ignored)		// vsetivli zero, 2, e8, mf8, ta, ma (ignored)
▲ Show 20 Lines • Show All 1,275 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll

Show All 31 Lines	;
%10 = shufflevector <vscale x 4 x i1> undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer		%10 = shufflevector <vscale x 4 x i1> undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
%11 = shufflevector <vscale x 2 x i1> undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer		%11 = shufflevector <vscale x 2 x i1> undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
ret void		ret void
}		}

define void @vector_insert_extract(<vscale x 4 x i32> %v0, <vscale x 16 x i32> %v1, <16 x i32> %v2) {		define void @vector_insert_extract(<vscale x 4 x i32> %v0, <vscale x 16 x i32> %v1, <16 x i32> %v2) {
; CHECK-LABEL: 'vector_insert_extract'		; CHECK-LABEL: 'vector_insert_extract'
; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0)		; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0)
; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %insert_fixed_into_scalable = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %v0, <16 x i32> %v2, i64 0)		; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %insert_fixed_into_scalable = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %v0, <16 x i32> %v2, i64 0)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_scalable_from_scalable = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %v1, i64 0)		; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_scalable_from_scalable = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %v1, i64 0)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_scalable_into_scalable = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %v0, i64 0)		; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_scalable_into_scalable = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %v0, i64 0)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void		; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;		;
%extract_fixed_from_scalable = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0)		%extract_fixed_from_scalable = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0)
%insert_fixed_into_scalable = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %v0, <16 x i32> %v2, i64 0)		%insert_fixed_into_scalable = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %v0, <16 x i32> %v2, i64 0)
%extract_scalable_from_scalable = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %v1, i64 0)		%extract_scalable_from_scalable = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %v1, i64 0)
%insert_scalable_into_scalable = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %v0, i64 0)		%insert_scalable_into_scalable = call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %v0, i64 0)
▲ Show 20 Lines • Show All 108 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/RISCV/shuffle-insert.ll

	; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2			; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
	; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV32			; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV32
	; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV64			; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV64

	define <8 x i8> @insert_subvector_middle_v8i8(<8 x i8> %v, <8 x i8> %w) {			define <8 x i8> @insert_subvector_middle_v8i8(<8 x i8> %v, <8 x i8> %w) {
	; CHECK-LABEL: 'insert_subvector_middle_v8i8'			; CHECK-LABEL: 'insert_subvector_middle_v8i8'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 6, i32 7>			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 6, i32 7>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res
	;			;
	%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 6, i32 7>			%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 6, i32 7>
	ret <8 x i8> %res			ret <8 x i8> %res
	}			}

	define <8 x i8> @insert_subvector_end_v8i8(<8 x i8> %v, <8 x i8> %w) {			define <8 x i8> @insert_subvector_end_v8i8(<8 x i8> %v, <8 x i8> %w) {
	; CHECK-LABEL: 'insert_subvector_end_v8i8'			; CHECK-LABEL: 'insert_subvector_end_v8i8'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res
	;			;
	%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>			%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
	ret <8 x i8> %res			ret <8 x i8> %res
	}			}

	define <8 x i8> @insert_subvector_end_swapped_v8i8(<8 x i8> %v, <8 x i8> %w) {			define <8 x i8> @insert_subvector_end_swapped_v8i8(<8 x i8> %v, <8 x i8> %w) {
	; CHECK-LABEL: 'insert_subvector_end_swapped_v8i8'			; CHECK-LABEL: 'insert_subvector_end_swapped_v8i8'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res
	;			;
	%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>			%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
	ret <8 x i8> %res			ret <8 x i8> %res
	}			}

	define <8 x i8> @insert_subvector_short_v8i8(<8 x i8> %v, <8 x i8> %w) {			define <8 x i8> @insert_subvector_short_v8i8(<8 x i8> %v, <8 x i8> %w) {
	; CHECK-LABEL: 'insert_subvector_short_v8i8'			; CHECK-LABEL: 'insert_subvector_short_v8i8'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res
	;			;
	%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>			%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
	ret <8 x i8> %res			ret <8 x i8> %res
	}			}

	define <8 x i8> @insert_subvector_offset_1_v8i8(<8 x i8> %v, <8 x i8> %w) {			define <8 x i8> @insert_subvector_offset_1_v8i8(<8 x i8> %v, <8 x i8> %w) {
	; CHECK-LABEL: 'insert_subvector_offset_1_v8i8'			; CHECK-LABEL: 'insert_subvector_offset_1_v8i8'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res
	;			;
	%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>			%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>
	ret <8 x i8> %res			ret <8 x i8> %res
	}			}

	define <8 x i64> @insert_subvector_offset_1_v8i64(<8 x i64> %v, <8 x i64> %w) {			define <8 x i64> @insert_subvector_offset_1_v8i64(<8 x i64> %v, <8 x i64> %w) {
	; RV32-LABEL: 'insert_subvector_offset_1_v8i64'			; CHECK-LABEL: 'insert_subvector_offset_1_v8i64'
	; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>			; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>
	; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
	;
	; RV64-LABEL: 'insert_subvector_offset_1_v8i64'
	; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>
	; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
	;			;
	%res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>			%res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 5, i32 6, i32 7>
	ret <8 x i64> %res			ret <8 x i64> %res
	}			}

				; FIXME: This is expensive and involves vrgathers and vslideups
				define <12 x i8> @insert_subvector_concat_v6i8(<6 x i8> %x, <6 x i8> %y) {
				; CHECK-LABEL: 'insert_subvector_concat_v6i8'
				; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = shufflevector <6 x i8> %x, <6 x i8> %y, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <12 x i8> %a
				;
				%a = shufflevector <6 x i8> %x, <6 x i8> %y, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
				ret <12 x i8> %a
				}

				; FIXME: This is a concat is emitted as one vslideup
				define <8 x i8> @insert_subvector_concat_v8i8(<4 x i8> %x, <4 x i8> %y) {
				; CHECK-LABEL: 'insert_subvector_concat_v8i8'
				; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %a
				;
				%a = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
				ret <8 x i8> %a
				}

				;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
				; RV32: {{.*}}
				; RV64: {{.*}}

llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll

	; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2			; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
	; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV32			; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV32
	; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV64			; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV64

	; The mask here interleaves (%v1, %v0), not (%v0, %v1): it should still be cheap.			; The mask here interleaves (%v1, %v0), not (%v0, %v1): it should still be cheap.
	define <4 x i8> @interleave2_v2i8(<2 x i8> %v0, <2 x i8> %v1) {			define <4 x i8> @interleave2_v2i8(<2 x i8> %v0, <2 x i8> %v1) {
	; CHECK-LABEL: 'interleave2_v2i8'			; CHECK-LABEL: 'interleave2_v2i8'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>			; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i8> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i8> %res
	;			;
	%concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			%concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	%res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>			%res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
	ret <4 x i8> %res			ret <4 x i8> %res
	}			}

	define <8 x i8> @interleave2_v8i8(<4 x i8> %v0, <4 x i8> %v1) {			define <8 x i8> @interleave2_v8i8(<4 x i8> %v0, <4 x i8> %v1) {
	; CHECK-LABEL: 'interleave2_v8i8'			; CHECK-LABEL: 'interleave2_v8i8'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %concat = shufflevector <4 x i8> %v0, <4 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %concat = shufflevector <4 x i8> %v0, <4 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = shufflevector <8 x i8> %concat, <8 x i8> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>			; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = shufflevector <8 x i8> %concat, <8 x i8> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res
	;			;
	%concat = shufflevector <4 x i8> %v0, <4 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			%concat = shufflevector <4 x i8> %v0, <4 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	%res = shufflevector <8 x i8> %concat, <8 x i8> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>			%res = shufflevector <8 x i8> %concat, <8 x i8> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
	ret <8 x i8> %res			ret <8 x i8> %res
	}			}

	define <8 x i32> @interleave2_v8i32(<4 x i32> %v0, <4 x i32> %v1) {			define <8 x i32> @interleave2_v8i32(<4 x i32> %v0, <4 x i32> %v1) {
	; CHECK-LABEL: 'interleave2_v8i32'			; CHECK-LABEL: 'interleave2_v8i32'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %concat = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %concat = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = shufflevector <8 x i32> %concat, <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = shufflevector <8 x i32> %concat, <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %res
	;			;
	%concat = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			%concat = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	%res = shufflevector <8 x i32> %concat, <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>			%res = shufflevector <8 x i32> %concat, <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
	ret <8 x i32> %res			ret <8 x i32> %res
	}			}

	; Should be expensive on RV32 because it can't widen			; Should be expensive on RV32 because it can't widen
	define <8 x i64> @interleave2_v8i64(<4 x i64> %v0, <4 x i64> %v1) {			define <8 x i64> @interleave2_v8i64(<4 x i64> %v0, <4 x i64> %v1) {
	; RV32-LABEL: 'interleave2_v8i64'			; RV32-LABEL: 'interleave2_v8i64'
	; RV32-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>			; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
	; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res			; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
	;			;
	; RV64-LABEL: 'interleave2_v8i64'			; RV64-LABEL: 'interleave2_v8i64'
	; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; RV64-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>			; RV64-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
	; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res			; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
	;			;
	%concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			%concat = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	%res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>			%res = shufflevector <8 x i64> %concat, <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
	ret <8 x i64> %res			ret <8 x i64> %res
	}			}

	Show All 15 Lines

llvm/test/Analysis/CostModel/RISCV/shuffle-select.ll

	; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2			; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
	; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV32			; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV32
	; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV64			; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v \| FileCheck %s -check-prefixes=CHECK,RV64

	define <8 x i8> @select_start_v8i8(<8 x i8> %v, <8 x i8> %w) {			define <8 x i8> @select_start_v8i8(<8 x i8> %v, <8 x i8> %w) {
	; CHECK-LABEL: 'select_start_v8i8'			; CHECK-LABEL: 'select_start_v8i8'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res
	;			;
	%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>			%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
	ret <8 x i8> %res			ret <8 x i8> %res
	}			}

	define <8 x i8> @select_non_contiguous_v8i8(<8 x i8> %v, <8 x i8> %w) {			define <8 x i8> @select_non_contiguous_v8i8(<8 x i8> %v, <8 x i8> %w) {
	; CHECK-LABEL: 'select_non_contiguous_v8i8'			; CHECK-LABEL: 'select_non_contiguous_v8i8'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>			; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %res
	;			;
	%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>			%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
	ret <8 x i8> %res			ret <8 x i8> %res
	}			}

	define <8 x i64> @select_start_v8i64(<8 x i64> %v, <8 x i64> %w) {			define <8 x i64> @select_start_v8i64(<8 x i64> %v, <8 x i64> %w) {
	; RV32-LABEL: 'select_start_v8i64'			; CHECK-LABEL: 'select_start_v8i64'
	; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>			; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
	; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
	;
	; RV64-LABEL: 'select_start_v8i64'
	; RV64-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
	; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
	;			;
	%res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>			%res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
	ret <8 x i64> %res			ret <8 x i64> %res
	}			}

	define <8 x i64> @select_non_contiguous_v8i64(<8 x i64> %v, <8 x i64> %w) {			define <8 x i64> @select_non_contiguous_v8i64(<8 x i64> %v, <8 x i64> %w) {
	; RV32-LABEL: 'select_non_contiguous_v8i64'			; CHECK-LABEL: 'select_non_contiguous_v8i64'
	; RV32-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>			; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
	; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
	;
	; RV64-LABEL: 'select_non_contiguous_v8i64'
	; RV64-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
	; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %res
	;			;
	%res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>			%res = shufflevector <8 x i64> %v, <8 x i64> %w, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 4, i32 13, i32 6, i32 15>
	ret <8 x i64> %res			ret <8 x i64> %res
	}			}
				;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
				; RV32: {{.*}}
				; RV64: {{.*}}

llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll

Show First 20 Lines • Show All 876 Lines • ▼ Show 20 Lines
; MAXVF0-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP5]], [[TMP8]]		; MAXVF0-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP5]], [[TMP8]]
; MAXVF0-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP9]], i1 true)		; MAXVF0-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP9]], i1 true)
; MAXVF0-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])		; MAXVF0-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
; MAXVF0-NEXT: ret i32 [[TMP11]]		; MAXVF0-NEXT: ret i32 [[TMP11]]
;		;
; CHECK-LABEL: @stride_sum_abs_diff(		; CHECK-LABEL: @stride_sum_abs_diff(
; CHECK-NEXT: [[P_2:%.]] = getelementptr inbounds i32, ptr [[P:%.]], i64 [[STRIDE:%.*]]		; CHECK-NEXT: [[P_2:%.]] = getelementptr inbounds i32, ptr [[P:%.]], i64 [[STRIDE:%.*]]
; CHECK-NEXT: [[Q_2:%.]] = getelementptr inbounds i32, ptr [[Q:%.]], i64 [[STRIDE]]		; CHECK-NEXT: [[Q_2:%.]] = getelementptr inbounds i32, ptr [[Q:%.]], i64 [[STRIDE]]
; CHECK-NEXT: [[P_3:%.*]] = getelementptr inbounds i32, ptr [[P_2]], i64 1
; CHECK-NEXT: [[Q_3:%.*]] = getelementptr inbounds i32, ptr [[Q_2]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P]], align 4		; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[Q]], align 4		; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[Q]], align 4
; CHECK-NEXT: [[X_2:%.*]] = load i32, ptr [[P_2]], align 4		; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[P_2]], align 4
; CHECK-NEXT: [[Y_2:%.*]] = load i32, ptr [[Q_2]], align 4		; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[Q_2]], align 4
; CHECK-NEXT: [[X_3:%.*]] = load i32, ptr [[P_3]], align 4		; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[Y_3:%.*]] = load i32, ptr [[Q_3]], align 4		; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>		; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X_2]], i32 2		; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[X_3]], i32 3		; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>		; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[Y_2]], i32 2		; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> [[TMP7]], [[TMP10]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[Y_3]], i32 3		; CHECK-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP11]], i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP5]], [[TMP8]]		; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP12]])
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP9]], i1 true)		; CHECK-NEXT: ret i32 [[TMP13]]
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
; CHECK-NEXT: ret i32 [[TMP11]]
;		;
%x.0 = load i32, ptr %p		%x.0 = load i32, ptr %p
%y.0 = load i32, ptr %q		%y.0 = load i32, ptr %q
%sub.0 = sub i32 %x.0, %y.0		%sub.0 = sub i32 %x.0, %y.0
%abs.0 = tail call i32 @llvm.abs.i32(i32 %sub.0, i1 true)		%abs.0 = tail call i32 @llvm.abs.i32(i32 %sub.0, i1 true)

%p.1 = getelementptr inbounds i32, ptr %p, i64 1		%p.1 = getelementptr inbounds i32, ptr %p, i64 1
%x.1 = load i32, ptr %p.1		%x.1 = load i32, ptr %p.1
Show All 18 Lines	;
%y.3 = load i32, ptr %q.3		%y.3 = load i32, ptr %q.3
%sub.3 = sub i32 %x.3, %y.3		%sub.3 = sub i32 %x.3, %y.3
%abs.3 = tail call i32 @llvm.abs.i32(i32 %sub.3, i1 true)		%abs.3 = tail call i32 @llvm.abs.i32(i32 %sub.3, i1 true)
%sum.2 = add i32 %sum.1, %abs.3		%sum.2 = add i32 %sum.1, %abs.3

ret i32 %sum.2		ret i32 %sum.2
}		}

; FIXME: This could be horizontally reduced, as it is functionally equivalent to
; @reduce_sum_2arrays_b
define i32 @reduce_sum_2arrays_a(ptr noalias %p, ptr noalias %q) {		define i32 @reduce_sum_2arrays_a(ptr noalias %p, ptr noalias %q) {
; CHECK-LABEL: @reduce_sum_2arrays_a(		; CHECK-LABEL: @reduce_sum_2arrays_a(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[X_0:%.]] = load i8, ptr [[P:%.]], align 1		; CHECK-NEXT: [[TMP0:%.]] = load <4 x i8>, ptr [[P:%.]], align 1
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[X_0]] to i32		; CHECK-NEXT: [[TMP1:%.]] = load <4 x i8>, ptr [[Q:%.]], align 1
; CHECK-NEXT: [[Y_0:%.]] = load i8, ptr [[Q:%.]], align 1		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[Y_0]] to i32		; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[ADD4:%.*]] = add nuw nsw i32 [[CONV]], [[CONV3]]		; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1		; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i32>
; CHECK-NEXT: [[X_1:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1		; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[X_1]] to i32		; CHECK-NEXT: ret i32 [[TMP6]]
; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 1
; CHECK-NEXT: [[Y_1:%.*]] = load i8, ptr [[ARRAYIDX2_1]], align 1
; CHECK-NEXT: [[CONV3_1:%.*]] = zext i8 [[Y_1]] to i32
; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i32 [[ADD4]], [[CONV_1]]
; CHECK-NEXT: [[ADD4_1:%.*]] = add nuw nsw i32 [[ADD_1]], [[CONV3_1]]
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 2
; CHECK-NEXT: [[X_2:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1
; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[X_2]] to i32
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 2
; CHECK-NEXT: [[Y_2:%.*]] = load i8, ptr [[ARRAYIDX2_2]], align 1
; CHECK-NEXT: [[CONV3_2:%.*]] = zext i8 [[Y_2]] to i32
; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i32 [[ADD4_1]], [[CONV_2]]
; CHECK-NEXT: [[ADD4_2:%.*]] = add nuw nsw i32 [[ADD_2]], [[CONV3_2]]
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 3
; CHECK-NEXT: [[X_3:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1
; CHECK-NEXT: [[CONV_3:%.*]] = zext i8 [[X_3]] to i32
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 3
; CHECK-NEXT: [[Y_3:%.*]] = load i8, ptr [[ARRAYIDX2_3]], align 1
; CHECK-NEXT: [[CONV3_3:%.*]] = zext i8 [[Y_3]] to i32
; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i32 [[ADD4_2]], [[CONV_3]]
; CHECK-NEXT: [[ADD4_3:%.*]] = add nuw nsw i32 [[ADD_3]], [[CONV3_3]]
; CHECK-NEXT: ret i32 [[ADD4_3]]
;		;
entry:		entry:
%x.0 = load i8, ptr %p, align 1		%x.0 = load i8, ptr %p, align 1
%conv = zext i8 %x.0 to i32		%conv = zext i8 %x.0 to i32
%y.0 = load i8, ptr %q, align 1		%y.0 = load i8, ptr %q, align 1
%conv3 = zext i8 %y.0 to i32		%conv3 = zext i8 %y.0 to i32
%add4 = add nuw nsw i32 %conv, %conv3		%add4 = add nuw nsw i32 %conv, %conv3

Show All 26 Lines	entry:

ret i32 %add4.3		ret i32 %add4.3
}		}

define i32 @reduce_sum_2arrays_b(ptr noalias noundef %x, ptr noalias %y) {		define i32 @reduce_sum_2arrays_b(ptr noalias noundef %x, ptr noalias %y) {
; CHECK-LABEL: @reduce_sum_2arrays_b(		; CHECK-LABEL: @reduce_sum_2arrays_b(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.]] = load <4 x i8>, ptr [[X:%.]], align 1		; CHECK-NEXT: [[TMP0:%.]] = load <4 x i8>, ptr [[X:%.]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>		; CHECK-NEXT: [[TMP1:%.]] = load <4 x i8>, ptr [[Y:%.]], align 1
; CHECK-NEXT: [[TMP2:%.]] = load <4 x i8>, ptr [[Y:%.]], align 1		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>		; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])		; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])		; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i32>
; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP4]], [[TMP5]]		; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[OP_RDX]]		; CHECK-NEXT: ret i32 [[TMP6]]
;		;
entry:		entry:
%0 = load i8, ptr %x, align 1		%0 = load i8, ptr %x, align 1
%conv = zext i8 %0 to i32		%conv = zext i8 %0 to i32
%arrayidx.1 = getelementptr inbounds i8, ptr %x, i64 1		%arrayidx.1 = getelementptr inbounds i8, ptr %x, i64 1
%1 = load i8, ptr %arrayidx.1, align 1		%1 = load i8, ptr %arrayidx.1, align 1
%conv.1 = zext i8 %1 to i32		%conv.1 = zext i8 %1 to i32
%add.1 = add nuw nsw i32 %conv, %conv.1		%add.1 = add nuw nsw i32 %conv, %conv.1
Show All 25 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Model select and insertsubvector shuffle kinds
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 507825

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll

llvm/test/Analysis/CostModel/RISCV/shuffle-insert.ll

llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll

llvm/test/Analysis/CostModel/RISCV/shuffle-select.ll

llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Model select and insertsubvector shuffle kindsClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 507825

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll

llvm/test/Analysis/CostModel/RISCV/shuffle-insert.ll

llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll

llvm/test/Analysis/CostModel/RISCV/shuffle-select.ll

llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll

[RISCV] Model select and insertsubvector shuffle kinds
ClosedPublic