This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Do 64-bit vector move of 0 and -1 by extracting from the 128-bit move
ClosedPublic

Authored by john.brawn on Oct 23 2018, 8:22 AM.

Download Raw Diff

Details

Reviewers

t.p.northover
olista01
dmgreen
SjoerdMeijer

Commits

rG49e61d90ca30: [AArch64] Do 64-bit vector move of 0 and -1 by extracting from the 128-bit move
rL345270: [AArch64] Do 64-bit vector move of 0 and -1 by extracting from the 128-bit move

Summary

Currently a vector move of 0 or -1 will use different instructions depending on the size of the vector. Using a single instruction (the 128-bit one) for both gives more opportunity for Machine CSE to eliminate instructions.

Diff Detail

Repository: rL LLVM

Event Timeline

john.brawn created this revision.Oct 23 2018, 8:22 AM

Herald added subscribers: kristof.beyls, javed.absar. · View Herald TranscriptOct 23 2018, 8:22 AM

john.brawn added a child revision: D53582: [AArch64] Add EXT patterns for 64-bit EXT of a subvector of a 128-bit vector.Oct 23 2018, 8:29 AM

LGTM.

I was thinking about how this might affect other little cores like the A53/A55, especially around the dual issue on q registers. I don't think it will make much difference though, and the CSE benefits look like a bigger win.

This revision is now accepted and ready to land.Oct 25 2018, 4:19 AM

Closed by commit rL345270: [AArch64] Do 64-bit vector move of 0 and -1 by extracting from the 128-bit move (authored by john.brawn). · Explain WhyOct 25 2018, 7:59 AM

This revision was automatically updated to reflect the committed changes.

dmgreen mentioned this in D144018: [AArch64] More consistently use buildvector for zero and all-ones constants.Feb 14 2023, 8:11 AM

dmgreen mentioned this in rGc6c6723189f4: [AArch64] More consistently use buildvector for zero and all-ones constants.Feb 20 2023, 6:13 AM

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

AArch64/

AArch64InstrInfo.td

22 lines

test/

CodeGen/

AArch64/

aarch64-be-bv.ll

2 lines

aarch64-smax-constantfold.ll

2 lines

arm64-neon-compare-instructions.ll

6 lines

arm64-neon-copy.ll

4 lines

arm64-vector-ext.ll

2 lines

arm64-vshuffle.ll

2 lines

arm64-zero-cycle-zeroing.ll

8 lines

4 lines

4 lines

2 lines

24 lines

neon-compare-instructions.ll

6 lines

selectiondag-order.ll

6 lines

Diff 171101

llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 4,914 Lines • ▼ Show 20 Lines
	let isReMaterializable = 1, isAsCheapAsAMove = 1 in			let isReMaterializable = 1, isAsCheapAsAMove = 1 in
	def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",			def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
	[(set FPR64:$Rd, simdimmtype10:$imm8)]>;			[(set FPR64:$Rd, simdimmtype10:$imm8)]>;
	// The movi_edit node has the immediate value already encoded, so we use			// The movi_edit node has the immediate value already encoded, so we use
	// a plain imm0_255 here.			// a plain imm0_255 here.
	def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),			def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
	(MOVID imm0_255:$shift)>;			(MOVID imm0_255:$shift)>;

	def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>;
	def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>;
	def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>;
	def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>;

	def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>;
	def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>;
	def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>;
	def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>;

	// EDIT byte mask: 2d			// EDIT byte mask: 2d

	// The movi_edit node has the immediate value already encoded, so we use			// The movi_edit node has the immediate value already encoded, so we use
	// a plain imm0_255 in the pattern			// a plain imm0_255 in the pattern
	let isReMaterializable = 1, isAsCheapAsAMove = 1 in			let isReMaterializable = 1, isAsCheapAsAMove = 1 in
	def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,			def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
	simdimmtype10,			simdimmtype10,
	"movi", ".2d",			"movi", ".2d",
	[(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;			[(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;

	def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;			def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
	def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;			def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
	def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;			def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
	def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;			def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;

	def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;			def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
	def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;			def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
	def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;			def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
	def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;			def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;

				// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
				// extract is free and this gives better MachineCSE results.
				def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
				def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
				def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
				def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;

				def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
				def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
				def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
				def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;

	// EDIT per word & halfword: 2s, 4h, 4s, & 8h			// EDIT per word & halfword: 2s, 4h, 4s, & 8h
	let isReMaterializable = 1, isAsCheapAsAMove = 1 in			let isReMaterializable = 1, isAsCheapAsAMove = 1 in
	defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;			defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;

	def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;			def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
	def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;			def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
	def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;			def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
	def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;			def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
	▲ Show 20 Lines • Show All 1,729 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AArch64/aarch64-be-bv.ll

Show First 20 Lines • Show All 740 Lines • ▼ Show 20 Lines	define void @modimm_t10_call() {
; CHECK: movi d[[REG1:[0-9]+]], #0x0000ff000000ff		; CHECK: movi d[[REG1:[0-9]+]], #0x0000ff000000ff
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b		; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8		; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0>)		call i8 @f_v8i8(<8 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0>)
; CHECK: movi d[[REG1:[0-9]+]], #0x00ffff0000ffff		; CHECK: movi d[[REG1:[0-9]+]], #0x00ffff0000ffff
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h		; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16		; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 -1, i16 0, i16 -1, i16 0>)		call i16 @f_v4i16(<4 x i16> <i16 -1, i16 0, i16 -1, i16 0>)
; CHECK: movi d[[REG1:[0-9]+]], #0xffffffffffffffff		; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffffffffffffff
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s		; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32		; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 -1, i32 -1>)		call i32 @f_v2i32(<2 x i32> <i32 -1, i32 -1>)
; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffff00ffffff		; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffff00ffffff
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b		; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8		; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8		; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0>)		call i8 @f_v16i8(<16 x i8> <i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0>)
▲ Show 20 Lines • Show All 74 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AArch64/aarch64-smax-constantfold.ll

	; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -\| FileCheck %s			; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -\| FileCheck %s

	; Function Attrs: nounwind readnone			; Function Attrs: nounwind readnone
	declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>)			declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>)

	; CHECK-LABEL: test			; CHECK-LABEL: test
	define <4 x i16> @test() {			define <4 x i16> @test() {
	entry:			entry:
	; CHECK: movi d{{[0-9]+}}, #0000000000000000			; CHECK: movi v{{[0-9]+}}.2d, #0000000000000000
	%0 = tail call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> zeroinitializer)			%0 = tail call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> zeroinitializer)
	ret <4 x i16> %0			ret <4 x i16> %0
	}			}

llvm/trunk/test/CodeGen/AArch64/arm64-neon-compare-instructions.ll

Show First 20 Lines • Show All 969 Lines • ▼ Show 20 Lines	;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, v[[ZERO]].2d
%tmp3 = icmp ugt <2 x i64> %A, <i64 1, i64 1>		%tmp3 = icmp ugt <2 x i64> %A, <i64 1, i64 1>
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>		%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4		ret <2 x i64> %tmp4
}		}

define <8 x i8> @cmlsz8xi8(<8 x i8> %A) {		define <8 x i8> @cmlsz8xi8(<8 x i8> %A) {
; Using registers other than v0, v1 are possible, but would be odd.		; Using registers other than v0, v1 are possible, but would be odd.
; LS implemented as HS, so check reversed operands.		; LS implemented as HS, so check reversed operands.
;CHECK: movi d[[ZERO:[0-9]+]], #0		;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, v[[ZERO]].8b, v0.8b		;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, v[[ZERO]].8b, v0.8b
%tmp3 = icmp ule <8 x i8> %A, zeroinitializer;		%tmp3 = icmp ule <8 x i8> %A, zeroinitializer;
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>		%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4		ret <8 x i8> %tmp4
}		}

define <16 x i8> @cmlsz16xi8(<16 x i8> %A) {		define <16 x i8> @cmlsz16xi8(<16 x i8> %A) {
; Using registers other than v0, v1 are possible, but would be odd.		; Using registers other than v0, v1 are possible, but would be odd.
; LS implemented as HS, so check reversed operands.		; LS implemented as HS, so check reversed operands.
;CHECK: movi v[[ZERO:[0-9]+]].2d, #0		;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, v[[ZERO]].16b, v0.16b		;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, v[[ZERO]].16b, v0.16b
%tmp3 = icmp ule <16 x i8> %A, zeroinitializer;		%tmp3 = icmp ule <16 x i8> %A, zeroinitializer;
%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>		%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
ret <16 x i8> %tmp4		ret <16 x i8> %tmp4
}		}

define <4 x i16> @cmlsz4xi16(<4 x i16> %A) {		define <4 x i16> @cmlsz4xi16(<4 x i16> %A) {
; Using registers other than v0, v1 are possible, but would be odd.		; Using registers other than v0, v1 are possible, but would be odd.
; LS implemented as HS, so check reversed operands.		; LS implemented as HS, so check reversed operands.
;CHECK: movi d[[ZERO:[0-9]+]], #0		;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, v[[ZERO]].4h, v0.4h		;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, v[[ZERO]].4h, v0.4h
%tmp3 = icmp ule <4 x i16> %A, zeroinitializer;		%tmp3 = icmp ule <4 x i16> %A, zeroinitializer;
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>		%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4		ret <4 x i16> %tmp4
}		}

define <8 x i16> @cmlsz8xi16(<8 x i16> %A) {		define <8 x i16> @cmlsz8xi16(<8 x i16> %A) {
; Using registers other than v0, v1 are possible, but would be odd.		; Using registers other than v0, v1 are possible, but would be odd.
; LS implemented as HS, so check reversed operands.		; LS implemented as HS, so check reversed operands.
;CHECK: movi v[[ZERO:[0-9]+]].2d, #0		;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, v[[ZERO]].8h, v0.8h		;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, v[[ZERO]].8h, v0.8h
%tmp3 = icmp ule <8 x i16> %A, zeroinitializer;		%tmp3 = icmp ule <8 x i16> %A, zeroinitializer;
%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>		%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4		ret <8 x i16> %tmp4
}		}

define <2 x i32> @cmlsz2xi32(<2 x i32> %A) {		define <2 x i32> @cmlsz2xi32(<2 x i32> %A) {
; Using registers other than v0, v1 are possible, but would be odd.		; Using registers other than v0, v1 are possible, but would be odd.
; LS implemented as HS, so check reversed operands.		; LS implemented as HS, so check reversed operands.
;CHECK: movi d[[ZERO:[0-9]+]], #0		;CHECK: movi v[[ZERO:[0-9]+]].2d, #0
;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, v[[ZERO]].2s, v0.2s		;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, v[[ZERO]].2s, v0.2s
%tmp3 = icmp ule <2 x i32> %A, zeroinitializer;		%tmp3 = icmp ule <2 x i32> %A, zeroinitializer;
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>		%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4		ret <2 x i32> %tmp4
}		}

define <4 x i32> @cmlsz4xi32(<4 x i32> %A) {		define <4 x i32> @cmlsz4xi32(<4 x i32> %A) {
; Using registers other than v0, v1 are possible, but would be odd.		; Using registers other than v0, v1 are possible, but would be odd.
▲ Show 20 Lines • Show All 168 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll

Show First 20 Lines • Show All 1,395 Lines • ▼ Show 20 Lines	entry:
%vecext1 = extractelement <1 x i64> %y, i32 0		%vecext1 = extractelement <1 x i64> %y, i32 0
%vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1		%vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
ret <2 x i64> %vecinit2		ret <2 x i64> %vecinit2
}		}


define <4 x i16> @concat_vector_v4i16_const() {		define <4 x i16> @concat_vector_v4i16_const() {
; CHECK-LABEL: concat_vector_v4i16_const:		; CHECK-LABEL: concat_vector_v4i16_const:
; CHECK: movi {{d[0-9]+}}, #0		; CHECK: movi {{v[0-9]+}}.2d, #0
%r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer		%r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
ret <4 x i16> %r		ret <4 x i16> %r
}		}

define <4 x i16> @concat_vector_v4i16_const_one() {		define <4 x i16> @concat_vector_v4i16_const_one() {
; CHECK-LABEL: concat_vector_v4i16_const_one:		; CHECK-LABEL: concat_vector_v4i16_const_one:
; CHECK: movi {{v[0-9]+}}.4h, #1		; CHECK: movi {{v[0-9]+}}.4h, #1
%r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer		%r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
ret <4 x i16> %r		ret <4 x i16> %r
}		}

define <4 x i32> @concat_vector_v4i32_const() {		define <4 x i32> @concat_vector_v4i32_const() {
; CHECK-LABEL: concat_vector_v4i32_const:		; CHECK-LABEL: concat_vector_v4i32_const:
; CHECK: movi {{v[0-9]+}}.2d, #0		; CHECK: movi {{v[0-9]+}}.2d, #0
%r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer		%r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %r		ret <4 x i32> %r
}		}

define <8 x i8> @concat_vector_v8i8_const() {		define <8 x i8> @concat_vector_v8i8_const() {
; CHECK-LABEL: concat_vector_v8i8_const:		; CHECK-LABEL: concat_vector_v8i8_const:
; CHECK: movi {{d[0-9]+}}, #0		; CHECK: movi {{v[0-9]+}}.2d, #0
%r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer		%r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
ret <8 x i8> %r		ret <8 x i8> %r
}		}

define <8 x i16> @concat_vector_v8i16_const() {		define <8 x i16> @concat_vector_v8i16_const() {
; CHECK-LABEL: concat_vector_v8i16_const:		; CHECK-LABEL: concat_vector_v8i16_const:
; CHECK: movi {{v[0-9]+}}.2d, #0		; CHECK: movi {{v[0-9]+}}.2d, #0
%r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer		%r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
▲ Show 20 Lines • Show All 51 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AArch64/arm64-vector-ext.ll

Show All 13 Lines	define void @func30(%T0_30 %v0, %T1_30* %p1) {
store %T1_30 %r, %T1_30* %p1		store %T1_30 %r, %T1_30* %p1
ret void		ret void
}		}

; Extend from v1i1 was crashing things (PR20791). Make sure we do something		; Extend from v1i1 was crashing things (PR20791). Make sure we do something
; sensible instead.		; sensible instead.
define <1 x i32> @autogen_SD7918() {		define <1 x i32> @autogen_SD7918() {
; CHECK-LABEL: autogen_SD7918		; CHECK-LABEL: autogen_SD7918
; CHECK: movi d0, #0000000000000000		; CHECK: movi.2d v0, #0000000000000000
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0		%I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
%ZE = zext <1 x i1> %I29 to <1 x i32>		%ZE = zext <1 x i1> %I29 to <1 x i32>
ret <1 x i32> %ZE		ret <1 x i32> %ZE
}		}

llvm/trunk/test/CodeGen/AArch64/arm64-vshuffle.ll

	; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone \| FileCheck %s			; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone \| FileCheck %s


	; CHECK: test1			; CHECK: test1
	; CHECK: movi d[[REG0:[0-9]+]], #0000000000000000			; CHECK: movi.16b v[[REG0:[0-9]+]], #0
	define <8 x i1> @test1() {			define <8 x i1> @test1() {
	entry:			entry:
	%Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,			%Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
	i1 7>,			i1 7>,
	<8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,			<8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
	i1 7>,			i1 7>,
	<8 x i32> <i32 2, i32 undef, i32 6, i32 undef, i32 10,			<8 x i32> <i32 2, i32 undef, i32 6, i32 undef, i32 10,
	i32 12, i32 14, i32 0>			i32 12, i32 14, i32 0>
	▲ Show 20 Lines • Show All 63 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll

	Show First 20 Lines • Show All 156 Lines • ▼ Show 20 Lines
	; NONEFP: mov d0, xzr			; NONEFP: mov d0, xzr
	; ZEROFP: movi v0.2d, #0			; ZEROFP: movi v0.2d, #0
	ret double 0.0			ret double 0.0
	}			}

	define <8 x i8> @tv8i8() {			define <8 x i8> @tv8i8() {
	entry:			entry:
	; ALL-LABEL: tv8i8:			; ALL-LABEL: tv8i8:
	; ALL: movi d0, #0			; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
	ret <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>			ret <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
	}			}

	define <4 x i16> @tv4i16() {			define <4 x i16> @tv4i16() {
	entry:			entry:
	; ALL-LABEL: tv4i16:			; ALL-LABEL: tv4i16:
	; ALL: movi d0, #0			; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
	ret <4 x i16> <i16 0, i16 0, i16 0, i16 0>			ret <4 x i16> <i16 0, i16 0, i16 0, i16 0>
	}			}

	define <2 x i32> @tv2i32() {			define <2 x i32> @tv2i32() {
	entry:			entry:
	; ALL-LABEL: tv2i32:			; ALL-LABEL: tv2i32:
	; ALL: movi d0, #0			; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
	ret <2 x i32> <i32 0, i32 0>			ret <2 x i32> <i32 0, i32 0>
	}			}

	define <2 x float> @tv2f32() {			define <2 x float> @tv2f32() {
	entry:			entry:
	; ALL-LABEL: tv2f32:			; ALL-LABEL: tv2f32:
	; ALL: movi d0, #0			; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
	ret <2 x float> <float 0.0, float 0.0>			ret <2 x float> <float 0.0, float 0.0>
	}			}

	define <16 x i8> @tv16i8() {			define <16 x i8> @tv16i8() {
	entry:			entry:
	; ALL-LABEL: tv16i8:			; ALL-LABEL: tv16i8:
	; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0			; ALL: movi{{(.16b)?}} v0{{(.2d)?}}, #0
	ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>			ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
	Show All 37 Lines

llvm/trunk/test/CodeGen/AArch64/bitcast.ll

	; RUN: llc < %s -mtriple=aarch64--linux-gnu \| FileCheck %s			; RUN: llc < %s -mtriple=aarch64--linux-gnu \| FileCheck %s

	; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined.			; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined.

	define <4 x i16> @foo1(<2 x i32> %a) {			define <4 x i16> @foo1(<2 x i32> %a) {
	; CHECK-LABEL: foo1:			; CHECK-LABEL: foo1:
	; CHECK: movi d0, #0000000000000000			; CHECK: movi v0.2d, #0000000000000000
	; CHECK-NEXT: ret			; CHECK-NEXT: ret

	%1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>			%1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
	; Can't optimize the following bitcast to scalar_to_vector.			; Can't optimize the following bitcast to scalar_to_vector.
	%2 = bitcast <2 x i32> %1 to <4 x i16>			%2 = bitcast <2 x i32> %1 to <4 x i16>
	%3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>			%3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
	ret <4 x i16> %3			ret <4 x i16> %3
	}			}

	define <4 x i16> @foo2(<2 x i32> %a) {			define <4 x i16> @foo2(<2 x i32> %a) {
	; CHECK-LABEL: foo2:			; CHECK-LABEL: foo2:
	; CHECK: movi d0, #0000000000000000			; CHECK: movi v0.2d, #0000000000000000
	; CHECK-NEXT: ret			; CHECK-NEXT: ret

	%1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>			%1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
	; Can't optimize the following bitcast to scalar_to_vector.			; Can't optimize the following bitcast to scalar_to_vector.
	%2 = bitcast <2 x i32> %1 to <4 x i16>			%2 = bitcast <2 x i32> %1 to <4 x i16>
	%3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>			%3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
	ret <4 x i16> %3			ret <4 x i16> %3
	}			}

llvm/trunk/test/CodeGen/AArch64/fast-isel-cmp-vec.ll

	Show All 18 Lines
	bb2:			bb2:
	%z = zext <2 x i1> %c to <2 x i32>			%z = zext <2 x i1> %c to <2 x i32>
	ret <2 x i32> %z			ret <2 x i32> %z
	}			}

	define <2 x i32> @icmp_constfold_v2i32(<2 x i32> %a) {			define <2 x i32> @icmp_constfold_v2i32(<2 x i32> %a) {
	; CHECK-LABEL: icmp_constfold_v2i32:			; CHECK-LABEL: icmp_constfold_v2i32:
	; CHECK: ; %bb.0:			; CHECK: ; %bb.0:
	; CHECK-NEXT: movi d[[CMP:[0-9]+]], #0xffffffffffffffff			; CHECK-NEXT: movi.2d v[[CMP:[0-9]+]], #0xffffffffffffffff
	; CHECK-NEXT: ; %bb.1:			; CHECK-NEXT: ; %bb.1:
	; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #1			; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #1
	; CHECK-NEXT: and.8b v0, v[[CMP]], [[MASK]]			; CHECK-NEXT: and.8b v0, v[[CMP]], [[MASK]]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%1 = icmp eq <2 x i32> %a, %a			%1 = icmp eq <2 x i32> %a, %a
	br label %bb2			br label %bb2
	bb2:			bb2:
	%2 = zext <2 x i1> %1 to <2 x i32>			%2 = zext <2 x i1> %1 to <2 x i32>
	Show All 15 Lines
	bb2:			bb2:
	%z = zext <4 x i1> %c to <4 x i32>			%z = zext <4 x i1> %c to <4 x i32>
	ret <4 x i32> %z			ret <4 x i32> %z
	}			}

	define <4 x i32> @icmp_constfold_v4i32(<4 x i32> %a) {			define <4 x i32> @icmp_constfold_v4i32(<4 x i32> %a) {
	; CHECK-LABEL: icmp_constfold_v4i32:			; CHECK-LABEL: icmp_constfold_v4i32:
	; CHECK: ; %bb.0:			; CHECK: ; %bb.0:
	; CHECK-NEXT: movi d[[CMP:[0-9]+]], #0xffffffffffffffff			; CHECK-NEXT: movi.2d v[[CMP:[0-9]+]], #0xffffffffffffffff
	; CHECK-NEXT: ; %bb.1:			; CHECK-NEXT: ; %bb.1:
	; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #1			; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #1
	; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], v[[CMP]], [[MASK]]			; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], v[[CMP]], [[MASK]]
	; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0			; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%1 = icmp eq <4 x i32> %a, %a			%1 = icmp eq <4 x i32> %a, %a
	br label %bb2			br label %bb2
	bb2:			bb2:
	Show All 33 Lines

llvm/trunk/test/CodeGen/AArch64/fold-constants.ll

	; RUN: llc -mtriple=aarch64-linux-gnu -o - %s \| FileCheck %s			; RUN: llc -mtriple=aarch64-linux-gnu -o - %s \| FileCheck %s

	define i64 @dotests_616() {			define i64 @dotests_616() {
	; CHECK-LABEL: dotests_616			; CHECK-LABEL: dotests_616
	; CHECK: movi d0, #0000000000000000			; CHECK: movi v0.2d, #0000000000000000
	; CHECK-NEXT: fmov x0, d0			; CHECK-NEXT: fmov x0, d0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	entry:			entry:
	%0 = bitcast <2 x i64> zeroinitializer to <8 x i16>			%0 = bitcast <2 x i64> zeroinitializer to <8 x i16>
	%1 = and <8 x i16> zeroinitializer, %0			%1 = and <8 x i16> zeroinitializer, %0
	%2 = icmp ne <8 x i16> %1, zeroinitializer			%2 = icmp ne <8 x i16> %1, zeroinitializer
	%3 = extractelement <8 x i1> %2, i32 2			%3 = extractelement <8 x i1> %2, i32 2
	%vgetq_lane285 = sext i1 %3 to i16			%vgetq_lane285 = sext i1 %3 to i16
	Show All 31 Lines

llvm/trunk/test/CodeGen/AArch64/machine_cse.ll

	Show First 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
	if.end:			if.end:
	br label %return			br label %return

	return:			return:
	%retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]			%retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
	store i32 %a, i32 *%arg			store i32 %a, i32 *%arg
	ret void			ret void
	}			}

				define void @combine_vector_zeros(<8 x i8>* %p, <16 x i8>* %q) {
				; CHECK-LABEL: combine_vector_zeros:
				; CHECK: movi v[[REG:[0-9]+]].2d, #0
				; CHECK-NOT: movi
				; CHECK: str d[[REG]], [x0]
				; CHECK: str q[[REG]], [x1]
				entry:
				store <8 x i8> zeroinitializer, <8 x i8>* %p
				store <16 x i8> zeroinitializer, <16 x i8>* %q
				ret void
				}

				define void @combine_vector_ones(<2 x i32>* %p, <4 x i32>* %q) {
				; CHECK-LABEL: combine_vector_ones:
				; CHECK: movi v[[REG:[0-9]+]].2d, #0xffffffffffffffff
				; CHECK-NOT: movi
				; CHECK: str d[[REG]], [x0]
				; CHECK: str q[[REG]], [x1]
				entry:
				store <2 x i32> <i32 -1, i32 -1>, <2 x i32>* %p
				store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %q
				ret void
				}

llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll

Show First 20 Lines • Show All 1,217 Lines • ▼ Show 20 Lines	; CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>		%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4		ret <2 x i64> %tmp4
}		}

define <8 x i8> @cmlsz8xi8(<8 x i8> %A) {		define <8 x i8> @cmlsz8xi8(<8 x i8> %A) {
; CHECK-LABEL: cmlsz8xi8:		; CHECK-LABEL: cmlsz8xi8:
; Using registers other than v0, v1 are possible, but would be odd.		; Using registers other than v0, v1 are possible, but would be odd.
; LS implemented as HS, so check reversed operands.		; LS implemented as HS, so check reversed operands.
; CHECK: movi {{v1.8b\|d1}}, #{{0x0\|0}}		; CHECK: movi {{v1.8b\|v1.2d}}, #{{0x0\|0}}
; CHECK-NEXT: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b		; CHECK-NEXT: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
%tmp3 = icmp ule <8 x i8> %A, zeroinitializer;		%tmp3 = icmp ule <8 x i8> %A, zeroinitializer;
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>		%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4		ret <8 x i8> %tmp4
}		}

define <16 x i8> @cmlsz16xi8(<16 x i8> %A) {		define <16 x i8> @cmlsz16xi8(<16 x i8> %A) {
; CHECK-LABEL: cmlsz16xi8:		; CHECK-LABEL: cmlsz16xi8:
; Using registers other than v0, v1 are possible, but would be odd.		; Using registers other than v0, v1 are possible, but would be odd.
; LS implemented as HS, so check reversed operands.		; LS implemented as HS, so check reversed operands.
; CHECK: movi {{v1.16b\|v1.2d}}, #{{0x0\|0}}		; CHECK: movi {{v1.16b\|v1.2d}}, #{{0x0\|0}}
; CHECK-NEXT: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b		; CHECK-NEXT: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
%tmp3 = icmp ule <16 x i8> %A, zeroinitializer;		%tmp3 = icmp ule <16 x i8> %A, zeroinitializer;
%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>		%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
ret <16 x i8> %tmp4		ret <16 x i8> %tmp4
}		}

define <4 x i16> @cmlsz4xi16(<4 x i16> %A) {		define <4 x i16> @cmlsz4xi16(<4 x i16> %A) {
; CHECK-LABEL: cmlsz4xi16:		; CHECK-LABEL: cmlsz4xi16:
; Using registers other than v0, v1 are possible, but would be odd.		; Using registers other than v0, v1 are possible, but would be odd.
; LS implemented as HS, so check reversed operands.		; LS implemented as HS, so check reversed operands.
; CHECK: movi {{v1.8b\|d1}}, #{{0x0\|0}}		; CHECK: movi {{v1.8b\|v1.2d}}, #{{0x0\|0}}
; CHECK-NEXT: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h		; CHECK-NEXT: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
%tmp3 = icmp ule <4 x i16> %A, zeroinitializer;		%tmp3 = icmp ule <4 x i16> %A, zeroinitializer;
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>		%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4		ret <4 x i16> %tmp4
}		}

define <8 x i16> @cmlsz8xi16(<8 x i16> %A) {		define <8 x i16> @cmlsz8xi16(<8 x i16> %A) {
; CHECK-LABEL: cmlsz8xi16:		; CHECK-LABEL: cmlsz8xi16:
; Using registers other than v0, v1 are possible, but would be odd.		; Using registers other than v0, v1 are possible, but would be odd.
; LS implemented as HS, so check reversed operands.		; LS implemented as HS, so check reversed operands.
; CHECK: movi {{v1.16b\|v1.2d}}, #{{0x0\|0}}		; CHECK: movi {{v1.16b\|v1.2d}}, #{{0x0\|0}}
; CHECK-NEXT: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h		; CHECK-NEXT: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
%tmp3 = icmp ule <8 x i16> %A, zeroinitializer;		%tmp3 = icmp ule <8 x i16> %A, zeroinitializer;
%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>		%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4		ret <8 x i16> %tmp4
}		}

define <2 x i32> @cmlsz2xi32(<2 x i32> %A) {		define <2 x i32> @cmlsz2xi32(<2 x i32> %A) {
; CHECK-LABEL: cmlsz2xi32:		; CHECK-LABEL: cmlsz2xi32:
; Using registers other than v0, v1 are possible, but would be odd.		; Using registers other than v0, v1 are possible, but would be odd.
; LS implemented as HS, so check reversed operands.		; LS implemented as HS, so check reversed operands.
; CHECK: movi {{v1.8b\|d1}}, #{{0x0\|0}}		; CHECK: movi {{v1.8b\|v1.2d}}, #{{0x0\|0}}
; CHECK-NEXT: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s		; CHECK-NEXT: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
%tmp3 = icmp ule <2 x i32> %A, zeroinitializer;		%tmp3 = icmp ule <2 x i32> %A, zeroinitializer;
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>		%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4		ret <2 x i32> %tmp4
}		}

define <4 x i32> @cmlsz4xi32(<4 x i32> %A) {		define <4 x i32> @cmlsz4xi32(<4 x i32> %A) {
; CHECK-LABEL: cmlsz4xi32:		; CHECK-LABEL: cmlsz4xi32:
▲ Show 20 Lines • Show All 939 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AArch64/selectiondag-order.ll

	Show All 15 Lines

	end: ; preds = %body			end: ; preds = %body
	%c = bitcast <2 x i32> %add to i64			%c = bitcast <2 x i32> %add to i64
	%res = add i64 %rand, %c			%res = add i64 %rand, %c
	ret i64 %res			ret i64 %res
	}			}

	; AARCH64-CHECK: simulate:			; AARCH64-CHECK: simulate:
	; AARCH64-CHECK: movi d9, #0000000000000000			; AARCH64-CHECK: movi v0.2d, #0000000000000000
	; AARCH64-CHECK: bl lrand48			; AARCH64-CHECK: bl lrand48
	; AARCH64-CHECK: mov x19, x0			; AARCH64-CHECK: mov x19, x0
	; AARCH64-CHECK: BB0_1:			; AARCH64-CHECK: BB0_1:


	define i64 @simulateWithDebugIntrinsic(<2 x i32> %a) local_unnamed_addr {			define i64 @simulateWithDebugIntrinsic(<2 x i32> %a) local_unnamed_addr {
	entry:			entry:
	%rand = tail call i64 @lrand48() #3			%rand = tail call i64 @lrand48() #3
	Show All 9 Lines

	end: ; preds = %body			end: ; preds = %body
	%c = bitcast <2 x i32> %add to i64			%c = bitcast <2 x i32> %add to i64
	%res = add i64 %rand, %c			%res = add i64 %rand, %c
	ret i64 %res			ret i64 %res
	}			}

	; AARCH64-CHECK: simulateWithDebugIntrinsic			; AARCH64-CHECK: simulateWithDebugIntrinsic
	; AARCH64-CHECK: movi d9, #0000000000000000			; AARCH64-CHECK: movi v0.2d, #0000000000000000
	; AARCH64-CHECK: bl lrand48			; AARCH64-CHECK: bl lrand48
	; AARCH64-CHECK: mov x19, x0			; AARCH64-CHECK: mov x19, x0
	; AARCH64-CHECK: BB1_1:			; AARCH64-CHECK: BB1_1:


	define i64 @simulateWithDbgDeclare(<2 x i32> %a) local_unnamed_addr {			define i64 @simulateWithDbgDeclare(<2 x i32> %a) local_unnamed_addr {
	entry:			entry:
	%rand = tail call i64 @lrand48() #3			%rand = tail call i64 @lrand48() #3
	Show All 9 Lines

	end: ; preds = %body			end: ; preds = %body
	%c = bitcast <2 x i32> %add to i64			%c = bitcast <2 x i32> %add to i64
	%res = add i64 %rand, %c			%res = add i64 %rand, %c
	ret i64 %res			ret i64 %res
	}			}

	; AARCH64-CHECK: simulateWithDbgDeclare:			; AARCH64-CHECK: simulateWithDbgDeclare:
	; AARCH64-CHECK: movi d9, #0000000000000000			; AARCH64-CHECK: movi v0.2d, #0000000000000000
	; AARCH64-CHECK: bl lrand48			; AARCH64-CHECK: bl lrand48
	; AARCH64-CHECK: mov x19, x0			; AARCH64-CHECK: mov x19, x0
	; AARCH64-CHECK: BB2_1:			; AARCH64-CHECK: BB2_1:

	declare i64 @lrand48()			declare i64 @lrand48()

	declare void @llvm.dbg.value(metadata, i64, metadata, metadata)			declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
	declare void @llvm.dbg.declare(metadata, metadata, metadata)			declare void @llvm.dbg.declare(metadata, metadata, metadata)
	Show All 12 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Do 64-bit vector move of 0 and -1 by extracting from the 128-bit moveClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 171101

llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td

llvm/trunk/test/CodeGen/AArch64/aarch64-be-bv.ll

llvm/trunk/test/CodeGen/AArch64/aarch64-smax-constantfold.ll

llvm/trunk/test/CodeGen/AArch64/arm64-neon-compare-instructions.ll

llvm/trunk/test/CodeGen/AArch64/arm64-neon-copy.ll

llvm/trunk/test/CodeGen/AArch64/arm64-vector-ext.ll

llvm/trunk/test/CodeGen/AArch64/arm64-vshuffle.ll

llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll

llvm/trunk/test/CodeGen/AArch64/bitcast.ll

llvm/trunk/test/CodeGen/AArch64/fast-isel-cmp-vec.ll

llvm/trunk/test/CodeGen/AArch64/fold-constants.ll

llvm/trunk/test/CodeGen/AArch64/machine_cse.ll

llvm/trunk/test/CodeGen/AArch64/neon-compare-instructions.ll

llvm/trunk/test/CodeGen/AArch64/selectiondag-order.ll

[AArch64] Do 64-bit vector move of 0 and -1 by extracting from the 128-bit move
ClosedPublic