Changeset View
Changeset View
Standalone View
Standalone View
llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll
Show All 25 Lines | |||||
} | } | ||||
define void @masked_gather_v4i8(<4 x i8>* %a, <4 x i8*>* %b) vscale_range(2,0) #0 { | define void @masked_gather_v4i8(<4 x i8>* %a, <4 x i8*>* %b) vscale_range(2,0) #0 { | ||||
; CHECK-LABEL: masked_gather_v4i8: | ; CHECK-LABEL: masked_gather_v4i8: | ||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ptrue p0.d, vl4 | ; CHECK-NEXT: ptrue p0.d, vl4 | ||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] | ; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] | ||||
; CHECK-NEXT: ptrue p0.h, vl4 | ; CHECK-NEXT: st1b { z0.d }, p0, [x0] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | |||||
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h | |||||
; CHECK-NEXT: st1b { z0.h }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%ptrs = load <4 x i8*>, <4 x i8*>* %b | %ptrs = load <4 x i8*>, <4 x i8*>* %b | ||||
%vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x i8*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) | %vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x i8*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) | ||||
store <4 x i8> %vals, <4 x i8>* %a | store <4 x i8> %vals, <4 x i8>* %a | ||||
ret void | ret void | ||||
} | } | ||||
define void @masked_gather_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 { | define void @masked_gather_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 { | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
define void @masked_gather_v32i8(<32 x i8>* %a, <32 x i8*>* %b) vscale_range(16,0) #0 { | define void @masked_gather_v32i8(<32 x i8>* %a, <32 x i8*>* %b) vscale_range(16,0) #0 { | ||||
; CHECK-LABEL: masked_gather_v32i8: | ; CHECK-LABEL: masked_gather_v32i8: | ||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ptrue p0.d, vl32 | ; CHECK-NEXT: ptrue p0.d, vl32 | ||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] | ; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] | ||||
; CHECK-NEXT: ptrue p0.b, vl32 | ; CHECK-NEXT: st1b { z0.d }, p0, [x0] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | |||||
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h | |||||
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b | |||||
; CHECK-NEXT: st1b { z0.b }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%ptrs = load <32 x i8*>, <32 x i8*>* %b | %ptrs = load <32 x i8*>, <32 x i8*>* %b | ||||
%vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | %vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | ||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | ||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | ||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef) | i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef) | ||||
store <32 x i8> %vals, <32 x i8>* %a | store <32 x i8> %vals, <32 x i8>* %a | ||||
ret void | ret void | ||||
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
define void @masked_gather_v16i16(<16 x i16>* %a, <16 x i16*>* %b) vscale_range(8,0) #0 { | define void @masked_gather_v16i16(<16 x i16>* %a, <16 x i16*>* %b) vscale_range(8,0) #0 { | ||||
; CHECK-LABEL: masked_gather_v16i16: | ; CHECK-LABEL: masked_gather_v16i16: | ||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ptrue p0.d, vl16 | ; CHECK-NEXT: ptrue p0.d, vl16 | ||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] | ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] | ||||
; CHECK-NEXT: ptrue p0.h, vl16 | ; CHECK-NEXT: st1h { z0.d }, p0, [x0] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | |||||
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h | |||||
; CHECK-NEXT: st1h { z0.h }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%ptrs = load <16 x i16*>, <16 x i16*>* %b | %ptrs = load <16 x i16*>, <16 x i16*>* %b | ||||
%vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | %vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | ||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> undef) | i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> undef) | ||||
store <16 x i16> %vals, <16 x i16>* %a | store <16 x i16> %vals, <16 x i16>* %a | ||||
ret void | ret void | ||||
} | } | ||||
define void @masked_gather_v32i16(<32 x i16>* %a, <32 x i16*>* %b) vscale_range(16,0) #0 { | define void @masked_gather_v32i16(<32 x i16>* %a, <32 x i16*>* %b) vscale_range(16,0) #0 { | ||||
; CHECK-LABEL: masked_gather_v32i16: | ; CHECK-LABEL: masked_gather_v32i16: | ||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ptrue p0.d, vl32 | ; CHECK-NEXT: ptrue p0.d, vl32 | ||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] | ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] | ||||
; CHECK-NEXT: ptrue p0.h, vl32 | ; CHECK-NEXT: st1h { z0.d }, p0, [x0] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | |||||
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h | |||||
; CHECK-NEXT: st1h { z0.h }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%ptrs = load <32 x i16*>, <32 x i16*>* %b | %ptrs = load <32 x i16*>, <32 x i16*>* %b | ||||
%vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | %vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | ||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | ||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | ||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> undef) | i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> undef) | ||||
store <32 x i16> %vals, <32 x i16>* %a | store <32 x i16> %vals, <32 x i16>* %a | ||||
ret void | ret void | ||||
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | |||||
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] | ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] | ||||
; VBITS_GE_256-NEXT: ret | ; VBITS_GE_256-NEXT: ret | ||||
; | ; | ||||
; VBITS_GE_512-LABEL: masked_gather_v8i32: | ; VBITS_GE_512-LABEL: masked_gather_v8i32: | ||||
; VBITS_GE_512: // %bb.0: | ; VBITS_GE_512: // %bb.0: | ||||
; VBITS_GE_512-NEXT: ptrue p0.d, vl8 | ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 | ||||
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] | ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] | ||||
; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z0.d] | ; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z0.d] | ||||
; VBITS_GE_512-NEXT: ptrue p0.s, vl8 | ; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x0] | ||||
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s | |||||
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] | |||||
; VBITS_GE_512-NEXT: ret | ; VBITS_GE_512-NEXT: ret | ||||
%ptrs = load <8 x i32*>, <8 x i32*>* %b | %ptrs = load <8 x i32*>, <8 x i32*>* %b | ||||
%vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) | %vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) | ||||
store <8 x i32> %vals, <8 x i32>* %a | store <8 x i32> %vals, <8 x i32>* %a | ||||
ret void | ret void | ||||
} | } | ||||
define void @masked_gather_v16i32(<16 x i32>* %a, <16 x i32*>* %b) vscale_range(8,0) #0 { | define void @masked_gather_v16i32(<16 x i32>* %a, <16 x i32*>* %b) vscale_range(8,0) #0 { | ||||
; CHECK-LABEL: masked_gather_v16i32: | ; CHECK-LABEL: masked_gather_v16i32: | ||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ptrue p0.d, vl16 | ; CHECK-NEXT: ptrue p0.d, vl16 | ||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] | ; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] | ||||
; CHECK-NEXT: ptrue p0.s, vl16 | ; CHECK-NEXT: st1w { z0.d }, p0, [x0] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | |||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%ptrs = load <16 x i32*>, <16 x i32*>* %b | %ptrs = load <16 x i32*>, <16 x i32*>* %b | ||||
%vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | %vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | ||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> undef) | i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> undef) | ||||
store <16 x i32> %vals, <16 x i32>* %a | store <16 x i32> %vals, <16 x i32>* %a | ||||
ret void | ret void | ||||
} | } | ||||
define void @masked_gather_v32i32(<32 x i32>* %a, <32 x i32*>* %b) vscale_range(16,0) #0 { | define void @masked_gather_v32i32(<32 x i32>* %a, <32 x i32*>* %b) vscale_range(16,0) #0 { | ||||
; CHECK-LABEL: masked_gather_v32i32: | ; CHECK-LABEL: masked_gather_v32i32: | ||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ptrue p0.d, vl32 | ; CHECK-NEXT: ptrue p0.d, vl32 | ||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] | ||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] | ; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] | ||||
; CHECK-NEXT: ptrue p0.s, vl32 | ; CHECK-NEXT: st1w { z0.d }, p0, [x0] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | |||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%ptrs = load <32 x i32*>, <32 x i32*>* %b | %ptrs = load <32 x i32*>, <32 x i32*>* %b | ||||
%vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | %vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | ||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | ||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, | ||||
i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i32> undef) | i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i32> undef) | ||||
store <32 x i32> %vals, <32 x i32>* %a | store <32 x i32> %vals, <32 x i32>* %a | ||||
ret void | ret void | ||||
▲ Show 20 Lines • Show All 117 Lines • Show Last 20 Lines |