Changeset View
Changeset View
Standalone View
Standalone View
llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
Show All 38 Lines | |||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ldr s0, [x0] | ; CHECK-NEXT: ldr s0, [x0] | ||||
; CHECK-NEXT: ptrue p0.d, vl4 | ; CHECK-NEXT: ptrue p0.d, vl4 | ||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] | ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] | ||||
; CHECK-NEXT: ushll v0.8h, v0.8b, #0 | ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 | ||||
; CHECK-NEXT: cmeq v0.4h, v0.4h, #0 | ; CHECK-NEXT: cmeq v0.4h, v0.4h, #0 | ||||
; CHECK-NEXT: sunpklo z0.s, z0.h | ; CHECK-NEXT: sunpklo z0.s, z0.h | ||||
; CHECK-NEXT: sunpklo z0.d, z0.s | ; CHECK-NEXT: sunpklo z0.d, z0.s | ||||
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 | ; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0 | ||||
; CHECK-NEXT: ld1b { z0.d }, p0/z, [z1.d] | ; CHECK-NEXT: ld1b { z0.d }, p1/z, [z1.d] | ||||
; CHECK-NEXT: ptrue p0.h, vl4 | ; CHECK-NEXT: st1b { z0.d }, p0, [x0] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | |||||
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h | |||||
; CHECK-NEXT: st1b { z0.h }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%cval = load <4 x i8>, <4 x i8>* %a | %cval = load <4 x i8>, <4 x i8>* %a | ||||
%ptrs = load <4 x i8*>, <4 x i8*>* %b | %ptrs = load <4 x i8*>, <4 x i8*>* %b | ||||
%mask = icmp eq <4 x i8> %cval, zeroinitializer | %mask = icmp eq <4 x i8> %cval, zeroinitializer | ||||
%vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x i8*> %ptrs, i32 8, <4 x i1> %mask, <4 x i8> undef) | %vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x i8*> %ptrs, i32 8, <4 x i1> %mask, <4 x i8> undef) | ||||
store <4 x i8> %vals, <4 x i8>* %a | store <4 x i8> %vals, <4 x i8>* %a | ||||
ret void | ret void | ||||
} | } | ||||
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines | |||||
define void @masked_gather_v32i8(<32 x i8>* %a, <32 x i8*>* %b) vscale_range(16,0) #0 { | define void @masked_gather_v32i8(<32 x i8>* %a, <32 x i8*>* %b) vscale_range(16,0) #0 { | ||||
; CHECK-LABEL: masked_gather_v32i8: | ; CHECK-LABEL: masked_gather_v32i8: | ||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ptrue p0.b, vl32 | ; CHECK-NEXT: ptrue p0.b, vl32 | ||||
; CHECK-NEXT: ptrue p1.d, vl32 | ; CHECK-NEXT: ptrue p1.d, vl32 | ||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] | ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] | ||||
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] | ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] | ||||
; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0 | ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 | ||||
; CHECK-NEXT: punpklo p1.h, p1.b | ; CHECK-NEXT: punpklo p0.h, p0.b | ||||
; CHECK-NEXT: punpklo p1.h, p1.b | ; CHECK-NEXT: punpklo p0.h, p0.b | ||||
; CHECK-NEXT: punpklo p1.h, p1.b | ; CHECK-NEXT: punpklo p0.h, p0.b | ||||
; CHECK-NEXT: ld1b { z0.d }, p1/z, [z1.d] | ; CHECK-NEXT: ld1b { z0.d }, p0/z, [z1.d] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | ; CHECK-NEXT: st1b { z0.d }, p1, [x0] | ||||
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h | |||||
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b | |||||
; CHECK-NEXT: st1b { z0.b }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%cval = load <32 x i8>, <32 x i8>* %a | %cval = load <32 x i8>, <32 x i8>* %a | ||||
%ptrs = load <32 x i8*>, <32 x i8*>* %b | %ptrs = load <32 x i8*>, <32 x i8*>* %b | ||||
%mask = icmp eq <32 x i8> %cval, zeroinitializer | %mask = icmp eq <32 x i8> %cval, zeroinitializer | ||||
%vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*> %ptrs, i32 8, <32 x i1> %mask, <32 x i8> undef) | %vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*> %ptrs, i32 8, <32 x i1> %mask, <32 x i8> undef) | ||||
store <32 x i8> %vals, <32 x i8>* %a | store <32 x i8> %vals, <32 x i8>* %a | ||||
ret void | ret void | ||||
} | } | ||||
▲ Show 20 Lines • Show All 100 Lines • ▼ Show 20 Lines | |||||
define void @masked_gather_v16i16(<16 x i16>* %a, <16 x i16*>* %b) vscale_range(8,0) #0 { | define void @masked_gather_v16i16(<16 x i16>* %a, <16 x i16*>* %b) vscale_range(8,0) #0 { | ||||
; CHECK-LABEL: masked_gather_v16i16: | ; CHECK-LABEL: masked_gather_v16i16: | ||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ptrue p0.h, vl16 | ; CHECK-NEXT: ptrue p0.h, vl16 | ||||
; CHECK-NEXT: ptrue p1.d, vl16 | ; CHECK-NEXT: ptrue p1.d, vl16 | ||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] | ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] | ||||
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] | ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] | ||||
; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0 | ; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 | ||||
; CHECK-NEXT: punpklo p1.h, p1.b | ; CHECK-NEXT: punpklo p0.h, p0.b | ||||
; CHECK-NEXT: punpklo p1.h, p1.b | ; CHECK-NEXT: punpklo p0.h, p0.b | ||||
; CHECK-NEXT: ld1h { z0.d }, p1/z, [z1.d] | ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | ; CHECK-NEXT: st1h { z0.d }, p1, [x0] | ||||
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h | |||||
; CHECK-NEXT: st1h { z0.h }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%cval = load <16 x i16>, <16 x i16>* %a | %cval = load <16 x i16>, <16 x i16>* %a | ||||
%ptrs = load <16 x i16*>, <16 x i16*>* %b | %ptrs = load <16 x i16*>, <16 x i16*>* %b | ||||
%mask = icmp eq <16 x i16> %cval, zeroinitializer | %mask = icmp eq <16 x i16> %cval, zeroinitializer | ||||
%vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*> %ptrs, i32 8, <16 x i1> %mask, <16 x i16> undef) | %vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*> %ptrs, i32 8, <16 x i1> %mask, <16 x i16> undef) | ||||
store <16 x i16> %vals, <16 x i16>* %a | store <16 x i16> %vals, <16 x i16>* %a | ||||
ret void | ret void | ||||
} | } | ||||
define void @masked_gather_v32i16(<32 x i16>* %a, <32 x i16*>* %b) vscale_range(16,0) #0 { | define void @masked_gather_v32i16(<32 x i16>* %a, <32 x i16*>* %b) vscale_range(16,0) #0 { | ||||
; CHECK-LABEL: masked_gather_v32i16: | ; CHECK-LABEL: masked_gather_v32i16: | ||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ptrue p0.h, vl32 | ; CHECK-NEXT: ptrue p0.h, vl32 | ||||
; CHECK-NEXT: ptrue p1.d, vl32 | ; CHECK-NEXT: ptrue p1.d, vl32 | ||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] | ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] | ||||
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] | ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] | ||||
; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0 | ; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 | ||||
; CHECK-NEXT: punpklo p1.h, p1.b | ; CHECK-NEXT: punpklo p0.h, p0.b | ||||
; CHECK-NEXT: punpklo p1.h, p1.b | ; CHECK-NEXT: punpklo p0.h, p0.b | ||||
; CHECK-NEXT: ld1h { z0.d }, p1/z, [z1.d] | ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | ; CHECK-NEXT: st1h { z0.d }, p1, [x0] | ||||
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h | |||||
; CHECK-NEXT: st1h { z0.h }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%cval = load <32 x i16>, <32 x i16>* %a | %cval = load <32 x i16>, <32 x i16>* %a | ||||
%ptrs = load <32 x i16*>, <32 x i16*>* %b | %ptrs = load <32 x i16*>, <32 x i16*>* %b | ||||
%mask = icmp eq <32 x i16> %cval, zeroinitializer | %mask = icmp eq <32 x i16> %cval, zeroinitializer | ||||
%vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*> %ptrs, i32 8, <32 x i1> %mask, <32 x i16> undef) | %vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*> %ptrs, i32 8, <32 x i1> %mask, <32 x i16> undef) | ||||
store <32 x i16> %vals, <32 x i16>* %a | store <32 x i16> %vals, <32 x i16>* %a | ||||
ret void | ret void | ||||
} | } | ||||
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | |||||
; VBITS_GE_256-NEXT: ret | ; VBITS_GE_256-NEXT: ret | ||||
; | ; | ||||
; VBITS_GE_512-LABEL: masked_gather_v8i32: | ; VBITS_GE_512-LABEL: masked_gather_v8i32: | ||||
; VBITS_GE_512: // %bb.0: | ; VBITS_GE_512: // %bb.0: | ||||
; VBITS_GE_512-NEXT: ptrue p0.s, vl8 | ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 | ||||
; VBITS_GE_512-NEXT: ptrue p1.d, vl8 | ; VBITS_GE_512-NEXT: ptrue p1.d, vl8 | ||||
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] | ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] | ||||
; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1] | ; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1] | ||||
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, #0 | ; VBITS_GE_512-NEXT: cmpeq p0.s, p0/z, z0.s, #0 | ||||
; VBITS_GE_512-NEXT: punpklo p1.h, p1.b | ; VBITS_GE_512-NEXT: punpklo p0.h, p0.b | ||||
; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [z1.d] | ; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z1.d] | ||||
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s | ; VBITS_GE_512-NEXT: st1w { z0.d }, p1, [x0] | ||||
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] | |||||
; VBITS_GE_512-NEXT: ret | ; VBITS_GE_512-NEXT: ret | ||||
%cval = load <8 x i32>, <8 x i32>* %a | %cval = load <8 x i32>, <8 x i32>* %a | ||||
%ptrs = load <8 x i32*>, <8 x i32*>* %b | %ptrs = load <8 x i32*>, <8 x i32*>* %b | ||||
%mask = icmp eq <8 x i32> %cval, zeroinitializer | %mask = icmp eq <8 x i32> %cval, zeroinitializer | ||||
%vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 8, <8 x i1> %mask, <8 x i32> undef) | %vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 8, <8 x i1> %mask, <8 x i32> undef) | ||||
store <8 x i32> %vals, <8 x i32>* %a | store <8 x i32> %vals, <8 x i32>* %a | ||||
ret void | ret void | ||||
} | } | ||||
define void @masked_gather_v16i32(<16 x i32>* %a, <16 x i32*>* %b) vscale_range(8,0) #0 { | define void @masked_gather_v16i32(<16 x i32>* %a, <16 x i32*>* %b) vscale_range(8,0) #0 { | ||||
; CHECK-LABEL: masked_gather_v16i32: | ; CHECK-LABEL: masked_gather_v16i32: | ||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ptrue p0.s, vl16 | ; CHECK-NEXT: ptrue p0.s, vl16 | ||||
; CHECK-NEXT: ptrue p1.d, vl16 | ; CHECK-NEXT: ptrue p1.d, vl16 | ||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] | ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] | ||||
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] | ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] | ||||
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0 | ; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 | ||||
; CHECK-NEXT: punpklo p1.h, p1.b | ; CHECK-NEXT: punpklo p0.h, p0.b | ||||
; CHECK-NEXT: ld1w { z0.d }, p1/z, [z1.d] | ; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | ; CHECK-NEXT: st1w { z0.d }, p1, [x0] | ||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%cval = load <16 x i32>, <16 x i32>* %a | %cval = load <16 x i32>, <16 x i32>* %a | ||||
%ptrs = load <16 x i32*>, <16 x i32*>* %b | %ptrs = load <16 x i32*>, <16 x i32*>* %b | ||||
%mask = icmp eq <16 x i32> %cval, zeroinitializer | %mask = icmp eq <16 x i32> %cval, zeroinitializer | ||||
%vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 8, <16 x i1> %mask, <16 x i32> undef) | %vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 8, <16 x i1> %mask, <16 x i32> undef) | ||||
store <16 x i32> %vals, <16 x i32>* %a | store <16 x i32> %vals, <16 x i32>* %a | ||||
ret void | ret void | ||||
} | } | ||||
define void @masked_gather_v32i32(<32 x i32>* %a, <32 x i32*>* %b) vscale_range(16,0) #0 { | define void @masked_gather_v32i32(<32 x i32>* %a, <32 x i32*>* %b) vscale_range(16,0) #0 { | ||||
; CHECK-LABEL: masked_gather_v32i32: | ; CHECK-LABEL: masked_gather_v32i32: | ||||
; CHECK: // %bb.0: | ; CHECK: // %bb.0: | ||||
; CHECK-NEXT: ptrue p0.s, vl32 | ; CHECK-NEXT: ptrue p0.s, vl32 | ||||
; CHECK-NEXT: ptrue p1.d, vl32 | ; CHECK-NEXT: ptrue p1.d, vl32 | ||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] | ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] | ||||
; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] | ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] | ||||
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0 | ; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 | ||||
; CHECK-NEXT: punpklo p1.h, p1.b | ; CHECK-NEXT: punpklo p0.h, p0.b | ||||
; CHECK-NEXT: ld1w { z0.d }, p1/z, [z1.d] | ; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d] | ||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s | ; CHECK-NEXT: st1w { z0.d }, p1, [x0] | ||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0] | |||||
; CHECK-NEXT: ret | ; CHECK-NEXT: ret | ||||
%cval = load <32 x i32>, <32 x i32>* %a | %cval = load <32 x i32>, <32 x i32>* %a | ||||
%ptrs = load <32 x i32*>, <32 x i32*>* %b | %ptrs = load <32 x i32*>, <32 x i32*>* %b | ||||
%mask = icmp eq <32 x i32> %cval, zeroinitializer | %mask = icmp eq <32 x i32> %cval, zeroinitializer | ||||
%vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*> %ptrs, i32 8, <32 x i1> %mask, <32 x i32> undef) | %vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*> %ptrs, i32 8, <32 x i1> %mask, <32 x i32> undef) | ||||
store <32 x i32> %vals, <32 x i32>* %a | store <32 x i32> %vals, <32 x i32>* %a | ||||
ret void | ret void | ||||
} | } | ||||
▲ Show 20 Lines • Show All 877 Lines • Show Last 20 Lines |