diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1633,6 +1633,7 @@ setOperationAction(ISD::ANY_EXTEND, VT, Custom); setOperationAction(ISD::ZERO_EXTEND, VT, Custom); setOperationAction(ISD::SIGN_EXTEND, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::AND, VT, Custom); setOperationAction(ISD::ADD, VT, Custom); @@ -11434,7 +11435,8 @@ SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - if (useSVEForFixedLengthVectorVT(VT)) + if (useSVEForFixedLengthVectorVT(VT, + Subtarget->forceStreamingCompatibleSVE())) return LowerToScalableOp(Op, DAG); assert(VT.isScalableVector() && VT.getVectorElementType() == MVT::i1 && diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll @@ -10,15 +10,11 @@ define <4 x i8> @ctlz_v4i8(<4 x i8> %op) #0 { ; CHECK-LABEL: ctlz_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: adrp x9, .LCPI0_1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI0_1] -; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: clz z0.h, p0/m, z0.h -; CHECK-NEXT: sub z0.h, z0.h, z2.h +; CHECK-NEXT: sub z0.h, z0.h, #8 // =0x8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> %op) @@ -67,15 +63,11 @@ define <2 x i16> @ctlz_v2i16(<2 x i16> %op) #0 { ; CHECK-LABEL: ctlz_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: adrp x9, .LCPI4_1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI4_1] -; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: clz z0.s, p0/m, z0.s -; CHECK-NEXT: sub z0.s, z0.s, z2.s +; CHECK-NEXT: sub z0.s, z0.s, #16 // =0x10 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %op) @@ -206,11 +198,9 @@ define <4 x i8> @ctpop_v4i8(<4 x i8> %op) #0 { ; CHECK-LABEL: ctpop_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: cnt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -260,11 +250,9 @@ define <2 x i16> @ctpop_v2i16(<2 x i16> %op) #0 { ; CHECK-LABEL: ctpop_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: cnt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -396,11 +384,9 @@ define <4 x i8> @cttz_v4i8(<4 x i8> %op) #0 { ; CHECK-LABEL: cttz_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI28_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI28_0] -; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: orr z0.h, z0.h, #0x100 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -455,11 +441,9 @@ define <2 x i16> @cttz_v2i16(<2 x i16> %op) #0 { ; CHECK-LABEL: cttz_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI32_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI32_0] -; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: orr z0.s, z0.s, #0x10000 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll @@ -12,23 +12,20 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_ptr) #0 { ; CHECK-LABEL: fixed_bitselect_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: adrp x8, .LCPI0_1 -; CHECK-NEXT: ldp q3, q4, [x1] -; CHECK-NEXT: sub z6.s, z2.s, z1.s -; CHECK-NEXT: sub z2.s, z2.s, z0.s -; CHECK-NEXT: and z3.d, z6.d, z3.d -; CHECK-NEXT: ldp q7, q16, [x2] +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: mov z0.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z7.s, z3.s, z0.s +; CHECK-NEXT: subr z3.s, z3.s, #0 // =0x0 +; CHECK-NEXT: ldp q1, q4, [x1] +; CHECK-NEXT: add z0.s, z2.s, z0.s +; CHECK-NEXT: subr z2.s, z2.s, #0 // =0x0 +; CHECK-NEXT: and z1.d, z3.d, z1.d +; CHECK-NEXT: ldp q5, q6, [x2] ; CHECK-NEXT: and z2.d, z2.d, z4.d -; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: add z1.s, z1.s, z5.s -; CHECK-NEXT: add z0.s, z0.s, z5.s -; CHECK-NEXT: and z4.d, z0.d, z16.d -; CHECK-NEXT: and z0.d, z1.d, z7.d -; CHECK-NEXT: orr z0.d, z0.d, z3.d -; CHECK-NEXT: orr z1.d, z4.d, z2.d +; CHECK-NEXT: and z3.d, z0.d, z6.d +; CHECK-NEXT: and z0.d, z7.d, z5.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: orr z1.d, z3.d, z2.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll @@ -45,17 +45,16 @@ define <2 x i256> @load_zext_v2i64i256(<2 x i64>* %ap) #0 { ; CHECK-LABEL: load_zext_v2i64i256: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: mov x1, xzr -; CHECK-NEXT: mov x5, xzr -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: mov z2.d, z0.d[1] -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: fmov x4, d2 +; CHECK-NEXT: fmov x2, d0 +; CHECK-NEXT: fmov x3, d2 +; CHECK-NEXT: mov x1, xzr ; CHECK-NEXT: mov z0.d, z1.d[1] -; CHECK-NEXT: fmov x2, d1 -; CHECK-NEXT: fmov x3, d0 +; CHECK-NEXT: fmov x0, d1 +; CHECK-NEXT: fmov x4, d0 +; CHECK-NEXT: mov x5, xzr ; CHECK-NEXT: mov x6, x2 ; CHECK-NEXT: mov x7, x3 ; CHECK-NEXT: ret @@ -136,33 +135,30 @@ define <2 x i256> @load_sext_v2i64i256(<2 x i64>* %ap) #0 { ; CHECK-LABEL: load_sext_v2i64i256: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: mov z0.d, z0.d[1] +; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: stp x8, x9, [sp, #16] -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: stp x9, x9, [sp] -; CHECK-NEXT: ldp q1, q0, [sp] -; CHECK-NEXT: asr x10, x8, #63 -; CHECK-NEXT: stp x8, x10, [sp, #48] -; CHECK-NEXT: fmov x2, d1 -; CHECK-NEXT: stp x10, x10, [sp, #32] -; CHECK-NEXT: ldp q3, q2, [sp, #32] -; CHECK-NEXT: mov z4.d, z0.d[1] -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: asr x11, x10, #63 +; CHECK-NEXT: stp x8, x9, [sp, #-32]! +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: mov z0.d, x9 +; CHECK-NEXT: stp x10, x11, [sp, #16] +; CHECK-NEXT: mov z1.d, z0.d[1] +; CHECK-NEXT: fmov x2, d0 +; CHECK-NEXT: mov z0.d, x11 +; CHECK-NEXT: fmov x3, d1 +; CHECK-NEXT: ldp q1, q3, [sp], #32 +; CHECK-NEXT: mov z2.d, z0.d[1] +; CHECK-NEXT: fmov x6, d0 ; CHECK-NEXT: mov z0.d, z1.d[1] -; CHECK-NEXT: fmov x1, d4 -; CHECK-NEXT: fmov x3, d0 -; CHECK-NEXT: fmov x6, d3 -; CHECK-NEXT: mov z1.d, z2.d[1] -; CHECK-NEXT: fmov x4, d2 -; CHECK-NEXT: mov z2.d, z3.d[1] -; CHECK-NEXT: fmov x5, d1 +; CHECK-NEXT: fmov x0, d1 +; CHECK-NEXT: mov z1.d, z3.d[1] ; CHECK-NEXT: fmov x7, d2 -; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: fmov x4, d3 +; CHECK-NEXT: fmov x1, d0 +; CHECK-NEXT: fmov x5, d1 ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %ap %val = sext <2 x i64> %a to <2 x i256> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll @@ -285,17 +285,16 @@ ; CHECK-LABEL: fcmp_ugt_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmge p0.h, p0/z, z3.h, z2.h -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z0.d, z0.d, z4.d -; CHECK-NEXT: eor z1.d, z1.d, z4.d -; CHECK-NEXT: stp q0, q1, [x2] +; CHECK-NEXT: eor z1.d, z1.d, z0.d +; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z0.d, z2.d, z0.d +; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b @@ -337,17 +336,16 @@ ; CHECK-LABEL: fcmp_ult_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmge p0.h, p0/z, z2.h, z3.h -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z0.d, z0.d, z4.d -; CHECK-NEXT: eor z1.d, z1.d, z4.d -; CHECK-NEXT: stp q0, q1, [x2] +; CHECK-NEXT: eor z1.d, z1.d, z0.d +; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z0.d, z2.d, z0.d +; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b @@ -389,17 +387,16 @@ ; CHECK-LABEL: fcmp_uge_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z0.d, z0.d, z4.d -; CHECK-NEXT: eor z1.d, z1.d, z4.d -; CHECK-NEXT: stp q0, q1, [x2] +; CHECK-NEXT: eor z1.d, z1.d, z0.d +; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z0.d, z2.d, z0.d +; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b @@ -441,17 +438,16 @@ ; CHECK-LABEL: fcmp_ule_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI20_0] +; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmgt p0.h, p0/z, z2.h, z3.h -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z0.d, z0.d, z4.d -; CHECK-NEXT: eor z1.d, z1.d, z4.d -; CHECK-NEXT: stp q0, q1, [x2] +; CHECK-NEXT: eor z1.d, z1.d, z0.d +; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z0.d, z2.d, z0.d +; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b @@ -493,17 +489,16 @@ ; CHECK-LABEL: fcmp_ord_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI22_0] +; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z2.h, z3.h -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z0.d, z0.d, z4.d -; CHECK-NEXT: eor z1.d, z1.d, z4.d -; CHECK-NEXT: stp q0, q1, [x2] +; CHECK-NEXT: eor z1.d, z1.d, z0.d +; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z0.d, z2.d, z0.d +; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll @@ -7,13 +7,12 @@ define void @fp_convert_combine_crash(<8 x float> *%a, <8 x i32> *%b) #0 { ; CHECK-LABEL: fp_convert_combine_crash: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: fmov z2.s, #8.00000000 ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q0, q2, [x0] -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s +; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z2.s ; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll @@ -6,25 +6,15 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI0_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI0_0] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: ldr d2, [sp, #8] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <2 x half> %op1, <2 x half> %op2 ret <2 x half> %sel @@ -33,25 +23,15 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI1_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI1_0] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: ldr d2, [sp, #8] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <4 x half> %op1, <4 x half> %op2 ret <4 x half> %sel @@ -60,29 +40,15 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI2_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI2_0] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: strh w8, [sp] -; CHECK-NEXT: ldr q2, [sp] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <8 x half> %op1, <8 x half> %op2 ret <8 x half> %sel @@ -91,34 +57,20 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) #0 { ; CHECK-LABEL: select_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: adrp x9, .LCPI3_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: ldr q1, [x0, #16] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI3_0] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: strh w8, [sp] -; CHECK-NEXT: ldr q4, [sp] -; CHECK-NEXT: eor z5.d, z4.d, z5.d -; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: mov z4.h, w8 +; CHECK-NEXT: bic z2.d, z2.d, z4.d ; CHECK-NEXT: and z0.d, z0.d, z4.d -; CHECK-NEXT: and z2.d, z2.d, z5.d -; CHECK-NEXT: and z3.d, z3.d, z5.d +; CHECK-NEXT: bic z3.d, z3.d, z4.d +; CHECK-NEXT: and z1.d, z1.d, z4.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z1.d, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %op1 = load volatile <16 x half>, ptr %a %op2 = load volatile <16 x half>, ptr %b @@ -130,22 +82,17 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI4_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI4_0] -; CHECK-NEXT: stp w8, w8, [sp, #8] -; CHECK-NEXT: ldr d2, [sp, #8] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mvn w9, w8 +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z3.s, w9 ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <2 x float> %op1, <2 x float> %op2 ret <2 x float> %sel @@ -154,23 +101,17 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI5_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI5_0] -; CHECK-NEXT: stp w8, w8, [sp, #8] -; CHECK-NEXT: stp w8, w8, [sp] -; CHECK-NEXT: ldr q2, [sp] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mvn w9, w8 +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z3.s, w9 ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <4 x float> %op1, <4 x float> %op2 ret <4 x float> %sel @@ -179,20 +120,15 @@ define void @select_v8f32(ptr %a, ptr %b, i1 %mask) #0 { ; CHECK-LABEL: select_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: adrp x9, .LCPI6_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: mvn w9, w8 ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: stp w8, w8, [sp, #8] -; CHECK-NEXT: stp w8, w8, [sp] -; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI6_0] -; CHECK-NEXT: ldr q4, [sp] -; CHECK-NEXT: eor z5.d, z4.d, z5.d +; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: mov z5.s, w9 ; CHECK-NEXT: and z1.d, z1.d, z4.d ; CHECK-NEXT: and z0.d, z0.d, z4.d ; CHECK-NEXT: and z2.d, z2.d, z5.d @@ -200,7 +136,6 @@ ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z1.d, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %op1 = load volatile <8 x float>, ptr %a %op2 = load volatile <8 x float>, ptr %b @@ -213,13 +148,12 @@ ; CHECK-LABEL: select_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov x9, #-1 -; CHECK-NEXT: csetm x8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: fmov d3, x9 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: mvn x9, x8 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov z3.d, x9 ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -235,18 +169,14 @@ ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: adrp x9, .LCPI8_0 ; CHECK-NEXT: csetm x8, ne -; CHECK-NEXT: stp x8, x8, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr q2, [sp] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI8_0] +; CHECK-NEXT: mvn x9, x8 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov z3.d, x9 ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: eor z3.d, z2.d, z3.d ; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <2 x double> %op1, <2 x double> %op2 ret <2 x double> %sel @@ -259,22 +189,18 @@ ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: csetm x8, ne ; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: mvn x9, x8 ; CHECK-NEXT: ldr q2, [x1] -; CHECK-NEXT: adrp x9, .LCPI9_0 ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: stp x8, x8, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI9_0] -; CHECK-NEXT: ldr q5, [sp] -; CHECK-NEXT: eor z4.d, z5.d, z4.d -; CHECK-NEXT: and z1.d, z1.d, z5.d -; CHECK-NEXT: and z0.d, z0.d, z5.d -; CHECK-NEXT: and z2.d, z2.d, z4.d -; CHECK-NEXT: and z3.d, z3.d, z4.d +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: mov z5.d, x9 +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: and z3.d, z3.d, z5.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z1.d, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %op1 = load volatile <4 x double>, ptr %a %op2 = load volatile <4 x double>, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll @@ -468,7 +468,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzs w8, d0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = fptoui <1 x double> %op1 to <1 x i16> ret <1 x i16> %res @@ -718,7 +719,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzu x8, d0 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = fptoui <1 x double> %op1 to <1 x i64> ret <1 x i64> %res @@ -1219,7 +1221,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzs w8, d0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = fptosi <1 x double> %op1 to <1 x i16> ret <1 x i16> %res @@ -1469,7 +1472,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = fptosi <1 x double> %op1 to <1 x i64> ret <1 x i64> %res diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll @@ -10,23 +10,18 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: mov z3.s, z2.s[1] -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: fmov w10, s3 +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: fmov w9, s3 ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: adrp x8, .LCPI0_1 -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: strh w10, [sp, #10] -; CHECK-NEXT: ldr d3, [sp, #8] -; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: lsl z3.h, p0/m, z3.h, z2.h -; CHECK-NEXT: asrr z2.h, p0/m, z2.h, z3.h -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: strh w9, [sp, #10] +; CHECK-NEXT: ldr d2, [sp, #8] +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: add sp, sp, #16 @@ -38,19 +33,14 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask) #0 { ; CHECK-LABEL: select_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: adrp x9, .LCPI1_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI1_1] -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h -; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -61,20 +51,15 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask) #0 { ; CHECK-LABEL: select_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI2_0 -; CHECK-NEXT: adrp x9, .LCPI2_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.h, z2.b -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI2_0] -; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI2_1] -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h -; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -85,23 +70,19 @@ define void @select_v16f16(ptr %a, ptr %b) #0 { ; CHECK-LABEL: select_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: fcmeq p1.h, p0/z, z2.h, z1.h -; CHECK-NEXT: fcmeq p0.h, p0/z, z3.h, z0.h -; CHECK-NEXT: mov z5.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z6.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z2.d, z2.d, z5.d -; CHECK-NEXT: eor z5.d, z5.d, z4.d -; CHECK-NEXT: eor z4.d, z6.d, z4.d -; CHECK-NEXT: and z3.d, z3.d, z6.d -; CHECK-NEXT: and z0.d, z0.d, z4.d -; CHECK-NEXT: and z1.d, z1.d, z5.d -; CHECK-NEXT: orr z0.d, z3.d, z0.d -; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: ldp q3, q2, [x1] +; CHECK-NEXT: fcmeq p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z3.h +; CHECK-NEXT: mov z4.h, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: bic z2.d, z2.d, z4.d +; CHECK-NEXT: bic z3.d, z3.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: orr z0.d, z0.d, z3.d +; CHECK-NEXT: orr z1.d, z1.d, z2.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a @@ -115,19 +96,14 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %mask) #0 { ; CHECK-LABEL: select_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: adrp x9, .LCPI4_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI4_1] -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -138,20 +114,15 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %mask) #0 { ; CHECK-LABEL: select_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: adrp x9, .LCPI5_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI5_1] -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -162,23 +133,19 @@ define void @select_v8f32(ptr %a, ptr %b) #0 { ; CHECK-LABEL: select_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: adrp x8, .LCPI6_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: fcmeq p1.s, p0/z, z2.s, z1.s -; CHECK-NEXT: fcmeq p0.s, p0/z, z3.s, z0.s -; CHECK-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z6.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z2.d, z2.d, z5.d -; CHECK-NEXT: eor z5.d, z5.d, z4.d -; CHECK-NEXT: eor z4.d, z6.d, z4.d -; CHECK-NEXT: and z3.d, z3.d, z6.d -; CHECK-NEXT: and z0.d, z0.d, z4.d -; CHECK-NEXT: and z1.d, z1.d, z5.d -; CHECK-NEXT: orr z0.d, z3.d, z0.d -; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: ldp q3, q2, [x1] +; CHECK-NEXT: fcmeq p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z3.s +; CHECK-NEXT: mov z4.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: bic z2.d, z2.d, z4.d +; CHECK-NEXT: bic z3.d, z3.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: orr z0.d, z0.d, z3.d +; CHECK-NEXT: orr z1.d, z1.d, z2.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a @@ -193,13 +160,12 @@ ; CHECK-LABEL: select_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov x9, #-1 -; CHECK-NEXT: csetm x8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: fmov d3, x9 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: mvn x9, x8 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov z3.d, x9 ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -212,20 +178,15 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> %mask) #0 { ; CHECK-LABEL: select_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: adrp x9, .LCPI8_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI8_1] -; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z3.d -; CHECK-NEXT: asr z2.d, p0/m, z2.d, z3.d -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #63 +; CHECK-NEXT: asr z2.d, p0/m, z2.d, #63 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -236,23 +197,19 @@ define void @select_v4f64(ptr %a, ptr %b) #0 { ; CHECK-LABEL: select_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: fcmeq p1.d, p0/z, z2.d, z1.d -; CHECK-NEXT: fcmeq p0.d, p0/z, z3.d, z0.d -; CHECK-NEXT: mov z5.d, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z6.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z2.d, z2.d, z5.d -; CHECK-NEXT: eor z5.d, z5.d, z4.d -; CHECK-NEXT: eor z4.d, z6.d, z4.d -; CHECK-NEXT: and z3.d, z3.d, z6.d -; CHECK-NEXT: and z0.d, z0.d, z4.d -; CHECK-NEXT: and z1.d, z1.d, z5.d -; CHECK-NEXT: orr z0.d, z3.d, z0.d -; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: ldp q3, q2, [x1] +; CHECK-NEXT: fcmeq p1.d, p0/z, z1.d, z2.d +; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z3.d +; CHECK-NEXT: mov z4.d, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: bic z2.d, z2.d, z4.d +; CHECK-NEXT: bic z3.d, z3.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: orr z0.d, z0.d, z3.d +; CHECK-NEXT: orr z1.d, z1.d, z2.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll @@ -202,8 +202,8 @@ define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) #0 { ; CHECK-LABEL: insertelement_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov z0.d, #5 // =0x5 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %r = insertelement <1 x i64> %op1, i64 5, i64 0 ret <1 x i64> %r diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll @@ -587,12 +587,10 @@ define <4 x i8> @abs_v4i8(<4 x i8> %op1) #0 { ; CHECK-LABEL: abs_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI42_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI42_0] -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -642,12 +640,10 @@ define <2 x i16> @abs_v2i16(<2 x i16> %op1) #0 { ; CHECK-LABEL: abs_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI46_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI46_0] -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll @@ -10,15 +10,13 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-LABEL: sdiv_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: asr z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: asr z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 ; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: ptrue p0.s, vl4 @@ -139,15 +137,13 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 { ; CHECK-LABEL: sdiv_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: asr z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #16 +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: asr z1.s, p0/m, z1.s, #16 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -312,13 +308,11 @@ define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-LABEL: udiv_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: and z1.d, z1.d, z2.d -; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s @@ -438,13 +432,11 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 { ; CHECK-LABEL: udiv_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: and z1.d, z1.d, z2.d -; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -605,26 +597,23 @@ define void @udiv_constantsplat_v8i32(<8 x i32>* %a) #0 { ; CHECK-LABEL: udiv_constantsplat_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI28_0 -; CHECK-NEXT: adrp x9, .LCPI28_1 -; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI28_0] -; CHECK-NEXT: adrp x8, .LCPI28_2 -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI28_1] -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: umulh z5.s, p0/m, z5.s, z0.s -; CHECK-NEXT: sub z1.s, z1.s, z5.s -; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI28_2] -; CHECK-NEXT: sub z2.s, z2.s, z0.s -; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: lsr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: add z1.s, z1.s, z5.s -; CHECK-NEXT: add z0.s, z2.s, z0.s -; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z4.s -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z4.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: movk w8, #22765, lsl #16 +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: umulh z3.s, p0/m, z3.s, z2.s +; CHECK-NEXT: umulh z2.s, p0/m, z2.s, z1.s +; CHECK-NEXT: sub z0.s, z0.s, z3.s +; CHECK-NEXT: sub z1.s, z1.s, z2.s +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #1 +; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #1 +; CHECK-NEXT: add z0.s, z0.s, z3.s +; CHECK-NEXT: add z1.s, z1.s, z2.s +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #6 +; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #6 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, <8 x i32>* %a %res = udiv <8 x i32> %op1, diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll @@ -13,19 +13,17 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, <8 x i32>* %out) #0 { ; CHECK-LABEL: sext_v8i1_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: asr z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: stp q2, q0, [x0] +; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %b = sext <8 x i1> %a to <8 x i32> store <8 x i32> %b, <8 x i32>* %out @@ -42,19 +40,17 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, <4 x i64>* %out) #0 { ; CHECK-LABEL: sext_v4i3_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: asr z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: stp q2, q0, [x0] +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #61 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #61 +; CHECK-NEXT: asr z1.d, p0/m, z1.d, #61 +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #61 +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %b = sext <4 x i3> %a to <4 x i64> store <4 x i64> %b, <4 x i64>* %out @@ -188,19 +184,17 @@ define void @sext_v4i8_v4i64(<4 x i8> %a, <4 x i64>* %out) #0 { ; CHECK-LABEL: sext_v4i8_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: asr z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: stp q2, q0, [x0] +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #56 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #56 +; CHECK-NEXT: asr z1.d, p0/m, z1.d, #56 +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #56 +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %b = sext <4 x i8> %a to <4 x i64> store <4 x i64>%b, <4 x i64>* %out @@ -611,10 +605,8 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, <4 x i64>* %out) #0 { ; CHECK-LABEL: zext_v4i8_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI23_0] -; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll @@ -16,12 +16,10 @@ define void @add_v32i8(ptr %a) #0 { ; CHECK-LABEL: add_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: add z1.b, z1.b, z0.b -; CHECK-NEXT: add z0.b, z2.b, z0.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: add z0.b, z0.b, #7 // =0x7 +; CHECK-NEXT: add z1.b, z1.b, #7 // =0x7 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i32 0 @@ -34,12 +32,10 @@ define void @add_v16i16(ptr %a) #0 { ; CHECK-LABEL: add_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: add z1.h, z1.h, z0.h -; CHECK-NEXT: add z0.h, z2.h, z0.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: add z0.h, z0.h, #15 // =0xf +; CHECK-NEXT: add z1.h, z1.h, #15 // =0xf +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -52,12 +48,10 @@ define void @add_v8i32(ptr %a) #0 { ; CHECK-LABEL: add_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI2_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] -; CHECK-NEXT: add z1.s, z1.s, z0.s -; CHECK-NEXT: add z0.s, z2.s, z0.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: add z0.s, z0.s, #31 // =0x1f +; CHECK-NEXT: add z1.s, z1.s, #31 // =0x1f +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -70,12 +64,10 @@ define void @add_v4i64(ptr %a) #0 { ; CHECK-LABEL: add_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: add z1.d, z1.d, z0.d -; CHECK-NEXT: add z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: add z0.d, z0.d, #63 // =0x3f +; CHECK-NEXT: add z1.d, z1.d, #63 // =0x3f +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -92,12 +84,10 @@ define void @and_v32i8(ptr %a) #0 { ; CHECK-LABEL: and_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: and z1.d, z1.d, z0.d -; CHECK-NEXT: and z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: and z0.b, z0.b, #0x7 +; CHECK-NEXT: and z1.b, z1.b, #0x7 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i32 0 @@ -110,12 +100,10 @@ define void @and_v16i16(ptr %a) #0 { ; CHECK-LABEL: and_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: and z1.d, z1.d, z0.d -; CHECK-NEXT: and z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: and z0.h, z0.h, #0xf +; CHECK-NEXT: and z1.h, z1.h, #0xf +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -128,12 +116,10 @@ define void @and_v8i32(ptr %a) #0 { ; CHECK-LABEL: and_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: and z1.d, z1.d, z0.d -; CHECK-NEXT: and z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: and z0.s, z0.s, #0x1f +; CHECK-NEXT: and z1.s, z1.s, #0x1f +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -146,12 +132,10 @@ define void @and_v4i64(ptr %a) #0 { ; CHECK-LABEL: and_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI7_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: and z1.d, z1.d, z0.d -; CHECK-NEXT: and z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: and z0.d, z0.d, #0x3f +; CHECK-NEXT: and z1.d, z1.d, #0x3f +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -168,13 +152,11 @@ define void @ashr_v32i8(ptr %a) #0 { ; CHECK-LABEL: ashr_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: asr z1.b, p0/m, z1.b, z0.b -; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: asr z1.b, p0/m, z1.b, #7 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i32 0 @@ -187,13 +169,11 @@ define void @ashr_v16i16(ptr %a) #0 { ; CHECK-LABEL: ashr_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: asr z1.h, p0/m, z1.h, z0.h -; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z1.h, p0/m, z1.h, #15 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -206,13 +186,11 @@ define void @ashr_v8i32(ptr %a) #0 { ; CHECK-LABEL: ashr_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI10_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: asr z1.s, p0/m, z1.s, z0.s -; CHECK-NEXT: asrr z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -225,13 +203,11 @@ define void @ashr_v4i64(ptr %a) #0 { ; CHECK-LABEL: ashr_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI11_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: asr z1.d, p0/m, z1.d, z0.d -; CHECK-NEXT: asrr z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: asr z1.d, p0/m, z1.d, #63 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -248,13 +224,11 @@ define void @icmp_eq_v32i8(ptr %a) #0 { ; CHECK-LABEL: icmp_eq_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI12_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z0.b -; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z0.b +; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #7 ; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, #7 ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -270,13 +244,11 @@ define void @icmp_sge_v16i16(ptr %a) #0 { ; CHECK-LABEL: icmp_sge_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI13_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: cmpge p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: cmpge p0.h, p0/z, z2.h, z0.h +; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, #15 ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: cmpge p0.h, p0/z, z1.h, #15 ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -292,13 +264,11 @@ define void @icmp_sgt_v8i32(ptr %a) #0 { ; CHECK-LABEL: icmp_sgt_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: cmpgt p1.s, p0/z, z1.s, z0.s -; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, z0.s +; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, #-8 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: cmpgt p0.s, p0/z, z1.s, #-8 ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -314,13 +284,11 @@ define void @icmp_ult_v4i64(ptr %a) #0 { ; CHECK-LABEL: icmp_ult_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI15_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI15_0] -; CHECK-NEXT: cmphi p1.d, p0/z, z0.d, z1.d -; CHECK-NEXT: cmphi p0.d, p0/z, z0.d, z2.d +; CHECK-NEXT: cmplo p1.d, p0/z, z0.d, #63 ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: cmplo p0.d, p0/z, z1.d, #63 ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -340,13 +308,11 @@ define void @lshr_v32i8(ptr %a) #0 { ; CHECK-LABEL: lshr_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: lsr z1.b, p0/m, z1.b, z0.b -; CHECK-NEXT: lsrr z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: lsr z1.b, p0/m, z1.b, #7 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 @@ -359,13 +325,11 @@ define void @lshr_v16i16(ptr %a) #0 { ; CHECK-LABEL: lshr_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: lsr z1.h, p0/m, z1.h, z0.h -; CHECK-NEXT: lsrr z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #15 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -378,13 +342,11 @@ define void @lshr_v8i32(ptr %a) #0 { ; CHECK-LABEL: lshr_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z0.s -; CHECK-NEXT: lsrr z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -397,13 +359,11 @@ define void @lshr_v4i64(ptr %a) #0 { ; CHECK-LABEL: lshr_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI19_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z0.d -; CHECK-NEXT: lsrr z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #63 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -420,13 +380,12 @@ define void @mul_v32i8(ptr %a) #0 { ; CHECK-LABEL: mul_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI20_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.b, #7 // =0x7 ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI20_0] -; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b ; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: mul z1.b, p0/m, z1.b, z2.b +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 @@ -439,13 +398,12 @@ define void @mul_v16i16(ptr %a) #0 { ; CHECK-LABEL: mul_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI21_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.h, #15 // =0xf ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI21_0] -; CHECK-NEXT: mul z1.h, p0/m, z1.h, z0.h ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: mul z1.h, p0/m, z1.h, z2.h +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -458,13 +416,12 @@ define void @mul_v8i32(ptr %a) #0 { ; CHECK-LABEL: mul_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI22_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.s, #31 // =0x1f ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI22_0] -; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -477,13 +434,12 @@ define void @mul_v4i64(ptr %a) #0 { ; CHECK-LABEL: mul_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI23_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.d, #63 // =0x3f ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_0] -; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -500,12 +456,10 @@ define void @or_v32i8(ptr %a) #0 { ; CHECK-LABEL: or_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI24_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI24_0] -; CHECK-NEXT: orr z1.d, z1.d, z0.d -; CHECK-NEXT: orr z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: orr z0.b, z0.b, #0x7 +; CHECK-NEXT: orr z1.b, z1.b, #0x7 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 @@ -518,12 +472,10 @@ define void @or_v16i16(ptr %a) #0 { ; CHECK-LABEL: or_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI25_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI25_0] -; CHECK-NEXT: orr z1.d, z1.d, z0.d -; CHECK-NEXT: orr z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: orr z0.h, z0.h, #0xf +; CHECK-NEXT: orr z1.h, z1.h, #0xf +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -536,12 +488,10 @@ define void @or_v8i32(ptr %a) #0 { ; CHECK-LABEL: or_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI26_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] -; CHECK-NEXT: orr z1.d, z1.d, z0.d -; CHECK-NEXT: orr z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: orr z0.s, z0.s, #0x1f +; CHECK-NEXT: orr z1.s, z1.s, #0x1f +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -554,12 +504,10 @@ define void @or_v4i64(ptr %a) #0 { ; CHECK-LABEL: or_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI27_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI27_0] -; CHECK-NEXT: orr z1.d, z1.d, z0.d -; CHECK-NEXT: orr z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: orr z0.d, z0.d, #0x3f +; CHECK-NEXT: orr z1.d, z1.d, #0x3f +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -576,13 +524,11 @@ define void @shl_v32i8(ptr %a) #0 { ; CHECK-LABEL: shl_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI28_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI28_0] -; CHECK-NEXT: lsl z1.b, p0/m, z1.b, z0.b -; CHECK-NEXT: lslr z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: lsl z1.b, p0/m, z1.b, #7 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 @@ -595,13 +541,11 @@ define void @shl_v16i16(ptr %a) #0 { ; CHECK-LABEL: shl_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI29_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI29_0] -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z0.h -; CHECK-NEXT: lslr z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #15 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -614,13 +558,11 @@ define void @shl_v8i32(ptr %a) #0 { ; CHECK-LABEL: shl_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI30_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI30_0] -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z0.s -; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -633,13 +575,11 @@ define void @shl_v4i64(ptr %a) #0 { ; CHECK-LABEL: shl_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI31_0 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI31_0] -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z0.d -; CHECK-NEXT: lslr z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #63 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -656,13 +596,12 @@ define void @smax_v32i8(ptr %a) #0 { ; CHECK-LABEL: smax_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI32_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.b, #7 // =0x7 ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI32_0] -; CHECK-NEXT: smax z1.b, p0/m, z1.b, z0.b ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: smax z1.b, p0/m, z1.b, z2.b +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 @@ -675,13 +614,12 @@ define void @smax_v16i16(ptr %a) #0 { ; CHECK-LABEL: smax_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI33_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.h, #15 // =0xf ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI33_0] -; CHECK-NEXT: smax z1.h, p0/m, z1.h, z0.h ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: smax z1.h, p0/m, z1.h, z2.h +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -694,13 +632,12 @@ define void @smax_v8i32(ptr %a) #0 { ; CHECK-LABEL: smax_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI34_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.s, #31 // =0x1f ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI34_0] -; CHECK-NEXT: smax z1.s, p0/m, z1.s, z0.s ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: smax z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -713,13 +650,12 @@ define void @smax_v4i64(ptr %a) #0 { ; CHECK-LABEL: smax_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI35_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.d, #63 // =0x3f ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI35_0] -; CHECK-NEXT: smax z1.d, p0/m, z1.d, z0.d ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: smax z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -736,13 +672,12 @@ define void @smin_v32i8(ptr %a) #0 { ; CHECK-LABEL: smin_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI36_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.b, #7 // =0x7 ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI36_0] -; CHECK-NEXT: smin z1.b, p0/m, z1.b, z0.b ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: smin z1.b, p0/m, z1.b, z2.b +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 @@ -755,13 +690,12 @@ define void @smin_v16i16(ptr %a) #0 { ; CHECK-LABEL: smin_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI37_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.h, #15 // =0xf ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI37_0] -; CHECK-NEXT: smin z1.h, p0/m, z1.h, z0.h ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: smin z1.h, p0/m, z1.h, z2.h +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -774,13 +708,12 @@ define void @smin_v8i32(ptr %a) #0 { ; CHECK-LABEL: smin_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI38_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.s, #31 // =0x1f ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI38_0] -; CHECK-NEXT: smin z1.s, p0/m, z1.s, z0.s ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: smin z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -793,13 +726,12 @@ define void @smin_v4i64(ptr %a) #0 { ; CHECK-LABEL: smin_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI39_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.d, #63 // =0x3f ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI39_0] -; CHECK-NEXT: smin z1.d, p0/m, z1.d, z0.d ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: smin z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -816,12 +748,10 @@ define void @sub_v32i8(ptr %a) #0 { ; CHECK-LABEL: sub_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI40_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI40_0] -; CHECK-NEXT: sub z1.b, z1.b, z0.b -; CHECK-NEXT: sub z0.b, z2.b, z0.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: sub z0.b, z0.b, #7 // =0x7 +; CHECK-NEXT: sub z1.b, z1.b, #7 // =0x7 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 @@ -834,12 +764,10 @@ define void @sub_v16i16(ptr %a) #0 { ; CHECK-LABEL: sub_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI41_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI41_0] -; CHECK-NEXT: sub z1.h, z1.h, z0.h -; CHECK-NEXT: sub z0.h, z2.h, z0.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: sub z0.h, z0.h, #15 // =0xf +; CHECK-NEXT: sub z1.h, z1.h, #15 // =0xf +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -852,12 +780,10 @@ define void @sub_v8i32(ptr %a) #0 { ; CHECK-LABEL: sub_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI42_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI42_0] -; CHECK-NEXT: sub z1.s, z1.s, z0.s -; CHECK-NEXT: sub z0.s, z2.s, z0.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: sub z0.s, z0.s, #31 // =0x1f +; CHECK-NEXT: sub z1.s, z1.s, #31 // =0x1f +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -870,12 +796,10 @@ define void @sub_v4i64(ptr %a) #0 { ; CHECK-LABEL: sub_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI43_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI43_0] -; CHECK-NEXT: sub z1.d, z1.d, z0.d -; CHECK-NEXT: sub z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: sub z0.d, z0.d, #63 // =0x3f +; CHECK-NEXT: sub z1.d, z1.d, #63 // =0x3f +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -892,13 +816,12 @@ define void @umax_v32i8(ptr %a) #0 { ; CHECK-LABEL: umax_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI44_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.b, #7 // =0x7 ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI44_0] -; CHECK-NEXT: umax z1.b, p0/m, z1.b, z0.b ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: umax z1.b, p0/m, z1.b, z2.b +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 @@ -911,13 +834,12 @@ define void @umax_v16i16(ptr %a) #0 { ; CHECK-LABEL: umax_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI45_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.h, #15 // =0xf ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI45_0] -; CHECK-NEXT: umax z1.h, p0/m, z1.h, z0.h ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: umax z1.h, p0/m, z1.h, z2.h +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -930,13 +852,12 @@ define void @umax_v8i32(ptr %a) #0 { ; CHECK-LABEL: umax_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI46_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.s, #31 // =0x1f ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI46_0] -; CHECK-NEXT: umax z1.s, p0/m, z1.s, z0.s ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: umax z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -949,13 +870,12 @@ define void @umax_v4i64(ptr %a) #0 { ; CHECK-LABEL: umax_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI47_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.d, #63 // =0x3f ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI47_0] -; CHECK-NEXT: umax z1.d, p0/m, z1.d, z0.d ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: umax z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -972,13 +892,12 @@ define void @umin_v32i8(ptr %a) #0 { ; CHECK-LABEL: umin_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI48_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.b, #7 // =0x7 ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI48_0] -; CHECK-NEXT: umin z1.b, p0/m, z1.b, z0.b ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: umin z1.b, p0/m, z1.b, z2.b +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 @@ -991,13 +910,12 @@ define void @umin_v16i16(ptr %a) #0 { ; CHECK-LABEL: umin_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI49_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.h, #15 // =0xf ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI49_0] -; CHECK-NEXT: umin z1.h, p0/m, z1.h, z0.h ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: umin z1.h, p0/m, z1.h, z2.h +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -1010,13 +928,12 @@ define void @umin_v8i32(ptr %a) #0 { ; CHECK-LABEL: umin_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI50_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.s, #31 // =0x1f ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI50_0] -; CHECK-NEXT: umin z1.s, p0/m, z1.s, z0.s ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: umin z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -1029,13 +946,12 @@ define void @umin_v4i64(ptr %a) #0 { ; CHECK-LABEL: umin_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI51_0 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z2.d, #63 // =0x3f ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI51_0] -; CHECK-NEXT: umin z1.d, p0/m, z1.d, z0.d ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: umin z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 @@ -1052,12 +968,10 @@ define void @xor_v32i8(ptr %a) #0 { ; CHECK-LABEL: xor_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI52_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI52_0] -; CHECK-NEXT: eor z1.d, z1.d, z0.d -; CHECK-NEXT: eor z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: eor z0.b, z0.b, #0x7 +; CHECK-NEXT: eor z1.b, z1.b, #0x7 +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 @@ -1070,12 +984,10 @@ define void @xor_v16i16(ptr %a) #0 { ; CHECK-LABEL: xor_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI53_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI53_0] -; CHECK-NEXT: eor z1.d, z1.d, z0.d -; CHECK-NEXT: eor z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: eor z0.h, z0.h, #0xf +; CHECK-NEXT: eor z1.h, z1.h, #0xf +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 @@ -1088,12 +1000,10 @@ define void @xor_v8i32(ptr %a) #0 { ; CHECK-LABEL: xor_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI54_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI54_0] -; CHECK-NEXT: eor z1.d, z1.d, z0.d -; CHECK-NEXT: eor z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: eor z0.s, z0.s, #0x1f +; CHECK-NEXT: eor z1.s, z1.s, #0x1f +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 @@ -1106,12 +1016,10 @@ define void @xor_v4i64(ptr %a) #0 { ; CHECK-LABEL: xor_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI55_0 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI55_0] -; CHECK-NEXT: eor z1.d, z1.d, z0.d -; CHECK-NEXT: eor z0.d, z2.d, z0.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: eor z0.d, z0.d, #0x3f +; CHECK-NEXT: eor z1.d, z1.d, #0x3f +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll @@ -13,19 +13,15 @@ define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-LABEL: smulh_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: adrp x8, .LCPI0_1 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: asr z1.h, p0/m, z1.h, z2.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: asr z1.h, p0/m, z1.h, #8 ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z3.h +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #4 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <4 x i16> undef, i16 4, i64 0 @@ -78,7 +74,6 @@ ; CHECK-LABEL: smulh_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: sunpklo z4.h, z1.b ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 @@ -95,23 +90,22 @@ ; CHECK-NEXT: sunpklo z2.h, z2.b ; CHECK-NEXT: mul z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: movprfx z2, z5 +; CHECK-NEXT: mul z2.h, p0/m, z2.h, z7.h ; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: mul z3.h, p0/m, z3.h, z6.h -; CHECK-NEXT: mul z5.h, p0/m, z5.h, z7.h -; CHECK-NEXT: movprfx z4, z5 -; CHECK-NEXT: lsr z4.h, p0/m, z4.h, z2.h -; CHECK-NEXT: lsr z3.h, p0/m, z3.h, z2.h -; CHECK-NEXT: lsr z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: lsr z3.h, p0/m, z3.h, #8 +; CHECK-NEXT: lsr z2.h, p0/m, z2.h, #8 +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #8 ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b -; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b -; CHECK-NEXT: splice z3.b, p0, z3.b, z0.b -; CHECK-NEXT: stp q2, q3, [x0] +; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b +; CHECK-NEXT: splice z3.b, p0, z3.b, z1.b +; CHECK-NEXT: splice z2.b, p0, z2.b, z0.b +; CHECK-NEXT: stp q3, q2, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, <32 x i8>* %a %op2 = load <32 x i8>, <32 x i8>* %b @@ -127,17 +121,15 @@ define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 { ; CHECK-LABEL: smulh_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: asr z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #16 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: asr z1.s, p0/m, z1.s, #16 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %1 = sext <2 x i16> %op1 to <2 x i32> @@ -368,17 +360,13 @@ define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-LABEL: umulh_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: adrp x9, .LCPI14_1 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI14_1] -; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z2.d +; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z3.h +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #4 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %1 = zext <4 x i8> %op1 to <4 x i16> @@ -427,7 +415,6 @@ ; CHECK-LABEL: umulh_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: uunpklo z4.h, z1.b ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 @@ -444,23 +431,22 @@ ; CHECK-NEXT: uunpklo z2.h, z2.b ; CHECK-NEXT: mul z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: movprfx z2, z5 +; CHECK-NEXT: mul z2.h, p0/m, z2.h, z7.h ; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: mul z3.h, p0/m, z3.h, z6.h -; CHECK-NEXT: mul z5.h, p0/m, z5.h, z7.h -; CHECK-NEXT: movprfx z4, z5 -; CHECK-NEXT: lsr z4.h, p0/m, z4.h, z2.h -; CHECK-NEXT: lsr z3.h, p0/m, z3.h, z2.h -; CHECK-NEXT: lsr z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: lsr z3.h, p0/m, z3.h, #8 +; CHECK-NEXT: lsr z2.h, p0/m, z2.h, #8 +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #8 ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b -; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b -; CHECK-NEXT: splice z3.b, p0, z3.b, z0.b -; CHECK-NEXT: stp q2, q3, [x0] +; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b +; CHECK-NEXT: splice z3.b, p0, z3.b, z1.b +; CHECK-NEXT: splice z2.b, p0, z2.b, z0.b +; CHECK-NEXT: stp q3, q2, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, <32 x i8>* %a %op2 = load <32 x i8>, <32 x i8>* %b @@ -476,17 +462,13 @@ define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 { ; CHECK-LABEL: umulh_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: adrp x9, .LCPI18_1 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI18_1] -; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z2.d +; CHECK-NEXT: and z0.s, z0.s, #0xffff +; CHECK-NEXT: and z1.s, z1.s, #0xffff ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z3.s +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %1 = zext <2 x i16> %op1 to <2 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll @@ -10,16 +10,14 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-LABEL: srem_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: ptrue p1.s, vl4 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: asr z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: asr z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 ; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: sunpklo z3.s, z0.h ; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s @@ -325,13 +323,11 @@ define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-LABEL: urem_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: and z1.d, z1.d, z2.d -; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: uunpklo z2.s, z1.h ; CHECK-NEXT: uunpklo z3.s, z0.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll @@ -6,25 +6,15 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI0_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI0_0] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: ldr d2, [sp, #8] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <4 x i8> %op1, <4 x i8> %op2 ret <4 x i8> %sel @@ -33,29 +23,15 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI1_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI1_0] -; CHECK-NEXT: strb w8, [sp, #15] -; CHECK-NEXT: strb w8, [sp, #14] -; CHECK-NEXT: strb w8, [sp, #13] -; CHECK-NEXT: strb w8, [sp, #12] -; CHECK-NEXT: strb w8, [sp, #11] -; CHECK-NEXT: strb w8, [sp, #10] -; CHECK-NEXT: strb w8, [sp, #9] -; CHECK-NEXT: strb w8, [sp, #8] -; CHECK-NEXT: ldr d2, [sp, #8] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mov z2.b, w8 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2 ret <8 x i8> %sel @@ -64,37 +40,15 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI2_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI2_0] -; CHECK-NEXT: strb w8, [sp, #15] -; CHECK-NEXT: strb w8, [sp, #14] -; CHECK-NEXT: strb w8, [sp, #13] -; CHECK-NEXT: strb w8, [sp, #12] -; CHECK-NEXT: strb w8, [sp, #11] -; CHECK-NEXT: strb w8, [sp, #10] -; CHECK-NEXT: strb w8, [sp, #9] -; CHECK-NEXT: strb w8, [sp, #8] -; CHECK-NEXT: strb w8, [sp, #7] -; CHECK-NEXT: strb w8, [sp, #6] -; CHECK-NEXT: strb w8, [sp, #5] -; CHECK-NEXT: strb w8, [sp, #4] -; CHECK-NEXT: strb w8, [sp, #3] -; CHECK-NEXT: strb w8, [sp, #2] -; CHECK-NEXT: strb w8, [sp, #1] -; CHECK-NEXT: strb w8, [sp] -; CHECK-NEXT: ldr q2, [sp] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mov z2.b, w8 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2 ret <16 x i8> %sel @@ -103,42 +57,20 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) #0 { ; CHECK-LABEL: select_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: adrp x9, .LCPI3_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: ldr q1, [x0, #16] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: strb w8, [sp, #15] -; CHECK-NEXT: strb w8, [sp, #14] -; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI3_0] -; CHECK-NEXT: strb w8, [sp, #13] -; CHECK-NEXT: strb w8, [sp, #12] -; CHECK-NEXT: strb w8, [sp, #11] -; CHECK-NEXT: strb w8, [sp, #10] -; CHECK-NEXT: strb w8, [sp, #9] -; CHECK-NEXT: strb w8, [sp, #8] -; CHECK-NEXT: strb w8, [sp, #7] -; CHECK-NEXT: strb w8, [sp, #6] -; CHECK-NEXT: strb w8, [sp, #5] -; CHECK-NEXT: strb w8, [sp, #4] -; CHECK-NEXT: strb w8, [sp, #3] -; CHECK-NEXT: strb w8, [sp, #2] -; CHECK-NEXT: strb w8, [sp, #1] -; CHECK-NEXT: strb w8, [sp] -; CHECK-NEXT: ldr q4, [sp] -; CHECK-NEXT: eor z5.d, z4.d, z5.d -; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: mov z4.b, w8 +; CHECK-NEXT: bic z2.d, z2.d, z4.d ; CHECK-NEXT: and z0.d, z0.d, z4.d -; CHECK-NEXT: and z2.d, z2.d, z5.d -; CHECK-NEXT: and z3.d, z3.d, z5.d +; CHECK-NEXT: bic z3.d, z3.d, z4.d +; CHECK-NEXT: and z1.d, z1.d, z4.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z1.d, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %op1 = load volatile <32 x i8>, ptr %a %op2 = load volatile <32 x i8>, ptr %b @@ -150,22 +82,17 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI4_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI4_0] -; CHECK-NEXT: stp w8, w8, [sp, #8] -; CHECK-NEXT: ldr d2, [sp, #8] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mvn w9, w8 +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z3.s, w9 ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <2 x i16> %op1, <2 x i16> %op2 ret <2 x i16> %sel @@ -174,25 +101,15 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI5_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI5_0] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: ldr d2, [sp, #8] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2 ret <4 x i16> %sel @@ -201,29 +118,15 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI6_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI6_0] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: strh w8, [sp] -; CHECK-NEXT: ldr q2, [sp] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2 ret <8 x i16> %sel @@ -232,34 +135,20 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) #0 { ; CHECK-LABEL: select_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: adrp x9, .LCPI7_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: ldr q1, [x0, #16] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI7_0] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: strh w8, [sp] -; CHECK-NEXT: ldr q4, [sp] -; CHECK-NEXT: eor z5.d, z4.d, z5.d -; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: mov z4.h, w8 +; CHECK-NEXT: bic z2.d, z2.d, z4.d ; CHECK-NEXT: and z0.d, z0.d, z4.d -; CHECK-NEXT: and z2.d, z2.d, z5.d -; CHECK-NEXT: and z3.d, z3.d, z5.d +; CHECK-NEXT: bic z3.d, z3.d, z4.d +; CHECK-NEXT: and z1.d, z1.d, z4.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z1.d, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %op1 = load volatile <16 x i16>, ptr %a %op2 = load volatile <16 x i16>, ptr %b @@ -271,22 +160,17 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI8_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI8_0] -; CHECK-NEXT: stp w8, w8, [sp, #8] -; CHECK-NEXT: ldr d2, [sp, #8] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mvn w9, w8 +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z3.s, w9 ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2 ret <2 x i32> %sel @@ -295,23 +179,17 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) #0 { ; CHECK-LABEL: select_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: adrp x9, .LCPI9_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI9_0] -; CHECK-NEXT: stp w8, w8, [sp, #8] -; CHECK-NEXT: stp w8, w8, [sp] -; CHECK-NEXT: ldr q2, [sp] -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm w8, ne +; CHECK-NEXT: mvn w9, w8 +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov z3.s, w9 ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2 ret <4 x i32> %sel @@ -320,20 +198,15 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) #0 { ; CHECK-LABEL: select_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: adrp x9, .LCPI10_0 -; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: csetm w8, ne ; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: mvn w9, w8 ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: stp w8, w8, [sp, #8] -; CHECK-NEXT: stp w8, w8, [sp] -; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI10_0] -; CHECK-NEXT: ldr q4, [sp] -; CHECK-NEXT: eor z5.d, z4.d, z5.d +; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: mov z5.s, w9 ; CHECK-NEXT: and z1.d, z1.d, z4.d ; CHECK-NEXT: and z0.d, z0.d, z4.d ; CHECK-NEXT: and z2.d, z2.d, z5.d @@ -341,7 +214,6 @@ ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z1.d, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %op1 = load volatile <8 x i32>, ptr %a %op2 = load volatile <8 x i32>, ptr %b @@ -354,13 +226,12 @@ ; CHECK-LABEL: select_v1i64: ; CHECK: // %bb.0: ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov x9, #-1 -; CHECK-NEXT: csetm x8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: fmov d3, x9 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: mvn x9, x8 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov z3.d, x9 ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -376,18 +247,14 @@ ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: adrp x9, .LCPI12_0 ; CHECK-NEXT: csetm x8, ne -; CHECK-NEXT: stp x8, x8, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr q2, [sp] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI12_0] +; CHECK-NEXT: mvn x9, x8 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov z3.d, x9 ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: eor z3.d, z2.d, z3.d ; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2 ret <2 x i64> %sel @@ -400,22 +267,18 @@ ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: csetm x8, ne ; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: mvn x9, x8 ; CHECK-NEXT: ldr q2, [x1] -; CHECK-NEXT: adrp x9, .LCPI13_0 ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: stp x8, x8, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI13_0] -; CHECK-NEXT: ldr q5, [sp] -; CHECK-NEXT: eor z4.d, z5.d, z4.d -; CHECK-NEXT: and z1.d, z1.d, z5.d -; CHECK-NEXT: and z0.d, z0.d, z5.d -; CHECK-NEXT: and z2.d, z2.d, z4.d -; CHECK-NEXT: and z3.d, z3.d, z4.d +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: mov z5.d, x9 +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: and z3.d, z3.d, z5.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z1.d, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %op1 = load volatile <4 x i64>, ptr %a %op2 = load volatile <4 x i64>, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll @@ -10,16 +10,12 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-LABEL: ashr_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: adrp x9, .LCPI0_1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI0_1] -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: and z1.h, z1.h, #0xff +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -73,16 +69,12 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 { ; CHECK-LABEL: ashr_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: adrp x9, .LCPI4_1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI4_1] -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -226,13 +218,11 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-LABEL: lshr_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: and z1.d, z1.d, z2.d -; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.h, z1.h, #0xff +; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -286,13 +276,11 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 { ; CHECK-LABEL: lshr_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: and z1.d, z1.d, z2.d -; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -436,12 +424,10 @@ define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) #0 { ; CHECK-LABEL: shl_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI28_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI28_0] -; CHECK-NEXT: and z1.d, z1.d, z2.d +; CHECK-NEXT: and z1.s, z1.s, #0xff ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -452,12 +438,10 @@ define <4 x i8> @shl_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 { ; CHECK-LABEL: shl_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI29_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI29_0] -; CHECK-NEXT: and z1.d, z1.d, z2.d +; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -55,11 +55,9 @@ define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) #0 { ; CHECK-LABEL: ucvtf_v2i16_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -142,11 +140,9 @@ define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) #0 { ; CHECK-LABEL: ucvtf_v2i16_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -644,12 +640,10 @@ define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) #0 { ; CHECK-LABEL: scvtf_v2i16_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI33_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI33_0] -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -720,12 +714,10 @@ define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) #0 { ; CHECK-LABEL: scvtf_v2i16_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI37_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI37_0] -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll @@ -6,19 +6,14 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) #0 { ; CHECK-LABEL: select_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: adrp x9, .LCPI0_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI0_1] -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h -; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -29,19 +24,14 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 { ; CHECK-LABEL: select_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: adrp x9, .LCPI1_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI1_1] -; CHECK-NEXT: lsl z2.b, p0/m, z2.b, z3.b -; CHECK-NEXT: asr z2.b, p0/m, z2.b, z3.b -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.b, p0/m, z2.b, #7 +; CHECK-NEXT: asr z2.b, p0/m, z2.b, #7 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -52,19 +42,14 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) #0 { ; CHECK-LABEL: select_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI2_0 -; CHECK-NEXT: adrp x9, .LCPI2_1 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI2_0] -; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI2_1] -; CHECK-NEXT: lsl z2.b, p0/m, z2.b, z3.b -; CHECK-NEXT: asr z2.b, p0/m, z2.b, z3.b -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.b, p0/m, z2.b, #7 +; CHECK-NEXT: asr z2.b, p0/m, z2.b, #7 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -75,23 +60,19 @@ define void @select_v32i8(ptr %a, ptr %b) #0 { ; CHECK-LABEL: select_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x1] -; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z0.b -; CHECK-NEXT: cmpeq p0.b, p0/z, z3.b, z1.b -; CHECK-NEXT: mov z5.b, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z6.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z2.d, z2.d, z5.d -; CHECK-NEXT: eor z5.d, z5.d, z4.d -; CHECK-NEXT: eor z4.d, z6.d, z4.d -; CHECK-NEXT: and z3.d, z3.d, z6.d -; CHECK-NEXT: and z1.d, z1.d, z4.d -; CHECK-NEXT: and z0.d, z0.d, z5.d -; CHECK-NEXT: orr z1.d, z3.d, z1.d -; CHECK-NEXT: orr z0.d, z2.d, z0.d +; CHECK-NEXT: ldp q3, q2, [x1] +; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z2.b +; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z3.b +; CHECK-NEXT: mov z4.b, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.b, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: bic z2.d, z2.d, z4.d +; CHECK-NEXT: bic z3.d, z3.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -105,19 +86,14 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) #0 { ; CHECK-LABEL: select_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: adrp x9, .LCPI4_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI4_1] -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -128,19 +104,14 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) #0 { ; CHECK-LABEL: select_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: adrp x9, .LCPI5_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI5_1] -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h -; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -151,20 +122,15 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0 { ; CHECK-LABEL: select_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: adrp x9, .LCPI6_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.h, z2.b -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI6_1] -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h -; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -175,23 +141,19 @@ define void @select_v16i16(ptr %a, ptr %b) #0 { ; CHECK-LABEL: select_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x1] -; CHECK-NEXT: adrp x8, .LCPI7_0 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: cmpeq p1.h, p0/z, z2.h, z0.h -; CHECK-NEXT: cmpeq p0.h, p0/z, z3.h, z1.h -; CHECK-NEXT: mov z5.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z6.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z2.d, z2.d, z5.d -; CHECK-NEXT: eor z5.d, z5.d, z4.d -; CHECK-NEXT: eor z4.d, z6.d, z4.d -; CHECK-NEXT: and z3.d, z3.d, z6.d -; CHECK-NEXT: and z1.d, z1.d, z4.d -; CHECK-NEXT: and z0.d, z0.d, z5.d -; CHECK-NEXT: orr z1.d, z3.d, z1.d -; CHECK-NEXT: orr z0.d, z2.d, z0.d +; CHECK-NEXT: ldp q3, q2, [x1] +; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z3.h +; CHECK-NEXT: mov z4.h, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: bic z2.d, z2.d, z4.d +; CHECK-NEXT: bic z3.d, z3.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -205,19 +167,14 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) #0 { ; CHECK-LABEL: select_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: adrp x9, .LCPI8_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI8_1] -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -228,20 +185,15 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #0 { ; CHECK-LABEL: select_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: adrp x9, .LCPI9_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI9_1] -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -252,23 +204,19 @@ define void @select_v8i32(ptr %a, ptr %b) #0 { ; CHECK-LABEL: select_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x1] -; CHECK-NEXT: adrp x8, .LCPI10_0 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: cmpeq p1.s, p0/z, z2.s, z0.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z3.s, z1.s -; CHECK-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z6.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z2.d, z2.d, z5.d -; CHECK-NEXT: eor z5.d, z5.d, z4.d -; CHECK-NEXT: eor z4.d, z6.d, z4.d -; CHECK-NEXT: and z3.d, z3.d, z6.d -; CHECK-NEXT: and z1.d, z1.d, z4.d -; CHECK-NEXT: and z0.d, z0.d, z5.d -; CHECK-NEXT: orr z1.d, z3.d, z1.d -; CHECK-NEXT: orr z0.d, z2.d, z0.d +; CHECK-NEXT: ldp q3, q2, [x1] +; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z3.s +; CHECK-NEXT: mov z4.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: bic z2.d, z2.d, z4.d +; CHECK-NEXT: bic z3.d, z3.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -283,13 +231,12 @@ ; CHECK-LABEL: select_v1i64: ; CHECK: // %bb.0: ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov x9, #-1 -; CHECK-NEXT: csetm x8, ne ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: fmov d3, x9 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: eor z3.d, z2.d, z3.d +; CHECK-NEXT: csetm x8, ne +; CHECK-NEXT: mvn x9, x8 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov z3.d, x9 ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -302,20 +249,15 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #0 { ; CHECK-LABEL: select_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI12_0 -; CHECK-NEXT: adrp x9, .LCPI12_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI12_1] -; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z3.d -; CHECK-NEXT: asr z2.d, p0/m, z2.d, z3.d -; CHECK-NEXT: eor z3.d, z2.d, z4.d +; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #63 +; CHECK-NEXT: asr z2.d, p0/m, z2.d, #63 +; CHECK-NEXT: bic z1.d, z1.d, z2.d ; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -326,23 +268,19 @@ define void @select_v4i64(ptr %a, ptr %b) #0 { ; CHECK-LABEL: select_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x1] -; CHECK-NEXT: adrp x8, .LCPI13_0 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: cmpeq p1.d, p0/z, z2.d, z0.d -; CHECK-NEXT: cmpeq p0.d, p0/z, z3.d, z1.d -; CHECK-NEXT: mov z5.d, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z6.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: and z2.d, z2.d, z5.d -; CHECK-NEXT: eor z5.d, z5.d, z4.d -; CHECK-NEXT: eor z4.d, z6.d, z4.d -; CHECK-NEXT: and z3.d, z3.d, z6.d -; CHECK-NEXT: and z1.d, z1.d, z4.d -; CHECK-NEXT: and z0.d, z0.d, z5.d -; CHECK-NEXT: orr z1.d, z3.d, z1.d -; CHECK-NEXT: orr z0.d, z2.d, z0.d +; CHECK-NEXT: ldp q3, q2, [x1] +; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z3.d +; CHECK-NEXT: mov z4.d, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: bic z2.d, z2.d, z4.d +; CHECK-NEXT: bic z3.d, z3.d, z5.d +; CHECK-NEXT: and z1.d, z1.d, z5.d +; CHECK-NEXT: and z0.d, z0.d, z4.d +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll @@ -10,12 +10,10 @@ define <4 x i8> @masked_load_v4i8(<4 x i8>* %src, <4 x i1> %mask) #0 { ; CHECK-LABEL: masked_load_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -27,12 +25,10 @@ define <8 x i8> @masked_load_v8i8(<8 x i8>* %src, <8 x i1> %mask) #0 { ; CHECK-LABEL: masked_load_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -44,12 +40,10 @@ define <16 x i8> @masked_load_v16i8(<16 x i8>* %src, <16 x i1> %mask) #0 { ; CHECK-LABEL: masked_load_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -117,20 +111,18 @@ ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: strb w10, [sp, #8] ; CHECK-NEXT: strb w8, [sp, #7] -; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: mov w8, #16 ; CHECK-NEXT: strb w4, [sp, #3] ; CHECK-NEXT: strb w3, [sp, #2] ; CHECK-NEXT: strb w2, [sp, #1] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: strb w1, [sp] -; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: ldp q2, q1, [sp] -; CHECK-NEXT: lsl z2.b, p0/m, z2.b, z0.b -; CHECK-NEXT: lsl z1.b, p0/m, z1.b, z0.b -; CHECK-NEXT: asr z1.b, p0/m, z1.b, z0.b -; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, #0 -; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ldp q1, q0, [sp] +; CHECK-NEXT: lsl z1.b, p0/m, z1.b, #7 +; CHECK-NEXT: asr z1.b, p0/m, z1.b, #7 +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0 +; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0, x8] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -147,18 +139,16 @@ ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str wzr, [sp, #12] +; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: strh w10, [sp, #10] -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: ldr d1, [sp, #8] -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z0.h -; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: strh w9, [sp, #10] +; CHECK-NEXT: ldr d0, [sp, #8] +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -171,12 +161,10 @@ define <4 x half> @masked_load_v4f16(<4 x half>* %src, <4 x i1> %mask) #0 { ; CHECK-LABEL: masked_load_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -188,13 +176,11 @@ define <8 x half> @masked_load_v8f16(<8 x half>* %src, <8 x i1> %mask) #0 { ; CHECK-LABEL: masked_load_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -206,19 +192,17 @@ define <16 x half> @masked_load_v16f16(<16 x half>* %src, <16 x i1> %mask) #0 { ; CHECK-LABEL: masked_load_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: uunpklo z2.h, z0.b -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: uunpklo z1.h, z0.b ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #15 ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] ; CHECK-NEXT: mov x8, #8 -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z1.h -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: asr z2.h, p0/m, z2.h, z1.h -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0 +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z1.h, p0/m, z1.h, #15 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] @@ -232,12 +216,10 @@ define <2 x float> @masked_load_v2f32(<2 x float>* %src, <2 x i1> %mask) #0 { ; CHECK-LABEL: masked_load_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -249,13 +231,11 @@ define <4 x float> @masked_load_v4f32(<4 x float>* %src, <4 x i1> %mask) #0 { ; CHECK-LABEL: masked_load_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -268,46 +248,44 @@ ; CHECK-LABEL: masked_load_v8f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z1.b, z0.b[3] ; CHECK-NEXT: mov z2.b, z0.b[2] -; CHECK-NEXT: adrp x8, .LCPI10_0 +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: mov z3.b, z0.b[1] ; CHECK-NEXT: mov z4.b, z0.b[7] ; CHECK-NEXT: mov z5.b, z0.b[6] ; CHECK-NEXT: mov z6.b, z0.b[5] -; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: mov z0.b, z0.b[4] -; CHECK-NEXT: fmov w11, s2 -; CHECK-NEXT: strh w9, [sp, #-16]! +; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: strh w8, [sp, #-16]! ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: strh w10, [sp, #6] -; CHECK-NEXT: fmov w10, s4 -; CHECK-NEXT: strh w11, [sp, #4] -; CHECK-NEXT: fmov w11, s5 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: strh w9, [sp, #2] -; CHECK-NEXT: fmov w9, s6 -; CHECK-NEXT: strh w10, [sp, #14] -; CHECK-NEXT: fmov w10, s0 -; CHECK-NEXT: strh w11, [sp, #12] -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: strh w9, [sp, #10] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w9, [sp, #6] +; CHECK-NEXT: fmov w9, s4 +; CHECK-NEXT: strh w10, [sp, #4] +; CHECK-NEXT: fmov w10, s5 +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: strh w9, [sp, #14] +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: strh w10, [sp, #12] +; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: mov x8, #4 -; CHECK-NEXT: strh w10, [sp, #8] -; CHECK-NEXT: ldp d0, d2, [sp] +; CHECK-NEXT: strh w9, [sp, #8] +; CHECK-NEXT: ldp d0, d1, [sp] ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z1.s +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31 ; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 -; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0] -; CHECK-NEXT: asrr z1.s, p0/m, z1.s, z2.s ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret @@ -318,13 +296,11 @@ define <2 x double> @masked_load_v2f64(<2 x double>* %src, <2 x i1> %mask) #0 { ; CHECK-LABEL: masked_load_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -336,20 +312,18 @@ define <4 x double> @masked_load_v4f64(<4 x double>* %src, <4 x i1> %mask) #0 { ; CHECK-LABEL: masked_load_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov x8, #2 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: mov x8, #2 -; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: asr z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0 +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #63 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: asr z1.d, p0/m, z1.d, #63 +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll @@ -10,16 +10,13 @@ define void @masked_store_v4i8(<4 x i8>* %dst, <4 x i1> %mask) #0 { ; CHECK-LABEL: masked_store_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: adrp x8, .LCPI0_1 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: st1b { z2.h }, p0, [x0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: st1b { z0.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v4i8(<4 x i8> zeroinitializer, <4 x i8>* %dst, i32 8, <4 x i1> %mask) ret void @@ -28,16 +25,13 @@ define void @masked_store_v8i8(<8 x i8>* %dst, <8 x i1> %mask) #0 { ; CHECK-LABEL: masked_store_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: adrp x8, .LCPI1_1 -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI1_1] -; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: st1b { z2.b }, p0, [x0] +; CHECK-NEXT: mov z0.b, #0 // =0x0 +; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v8i8(<8 x i8> zeroinitializer, <8 x i8>* %dst, i32 8, <8 x i1> %mask) ret void @@ -46,16 +40,13 @@ define void @masked_store_v16i8(<16 x i8>* %dst, <16 x i1> %mask) #0 { ; CHECK-LABEL: masked_store_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] -; CHECK-NEXT: adrp x8, .LCPI2_1 -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_1] -; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: st1b { z2.b }, p0, [x0] +; CHECK-NEXT: mov z0.b, #0 // =0x0 +; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, <16 x i8>* %dst, i32 8, <16 x i1> %mask) ret void @@ -113,31 +104,29 @@ ; CHECK-NEXT: strb w9, [sp, #21] ; CHECK-NEXT: ldr w9, [sp, #120] ; CHECK-NEXT: strb w10, [sp, #20] -; CHECK-NEXT: ldr w10, [sp, #104] +; CHECK-NEXT: ldr w10, [sp, #112] ; CHECK-NEXT: strb w8, [sp, #19] -; CHECK-NEXT: ldr w8, [sp, #112] +; CHECK-NEXT: ldr w8, [sp, #104] ; CHECK-NEXT: strb w4, [sp, #3] -; CHECK-NEXT: adrp x11, .LCPI3_0 -; CHECK-NEXT: strb w3, [sp, #2] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: strb w3, [sp, #2] ; CHECK-NEXT: strb w2, [sp, #1] ; CHECK-NEXT: strb w1, [sp] -; CHECK-NEXT: ldr q0, [x11, :lo12:.LCPI3_0] ; CHECK-NEXT: strb w9, [sp, #18] -; CHECK-NEXT: strb w8, [sp, #17] -; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: strb w10, [sp, #16] -; CHECK-NEXT: ldp q1, q2, [sp] -; CHECK-NEXT: lsl z1.b, p0/m, z1.b, z0.b -; CHECK-NEXT: asr z1.b, p0/m, z1.b, z0.b -; CHECK-NEXT: lsl z2.b, p0/m, z2.b, z0.b -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_1] +; CHECK-NEXT: strb w10, [sp, #17] +; CHECK-NEXT: strb w8, [sp, #16] ; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, #0 +; CHECK-NEXT: ldp q0, q1, [sp] +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: lsl z1.b, p0/m, z1.b, #7 +; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0 +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: st1b { z3.b }, p0, [x0, x8] -; CHECK-NEXT: st1b { z3.b }, p1, [x0] +; CHECK-NEXT: mov z0.b, #0 // =0x0 +; CHECK-NEXT: st1b { z0.b }, p0, [x0, x8] +; CHECK-NEXT: st1b { z0.b }, p1, [x0] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret call void @llvm.masked.store.v32i8(<32 x i8> zeroinitializer, <32 x i8>* %dst, i32 8, <32 x i1> %mask) @@ -150,22 +139,19 @@ ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str wzr, [sp, #12] +; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: strh w10, [sp, #10] -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: ldr d1, [sp, #8] -; CHECK-NEXT: adrp x8, .LCPI4_1 -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z0.h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_1] -; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: strh w9, [sp, #10] +; CHECK-NEXT: ldr d0, [sp, #8] +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: st1h { z2.h }, p0, [x0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret call void @llvm.masked.store.v2f16(<2 x half> zeroinitializer, <2 x half>* %dst, i32 8, <2 x i1> %mask) @@ -175,16 +161,13 @@ define void @masked_store_v4f16(<4 x half>* %dst, <4 x i1> %mask) #0 { ; CHECK-LABEL: masked_store_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: adrp x8, .LCPI5_1 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_1] -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: st1h { z2.h }, p0, [x0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v4f16(<4 x half> zeroinitializer, <4 x half>* %dst, i32 8, <4 x i1> %mask) ret void @@ -193,17 +176,14 @@ define void @masked_store_v8f16(<8 x half>* %dst, <8 x i1> %mask) #0 { ; CHECK-LABEL: masked_store_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: adrp x8, .LCPI6_1 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_1] -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: st1h { z2.h }, p0, [x0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v8f16(<8 x half> zeroinitializer, <8 x half>* %dst, i32 8, <8 x i1> %mask) ret void @@ -212,25 +192,22 @@ define void @masked_store_v16f16(<16 x half>* %dst, <16 x i1> %mask) #0 { ; CHECK-LABEL: masked_store_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 +; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z1.h, z1.b -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: adrp x8, .LCPI7_1 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI7_1] -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z2.h ; CHECK-NEXT: mov x8, #8 -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: asr z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #15 +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: asr z1.h, p0/m, z1.h, #15 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15 ; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0 +; CHECK-NEXT: mov z1.h, #0 // =0x0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: st1h { z3.h }, p1, [x0, x8, lsl #1] -; CHECK-NEXT: st1h { z3.h }, p0, [x0] +; CHECK-NEXT: st1h { z1.h }, p1, [x0, x8, lsl #1] +; CHECK-NEXT: st1h { z1.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v16f16(<16 x half> zeroinitializer, <16 x half>* %dst, i32 8, <16 x i1> %mask) ret void @@ -239,17 +216,14 @@ define void @masked_store_v4f32(<4 x float>* %dst, <4 x i1> %mask) #0 { ; CHECK-LABEL: masked_store_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: adrp x8, .LCPI8_1 -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_1] -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 -; CHECK-NEXT: st1w { z2.s }, p0, [x0] +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v4f32(<4 x float> zeroinitializer, <4 x float>* %dst, i32 8, <4 x i1> %mask) ret void @@ -263,46 +237,42 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.b, z0.b[7] ; CHECK-NEXT: mov z2.b, z0.b[6] -; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov z1.b, z0.b[5] -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: mov z2.b, z0.b[4] -; CHECK-NEXT: fmov w11, s1 -; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: strh w9, [sp, #14] ; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: strh w10, [sp, #12] -; CHECK-NEXT: adrp x10, .LCPI9_1 -; CHECK-NEXT: strh w11, [sp, #10] -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: mov x9, #4 +; CHECK-NEXT: mov z2.b, z0.b[4] +; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: strh w9, [sp, #12] +; CHECK-NEXT: mov z2.b, z0.b[3] +; CHECK-NEXT: strh w10, [sp, #10] +; CHECK-NEXT: mov z3.b, z0.b[2] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: mov z4.b, z0.b[1] ; CHECK-NEXT: ldr d1, [sp, #8] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: ldr q3, [x10, :lo12:.LCPI9_1] -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: mov z4.b, z0.b[3] -; CHECK-NEXT: mov z5.b, z0.b[2] -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: mov z6.b, z0.b[1] -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: mov x9, #4 +; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: uunpklo z0.s, z1.h +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31 ; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 -; CHECK-NEXT: fmov w10, s4 -; CHECK-NEXT: st1w { z3.s }, p1, [x0, x9, lsl #2] -; CHECK-NEXT: fmov w9, s5 +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: st1w { z0.s }, p1, [x0, x9, lsl #2] +; CHECK-NEXT: fmov w9, s3 ; CHECK-NEXT: strh w8, [sp] -; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: fmov w8, s4 ; CHECK-NEXT: strh w10, [sp, #6] ; CHECK-NEXT: strh w9, [sp, #4] ; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: ldr d0, [sp] -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 -; CHECK-NEXT: st1w { z3.s }, p0, [x0] +; CHECK-NEXT: ldr d1, [sp] +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31 +; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 +; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret call void @llvm.masked.store.v8f32(<8 x float> zeroinitializer, <8 x float>* %dst, i32 8, <8 x i1> %mask) @@ -312,17 +282,14 @@ define void @masked_store_v2f64(<2 x double>* %dst, <2 x i1> %mask) #0 { ; CHECK-LABEL: masked_store_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: adrp x8, .LCPI10_1 -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_1] -; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: st1d { z2.d }, p0, [x0] +; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v2f64(<2 x double> zeroinitializer, <2 x double>* %dst, i32 8, <2 x i1> %mask) ret void @@ -331,25 +298,22 @@ define void @masked_store_v4f64(<4 x double>* %dst, <4 x i1> %mask) #0 { ; CHECK-LABEL: masked_store_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov x8, #2 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: adrp x8, .LCPI11_1 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_1] -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: mov x8, #2 -; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: asrr z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #63 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: asr z1.d, p0/m, z1.d, #63 +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63 ; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0 +; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1d { z3.d }, p1, [x0, x8, lsl #3] -; CHECK-NEXT: st1d { z3.d }, p0, [x0] +; CHECK-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v4f64(<4 x double> zeroinitializer, <4 x double>* %dst, i32 8, <4 x i1> %mask) ret void diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll @@ -6,32 +6,30 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) #0 { ; CHECK-LABEL: ptest_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0, #32] ; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: ldp q3, q4, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: fcmne p2.s, p0/z, z2.s, z0.s -; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, z0.s +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0 ; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmne p2.s, p0/z, z4.s, z0.s -; CHECK-NEXT: fcmne p0.s, p0/z, z3.s, z0.s +; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: splice z1.h, p1, z1.h, z2.h ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.h, z3.h, z3.h -; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h -; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b +; CHECK-NEXT: splice z0.h, p1, z0.h, z1.h +; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h +; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b +; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: orv b0, p0, z0.b +; CHECK-NEXT: orv b0, p0, z1.b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -45,51 +43,49 @@ define i1 @ptest_or_v16i1(ptr %a, ptr %b) #0 { ; CHECK-LABEL: ptest_or_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q0, q2, [x0, #32] ; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: ldp q3, q4, [x0] -; CHECK-NEXT: fcmne p2.s, p0/z, z2.s, z1.s -; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: mov z1.s, p3/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h -; CHECK-NEXT: ldp q0, q5, [x1, #32] -; CHECK-NEXT: fcmne p2.s, p0/z, z4.s, z1.s -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: mov z4.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, z1.s -; CHECK-NEXT: mov z3.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h +; CHECK-NEXT: fcmne p3.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: mov z4.s, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, #0.0 ; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: ldp q3, q0, [x1, #32] +; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h +; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0 +; CHECK-NEXT: mov z3.s, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ldp q5, q6, [x1] +; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, z1.s -; CHECK-NEXT: splice z3.h, p1, z3.h, z4.h -; CHECK-NEXT: fcmne p2.s, p0/z, z5.s, z1.s -; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b -; CHECK-NEXT: ldp q4, q5, [x1] ; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: fcmne p2.s, p0/z, z5.s, z1.s -; CHECK-NEXT: fcmne p0.s, p0/z, z4.s, z1.s -; CHECK-NEXT: mov z5.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: splice z3.h, p1, z3.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b +; CHECK-NEXT: fcmne p2.s, p0/z, z6.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z5.s, #0.0 +; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z4.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: splice z5.h, p1, z5.h, z0.h -; CHECK-NEXT: splice z4.h, p1, z4.h, z1.h -; CHECK-NEXT: ptrue p3.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z5.b, z5.b -; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b -; CHECK-NEXT: splice z3.b, p3, z3.b, z2.b -; CHECK-NEXT: splice z1.b, p3, z1.b, z0.b +; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h +; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b +; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b +; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b +; CHECK-NEXT: orr z0.d, z1.d, z3.d ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: orr z0.d, z3.d, z1.d ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 @@ -114,51 +110,49 @@ define i1 @ptest_and_v16i1(ptr %a, ptr %b) #0 { ; CHECK-LABEL: ptest_and_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI2_0 +; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q0, q2, [x0, #32] ; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] -; CHECK-NEXT: ldp q3, q4, [x0] -; CHECK-NEXT: fcmne p2.s, p0/z, z2.s, z1.s -; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: mov z1.s, p3/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h -; CHECK-NEXT: ldp q0, q5, [x1, #32] -; CHECK-NEXT: fcmne p2.s, p0/z, z4.s, z1.s -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: mov z4.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, z1.s -; CHECK-NEXT: mov z3.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h +; CHECK-NEXT: fcmne p3.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: mov z4.s, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, #0.0 ; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: ldp q3, q0, [x1, #32] +; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h +; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0 +; CHECK-NEXT: mov z3.s, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ldp q5, q6, [x1] +; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, z1.s -; CHECK-NEXT: splice z3.h, p1, z3.h, z4.h -; CHECK-NEXT: fcmne p2.s, p0/z, z5.s, z1.s -; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b -; CHECK-NEXT: ldp q4, q5, [x1] ; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: fcmne p2.s, p0/z, z5.s, z1.s -; CHECK-NEXT: fcmne p0.s, p0/z, z4.s, z1.s -; CHECK-NEXT: mov z5.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: splice z3.h, p1, z3.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b +; CHECK-NEXT: fcmne p2.s, p0/z, z6.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z5.s, #0.0 +; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z4.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: splice z5.h, p1, z5.h, z0.h -; CHECK-NEXT: splice z4.h, p1, z4.h, z1.h -; CHECK-NEXT: ptrue p3.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z5.b, z5.b -; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b -; CHECK-NEXT: splice z3.b, p3, z3.b, z2.b -; CHECK-NEXT: splice z1.b, p3, z1.b, z0.b +; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h +; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b +; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b +; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b +; CHECK-NEXT: and z0.d, z1.d, z3.d ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: and z0.d, z3.d, z1.d ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll @@ -10,12 +10,10 @@ define <4 x i8> @bitreverse_v4i8(<4 x i8> %op) #0 { ; CHECK-LABEL: bitreverse_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %op) @@ -64,12 +62,10 @@ define <2 x i16> @bitreverse_v2i16(<2 x i16> %op) #0 { ; CHECK-LABEL: bitreverse_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %op) @@ -200,28 +196,21 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %op) #0 { ; CHECK-LABEL: bswap_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: adrp x10, .LCPI14_2 -; CHECK-NEXT: adrp x9, .LCPI14_1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: adrp x8, .LCPI14_3 -; CHECK-NEXT: ldr d3, [x10, :lo12:.LCPI14_2] -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: lsr z4.s, p0/m, z4.s, z1.s -; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI14_1] -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z2.s -; CHECK-NEXT: lslr z1.s, p0/m, z1.s, z0.s -; CHECK-NEXT: and z0.d, z0.d, z3.d -; CHECK-NEXT: and z3.d, z5.d, z3.d -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI14_3] -; CHECK-NEXT: orr z3.d, z3.d, z4.d -; CHECK-NEXT: orr z0.d, z1.d, z0.d -; CHECK-NEXT: orr z0.d, z0.d, z3.d -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #24 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #8 +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24 +; CHECK-NEXT: and z0.s, z0.s, #0xff00 +; CHECK-NEXT: and z2.s, z2.s, #0xff00 +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8 +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: orr z0.d, z3.d, z0.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %op) @@ -231,14 +220,12 @@ define <4 x i16> @bswap_v4i16(<4 x i16> %op) #0 { ; CHECK-LABEL: bswap_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI15_0] -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.h, p0/m, z2.h, z1.h -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: orr z0.d, z0.d, z2.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %op) @@ -248,14 +235,12 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) #0 { ; CHECK-LABEL: bswap_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.h, p0/m, z2.h, z1.h -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: orr z0.d, z0.d, z2.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %op) @@ -265,18 +250,16 @@ define void @bswap_v16i16(<16 x i16>* %a) #0 { ; CHECK-LABEL: bswap_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q0, [x0] -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: lsr z3.h, p0/m, z3.h, z1.h -; CHECK-NEXT: movprfx z4, z2 -; CHECK-NEXT: lsr z4.h, p0/m, z4.h, z1.h -; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z1.h -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: orr z1.d, z2.d, z4.d -; CHECK-NEXT: orr z0.d, z0.d, z3.d +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: lsr z2.h, p0/m, z2.h, #8 +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: lsr z3.h, p0/m, z3.h, #8 +; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8 +; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %op = load <16 x i16>, <16 x i16>* %a @@ -288,26 +271,20 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) #0 { ; CHECK-LABEL: bswap_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: adrp x10, .LCPI18_2 -; CHECK-NEXT: adrp x9, .LCPI18_1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: lsr z4.s, p0/m, z4.s, z1.s -; CHECK-NEXT: ldr d3, [x10, :lo12:.LCPI18_2] -; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI18_1] -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z2.s -; CHECK-NEXT: and z5.d, z5.d, z3.d -; CHECK-NEXT: and z3.d, z0.d, z3.d -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: orr z2.d, z5.d, z4.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #24 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #8 +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24 +; CHECK-NEXT: and z0.s, z0.s, #0xff00 +; CHECK-NEXT: and z2.s, z2.s, #0xff00 +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8 +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: orr z0.d, z3.d, z0.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %op) @@ -317,26 +294,20 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) #0 { ; CHECK-LABEL: bswap_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: adrp x10, .LCPI19_2 -; CHECK-NEXT: adrp x9, .LCPI19_1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: lsr z4.s, p0/m, z4.s, z1.s -; CHECK-NEXT: ldr q3, [x10, :lo12:.LCPI19_2] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI19_1] -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z2.s -; CHECK-NEXT: and z5.d, z5.d, z3.d -; CHECK-NEXT: and z3.d, z0.d, z3.d -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: orr z2.d, z5.d, z4.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #24 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #8 +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24 +; CHECK-NEXT: and z0.s, z0.s, #0xff00 +; CHECK-NEXT: and z2.s, z2.s, #0xff00 +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8 +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: orr z0.d, z3.d, z0.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %op) @@ -346,38 +317,33 @@ define void @bswap_v8i32(<8 x i32>* %a) #0 { ; CHECK-LABEL: bswap_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI20_0 -; CHECK-NEXT: adrp x9, .LCPI20_1 -; CHECK-NEXT: ldp q4, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI20_0] -; CHECK-NEXT: adrp x8, .LCPI20_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI20_1] +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #24 +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: lsr z3.s, p0/m, z3.s, #8 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: lsr z4.s, p0/m, z4.s, #24 ; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z0.s -; CHECK-NEXT: movprfx z6, z1 -; CHECK-NEXT: lsr z6.s, p0/m, z6.s, z2.s -; CHECK-NEXT: movprfx z7, z1 -; CHECK-NEXT: lsl z7.s, p0/m, z7.s, z0.s -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI20_2] -; CHECK-NEXT: movprfx z16, z4 -; CHECK-NEXT: lsr z16.s, p0/m, z16.s, z2.s -; CHECK-NEXT: and z1.d, z1.d, z3.d -; CHECK-NEXT: and z6.d, z6.d, z3.d -; CHECK-NEXT: and z16.d, z16.d, z3.d -; CHECK-NEXT: and z3.d, z4.d, z3.d -; CHECK-NEXT: orr z5.d, z6.d, z5.d -; CHECK-NEXT: movprfx z6, z4 -; CHECK-NEXT: lsr z6.s, p0/m, z6.s, z0.s -; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z4.s -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: lslr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: orr z3.d, z16.d, z6.d +; CHECK-NEXT: lsr z5.s, p0/m, z5.s, #8 +; CHECK-NEXT: and z3.s, z3.s, #0xff00 +; CHECK-NEXT: and z5.s, z5.s, #0xff00 +; CHECK-NEXT: orr z2.d, z3.d, z2.d +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24 +; CHECK-NEXT: orr z4.d, z5.d, z4.d +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: lsl z5.s, p0/m, z5.s, #24 +; CHECK-NEXT: and z1.s, z1.s, #0xff00 +; CHECK-NEXT: and z0.s, z0.s, #0xff00 +; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #8 +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8 +; CHECK-NEXT: orr z1.d, z5.d, z1.d +; CHECK-NEXT: orr z0.d, z3.d, z0.d +; CHECK-NEXT: orr z1.d, z1.d, z4.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: orr z1.d, z7.d, z1.d -; CHECK-NEXT: orr z0.d, z0.d, z3.d -; CHECK-NEXT: orr z1.d, z1.d, z5.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %op = load <8 x i32>, <8 x i32>* %a %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %op) @@ -388,46 +354,35 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) #0 { ; CHECK-LABEL: bswap_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #56 -; CHECK-NEXT: mov w9, #40 -; CHECK-NEXT: mov w10, #65280 -; CHECK-NEXT: mov w11, #24 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov w8, #16711680 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov w9, #8 -; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: movprfx z7, z0 -; CHECK-NEXT: lsr z7.d, p0/m, z7.d, z1.d -; CHECK-NEXT: fmov d5, x8 -; CHECK-NEXT: mov w8, #-16777216 -; CHECK-NEXT: movprfx z16, z0 -; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z2.d -; CHECK-NEXT: fmov d4, x11 -; CHECK-NEXT: fmov d6, x9 -; CHECK-NEXT: and z16.d, z16.d, z3.d -; CHECK-NEXT: fmov d17, x8 -; CHECK-NEXT: orr z7.d, z16.d, z7.d -; CHECK-NEXT: movprfx z16, z0 -; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z4.d -; CHECK-NEXT: movprfx z18, z0 -; CHECK-NEXT: lsr z18.d, p0/m, z18.d, z6.d -; CHECK-NEXT: and z16.d, z16.d, z5.d -; CHECK-NEXT: and z5.d, z0.d, z5.d -; CHECK-NEXT: and z18.d, z18.d, z17.d -; CHECK-NEXT: and z17.d, z0.d, z17.d -; CHECK-NEXT: lslr z6.d, p0/m, z6.d, z17.d -; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z5.d -; CHECK-NEXT: and z3.d, z0.d, z3.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: orr z16.d, z18.d, z16.d -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z2.d -; CHECK-NEXT: orr z2.d, z4.d, z6.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: orr z1.d, z16.d, z7.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40 +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #8 +; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: and z2.d, z2.d, #0xff00 +; CHECK-NEXT: and z3.d, z3.d, #0xff0000 +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: and z4.d, z4.d, #0xff000000 +; CHECK-NEXT: and z5.d, z5.d, #0xff000000 +; CHECK-NEXT: orr z3.d, z4.d, z3.d +; CHECK-NEXT: and z2.d, z2.d, #0xff0000 +; CHECK-NEXT: movprfx z4, z5 +; CHECK-NEXT: lsl z4.d, p0/m, z4.d, #8 +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: lsl z5.d, p0/m, z5.d, #56 +; CHECK-NEXT: and z0.d, z0.d, #0xff00 +; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #24 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #40 +; CHECK-NEXT: orr z2.d, z2.d, z4.d +; CHECK-NEXT: orr z0.d, z5.d, z0.d +; CHECK-NEXT: orr z1.d, z3.d, z1.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -439,46 +394,35 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) #0 { ; CHECK-LABEL: bswap_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI22_0 -; CHECK-NEXT: adrp x9, .LCPI22_1 -; CHECK-NEXT: adrp x10, .LCPI22_2 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] -; CHECK-NEXT: adrp x8, .LCPI22_3 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI22_1] -; CHECK-NEXT: adrp x9, .LCPI22_4 -; CHECK-NEXT: ldr q3, [x10, :lo12:.LCPI22_2] -; CHECK-NEXT: adrp x10, .LCPI22_5 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI22_3] -; CHECK-NEXT: adrp x8, .LCPI22_6 -; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI22_4] -; CHECK-NEXT: movprfx z7, z0 -; CHECK-NEXT: lsr z7.d, p0/m, z7.d, z1.d -; CHECK-NEXT: movprfx z16, z0 -; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z2.d -; CHECK-NEXT: ldr q6, [x10, :lo12:.LCPI22_5] -; CHECK-NEXT: ldr q17, [x8, :lo12:.LCPI22_6] -; CHECK-NEXT: and z16.d, z16.d, z3.d -; CHECK-NEXT: orr z7.d, z16.d, z7.d -; CHECK-NEXT: movprfx z16, z0 -; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z4.d -; CHECK-NEXT: movprfx z18, z0 -; CHECK-NEXT: lsr z18.d, p0/m, z18.d, z6.d -; CHECK-NEXT: and z16.d, z16.d, z5.d -; CHECK-NEXT: and z18.d, z18.d, z17.d -; CHECK-NEXT: and z17.d, z0.d, z17.d -; CHECK-NEXT: and z5.d, z0.d, z5.d -; CHECK-NEXT: lslr z6.d, p0/m, z6.d, z17.d -; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z5.d -; CHECK-NEXT: and z3.d, z0.d, z3.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: orr z16.d, z18.d, z16.d -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z2.d -; CHECK-NEXT: orr z2.d, z4.d, z6.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: orr z1.d, z16.d, z7.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40 +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #8 +; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: and z2.d, z2.d, #0xff00 +; CHECK-NEXT: and z3.d, z3.d, #0xff0000 +; CHECK-NEXT: orr z1.d, z2.d, z1.d +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: and z4.d, z4.d, #0xff000000 +; CHECK-NEXT: and z5.d, z5.d, #0xff000000 +; CHECK-NEXT: orr z3.d, z4.d, z3.d +; CHECK-NEXT: and z2.d, z2.d, #0xff0000 +; CHECK-NEXT: movprfx z4, z5 +; CHECK-NEXT: lsl z4.d, p0/m, z4.d, #8 +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: lsl z5.d, p0/m, z5.d, #56 +; CHECK-NEXT: and z0.d, z0.d, #0xff00 +; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #24 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #40 +; CHECK-NEXT: orr z2.d, z2.d, z4.d +; CHECK-NEXT: orr z0.d, z5.d, z0.d +; CHECK-NEXT: orr z1.d, z3.d, z1.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -490,75 +434,67 @@ define void @bswap_v4i64(<4 x i64>* %a) #0 { ; CHECK-LABEL: bswap_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI23_0 -; CHECK-NEXT: adrp x9, .LCPI23_1 -; CHECK-NEXT: adrp x10, .LCPI23_3 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_0] -; CHECK-NEXT: adrp x8, .LCPI23_2 -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI23_1] -; CHECK-NEXT: adrp x9, .LCPI23_4 -; CHECK-NEXT: ldr q5, [x10, :lo12:.LCPI23_3] -; CHECK-NEXT: adrp x10, .LCPI23_6 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI23_2] -; CHECK-NEXT: adrp x8, .LCPI23_5 -; CHECK-NEXT: ldr q6, [x9, :lo12:.LCPI23_4] -; CHECK-NEXT: movprfx z16, z2 -; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z3.d -; CHECK-NEXT: ldr q17, [x10, :lo12:.LCPI23_6] -; CHECK-NEXT: movprfx z18, z2 -; CHECK-NEXT: lsr z18.d, p0/m, z18.d, z0.d -; CHECK-NEXT: ldr q7, [x8, :lo12:.LCPI23_5] -; CHECK-NEXT: movprfx z19, z2 -; CHECK-NEXT: lsr z19.d, p0/m, z19.d, z5.d -; CHECK-NEXT: movprfx z20, z2 -; CHECK-NEXT: lsr z20.d, p0/m, z20.d, z7.d -; CHECK-NEXT: and z16.d, z16.d, z4.d -; CHECK-NEXT: and z19.d, z19.d, z6.d -; CHECK-NEXT: and z20.d, z20.d, z17.d -; CHECK-NEXT: orr z16.d, z16.d, z18.d -; CHECK-NEXT: orr z18.d, z20.d, z19.d -; CHECK-NEXT: and z19.d, z2.d, z17.d -; CHECK-NEXT: and z20.d, z2.d, z6.d -; CHECK-NEXT: lsl z19.d, p0/m, z19.d, z7.d -; CHECK-NEXT: lsl z20.d, p0/m, z20.d, z5.d -; CHECK-NEXT: orr z16.d, z18.d, z16.d -; CHECK-NEXT: orr z18.d, z20.d, z19.d -; CHECK-NEXT: movprfx z19, z2 -; CHECK-NEXT: lsl z19.d, p0/m, z19.d, z0.d -; CHECK-NEXT: and z2.d, z2.d, z4.d -; CHECK-NEXT: movprfx z20, z1 -; CHECK-NEXT: lsr z20.d, p0/m, z20.d, z3.d -; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z3.d -; CHECK-NEXT: movprfx z21, z1 -; CHECK-NEXT: lsr z21.d, p0/m, z21.d, z0.d -; CHECK-NEXT: and z20.d, z20.d, z4.d -; CHECK-NEXT: orr z2.d, z19.d, z2.d -; CHECK-NEXT: orr z19.d, z20.d, z21.d -; CHECK-NEXT: movprfx z20, z1 -; CHECK-NEXT: lsr z20.d, p0/m, z20.d, z5.d -; CHECK-NEXT: movprfx z21, z1 -; CHECK-NEXT: lsr z21.d, p0/m, z21.d, z7.d -; CHECK-NEXT: and z20.d, z20.d, z6.d -; CHECK-NEXT: and z21.d, z21.d, z17.d -; CHECK-NEXT: and z17.d, z1.d, z17.d -; CHECK-NEXT: and z6.d, z1.d, z6.d -; CHECK-NEXT: lslr z7.d, p0/m, z7.d, z17.d -; CHECK-NEXT: lslr z5.d, p0/m, z5.d, z6.d -; CHECK-NEXT: lslr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: orr z20.d, z21.d, z20.d -; CHECK-NEXT: and z4.d, z1.d, z4.d -; CHECK-NEXT: movprfx z1, z4 -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: orr z3.d, z5.d, z7.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: orr z1.d, z20.d, z19.d +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #56 +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #40 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #24 +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #8 +; CHECK-NEXT: and z3.d, z3.d, #0xff00 +; CHECK-NEXT: and z4.d, z4.d, #0xff0000 +; CHECK-NEXT: and z5.d, z5.d, #0xff000000 +; CHECK-NEXT: orr z2.d, z3.d, z2.d +; CHECK-NEXT: orr z3.d, z5.d, z4.d +; CHECK-NEXT: mov z6.d, z0.d +; CHECK-NEXT: mov z7.d, z0.d +; CHECK-NEXT: movprfx z16, z0 +; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #56 +; CHECK-NEXT: orr z2.d, z3.d, z2.d +; CHECK-NEXT: and z6.d, z6.d, #0xff000000 +; CHECK-NEXT: and z7.d, z7.d, #0xff0000 +; CHECK-NEXT: movprfx z3, z6 +; CHECK-NEXT: lsl z3.d, p0/m, z3.d, #8 +; CHECK-NEXT: movprfx z4, z7 +; CHECK-NEXT: lsl z4.d, p0/m, z4.d, #24 +; CHECK-NEXT: orr z3.d, z4.d, z3.d +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #40 +; CHECK-NEXT: and z0.d, z0.d, #0xff00 +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #56 +; CHECK-NEXT: and z4.d, z4.d, #0xff00 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #40 +; CHECK-NEXT: orr z4.d, z4.d, z5.d +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #24 +; CHECK-NEXT: movprfx z7, z1 +; CHECK-NEXT: lsr z7.d, p0/m, z7.d, #8 +; CHECK-NEXT: orr z0.d, z16.d, z0.d +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: mov z16.d, z1.d +; CHECK-NEXT: and z5.d, z5.d, #0xff0000 +; CHECK-NEXT: and z7.d, z7.d, #0xff000000 +; CHECK-NEXT: orr z5.d, z7.d, z5.d +; CHECK-NEXT: and z6.d, z6.d, #0xff000000 +; CHECK-NEXT: and z16.d, z16.d, #0xff0000 +; CHECK-NEXT: movprfx z7, z1 +; CHECK-NEXT: lsl z7.d, p0/m, z7.d, #56 +; CHECK-NEXT: and z1.d, z1.d, #0xff00 +; CHECK-NEXT: lsl z6.d, p0/m, z6.d, #8 +; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #24 +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #40 +; CHECK-NEXT: orr z6.d, z16.d, z6.d +; CHECK-NEXT: orr z1.d, z7.d, z1.d +; CHECK-NEXT: orr z4.d, z5.d, z4.d +; CHECK-NEXT: orr z1.d, z1.d, z6.d ; CHECK-NEXT: orr z0.d, z0.d, z3.d -; CHECK-NEXT: orr z2.d, z2.d, z18.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: orr z1.d, z2.d, z16.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: orr z1.d, z1.d, z4.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %op = load <4 x i64>, <4 x i64>* %a %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %op) diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll @@ -6,12 +6,10 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1) #0 { ; CHECK-LABEL: sdiv_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8 ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -61,12 +59,10 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1) #0 { ; CHECK-LABEL: sdiv_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16 ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll @@ -11,14 +11,8 @@ define <4 x i8> @splat_v4i8(i8 %a) #0 { ; CHECK-LABEL: splat_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: strh w0, [sp, #14] -; CHECK-NEXT: strh w0, [sp, #12] -; CHECK-NEXT: strh w0, [sp, #10] -; CHECK-NEXT: strh w0, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: mov z0.h, w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <4 x i8> undef, i8 %a, i64 0 %splat = shufflevector <4 x i8> %insert, <4 x i8> undef, <4 x i32> zeroinitializer @@ -28,18 +22,8 @@ define <8 x i8> @splat_v8i8(i8 %a) #0 { ; CHECK-LABEL: splat_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: strb w0, [sp, #15] -; CHECK-NEXT: strb w0, [sp, #14] -; CHECK-NEXT: strb w0, [sp, #13] -; CHECK-NEXT: strb w0, [sp, #12] -; CHECK-NEXT: strb w0, [sp, #11] -; CHECK-NEXT: strb w0, [sp, #10] -; CHECK-NEXT: strb w0, [sp, #9] -; CHECK-NEXT: strb w0, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: mov z0.b, w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <8 x i8> undef, i8 %a, i64 0 %splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer @@ -49,25 +33,8 @@ define <16 x i8> @splat_v16i8(i8 %a) #0 { ; CHECK-LABEL: splat_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: strb w0, [sp, #15] -; CHECK-NEXT: strb w0, [sp, #14] -; CHECK-NEXT: strb w0, [sp, #13] -; CHECK-NEXT: strb w0, [sp, #12] -; CHECK-NEXT: strb w0, [sp, #11] -; CHECK-NEXT: strb w0, [sp, #10] -; CHECK-NEXT: strb w0, [sp, #9] -; CHECK-NEXT: strb w0, [sp, #8] -; CHECK-NEXT: strb w0, [sp, #7] -; CHECK-NEXT: strb w0, [sp, #6] -; CHECK-NEXT: strb w0, [sp, #5] -; CHECK-NEXT: strb w0, [sp, #4] -; CHECK-NEXT: strb w0, [sp, #3] -; CHECK-NEXT: strb w0, [sp, #2] -; CHECK-NEXT: strb w0, [sp, #1] -; CHECK-NEXT: strb w0, [sp] -; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: mov z0.b, w0 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <16 x i8> undef, i8 %a, i64 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer @@ -77,27 +44,8 @@ define void @splat_v32i8(i8 %a, <32 x i8>* %b) #0 { ; CHECK-LABEL: splat_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: strb w0, [sp, #15] -; CHECK-NEXT: strb w0, [sp, #14] -; CHECK-NEXT: strb w0, [sp, #13] -; CHECK-NEXT: strb w0, [sp, #12] -; CHECK-NEXT: strb w0, [sp, #11] -; CHECK-NEXT: strb w0, [sp, #10] -; CHECK-NEXT: strb w0, [sp, #9] -; CHECK-NEXT: strb w0, [sp, #8] -; CHECK-NEXT: strb w0, [sp, #7] -; CHECK-NEXT: strb w0, [sp, #6] -; CHECK-NEXT: strb w0, [sp, #5] -; CHECK-NEXT: strb w0, [sp, #4] -; CHECK-NEXT: strb w0, [sp, #3] -; CHECK-NEXT: strb w0, [sp, #2] -; CHECK-NEXT: strb w0, [sp, #1] -; CHECK-NEXT: strb w0, [sp] -; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: mov z0.b, w0 ; CHECK-NEXT: stp q0, q0, [x1] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %insert = insertelement <32 x i8> undef, i8 %a, i64 0 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer @@ -108,11 +56,8 @@ define <2 x i16> @splat_v2i16(i16 %a) #0 { ; CHECK-LABEL: splat_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: stp w0, w0, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <2 x i16> undef, i16 %a, i64 0 %splat = shufflevector <2 x i16> %insert, <2 x i16> undef, <2 x i32> zeroinitializer @@ -122,14 +67,8 @@ define <4 x i16> @splat_v4i16(i16 %a) #0 { ; CHECK-LABEL: splat_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: strh w0, [sp, #14] -; CHECK-NEXT: strh w0, [sp, #12] -; CHECK-NEXT: strh w0, [sp, #10] -; CHECK-NEXT: strh w0, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: mov z0.h, w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <4 x i16> undef, i16 %a, i64 0 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer @@ -139,17 +78,8 @@ define <8 x i16> @splat_v8i16(i16 %a) #0 { ; CHECK-LABEL: splat_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: strh w0, [sp, #14] -; CHECK-NEXT: strh w0, [sp, #12] -; CHECK-NEXT: strh w0, [sp, #10] -; CHECK-NEXT: strh w0, [sp, #8] -; CHECK-NEXT: strh w0, [sp, #6] -; CHECK-NEXT: strh w0, [sp, #4] -; CHECK-NEXT: strh w0, [sp, #2] -; CHECK-NEXT: strh w0, [sp] -; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: mov z0.h, w0 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <8 x i16> undef, i16 %a, i64 0 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer @@ -159,19 +89,8 @@ define void @splat_v16i16(i16 %a, <16 x i16>* %b) #0 { ; CHECK-LABEL: splat_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: strh w0, [sp, #14] -; CHECK-NEXT: strh w0, [sp, #12] -; CHECK-NEXT: strh w0, [sp, #10] -; CHECK-NEXT: strh w0, [sp, #8] -; CHECK-NEXT: strh w0, [sp, #6] -; CHECK-NEXT: strh w0, [sp, #4] -; CHECK-NEXT: strh w0, [sp, #2] -; CHECK-NEXT: strh w0, [sp] -; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: mov z0.h, w0 ; CHECK-NEXT: stp q0, q0, [x1] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %insert = insertelement <16 x i16> undef, i16 %a, i64 0 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer @@ -182,11 +101,8 @@ define <2 x i32> @splat_v2i32(i32 %a) #0 { ; CHECK-LABEL: splat_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: stp w0, w0, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <2 x i32> undef, i32 %a, i64 0 %splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer @@ -196,11 +112,8 @@ define <4 x i32> @splat_v4i32(i32 %a) #0 { ; CHECK-LABEL: splat_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: stp w0, w0, [sp, #8] -; CHECK-NEXT: stp w0, w0, [sp] -; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <4 x i32> undef, i32 %a, i64 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -210,13 +123,8 @@ define void @splat_v8i32(i32 %a, <8 x i32>* %b) #0 { ; CHECK-LABEL: splat_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: stp w0, w0, [sp, #8] -; CHECK-NEXT: stp w0, w0, [sp] -; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: mov z0.s, w0 ; CHECK-NEXT: stp q0, q0, [x1] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %insert = insertelement <8 x i32> undef, i32 %a, i64 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer @@ -227,7 +135,8 @@ define <1 x i64> @splat_v1i64(i64 %a) #0 { ; CHECK-LABEL: splat_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <1 x i64> undef, i64 %a, i64 0 %splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer @@ -237,9 +146,8 @@ define <2 x i64> @splat_v2i64(i64 %a) #0 { ; CHECK-LABEL: splat_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x0, x0, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <2 x i64> undef, i64 %a, i64 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer @@ -249,11 +157,8 @@ define void @splat_v4i64(i64 %a, <4 x i64>* %b) #0 { ; CHECK-LABEL: splat_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x0, x0, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: mov z0.d, x0 ; CHECK-NEXT: stp q0, q0, [x1] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %insert = insertelement <4 x i64> undef, i64 %a, i64 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer @@ -268,12 +173,9 @@ define <2 x half> @splat_v2f16(half %a) #0 { ; CHECK-LABEL: splat_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: str h0, [sp, #10] -; CHECK-NEXT: str h0, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <2 x half> undef, half %a, i64 0 %splat = shufflevector <2 x half> %insert, <2 x half> undef, <2 x i32> zeroinitializer @@ -283,14 +185,9 @@ define <4 x half> @splat_v4f16(half %a) #0 { ; CHECK-LABEL: splat_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: str h0, [sp, #14] -; CHECK-NEXT: str h0, [sp, #12] -; CHECK-NEXT: str h0, [sp, #10] -; CHECK-NEXT: str h0, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <4 x half> undef, half %a, i64 0 %splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer @@ -300,17 +197,9 @@ define <8 x half> @splat_v8f16(half %a) #0 { ; CHECK-LABEL: splat_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: str h0, [sp, #14] -; CHECK-NEXT: str h0, [sp, #12] -; CHECK-NEXT: str h0, [sp, #10] -; CHECK-NEXT: str h0, [sp, #8] -; CHECK-NEXT: str h0, [sp, #6] -; CHECK-NEXT: str h0, [sp, #4] -; CHECK-NEXT: str h0, [sp, #2] -; CHECK-NEXT: str h0, [sp] -; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <8 x half> undef, half %a, i64 0 %splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer @@ -320,19 +209,9 @@ define void @splat_v16f16(half %a, <16 x half>* %b) #0 { ; CHECK-LABEL: splat_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: str h0, [sp, #14] -; CHECK-NEXT: str h0, [sp, #12] -; CHECK-NEXT: str h0, [sp, #10] -; CHECK-NEXT: str h0, [sp, #8] -; CHECK-NEXT: str h0, [sp, #6] -; CHECK-NEXT: str h0, [sp, #4] -; CHECK-NEXT: str h0, [sp, #2] -; CHECK-NEXT: str h0, [sp] -; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: stp q0, q0, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %insert = insertelement <16 x half> undef, half %a, i64 0 %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer @@ -343,11 +222,9 @@ define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) #0 { ; CHECK-LABEL: splat_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: stp s0, s0, [sp, #8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: mov z0.s, s0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <2 x float> undef, float %a, i64 0 %splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer @@ -357,11 +234,9 @@ define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) #0 { ; CHECK-LABEL: splat_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: stp s0, s0, [sp, #8] -; CHECK-NEXT: stp s0, s0, [sp] -; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: mov z0.s, s0 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <4 x float> undef, float %a, i64 0 %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer @@ -371,13 +246,9 @@ define void @splat_v8f32(float %a, <8 x float>* %b) #0 { ; CHECK-LABEL: splat_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: stp s0, s0, [sp, #8] -; CHECK-NEXT: stp s0, s0, [sp] -; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: stp q0, q0, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %insert = insertelement <8 x float> undef, float %a, i64 0 %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer @@ -397,9 +268,9 @@ define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) #0 { ; CHECK-LABEL: splat_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d0, d0, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z0.d, d0 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %insert = insertelement <2 x double> undef, double %a, i64 0 %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer @@ -409,11 +280,9 @@ define void @splat_v4f64(double %a, <4 x double>* %b) #0 { ; CHECK-LABEL: splat_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d0, d0, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: stp q0, q0, [x0] -; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %insert = insertelement <4 x double> undef, double %a, i64 0 %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer @@ -428,8 +297,7 @@ define void @splat_imm_v32i8(<32 x i8>* %a) #0 { ; CHECK-LABEL: splat_imm_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI24_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI24_0] +; CHECK-NEXT: mov z0.b, #1 // =0x1 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret %insert = insertelement <32 x i8> undef, i8 1, i64 0 @@ -441,8 +309,7 @@ define void @splat_imm_v16i16(<16 x i16>* %a) #0 { ; CHECK-LABEL: splat_imm_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI25_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI25_0] +; CHECK-NEXT: mov z0.h, #2 // =0x2 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret %insert = insertelement <16 x i16> undef, i16 2, i64 0 @@ -454,8 +321,7 @@ define void @splat_imm_v8i32(<8 x i32>* %a) #0 { ; CHECK-LABEL: splat_imm_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI26_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] +; CHECK-NEXT: mov z0.s, #3 // =0x3 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret %insert = insertelement <8 x i32> undef, i32 3, i64 0 @@ -467,8 +333,7 @@ define void @splat_imm_v4i64(<4 x i64>* %a) #0 { ; CHECK-LABEL: splat_imm_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI27_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI27_0] +; CHECK-NEXT: mov z0.d, #4 // =0x4 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret %insert = insertelement <4 x i64> undef, i64 4, i64 0 @@ -484,8 +349,7 @@ define void @splat_imm_v16f16(<16 x half>* %a) #0 { ; CHECK-LABEL: splat_imm_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI28_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI28_0] +; CHECK-NEXT: fmov z0.h, #5.00000000 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret %insert = insertelement <16 x half> undef, half 5.0, i64 0 @@ -497,8 +361,7 @@ define void @splat_imm_v8f32(<8 x float>* %a) #0 { ; CHECK-LABEL: splat_imm_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI29_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI29_0] +; CHECK-NEXT: fmov z0.s, #6.00000000 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret %insert = insertelement <8 x float> undef, float 6.0, i64 0 @@ -510,8 +373,7 @@ define void @splat_imm_v4f64(<4 x double>* %a) #0 { ; CHECK-LABEL: splat_imm_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI30_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI30_0] +; CHECK-NEXT: fmov z0.d, #7.00000000 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret %insert = insertelement <4 x double> undef, double 7.0, i64 0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll @@ -6,9 +6,8 @@ define void @store_v4i8(<4 x i8>* %a) #0 { ; CHECK-LABEL: store_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1b { z0.h }, p0, [x0] ; CHECK-NEXT: ret store <4 x i8> zeroinitializer, <4 x i8>* %a @@ -18,8 +17,7 @@ define void @store_v8i8(<8 x i8>* %a) #0 { ; CHECK-LABEL: store_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret store <8 x i8> zeroinitializer, <8 x i8>* %a @@ -29,8 +27,7 @@ define void @store_v16i8(<16 x i8>* %a) #0 { ; CHECK-LABEL: store_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI2_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret store <16 x i8> zeroinitializer, <16 x i8>* %a @@ -40,8 +37,7 @@ define void @store_v32i8(<32 x i8>* %a) #0 { ; CHECK-LABEL: store_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret store <32 x i8> zeroinitializer, <32 x i8>* %a @@ -51,9 +47,8 @@ define void @store_v2i16(<2 x i16>* %a) #0 { ; CHECK-LABEL: store_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: st1h { z0.s }, p0, [x0] ; CHECK-NEXT: ret store <2 x i16> zeroinitializer, <2 x i16>* %a @@ -63,8 +58,7 @@ define void @store_v2f16(<2 x half>* %a) #0 { ; CHECK-LABEL: store_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret @@ -75,8 +69,7 @@ define void @store_v4i16(<4 x i16>* %a) #0 { ; CHECK-LABEL: store_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI6_0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret store <4 x i16> zeroinitializer, <4 x i16>* %a @@ -86,8 +79,7 @@ define void @store_v4f16(<4 x half>* %a) #0 { ; CHECK-LABEL: store_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI7_0 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI7_0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret store <4 x half> zeroinitializer, <4 x half>* %a @@ -97,8 +89,7 @@ define void @store_v8i16(<8 x i16>* %a) #0 { ; CHECK-LABEL: store_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret store <8 x i16> zeroinitializer, <8 x i16>* %a @@ -108,8 +99,7 @@ define void @store_v8f16(<8 x half>* %a) #0 { ; CHECK-LABEL: store_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret store <8 x half> zeroinitializer, <8 x half>* %a @@ -119,8 +109,7 @@ define void @store_v16i16(<16 x i16>* %a) #0 { ; CHECK-LABEL: store_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI10_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret store <16 x i16> zeroinitializer, <16 x i16>* %a @@ -130,8 +119,7 @@ define void @store_v16f16(<16 x half>* %a) #0 { ; CHECK-LABEL: store_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI11_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI11_0] +; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret store <16 x half> zeroinitializer, <16 x half>* %a @@ -177,8 +165,7 @@ define void @store_v8i32(<8 x i32>* %a) #0 { ; CHECK-LABEL: store_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret store <8 x i32> zeroinitializer, <8 x i32>* %a @@ -188,8 +175,7 @@ define void @store_v8f32(<8 x float>* %a) #0 { ; CHECK-LABEL: store_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret store <8 x float> zeroinitializer, <8 x float>* %a @@ -199,7 +185,7 @@ define void @store_v1i64(<1 x i64>* %a) #0 { ; CHECK-LABEL: store_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, xzr +; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret store <1 x i64> zeroinitializer, <1 x i64>* %a @@ -237,8 +223,7 @@ define void @store_v4i64(<4 x i64>* %a) #0 { ; CHECK-LABEL: store_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI22_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI22_0] +; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret store <4 x i64> zeroinitializer, <4 x i64>* %a @@ -248,8 +233,7 @@ define void @store_v4f64(<4 x double>* %a) #0 { ; CHECK-LABEL: store_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI23_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_0] +; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret store <4 x double> zeroinitializer, <4 x double>* %a